a
    bg                     @   s   d dl Z d dlZd dlZd dlmZmZmZ d dlmZ d dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ G dd	 d	ZG d
d deZG dd deZdS )    N)AnyListOptional)urljoin)Document)HTTPBasicAuth)
BaseLoader)UnstructuredBaseLoaderc                   @   sJ   e Zd ZdZeeedddZeeeee edddZ	edd	d
Z
dS )LakeFSClientzClient for lakeFS.)lakefs_access_keylakefs_secret_keylakefs_endpointc                 C   sb   d |ddg| _t||| _z$tjt| jd| jd}|  W n ty\   t	dY n0 d S )N/apizv1/Zhealthcheckauthz<lakeFS server isn't accessible. Make sure lakeFS is running.)
join_LakeFSClient__endpointr   _LakeFSClient__authrequestsgetr   raise_for_status	Exception
ValueError)selfr   r   r   Zhealth_check r   y/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/lakefs.py__init__   s    zLakeFSClient.__init__)reporefpathpresignreturnc           
      C   sj   ||d}t j|}t| jd| d| d| }tj|| jd}|  |	 }	t
tdd |	d S )	N)prefixr!   zrepositories/z/refs/z/objects/ls?r   c                 S   s   | d | d fS )Nr    Zphysical_addressr   )resr   r   r   <lambda>1       z)LakeFSClient.ls_objects.<locals>.<lambda>results)urllibparse	urlencoder   r   r   r   r   r   jsonlistmap)
r   r   r   r    r!   ZqpZeqpZobjects_ls_endpointZolsrZ	olsr_jsonr   r   r   
ls_objects$   s    
zLakeFSClient.ls_objectsr"   c                 C   s6   | j d }tj|| jd}|  | }|d d S )Nconfigr   Zstorage_configZpre_sign_support)r   r   r   r   r   r+   )r   Zconfig_endpointresponser0   r   r   r   is_presign_supported5   s
    
z!LakeFSClient.is_presign_supportedN)__name__
__module____qualname____doc__strr   r   boolr   r.   r2   r   r   r   r   r
      s   r
   c                   @   s   e Zd ZU dZeed< eed< eed< deeeee ee ee dd	d
ZeddddZeddddZ	eddddZ
ee dddZddddZdS )LakeFSLoaderzLoad from `lakeFS`.r   r   r    Nmain )r   r   r   r   r   r    c                 C   sd   t |||| _|du s|dkr"dnt|| _|du s<|dkr@dnt|| _|du rVdnt|| _dS )a  

        :param lakefs_access_key: [required] lakeFS server's access key
        :param lakefs_secret_key: [required] lakeFS server's secret key
        :param lakefs_endpoint: [required] lakeFS server's endpoint address,
               ex: https://example.my-lakefs.com
        :param repo: [optional, default = ''] target repository
        :param ref: [optional, default = 'main'] target ref (branch name,
               tag, or commit ID)
        :param path: [optional, default = ''] target path
        Nr;   r:   )r
   _LakeFSLoader__lakefs_clientr7   r   r   r    )r   r   r   r   r   r   r    r   r   r   r   D   s    zLakeFSLoader.__init__)r    r"   c                 C   s
   || _ d S N)r    )r   r    r   r   r   set_path`   s    zLakeFSLoader.set_path)r   r"   c                 C   s
   || _ d S r=   )r   )r   r   r   r   r   set_refc   s    zLakeFSLoader.set_ref)r   r"   c                 C   s
   || _ d S r=   )r   )r   r   r   r   r   set_repof   s    zLakeFSLoader.set_repor/   c                 C   sh   |    | j }g }| jj| j| j| j|d}|D ].}t|d | j| j|d |}||	  q4|S )N)r   r   r    r!      r   )
 _LakeFSLoader__validate_instancer<   r2   r.   r   r   r    UnstructuredLakeFSLoaderextendload)r   Z	presigneddocsZobjsobjZlakefs_unstructured_loaderr   r   r   rE   i   s    
zLakeFSLoader.loadc                 C   sN   | j d u s| j dkrtd| jd u s0| jdkr8td| jd u rJtdd S )Nr;   zBno repository was provided. use `set_repo` to specify a repositoryz3no ref was provided. use `set_ref` to specify a refz6no path was provided. use `set_path` to specify a path)r   r   r   r    r   r   r   r   Z__validate_instancew   s    
z LakeFSLoader.__validate_instance)Nr:   r;   )r3   r4   r5   r6   r7   __annotations__r   r   r>   r?   r@   r   r   rE   rB   r   r   r   r   r9   =   s(   
   r9   c                       sN   e Zd ZdZdeeeeeed fddZedd	d
Z	e
dddZ  ZS )rC   z(Load from `lakeFS` as unstructured data.r:   r;   T)urlr   r   r    r!   unstructured_kwargsc                    s4   t  jf i | || _|| _|| _|| _|| _dS )zInitialize UnstructuredLakeFSLoader.

        Args:

        :param lakefs_access_key:
        :param lakefs_secret_key:
        :param lakefs_endpoint:
        :param repo:
        :param ref:
        N)superr   rJ   r   r   r    r!   )r   rJ   r   r   r    r!   rK   	__class__r   r   r      s    z!UnstructuredLakeFSLoader.__init__r/   c                 C   s   | j | j| jdS )Nr   r   r    rO   rH   r   r   r   _get_metadata   s    z&UnstructuredLakeFSLoader._get_metadatac              	   C   s   ddl m} d}| jrt }| d| jdd  }tjtj	|dd t
| j}|  t|dd	}||j W d    n1 s0    Y  ||d
W  d    S 1 s0    Y  n2| j|stdn| jt|d  }||d
S d S )Nr   )	partitionzlocal://r   T)exist_okwb)mode)filenamez>Non pre-signed URLs are supported only with 'local' blockstore)Zunstructured.partition.autorQ   r!   tempfileTemporaryDirectoryr    splitosmakedirsdirnamer   r   rJ   r   openwritecontent
startswithr   len)r   rQ   Zlocal_prefixtemp_dir	file_pathr1   fileZ
local_pathr   r   r   _get_elements   s"    
**z&UnstructuredLakeFSLoader._get_elements)r:   r;   T)r3   r4   r5   r6   r7   r8   r   r   dictrP   r   re   __classcell__r   r   rM   r   rC      s      rC   )rZ   rW   urllib.parser(   typingr   r   r   r   r   Zlangchain_core.documentsr   Zrequests.authr   Z)langchain_community.document_loaders.baser   Z1langchain_community.document_loaders.unstructuredr	   r
   r9   rC   r   r   r   r   <module>   s   .E