a
    bgY$                     @  s   d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ G d
d deeZdS )z7Loader that loads data from Sharepoint Document Library    )annotationsN)Path)AnyDictIteratorListOptional)
BaseLoader)Document)Field)O365BaseLoaderc                   @  s   e Zd ZU dZedZded< dZded< dZded	< dZ	ded
< dZ
ded< e d d Zded< dZded< eddddZddddZdddddZdddd Zdd!dd"d#ZdS )$SharePointLoaderzLoad  from `SharePoint`..strdocument_library_idNzOptional[str]folder_pathzOptional[List[str]]
object_ids	folder_idFzOptional[bool]	load_authz.credentialszo365_token.txtr   
token_pathload_extended_metadataz	List[str])returnc                 C  s   ddgS )zcReturn required scopes.
        Returns:
            List[str]: A list of required scopes.
        Z
sharepointbasic )selfr   r   }/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/sharepoint.py_scopes%   s    zSharePointLoader._scopeszIterator[Document]c                 c  s  zddl m}m} W n ty.   tdY n0 |   | j}t||s`t	d| j d| j
r.|| j
}t||st	d| j
 d| |D ]}t|jd}| jdu r| |}| jdu r| |}|d	|ji | j|D ]8}	| jdu r||	jd
< | jdu r$|	j| |	V  qq| jr|| j}t||s`t	d| j
 d| |D ]}t|jd}| jdu r| |}| jdu r| |}|d	|ji | j|D ]:}	| jdu r||	jd
< | jdu r|	j| |	V  qƐqj| jr| || jD ]}t|jd}| jdu rF| |}| jdu r\| |}| j|D ]:}	| jdu r||	jd
< | jdu r|	j| |	V  qhq| j
s| js| js| }t||st	d| |D ]}t|jd}| jdu r| |}| jdu r&| |}| j|D ]Z}
|
j|j | jdu rZ||
jd
< | jdu r|
j| |
jd	|ji |
V  q2qdS )z
        Load documents lazily. Use this when working at a large scale.
        Yields:
            Document: A document object representing the parsed blob.
        r   )DriveFolderzAO365 package not found, please install it with `pip install o365`zThere isn't a Drive with id .zThere isn't a folder with path idTZsource_full_urlauthorized_identitieszUnable to fetch root folderN)Z
O365.driver   r   ImportErrorZ_authZstorageZ	get_driver   
isinstance
ValueErrorr   Zget_item_by_pathZ_load_from_folderr   metadatagetr   r    r   get_extended_metadataupdateZweb_urlZ_blob_parserZ
lazy_parser   Zget_itemr   Z_load_from_object_idsZget_root_folder)r   r   r   driveZtarget_folderZblobfile_idZauth_identitiesZextended_metadataZparsed_blobZ	blob_partr   r   r   	lazy_load-   s    

















zSharePointLoader.lazy_loadr   )r)   r   c                 C  s   |   }|d}d| j d| d}dd| i}tjd||d}| }g }|d	D ]Z}	|	d
rZ|	d
dp|	d
dp|	d
d}
|
rZ|
d}|rZ|| qZ|S )a  
        Retrieve the access identities (user/group emails) for a given file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            List: A list of group names (email addresses) that have
                  access to the file.
        access_token(https://graph.microsoft.com/v1.0/drives//items/z/permissionsAuthorizationBearer GETheadersvalueZgrantedToV2ZsiteUserusergroupemail)_fetch_access_tokenr%   r   requestsrequestjsonappend)r   r)   datar+   urlr2   responseZaccess_listZgroup_namesZaccess_dataZ	site_datar6   r   r   r   r       s0    	


z&SharePointLoader.authorized_identitiesr   c                 C  sD   t | jdd}| }W d   n1 s,0    Y  t|}|S )z|
        Fetch the access token from the token file.
        Returns:
            The access token as a dictionary.
        zutf-8)encodingN)openr   readr:   loads)r   fsr<   r   r   r   r7      s    &
z$SharePointLoader._fetch_access_tokenr   c           	      C  s   |   }|d}d| j d| d}dd| i}tjd||d}| }|d	d
|di di dd|di dddd d |dd d}|S )a  
        Retrieve extended metadata for a file in SharePoint.
        As of today, following fields are supported in the extended metadata:
        - size: size of the source file.
        - owner: display name of the owner of the source file.
        - full_path: pretty human readable path of the source file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            dict: A dictionary containing the extended metadata of the file,
                  including size, owner, and full path.
        r+   r,   r-   z,?$select=size,createdBy,parentReference,namer.   r/   r0   r1   sizer   Z	createdByr4   ZdisplayName ZparentReferencepath:/name)rE   owner	full_path)r7   r%   r   r8   r9   r:   split)	r   r)   r<   r+   r=   r2   r>   r$   Zstaged_metadatar   r   r   r&      s<    


z&SharePointLoader.get_extended_metadata)__name__
__module____qualname____doc__r   r   __annotations__r   r   r   r   r   homer   r   propertyr   r*   r    r7   r&   r   r   r   r   r      s   
T"r   )rR   
__future__r   r:   pathlibr   typingr   r   r   r   r   r8   Zlangchain_core.document_loadersr	   Zlangchain_core.documentsr
   Zpydanticr   Z.langchain_community.document_loaders.base_o365r   r   r   r   r   r   <module>   s   