a
    bgn9                     @   s   d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ dgZe
dd	d
dG dd deeZdS )    )Path)AnyDictListOptionalSequenceUnion)
deprecated)Document)	BaseModelmodel_validator	validator)
BaseLoaderz.https://www.googleapis.com/auth/drive.readonlyz0.0.32z1.0z,langchain_google_community.GoogleDriveLoader)ZsinceZremovalZalternative_importc                	   @   s  e Zd ZU dZe d d Zeed< e d d Zeed< e d d Z	eed< d	Z
ee ed
< d	Zeee  ed< d	Zeee  ed< dZeed< d	Zeee  ed< dZeed< d	Zeed< i Zedef ed< eddeeeef edddZedeeedddZedddZeee dd d!Z eedd"d#Z!d	d$eeee  ee d%d&d'Z"eeeeee#eee f f  d(d)d*Z$ee dd+d,Z%eee dd-d.Z&ee dd/d0Z'ee dd1d2Z(d	S )3GoogleDriveLoaderz%Load Google Docs from `Google Drive`.z.credentialsz	keys.jsonservice_account_keyzcredentials.jsoncredentials_pathz
token.json
token_pathN	folder_iddocument_idsfile_idsF	recursive
file_typesload_trashed_filesfile_loader_clsstrfile_loader_kwargsbefore)mode)valuesreturnc                    s&  | dr&| ds| dr&td| dsL| dsL| dsLtd| d}|r"| dsp| drxtddd	d
dt t  }ddd  D }ddd  D }|D ]&}||vrtd| d| d| qttdfdd  fdd|D |d< |S )zDValidate that either folder_id or document_ids is set, but not both.r   r   r   zICannot specify both folder_id and document_ids nor folder_id and file_idsz8Must specify either folder_id, document_ids, or file_idsr   zdfile_types can only be given when folder_id is given, (not when document_ids or file_ids are given).$application/vnd.google-apps.document'application/vnd.google-apps.spreadsheetapplication/pdf)ZdocumentsheetZpdfz, c                 S   s   g | ]}d | d qS ' .0xr&   r&   ~/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/googledrive.py
<listcomp>V       z5GoogleDriveLoader.validate_inputs.<locals>.<listcomp>c                 S   s   g | ]}d | d qS r$   r&   r'   r&   r&   r*   r+   W   r,   zGiven file type z) is not supported. Supported values are: z; and their full-form names: )r)   r   c                    s   |  v r |  S | S )Nr&   )r)   )type_mappingr&   r*   	full_forma   s    z4GoogleDriveLoader.validate_inputs.<locals>.full_formc                    s   g | ]} |qS r&   r&   )r(   	file_type)r.   r&   r*   r+   d   r,   )get
ValueErrorlistkeysr   joinr   )clsr   r   Zallowed_typesZshort_namesZ
full_namesr/   r&   )r.   r-   r*   validate_inputs7   sP    

z!GoogleDriveLoader.validate_inputs)vkwargsr   c                 K   s   |  std| d|S )z&Validate that credentials_path exists.zcredentials_path z does not exist)existsr1   )r5   r7   r8   r&   r&   r*   validate_credentials_pathg   s    z+GoogleDriveLoader.validate_credentials_path)r   c           
      C   sH  z@ddl m} ddlm} ddlm} ddlm} ddlm	} W n t
yZ   t
dY n0 d}| j r|jjt| jtd	S | j r|t| jt}|r|js0|r|jr|jr||  n(| j r|t| jt}|jdd
}|r0t| jd}||  W d   n1 s&0    Y  |sD|td	\}}	|S )a?  Load credentials.
        The order of loading credentials:
        1. Service account key if file exists
        2. Token path (for OAuth Client) if file exists
        3. Credentials path (for OAuth Client) if file exists
        4. Default credentials. if no credentials found, raise DefaultCredentialsError
        r   )default)Request)service_account)Credentials)InstalledAppFlowzYou must run `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib` to use the Google Drive loader.N)Zscopes)portw)Zgoogle.authr;   Zgoogle.auth.transport.requestsr<   Zgoogle.oauth2r=   Zgoogle.oauth2.credentialsr>   Zgoogle_auth_oauthlib.flowr?   ImportErrorr   r9   Zfrom_service_account_filer   SCOPESr   Zfrom_authorized_user_fileZvalidZexpiredZrefresh_tokenrefreshr   Zfrom_client_secrets_fileZrun_local_serveropenwriteto_json)
selfr;   r<   r=   r>   r?   credsZflowtoken_r&   r&   r*   _load_credentialsn   s>    	





.z#GoogleDriveLoader._load_credentials)idr   c              	   C   sL  ddl m} |  }|dd|d}| j|d }|dg }g }|D ]}|d d }	|  j||	d	 }
|
d
g }|sqJ|d }t|dd ddD ]\}}d| d|d d  |d d  d|	 |d}g }t|D ]>\}}t||kr
|| 	 nd}|
| d|	   qd|}|
t||d qqJ|S )z%Load a sheet and all tabs from an ID.r   buildsheetsZv4credentials)spreadsheetId
propertiestitle)rS   ranger      N)startz'https://docs.google.com/spreadsheets/d/z
/edit?gid=ZsheetIdz - )sourcerU   row z: 
page_contentmetadata)googleapiclient.discoveryrO   rL   Zspreadsheetsr0   executer   	enumeratelenstripappendr4   r
   )rH   rM   rO   rI   Zsheets_serviceZspreadsheetrP   Z	documentsr#   Z
sheet_nameresultr   headerirZ   r_   contentjr7   rU   r^   r&   r&   r*   _load_sheet_from_id   s<    

z%GoogleDriveLoader._load_sheet_from_idc              
   C   s,  ddl m} ddlm} ddlm} ddlm} |  }|dd|d}|	 j
|d	d
d }|	 j|dd}	| }
||
|	}d}z|du r| \}}qW nN |y } z6|jjdkrtd| ntd| W Y d}~n
d}~0 0 |
 d}d| d|
d |
d d}t||dS )zLoad a document from an ID.r   BytesIOrN   )	HttpErrorMediaIoBaseDownloaddrivev3rQ   TzmodifiedTime,name)fileIdsupportsAllDrivesfieldsz
text/plain)rs   mimeTypeFi  zFile not found: {}zAn error occurred: {}Nzutf-8z#https://docs.google.com/document/d/z/editnameZmodifiedTime)rY   rU   whenr]   )iorm   r`   rO   Zgoogleapiclient.errorsrn   googleapiclient.httprp   rL   filesr0   ra   Zexport_media
next_chunkrespstatusprintformatgetvaluedecoder
   )rH   rM   rm   rO   rn   rp   rI   servicefilerequestfh
downloaderdoner~   etextr_   r&   r&   r*   _load_document_from_id   s6    
$


z(GoogleDriveLoader._load_document_from_idr   )r   r   r   c          
         s   ddl m} |  }|dd|d}| ||} rF fdd|D }n|}g }|D ]}	|	d rh| jshqRqR|	d	 d
kr|| |	d  qR|	d	 dkr|| |	d  qR|	d	 dks| j	durR|| 
|	d  qRqR|S )zLoad documents from a folder.r   rN   rq   rr   rQ   c                    s   g | ]}|d   v r|qS )rv   r&   )r(   fr   r&   r*   r+      r,   zAGoogleDriveLoader._load_documents_from_folder.<locals>.<listcomp>Ztrashedrv   r    rM   r!   r"   N)r`   rO   rL   _fetch_files_recursiver   re   r   extendrk   r   _load_file_from_id)
rH   r   r   rO   rI   r   r{   _filesreturnsr   r&   r   r*   _load_documents_from_folder   s,    
z-GoogleDriveLoader._load_documents_from_folder)r   r   r   c                 C   sv   |  jd| dddddd }|dg }g }|D ]8}|d d	krf| jrp|| ||d
  q8|| q8|S )z+Fetch all files and subfolders recursively.r%   z' in parentsi  Tz:nextPageToken, files(id, name, mimeType, parents, trashed))qZpageSizeZincludeItemsFromAllDrivesrt   ru   r{   rv   z"application/vnd.google-apps.folderrM   )r{   r2   ra   r0   r   r   r   re   )rH   r   r   resultsr{   r   r   r&   r&   r*   r     s     
z(GoogleDriveLoader._fetch_files_recursivec                    s"    j std fdd j D S )z"Load documents from a list of IDs.zdocument_ids must be setc                    s   g | ]}  |qS r&   )r   )r(   Zdoc_idrH   r&   r*   r+   3  r,   z>GoogleDriveLoader._load_documents_from_ids.<locals>.<listcomp>)r   r1   r   r&   r   r*   _load_documents_from_ids.  s    z*GoogleDriveLoader._load_documents_from_idsc                    s6  ddl m} ddlm} ddlm} |  }|dd|d}| jdd		  | j
d
}| }|||}	d}
|
du r|	 \}}
qt| jdur|d | jf d|i| j}| }|D ]2}d d|jd< d|jvr d |jd< q|S ddlm} | }|||} fddt|jD S dS )zLoad a file from an ID.r   rl   rN   ro   rq   rr   rQ   T)rs   rt   )rs   FNr    https://drive.google.com/file/d//viewrY   rU   rw   )	PdfReaderc                    s8   g | ]0\}}t | d  d d |ddqS )r   r   rw   )rY   rU   pager]   )r
   Zextract_textr0   )r(   rh   r   r   rM   r&   r*   r+   W  s   	

z8GoogleDriveLoader._load_file_from_id.<locals>.<listcomp>)ry   rm   r`   rO   rz   rp   rL   r{   r0   ra   Z	get_mediar|   r   seekr   loadr_   ZPyPDF2r   r   rb   Zpages)rH   rM   rm   rO   rp   rI   r   r   r   r   r   r~   loaderdocsdocr   ri   Z
pdf_readerr&   r   r*   r   5  s6    



	z$GoogleDriveLoader._load_file_from_idc                 C   s2   | j stdg }| j D ]}|| | q|S )zLoad files from a list of IDs.zfile_ids must be set)r   r1   r   r   )rH   r   Zfile_idr&   r&   r*   _load_file_from_idsc  s    
z%GoogleDriveLoader._load_file_from_idsc                 C   s2   | j r| j| j | jdS | jr&|  S |  S dS )zLoad documents.r   N)r   r   r   r   r   r   r   r&   r&   r*   r   l  s    zGoogleDriveLoader.load))__name__
__module____qualname____doc__r   homer   __annotations__r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r   classmethodr6   r   r:   rL   r
   rk   r   r   r   r   r   r   r   r   r&   r&   r&   r*   r      s@   
.5+'!.	r   N)pathlibr   typingr   r   r   r   r   r   Zlangchain_core._api.deprecationr	   Zlangchain_core.documentsr
   Zpydanticr   r   r   Z)langchain_community.document_loaders.baser   rC   r   r&   r&   r&   r*   <module>
   s    