a
    bg#                     @   s~   d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZ e eZdZG dd	 d	eZdS )
    N)BytesIO)ListOptionalSequence)ElementTree)Document)
BaseLoader@   c                	   @   s   e Zd ZdZdddeeee edddZd ee	e  ee	e  ee eeee	e
 d
ddZeee	e ddddZee eee	e
 dddZeeeee
 dddZeedddZeedddZeeedddZdS )!
QuipLoaderz_Load `Quip` pages.

    Port of https://github.com/quip/quip-api/tree/master/samples/baqup
    <   F)allow_dangerous_xml_parsing)api_urlaccess_tokenrequest_timeoutr   c                C   sL   zddl m} W n ty*   tdY n0 ||||d| _|sHtddS )a  
        Args:
            api_url: https://platform.quip.com
            access_token: token of access quip API. Please refer:
                https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
            request_timeout: timeout of request, default 60s.
            allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
        r   )
QuipClientz?`quip_api` package not found, please run `pip install quip_api`)r   base_urlr   ac  The quip client uses the built-in XML parser which may causesecurity issues when parsing XML data in some cases. Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities For more information, set `allow_dangerous_xml_parsing` as True if you are sure that your distribution of the standard library is not vulnerable to XML vulnerabilities.N)quip_api.quipr   ImportErrorquip_client
ValueError)selfr   r   r   r   r    r   w/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/quip.py__init__   s    
zQuipLoader.__init__N  )
folder_ids
thread_idsmax_docsinclude_all_foldersinclude_commentsinclude_imagesreturnc           	      C   s   |s|s|st d|pg }|r8|D ]}| |d| q$|rz| j }d|v r`| |d d| d|v rz| |d d| tt|d| }| |||S )aA  
        Args:
            :param folder_ids: List of specific folder IDs to load, defaults to None
            :param thread_ids: List of specific thread IDs to load, defaults to None
            :param max_docs: Maximum number of docs to retrieve in total, defaults 1000
            :param include_all_folders: Include all folders that your access_token
                   can access, but doesn't include your private folder
            :param include_comments: Include comments, defaults to False
            :param include_images: Include images, defaults to False
        z_Must specify at least one among `folder_ids`, `thread_ids` or set `include_all`_folders as Truer   Zgroup_folder_idsZshared_folder_idsN)r   get_thread_ids_by_folder_idr   Zget_authenticated_userlistsetprocess_threads)	r   r   r   r   r   r   r    	folder_iduserr   r   r   load=   s(    


zQuipLoader.load)r&   depthr   r!   c           
      C   sH  ddl m}m} z| j|}W n |y } zT|jdkrXtd| d| d|  ntd| d| d|j  W Y d	}~d	S d	}~0  |y } z,td| d| d
|j  W Y d	}~d	S d	}~0 0 |d dd| }t	d| d|  |d D ]@}	d|	v r(| 
|	d |d | nd|	v r||	d  qd	S )z4Get thread ids by folder id and update in thread_idsr   )	HTTPError	QuipErrori  zdepth z!, Skipped over restricted folder z, z, Skipped over folder z due to unknown error Nz due to HTTP error foldertitlez	Folder %sz, Processing folder childrenr&      	thread_id)r   r*   r+   r   Z
get_foldercodeloggingwarninggetinfor"   append)
r   r&   r)   r   r*   r+   r,   er-   childr   r   r   r"   j   s<    


z&QuipLoader.get_thread_ids_by_folder_id)r   r    include_messagesr!   c                 C   s2   g }|D ]$}|  |||}|dur|| q|S )z2Process a list of thread into a list of documents.N)process_threadr6   )r   r   r    r9   docsr0   docr   r   r   r%      s    zQuipLoader.process_threads)r0   r    r9   r!   c                 C   s   | j |}|d d }|d d }|d d }|d d }t|}td| d| d| d	|  d
|v rz| j |d
 }	W nH tjj	j
y }
 z*td| d| d|
  W Y d }
~
d S d }
~
0 0 ||||d}d}|r| |	}|r|d | | }t|d
 | |dS d S )Nthreadidr-   linkZupdated_useczprocessing thread z title z link z update_ts htmlzError parsing thread  z, skipping, )r-   	update_tsr>   source z/n)Zpage_contentmetadata)r   Z
get_threadr
   _sanitize_titleloggerr5   Zparse_document_htmlxmletreeZcElementTreeZ
ParseErrorerrorprocess_thread_imagesprocess_thread_messagesr   )r   r0   r    r9   r=   r-   r?   rB   sanitized_titletreer7   rE   textr   r   r   r:      sD    



zQuipLoader.process_thread)rN   r!   c                 C   s   d}zddl m} ddlm} W n ty:   tdY n0 |dD ]}|d}|rF|dsdqF|d	\}}}}	| j	||	}
z(|
t|
 }|d
 || }W qF ty } z td|  |W Y d }~qFd }~0 0 qF|S )NrD   r   )Image)pytesseractzg`Pillow or pytesseract` package not found, please run `pip install Pillow` or `pip install pytesseract`imgsrcz/blob/
z!failed to convert image to text, )ZPILrP   rQ   r   iterr4   
startswithsplitr   Zget_blobopenr   readZimage_to_stringOSErrorrG   rJ   )r   rN   rO   rP   rQ   rR   rS   _r0   Zblob_idZblob_responseimager7   r   r   r   rK      s*    

z QuipLoader.process_thread_images)r0   r!   c                 C   s^   d }g }| j j||dd}|| |r>|d d d }qq>q|  dd |D }d|S )	Nd   )max_created_useccountZcreated_usecr/   c                 S   s   g | ]}|d  qS )rO   r   ).0messager   r   r   
<listcomp>       z6QuipLoader.process_thread_messages.<locals>.<listcomp>rU   )r   Zget_messagesextendreversejoin)r   r0   r_   messageschunkZtextsr   r   r   rL      s    
z"QuipLoader.process_thread_messages)r-   r!   c                 C   s8   t dd| }t dd|}t|tkr4|d t }|S )Nz\srA   z(?u)[^- \w.]rD   )resublen_MAXIMUM_TITLE_LENGTH)r-   rM   r   r   r   rF      s
    zQuipLoader._sanitize_title)r   )NNr   FFF)__name__
__module____qualname____doc__strr   intboolr   r   r   r(   r"   r   r%   r:   r   rK   rL   staticmethodrF   r   r   r   r   r
      sL   	 (      

.
&
,r
   )r2   rk   Zxml.etree.cElementTreerH   ior   typingr   r   r   Zxml.etree.ElementTreer   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerro   rG   rn   r
   r   r   r   r   <module>   s   
