a
    bg                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ eeZG dd deZdS )    N)AnyDictIteratorList)Document)	BaseModelmodel_validatorc                   @   s8  e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< d	Zeed
< dZe
ed< dZe
ed< dZe
ed< dZeed< dZeed< eddeeedddZeedddZeee dddZeee ddd Zeed!d"d#Zeee dd$d%Zeee dd&d'Zeeed(d)d*Z eeed+d,d-Z!d.S )/PubMedAPIWrappera`  
    Wrapper around PubMed API.

    This wrapper will use the PubMed API to conduct searches and fetch
    document summaries. By default, it will return the document summaries
    of the top-k results of an input search.

    Parameters:
        top_k_results: number of the top-scored document used for the PubMed tool
        MAX_QUERY_LENGTH: maximum length of the query.
          Default is 300 characters.
        doc_content_chars_max: maximum length of the document content.
          Content will be truncated if it exceeds this length.
          Default is 2000 characters.
        max_retry: maximum number of retries for a request. Default is 5.
        sleep_time: time to wait between retries.
          Default is 0.2 seconds.
        email: email address to be used for the PubMed API.
        api_key: API key to be used for the PubMed API.
    parsez;https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?base_url_esearchz:https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?base_url_efetch   	max_retryg?
sleep_time   top_k_resultsi,  MAX_QUERY_LENGTHi  doc_content_chars_maxzyour_email@example.comemail api_keybefore)mode)valuesreturnc                 C   s6   zddl }|j|d< W n ty0   tdY n0 |S )z7Validate that the python package exists in environment.r   Nr
   zZCould not import xmltodict python package. Please install it with `pip install xmltodict`.)	xmltodictr
   ImportError)clsr   r    r   r/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/utilities/pubmed.pyvalidate_environment5   s    
z%PubMedAPIWrapper.validate_environment)queryr   c              
   C   sp   z<dd |  |d| j D }|r8d|d| j ndW S  tyj } zd| W  Y d}~S d}~0 0 dS )z
        Run PubMed search and get the article meta information.
        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
        It uses only the most informative fields of article meta information.
        c                 S   s8   g | ]0}d |d  d|d  d|d  d|d  qS )zPublished: 	Publishedz
Title: Titlez
Copyright Information: Copyright Informationz
Summary::
Summaryr   ).0resultr   r   r   
<listcomp>M   s   z(PubMedAPIWrapper.run.<locals>.<listcomp>Nz

zNo good PubMed Result was foundzPubMed exception: )loadr   joinr   	Exception)selfr!   docsexr   r   r   runD   s    zPubMedAPIWrapper.runc                 c   s   | j d ttj|h d| j d }| jdkrD|d| j 7 }tj|}|	 
d}t|}|d d }|d d	 D ]}| ||V  qd
S )z
        Search PubMed for documents matching the query.
        Return an iterator of dictionaries containing the document metadata.
        zdb=pubmed&term=z&retmode=json&retmax=z&usehistory=yr   	&api_key=utf-8ZesearchresultwebenvZidlistN)r   strurllibr
   quoter   r   requesturlopenreaddecodejsonloadsretrieve_article)r,   r!   urlr'   textZ	json_textr2   uidr   r   r   	lazy_load^   s     

zPubMedAPIWrapper.lazy_loadc                 C   s   t | |S )z
        Search PubMed for documents matching the query.
        Return a list of dictionaries containing the document metadata.
        )listr@   r,   r!   r   r   r   r)   t   s    zPubMedAPIWrapper.load)docr   c                 C   s   | d}t||dS )Nr%   )Zpage_contentmetadata)popr   )r,   rC   summaryr   r   r   _dict2document{   s    
zPubMedAPIWrapper._dict2documentc                 c   s"   | j |dD ]}| |V  qd S N)r!   )r@   rG   )r,   r!   dr   r   r   lazy_load_docs   s    zPubMedAPIWrapper.lazy_load_docsc                 C   s   t | j|dS rH   )rA   rJ   rB   r   r   r   	load_docs   s    zPubMedAPIWrapper.load_docs)r?   r2   r   c           	   
   C   s   | j d | d | }| jdkr0|d| j 7 }d}ztj|}W qW q4 tjjy } z\|jdkr|| jk rt	d| j
dd	 t| j
 |  j
d
9  _
|d7 }n|W Y d }~q4d }~0 0 q4| d}| |}| ||S )Nzdb=pubmed&retmode=xml&id=z&webenv=r   r0   r   i  zToo Many Requests, waiting for z.2fz seconds...      r1   )r   r   r4   r6   r7   error	HTTPErrorcoder   printr   timesleepr8   r9   r
   _parse_article)	r,   r?   r2   r=   retryr'   eZxml_text	text_dictr   r   r   r<      s<    


z!PubMedAPIWrapper.retrieve_article)r?   rW   r   c           	      C   s   z|d d d d }W n" t y:   |d d d }Y n0 |di dg }d	d
 |D }|rld|n2t|trz|n$t|trddd | D nd}|di }d|dd|dd|ddg}||dd||di dd|dS )NZPubmedArticleSetZPubmedArticleZMedlineCitationZArticleZPubmedBookArticleZBookDocumentZAbstractZAbstractTextc                 S   s2   g | ]*}d |v rd|v r|d  d|d   qS )z#textz@Labelz: r   )r&   txtr   r   r   r(      s   z3PubMedAPIWrapper._parse_article.<locals>.<listcomp>
c                 s   s   | ]}t |V  qd S )N)r3   )r&   valuer   r   r   	<genexpr>       z2PubMedAPIWrapper._parse_article.<locals>.<genexpr>zNo abstract availableZArticleDate-ZYearr   ZMonthZDayZArticleTitleZCopyrightInformation)r?   r#   r"   r$   r%   )KeyErrorgetr*   
isinstancer3   dictr   )	r,   r?   rW   arZabstract_textZ	summariesrF   Za_dZpub_dater   r   r   rT      s<     
zPubMedAPIWrapper._parse_articleN)"__name__
__module____qualname____doc__r   __annotations__r   r3   r   r   intr   floatr   r   r   r   r   r   classmethodr   r    r/   r   ra   r@   r   r)   r   rG   rJ   rK   r<   rT   r   r   r   r   r	      s.   

"r	   )r:   loggingrR   urllib.errorr4   urllib.parseurllib.requesttypingr   r   r   r   Zlangchain_core.documentsr   Zpydanticr   r   	getLoggerrc   loggerr	   r   r   r   r   <module>   s   
