a
    bŠÝg¤  ã                   @   sP   d dl mZmZmZmZ d dlmZ d dlmZ d dl	m
Z
 G dd„ deƒZdS )é    )ÚAnyÚIteratorÚListÚOptional)ÚDocument)Ú
BaseLoader)ÚArxivAPIWrapperc                   @   sL   e Zd ZdZdeee edœdd„Ze	e
 dœdd„Zee
 dœd	d
„ZdS )ÚArxivLoaderaö  Load a query result from `Arxiv`.
    The loader converts the original PDF format into the text.

    Setup:
        Install ``arxiv`` and ``PyMuPDF`` packages.
        ``PyMuPDF`` transforms PDF files downloaded from the arxiv.org site
        into the text format.

        .. code-block:: bash

            pip install -U arxiv pymupdf


    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import ArxivLoader

            loader = ArxivLoader(
                query="reasoning",
                # load_max_docs=2,
                # load_all_available_meta=False
            )

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python
            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }


    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Understanding the Reasoning Ability of Language Models
            From the Perspective of Reasoning Paths Aggre
            {
                'Published': '2024-02-29',
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang',
                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
                        without explicit fine-tuning...'
            }

    Use summaries of articles as docs:
        .. code-block:: python

            from langchain_community.document_loaders import ArxivLoader

            loader = ArxivLoader(
                query="reasoning"
            )

            docs = loader.get_summaries_as_docs()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Pre-trained language models (LMs) are able to perform complex reasoning
            without explicit fine-tuning
            {
                'Entry ID': 'http://arxiv.org/abs/2402.03268v2',
                'Published': datetime.date(2024, 2, 29),
                'Title': 'Understanding the Reasoning Ability of Language Models From the
                        Perspective of Reasoning Paths Aggregation',
                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
                        Wenhu Chen, William Yang Wang'
            }
    N)ÚqueryÚdoc_content_chars_maxÚkwargsc                 K   s   || _ tf d|i|¤Ž| _dS )a$  Initialize with search query to find documents in the Arxiv.
        Supports all arguments of `ArxivAPIWrapper`.

        Args:
            query: free text which used to find documents in the Arxiv
            doc_content_chars_max: cut limit for the length of a document's content
        r   N)r
   r   Úclient)Úselfr
   r   r   © r   úx/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/arxiv.pyÚ__init__ƒ   s    ÿÿzArxivLoader.__init__)Úreturnc                 c   s   | j  | j¡E dH  dS )zLazy load Arvix documentsN)r   Ú	lazy_loadr
   ©r   r   r   r   r   “   s    zArxivLoader.lazy_loadc                 C   s   | j  | j¡S )zBUses papers summaries as documents rather than source Arvix papers)r   Úget_summaries_as_docsr
   r   r   r   r   r   —   s    z!ArxivLoader.get_summaries_as_docs)N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ústrr   Úintr   r   r   r   r   r   r   r   r   r   r   r	   	   s   z ÿ
ÿr	   N)Útypingr   r   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   Z#langchain_community.utilities.arxivr   r	   r   r   r   r   Ú<module>   s   