a
    bŠÝg+  ã                   @   sD   d dl mZ d dlmZmZmZ d dlmZmZ G dd„ deƒZ	dS )é    )ÚPath)ÚAnyÚListÚUnion)ÚUnstructuredFileLoaderÚvalidate_unstructured_versionc                       sB   e Zd ZdZd	eeef eedœ‡ fdd„Ze	dœdd„Z
‡  ZS )
ÚUnstructuredMarkdownLoaderaß  Load `Markdown` files using `Unstructured`.

    You can run the loader in one of two modes: "single" and "elements".
    If you use "single" mode, the document will be returned as a single
    langchain Document object. If you use "elements" mode, the unstructured
    library will split the document into elements such as Title and NarrativeText.
    You can pass in additional unstructured kwargs after mode to apply
    different unstructured settings.

    Setup:
        Install ``langchain-community``.

        .. code-block:: bash

            pip install -U langchain-community

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import UnstructuredMarkdownLoader

            loader = UnstructuredMarkdownLoader(
                "./example_data/example.md",
                mode="elements",
                strategy="fast",
            )

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Sample Markdown Document
            {'source': './example_data/example.md', 'category_depth': 0, 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'Title', 'element_id': '3d0b313864598e704aa26c728ecb61e5'}


    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Sample Markdown Document
            {'source': './example_data/example.md', 'category_depth': 0, 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'Title', 'element_id': '3d0b313864598e704aa26c728ecb61e5'}

    References
    ----------
    https://unstructured-io.github.io/unstructured/core/partition.html#partition-md
    Úsingle)Ú	file_pathÚmodeÚunstructured_kwargsc                    s,   t |ƒ}tdƒ tƒ jf ||dœ|¤Ž dS )a*  

        Args:
            file_path: The path to the Markdown file to load.
            mode: The mode to use when loading the file. Can be one of "single",
                "multi", or "all". Default is "single".
            **unstructured_kwargs: Any kwargs to pass to the unstructured.
        z0.4.16)r
   r   N)Ústrr   ÚsuperÚ__init__)Úselfr
   r   r   ©Ú	__class__© ú{/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/markdown.pyr   K   s    z#UnstructuredMarkdownLoader.__init__)Úreturnc                 C   s"   ddl m} |f d| ji| j¤ŽS )Nr   )Úpartition_mdÚfilename)Zunstructured.partition.mdr   r
   r   )r   r   r   r   r   Ú_get_elements]   s    z(UnstructuredMarkdownLoader._get_elements)r	   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   Ú__classcell__r   r   r   r   r   
   s   C ý
ür   N)
Úpathlibr   Útypingr   r   r   Z1langchain_community.document_loaders.unstructuredr   r   r   r   r   r   r   Ú<module>   s   