a
    bg=                     @   s8   d Z ddlmZmZmZ ddlmZ G dd deZdS )z+Load Documents from Docusarus Documentation    )AnyListOptional)SitemapLoaderc                       sD   e Zd ZdZd	eeee  ed fddZeedddZ	  Z
S )
DocusaurusLoadera  Load from Docusaurus Documentation.

    It leverages the SitemapLoader to loop through the generated pages of a
    Docusaurus Documentation website and extracts the content by looking for specific
    HTML tags. By default, the parser searches for the main content of the Docusaurus
    page, which is normally the <article>. You can also define your own
    custom HTML tags by providing them as a list, for example: ["div", ".main", "a"].
    N)urlcustom_html_tagskwargsc                    sH   | ds| d}|pdg| _t j|fd| dp:| ji| dS )aq  Initialize DocusaurusLoader

        Args:
            url: The base URL of the Docusaurus website.
            custom_html_tags: Optional custom html tags to extract content from pages.
            kwargs: Additional args to extend the underlying SitemapLoader, for example:
                filter_urls, blocksize, meta_function, is_local, continue_on_failure
        is_localz/sitemap.xmlzmain articleZparsing_functionN)getr   super__init___parsing_function)selfr   r   r	   	__class__ }/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/docusaurus.pyr      s    

zDocusaurusLoader.__init__)contentreturnc                 C   s8   | d| j}|D ]}||vr|  qt| S )z0Parses specific elements from a Docusaurus page.,)selectjoinr   Z	decomposestrZget_text)r   r   Zrelevant_elementselementr   r   r   r   +   s
    
z"DocusaurusLoader._parsing_function)N)__name__
__module____qualname____doc__r   r   r   r   r   r   __classcell__r   r   r   r   r      s    
r   N)r   typingr   r   r   Z,langchain_community.document_loaders.sitemapr   r   r   r   r   r   <module>   s   