a
    bgb
                     @   sf   d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	 d dl
mZ eeZG dd deZdS )    N)Path)DictIteratorUnion)Document)
BaseLoaderc                   @   sR   e Zd ZdZd
eeef eedf eedf eddddZe	e
 ddd	ZdS )MHTMLLoaderz)Parse `MHTML` files with `BeautifulSoup`.N )	file_pathopen_encoding	bs_kwargsget_text_separatorreturnc                 C   sT   zddl }W n ty&   tdY n0 || _|| _|du rDddi}|| _|| _dS )a  initialize with path, and optionally, file encoding to use, and any kwargs
        to pass to the BeautifulSoup object.

        Args:
            file_path: Path to file to load.
            open_encoding: The encoding to use when opening the file.
            bs_kwargs: Any kwargs to pass to the BeautifulSoup object.
            get_text_separator: The separator to use when getting the text
                from the soup.
        r   NzUbeautifulsoup4 package not found, please install it with `pip install beautifulsoup4`featuresZlxml)bs4ImportErrorr
   r   r   r   )selfr
   r   r   r   r    r   x/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/mhtml.py__init__   s    
zMHTMLLoader.__init__)r   c                 c   s   ddl m} t| jd| jd}t| }| }t	|t
sF|g}|D ]}| dkrJ|jdd }||fi | j}|| j}|jrt|jj}	nd}	t| j|	d	}
t||
d
V   W d   dS qJW d   n1 s0    Y  dS )z*Load MHTML document into document objects.r   )BeautifulSoupr)encodingz	text/htmlT)decoder	   )sourcetitle)Zpage_contentmetadataN)r   r   openr
   r   emailmessage_from_stringreadget_payload
isinstancelistget_content_typer   r   Zget_textr   r   strstringr   )r   r   fmessagepartsparthtmlZsouptextr   r   r   r   r   	lazy_load0   s&    
zMHTMLLoader.lazy_load)NNr	   )__name__
__module____qualname____doc__r   r%   r   dictr   r   r   r-   r   r   r   r   r      s      


 r   )r   loggingpathlibr   typingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerr.   loggerr   r   r   r   r   <module>   s   
