a
    bg                     @   sT   d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	m
Z
 G dd de
ZdS )    N)Path)IteratorPatternUnion)Document)
BaseLoaderc                   @   s   e Zd ZU dZedejejB Ze	e
d< deeef eedddZeed	d
dZeed	ddZeed	ddZee dddZdS )AcreomLoaderz%Load `acreom` vault from a directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXUTF-8T)pathencodingcollect_metadatac                 C   s   || _ || _|| _dS )zInitialize the loader.N)	file_pathr   r   )selfr   r   r    r   y/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/acreom.py__init__   s    zAcreomLoader.__init__)contentreturnc                 C   sh   | j s
i S | j|}i }|rd|dd}|D ]0}d|v r2|dd\}}| || < q2q2q2|S )zEParse front matter metadata from the content and return it as a dict.   
:)r   r	   searchgroupsplitstrip)r   r   matchfront_matterlineslinekeyvaluer   r   r   _parse_front_matter    s    z AcreomLoader._parse_front_matterc                 C   s   | j s
|S | jd|S )z4Remove front matter metadata from the given content. )r   r	   subr   r   r   r   r   _remove_front_matter1   s    z!AcreomLoader._remove_front_matterc                 C   s.   t dd|}t dd|}t dd|}|S )Nz\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*r#   #z\[\[.*?\]\])rer$   r%   r   r   r   _process_acreom_content7   s    z$AcreomLoader._process_acreom_content)r   c              	   c   s   t t| jd}|D ]~}t|| jd}| }W d    n1 sH0    Y  | |}| |}| 	|}t
|jt
|d|}t||dV  qd S )Nz**/*.md)r   )sourcer   )Zpage_contentmetadata)listr   r   globopenr   readr"   r&   r)   strnamer   )r   Zpspftextr   r+   r   r   r   	lazy_load?   s    &


zAcreomLoader.lazy_loadN)r
   T)__name__
__module____qualname____doc__r(   compile	MULTILINEDOTALLr	   r   __annotations__r   r0   r   boolr   dictr"   r&   r)   r   r   r5   r   r   r   r   r   
   s   
  
r   )r(   pathlibr   typingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   r   r   r   r   r   <module>   s
   