a
    bgO                     @   s~   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ eeZG dd deZdS )    N)Path)AnyDictIteratorPatternUnion)Document)
BaseLoaderc                   @   sF  e Zd ZU dZedejZee	d< edejZ
ee	d< edZee	d< edejZee	d	< ed
ejZee	d< edejZee	d< d)eeef eedddZeeef ejedddZeeeef edddZeedddZeedddZeeddd Zeedd!d"Zeedd#d$Z e!e" d%d&d'Z#d(S )*ObsidianLoaderz%Load `Obsidian` files from directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXz	{{(.*?)}}TEMPLATE_VARIABLE_REGEXz[^\S\/]#([a-zA-Z_]+[-_/\w]*)	TAG_REGEXz^\s*(\w+)::\s*(.*)$DATAVIEW_LINE_REGEXz\[(\w+)::\s*(.*)\]DATAVIEW_INLINE_BRACKET_REGEXz\((\w+)::\s*(.*)\)DATAVIEW_INLINE_PAREN_REGEXUTF-8T)pathencodingcollect_metadatac                 C   s   || _ || _|| _dS )a%  Initialize with a path.

        Args:
            path: Path to the directory containing the Obsidian files.
            encoding: Charset encoding, defaults to "UTF-8"
            collect_metadata: Whether to collect metadata from the front matter.
                Defaults to True.
        N)	file_pathr   r   )selfr   r   r    r   {/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/obsidian.py__init__   s    zObsidianLoader.__init__)placeholdersmatchreturnc                 C   s"   dt | d}|d||< |S )z/Replace a template variable with a placeholder.Z__TEMPLATE_VAR___   )lengroup)r   r   r   placeholderr   r   r   _replace_template_var/   s    z$ObsidianLoader._replace_template_var)objr   r   c                 C   s   t |tr2| D ]\}}||d| d}qnZt |tr`| D ]\}}| ||||< qDn,t |trt|D ]\}}| ||||< qr|S )zIRestore template variables replaced with placeholders to original values.z{{z}})
isinstancestritemsreplacedict_restore_template_varslist	enumerate)r   r#   r   r!   valuekeyiitemr   r   r   r)   7   s    


z%ObsidianLoader._restore_template_vars)contentr   c                 C   s   | j s
i S | j|}|si S i }t| j|}| j||d}zDt	
|}| ||}d|v rt|d tr|d d|d< |W S  t	jjy   td i  Y S 0 dS )zEParse front matter metadata from the content and return it as a dict.r   tagsz, z Encountered non-yaml frontmatterN)r   r   search	functoolspartialr"   r   subr    yamlZ	safe_loadr)   r$   r%   splitparserZParserErrorloggerwarning)r   r0   r   r   Zreplace_template_varZfront_matter_textfront_matterr   r   r   _parse_front_matterD   s*    


z"ObsidianLoader._parse_front_matter)metadatar   c                 C   sB   i }|  D ]0\}}t|ttthv r0|||< qt|||< q|S )z4Convert a dictionary to a compatible with langchain.)r&   typer%   intfloat)r   r=   resultr-   r,   r   r   r   !_to_langchain_compatible_metadatab   s    
z0ObsidianLoader._to_langchain_compatible_metadatac                 C   s0   | j st S | j|}|s"t S dd |D S )z0Return a set of all tags in within the document.c                 S   s   h | ]}|qS r   r   ).0tagr   r   r   	<setcomp>u       z6ObsidianLoader._parse_document_tags.<locals>.<setcomp>)r   setr   findall)r   r0   r   r   r   r   _parse_document_tagsl   s    z#ObsidianLoader._parse_document_tagsc                 C   sP   | j s
i S i dd | j|D dd | j|D dd | j|D S )zWParse obsidian dataview plugin fields from the content and return it
        as a dict.c                 S   s   i | ]}|d  |d qS r   r   r   rC   r   r   r   r   
<dictcomp>~   s   z9ObsidianLoader._parse_dataview_fields.<locals>.<dictcomp>c                 S   s   i | ]}|d  |d qS rJ   r   rK   r   r   r   rL      s   c                 S   s   i | ]}|d  |d qS rJ   r   rK   r   r   r   rL      s   )r   r   rH   r   r   r   r0   r   r   r   _parse_dataview_fieldsw   s    

	
z%ObsidianLoader._parse_dataview_fieldsc                 C   s   | j s
|S | jd|S )z4Remove front matter metadata from the given content. )r   r   r5   rM   r   r   r   _remove_front_matter   s    z#ObsidianLoader._remove_front_matter)r   c           	   	   c   s   t t| jd}|D ]}t|| jd}| }W d    n1 sH0    Y  | |}| |}| 	|}| 
|}t|jt|| j| j| jd| ||}|s|drd|t|dg pg B |d< t||dV  qd S )Nz**/*.md)r   )sourcer   createdZlast_modifiedZlast_accessedr1   ,)Zpage_contentr=   )r*   r   r   globopenr   readr<   rI   rN   rP   r%   namestatst_ctimest_mtimest_atimerB   getjoinrG   r   )	r   pathsr   ftextr;   r1   Zdataview_fieldsr=   r   r   r   	lazy_load   s.    &




zObsidianLoader.lazy_loadN)r   T)$__name__
__module____qualname____doc__recompileDOTALLr   r   __annotations__r   r   	MULTILINEr   r   r   r   r%   r   boolr   r   Matchr"   r   r)   r(   r<   rB   rG   rI   rN   rP   r   r   ra   r   r   r   r   r
      s8   
  

r
   )r3   loggingrf   pathlibr   typingr   r   r   r   r   r6   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser	   	getLoggerrb   r9   r
   r   r   r   r   <module>   s   
