a
    bgf                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ G d	d
 d
eZdS )    )annotationsN)Path)AnyDictIterableListOptional)CallbackManagerForRetrieverRunDocument)BaseRetriever)
ConfigDictc                   @  s   e Zd ZU dZdZded< ded< dZded< dZd	ed
< eddZ	e
d(ddddd dddZe
dddddd dddZddddddZd)ddddd d!Ze
d"dd#dd$dd d%d&d'ZdS )*TFIDFRetrieverz`TF-IDF` retriever.

    Largely based on
    https://github.com/asvskartheek/Text-Retrieval/blob/master/TF-IDF%20Search%20Engine%20(SKLEARN).ipynb
    Nr   
vectorizerzList[Document]docstfidf_array   intkT)Zarbitrary_types_allowedzIterable[str]zOptional[Iterable[dict]]zOptional[Dict[str, Any]])texts	metadatastfidf_paramskwargsreturnc           	      K  s   zddl m} W n ty*   tdY n0 |p2i }|f i |}||}|p\dd |D }dd t||D }| f |||d|S )	Nr   )TfidfVectorizerzNCould not import scikit-learn, please install with `pip install scikit-learn`.c                 s  s   | ]
}i V  qd S N ).0_r   r   r/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/retrievers/tfidf.py	<genexpr>4       z,TFIDFRetriever.from_texts.<locals>.<genexpr>c                 S  s   g | ]\}}t ||d qS )Zpage_contentmetadatar
   )r   tmr   r   r   
<listcomp>5   r!   z-TFIDFRetriever.from_texts.<locals>.<listcomp>r   r   r   )Zsklearn.feature_extraction.textr   ImportErrorZfit_transformzip)	clsr   r   r   r   r   r   r   r   r   r   r   
from_texts!   s    

zTFIDFRetriever.from_texts)r   zIterable[Document])	documentsr   r   r   c                K  s.   t dd |D  \}}| jf |||d|S )Nc                 s  s   | ]}|j |jfV  qd S r   r"   )r   dr   r   r   r    @   r!   z0TFIDFRetriever.from_documents.<locals>.<genexpr>)r   r   r   )r)   r+   )r*   r,   r   r   r   r   r   r   r   from_documents8   s    zTFIDFRetriever.from_documentsstrr	   )queryrun_managerr   c                  s\   ddl m}  j|g}| j|d} fdd|  j d  d d d D }|S )Nr   )cosine_similarity)c                   s   g | ]} j | qS r   )r   )r   iselfr   r   r&   P   r!   z:TFIDFRetriever._get_relevant_documents.<locals>.<listcomp>r3   )Zsklearn.metrics.pairwiser2   r   Z	transformr   ZreshapeZargsortr   )r6   r0   r1   r2   Z	query_vecresultsZreturn_docsr   r5   r   _get_relevant_documentsE   s    ,z&TFIDFRetriever._get_relevant_documentstfidf_vectorizerNone)folder_path	file_namer   c                 C  s   zdd l }W n ty&   tdY n0 t|}|jddd || j|| d  t|| d d$}t| j| j	f| W d    n1 s0    Y  d S )Nr   BCould not import joblib, please install with `pip install joblib`.T)exist_okparents.joblib.pklwb)
joblibr(   r   mkdirdumpr   openpickler   r   )r6   r;   r<   rC   pathfr   r   r   
save_localS   s    
zTFIDFRetriever.save_localF)allow_dangerous_deserializationr<   bool)r;   rK   r<   r   c          
      C  s   zddl }W n ty&   tdY n0 |s4tdt|}||| d }t|| d d}t|\}}	W d   n1 s0    Y  | |||	dS )	a  Load the retriever from local storage.

        Args:
            folder_path: Folder path to load from.
            allow_dangerous_deserialization: Whether to allow dangerous deserialization.
                Defaults to False.
                The deserialization relies on .joblib and .pkl files, which can be
                modified to deliver a malicious payload that results in execution of
                arbitrary code on your machine. You will need to set this to `True` to
                use deserialization. If you do this, make sure you trust the source of
                the file.
            file_name: File name to load from. Defaults to "tfidf_vectorizer".

        Returns:
            TFIDFRetriever: Loaded retriever.
        r   Nr=   a  The de-serialization of this retriever is based on .joblib and .pkl files.Such files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to load this retriever. If you do this, make sure you trust the source of the file, and you are responsible for validating the file came from a trusted source.r@   rA   rbr'   )rC   r(   
ValueErrorr   loadrF   rG   )
r*   r;   rK   r<   rC   rH   r   rI   r   r   r   r   r   
load_locali   s    
,zTFIDFRetriever.load_local)NN)r9   )__name__
__module____qualname____doc__r   __annotations__r   r   r   Zmodel_configclassmethodr+   r.   r8   rJ   rP   r   r   r   r   r      s,   
   r   )
__future__r   rG   pathlibr   typingr   r   r   r   r   Zlangchain_core.callbacksr	   Zlangchain_core.documentsr   Zlangchain_core.retrieversr   Zpydanticr   r   r   r   r   r   <module>   s   