a
    bg F                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z  e!g dZ"dZ#ddddZ$G dd deZ%dS )    )annotationsN)ConfigParser)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)Docstore)InMemoryDocstore)maximal_marginal_relevance)angularZ	euclideanZ	manhattanZhammingdotr   r   returnc                   C  s   t dS )z1Import annoy if available, otherwise raise error.annoyr    r   r   t/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/annoy.pydependable_annoy_import   s    r   c                   @  s  e Zd ZdZdddddddd	Zed
dddZdQdddddddZddddddZdRddddddd Z	dSddddd!d"d#Z
dTddddd$d%d&ZdUddddd'd(d)d*ZdVddddd'd+d,d-ZdWddddd'd.d/d0ZdXdddd3dd'd4d5d6ZdYdddd3dd'd7d8d9Zeded:dfdd;d<dddddd d=	d>d?Zeded:dfdd<dddddd d@dAdBZeded:dfdCd<dddddd dDdEdFZdZddHdIdJdKdLZedGdMdd<dHd dNdOdPZdS )[Annoya  `Annoy` vector store.

    To use, you should have the ``annoy`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Annoy
            db = Annoy(embedding_function, index, docstore, index_to_docstore_id)

    r   r   strr   zDict[int, str]embedding_functionindexmetricdocstoreindex_to_docstore_idc                 C  s"   || _ || _|| _|| _|| _dS )z%Initialize with necessary components.Nr   )selfr   r    r!   r"   r#   r   r   r   __init__*   s
    	zAnnoy.__init__zOptional[Embeddings]r   c                 C  s   d S )Nr   r$   r   r   r   
embeddings9   s    zAnnoy.embeddingsNzIterable[str]zOptional[List[dict]]z	List[str])texts	metadataskwargsr   c                 K  s   t dd S )Nz=Annoy does not allow to add new data once the index is build.)NotImplementedError)r$   r(   r)   r*   r   r   r   	add_texts>   s    zAnnoy.add_textsz	List[int]zList[float]zList[Tuple[Document, float]])idxsdistsr   c                 C  s^   g }t ||D ]J\}}| j| }| j|}t|tsJtd| d| |||f q|S )a  Turns annoy results into a list of documents and scores.

        Args:
            idxs: List of indices of the documents in the index.
            dists: List of distances of the documents in the index.
        Returns:
            List of Documents and scores.
        Could not find document for id , got )zipr#   r"   search
isinstancer   
ValueErrorappend)r$   r-   r.   docsidxdist_iddocr   r   r   process_index_resultsH   s    

zAnnoy.process_index_results   int)	embeddingksearch_kr   c                 C  s$   | j j|||dd\}}| ||S a}  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided
        Returns:
            List of Documents most similar to the query and score for each
        TrA   Zinclude_distances)r    get_nns_by_vectorr;   )r$   r?   r@   rA   r-   r.   r   r   r   &similarity_search_with_score_by_vector\   s    
z,Annoy.similarity_search_with_score_by_vector)docstore_indexr@   rA   r   c                 C  s$   | j j|||dd\}}| ||S rB   )r    Zget_nns_by_itemr;   )r$   rF   r@   rA   r-   r.   r   r   r   %similarity_search_with_score_by_indexn   s    
z+Annoy.similarity_search_with_score_by_index)queryr@   rA   r   c                 C  s   |  |}| |||}|S )a~  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query and score for each
        )r   rE   )r$   rH   r@   rA   r?   r6   r   r   r   similarity_search_with_score   s    
z"Annoy.similarity_search_with_scorezList[Document])r?   r@   rA   r*   r   c                 K  s   |  |||}dd |D S )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        c                 S  s   g | ]\}}|qS r   r   .0r:   _r   r   r   
<listcomp>       z5Annoy.similarity_search_by_vector.<locals>.<listcomp>)rE   )r$   r?   r@   rA   r*   docs_and_scoresr   r   r   similarity_search_by_vector   s    z!Annoy.similarity_search_by_vector)rF   r@   rA   r*   r   c                 K  s   |  |||}dd |D S )az  Return docs most similar to docstore_index.

        Args:
            docstore_index: Index of document in docstore
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        c                 S  s   g | ]\}}|qS r   r   rJ   r   r   r   rM      rN   z4Annoy.similarity_search_by_index.<locals>.<listcomp>)rG   )r$   rF   r@   rA   r*   rO   r   r   r   similarity_search_by_index   s    z Annoy.similarity_search_by_index)rH   r@   rA   r*   r   c                 K  s   |  |||}dd |D S )al  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query.
        c                 S  s   g | ]\}}|qS r   r   rJ   r   r   r   rM      rN   z+Annoy.similarity_search.<locals>.<listcomp>)rI   )r$   rH   r@   rA   r*   rO   r   r   r   similarity_search   s    zAnnoy.similarity_search         ?float)r?   r@   fetch_klambda_multr*   r   c                   s   j j||ddd fdd D }ttj|gtjd|||d} fdd|D }g }	|D ]B}
j|
 }j|}t	|t
std	| d
| |	| q^|	S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            k: Number of Documents to return. Defaults to 4.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.

        Returns:
            List of Documents selected by maximal marginal relevance.
        r=   FrC   c                   s   g | ]} j |qS r   )r    Zget_item_vectorrK   ir&   r   r   rM      rN   zAAnnoy.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)Zdtype)r@   rW   c                   s   g | ]}|d kr | qS )r=   r   rX   )r-   r   r   rM      rN   r/   r0   )r    rD   r   nparrayZfloat32r#   r"   r2   r3   r   r4   r5   )r$   r?   r@   rV   rW   r*   r'   Zmmr_selectedZselected_indicesr6   rY   r9   r:   r   )r-   r$   r   'max_marginal_relevance_search_by_vector   s&    

z-Annoy.max_marginal_relevance_search_by_vector)rH   r@   rV   rW   r*   r   c                 K  s    |  |}| j||||d}|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        )rW   )r   r\   )r$   rH   r@   rV   rW   r*   r?   r6   r   r   r   max_marginal_relevance_search   s
    
z#Annoy.max_marginal_relevance_searchd   zList[List[float]]r   )	r(   r'   r?   r)   r!   treesn_jobsr*   r   c                   s   |t vr td| dtt  td}	|s4tdt|d }
|	j|
|d}t|D ]\}}||| qV|j||d g }t|D ]*\}}|r|| ni }|	t
||d qd	d
 tt|D  t fdd
t|D }| |j||| S )NzUnsupported distance metric: z. Expected one of r   z/embeddings must be provided to build AnnoyIndexr   r!   )r`   )Zpage_contentmetadatac                 S  s   i | ]}|t t qS r   )r   uuiduuid4rX   r   r   r   
<dictcomp>6  rN   z Annoy.__from.<locals>.<dictcomp>c                   s   i | ]\}} | |qS r   r   )rK   rY   r:   Zindex_to_idr   r   re   8  rN   )INDEX_METRICSr4   listr   len
AnnoyIndex	enumerateZadd_itembuildr5   r   ranger   embed_query)clsr(   r'   r?   r)   r!   r_   r`   r*   r   fr    rY   ZembZ	documentstextrb   r"   r   rf   r   Z__from  s.    zAnnoy.__from)r(   r?   r)   r!   r_   r`   r*   r   c           	      K  s(   | |}| j|||||||fi |S )a  Construct Annoy wrapper from raw documents.

        Args:
            texts: List of documents to index.
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                index = Annoy.from_texts(texts, embeddings)
        )Zembed_documents_Annoy__from)	ro   r(   r?   r)   r!   r_   r`   r*   r'   r   r   r   
from_texts<  s    $
zAnnoy.from_textszList[Tuple[str, List[float]]])text_embeddingsr?   r)   r!   r_   r`   r*   r   c           
      K  s:   dd |D }dd |D }	| j ||	|||||fi |S )a  Construct Annoy wrapper from embeddings.

        Args:
            text_embeddings: List of tuples of (text, embedding)
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1

        This is a user friendly interface that:
            1. Creates an in memory docstore with provided embeddings
            2. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                db = Annoy.from_embeddings(text_embedding_pairs, embeddings)
        c                 S  s   g | ]}|d  qS )r   r   rK   tr   r   r   rM     rN   z)Annoy.from_embeddings.<locals>.<listcomp>c                 S  s   g | ]}|d  qS )   r   ru   r   r   r   rM     rN   )rr   )
ro   rt   r?   r)   r!   r_   r`   r*   r(   r'   r   r   r   from_embeddingse  s    %zAnnoy.from_embeddingsFboolNone)folder_pathprefaultr   c                 C  s   t |}tj|dd t }| jj| jd|d< | jjt|d |d t	|d d&}t
| j| j|f| W d	   n1 s0    Y  d	S )
a  Save Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
            prefault: Whether to pre-load the index into memory.
        T)exist_ok)rp   r!   ANNOYindex.annoy)r|   	index.pklwbN)r   osmakedirsr   r    rp   r!   saver   openpickledumpr"   r#   )r$   r{   r|   pathconfig_objectfiler   r   r   
save_local  s    
zAnnoy.save_local)allow_dangerous_deserialization)r{   r'   r   r   c                C  s   |st dt|}td}t|d d }t|\}}}	W d   n1 sP0    Y  t|	d d }
|	d d }|j|
|d	}|t|d
  | |j	||||S )aR  Load Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embeddings: Embeddings to use when generating queries.
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   rbNr~   rp   r!   ra   r   )
r4   r   r   r   r   loadr>   rj   r   rn   )ro   r{   r'   r   r   r   r   r"   r#   r   rp   r!   r    r   r   r   
load_local  s,    zAnnoy.load_local)N)r<   r=   )r<   r=   )r<   r=   )r<   r=   )r<   r=   )r<   r=   )r<   rS   rT   )r<   rS   rT   )F)__name__
__module____qualname____doc__r%   propertyr'   r,   r;   rE   rG   rI   rP   rQ   rR   r\   r]   classmethodDEFAULT_METRICrr   rs   rx   r   r   r   r   r   r   r      sb    
         2   "% ( +r   )&
__future__r   r   r   rc   configparserr   pathlibr   typingr   r   r   r   r	   r
   r   numpyrZ   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z!langchain_community.docstore.baser   Z&langchain_community.docstore.in_memoryr   Z&langchain_community.vectorstores.utilsr   	frozensetrg   r   r   r   r   r   r   r   <module>   s$   $