a
    bgQ                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ dddddZ ddddZ!G dd deZ"dS )    )annotationsN)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)AddableMixinDocstore)InMemoryDocstore)DistanceStrategyz
np.ndarray)xreturnc                 C  s$   | t t jj| ddddd } | S )z!Normalize vectors to unit length.T)ZaxisZkeepdimsg-q=N)npZclipZlinalgZnorm)r    r   t/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/scann.py	normalize   s     r   r   r   c                   C  s   t dS )z=
    Import `scann` if available, otherwise raise error.
    scannr   r   r   r   r   dependable_scann_import   s    r   c                   @  s  e Zd ZdZddejdfdddddd	d
ddddZdNdddddddddZdOddddddddZdPddddddddZ	dQddddd d!Z
dRd$d%d&d%dd'd(d)d*ZdSd+d%d&d%dd'd,d-d.ZdTd$d%d&d%dd/d(d0d1ZdUd+d%d&d%dd/d,d2d3ZedVdd4dddd	dd d5d6d7ZedWdddddd d8d9d:ZedXd;ddddd d<d=d>ZdYd+d+d@dAdBdCZedZddDd+dd+d	dd dEdFdGZdHdIdJdKZd[d+d%d&d%dd'd,dLdMZdS )\ScaNNa  `ScaNN` vector store.

    To use, you should have the ``scann`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceEmbeddings
            from langchain_community.vectorstores import ScaNN

            model_name = "sentence-transformers/all-mpnet-base-v2"
            db = ScaNN.from_texts(
                ['foo', 'bar', 'barz', 'qux'],
                HuggingFaceEmbeddings(model_name=model_name))
            db.similarity_search('foo?', k=1)
    NFr   r   r   zDict[int, str]z"Optional[Callable[[float], float]]boolr   zOptional[str])	embeddingindexdocstoreindex_to_docstore_idrelevance_score_fnnormalize_L2distance_strategyscann_configc	           	      C  s4   || _ || _|| _|| _|| _|| _|| _|| _dS )z%Initialize with necessary components.N)r    r!   r"   r#   r&   override_relevance_score_fn_normalize_L2Z_scann_config)	selfr    r!   r"   r#   r$   r%   r&   r'   r   r   r   __init__3   s    zScaNN.__init__zIterable[str]zIterable[List[float]]zOptional[List[dict]]zOptional[List[str]]z	List[str])texts
embeddings	metadatasidskwargsr   c                 K  s*   t | jtstd| j dtdd S )NSIf trying to add texts, the underlying docstore should support adding items, which 	 does notz(Updates are not available in ScaNN, yet.)
isinstancer"   r   
ValueErrorNotImplementedError)r*   r,   r-   r.   r/   r0   r   r   r   Z__addH   s    zScaNN.__add)r,   r.   r/   r0   r   c                 K  s*   | j t|}| j||f||d|S )al  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r.   r/   )r    embed_documentslist_ScaNN__add)r*   r,   r.   r/   r0   r-   r   r   r   	add_textsW   s    zScaNN.add_textsz!Iterable[Tuple[str, List[float]]])text_embeddingsr.   r/   r0   r   c                 K  sD   t | jtstd| j dt| \}}| j||f||d|S )a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            text_embeddings: Iterable pairs of string and embedding to
                add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r1   r2   r6   )r3   r"   r   r4   zipr9   )r*   r;   r.   r/   r0   r,   r-   r   r   r   add_embeddingsl   s    zScaNN.add_embeddingszOptional[bool])r/   r0   r   c                 K  s   t ddS )a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        z*Deletions are not available in ScaNN, yet.N)r5   )r*   r/   r0   r   r   r   delete   s    zScaNN.delete      zList[float]intzOptional[Dict[str, Any]]zList[Tuple[Document, float]])r    kfilterfetch_kr0   r   c                   sL  t j|gt jd}| jr t|}| j||du r4|n|\}}g }	t|d D ]\}
}|dkr`qN| j| }| j	
|ttstd| d |durdd | D }tfd	d
| D r|	|d |
 f qN|	|d |
 f qN|ddur@| jtjtjfv r&tjntj  fdd|	D }	|	d| S )a  Return docs most similar to query.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        ZdtypeNr   r   zCould not find document for id z, got c                 S  s&   i | ]\}}|t |ts|gn|qS r   )r3   r8   .0keyvaluer   r   r   
<dictcomp>   s   z@ScaNN.similarity_search_with_score_by_vector.<locals>.<dictcomp>c                 3  s"   | ]\}} j ||v V  qd S )N)metadatagetrF   )docr   r   	<genexpr>       z?ScaNN.similarity_search_with_score_by_vector.<locals>.<genexpr>score_thresholdc                   s"   g | ]\}} |r||fqS r   r   rG   rM   Z
similarity)cmprP   r   r   
<listcomp>   s   
z@ScaNN.similarity_search_with_score_by_vector.<locals>.<listcomp>)r   arrayfloat32r)   r   r!   Zsearch_batched	enumerater#   r"   searchr3   r   r4   itemsallappendrL   r&   r   MAX_INNER_PRODUCTZJACCARDoperatorgele)r*   r    rB   rC   rD   r0   vectorindicesZscoresdocsjiZ_idr   )rR   rM   rP   r   &similarity_search_with_score_by_vector   sB    





z,ScaNN.similarity_search_with_score_by_vectorstr)queryrB   rC   rD   r0   r   c                 K  s*   | j |}| j||f||d|}|S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        rC   rD   )r    Zembed_queryrd   )r*   rf   rB   rC   rD   r0   r    ra   r   r   r   similarity_search_with_score   s    z"ScaNN.similarity_search_with_scorezList[Document]c                 K  s(   | j ||f||d|}dd |D S )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        rg   c                 S  s   g | ]\}}|qS r   r   rG   rM   _r   r   r   rS     rO   z5ScaNN.similarity_search_by_vector.<locals>.<listcomp>)rd   )r*   r    rB   rC   rD   r0   docs_and_scoresr   r   r   similarity_search_by_vector   s    z!ScaNN.similarity_search_by_vectorc                 K  s(   | j ||f||d|}dd |D S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        rg   c                 S  s   g | ]\}}|qS r   r   ri   r   r   r   rS   )  rO   z+ScaNN.similarity_search.<locals>.<listcomp>)rh   )r*   rf   rB   rC   rD   r0   rk   r   r   r   similarity_search  s    zScaNN.similarity_searchzList[List[float]])r,   r-   r    r.   r/   r%   r0   r   c                 K  sJ  t d}|dtj}	|dd }
tj|tjd}|r>t|}|
d urV|j	||
}n<|	tj
krz|j|dd  }n|j|dd  }g }|d u rdd	 |D }t|D ]*\}}|r|| ni }|t||d
 qtt|}t|t|krtt| dt| dttt| |}| ||||fd|i|S )Nr   r&   r'   rE      Zdot_productZ
squared_l2c                 S  s   g | ]}t t qS r   )re   uuiduuid4)rG   rj   r   r   r   rS   Q  rO   z ScaNN.__from.<locals>.<listcomp>)Zpage_contentrK   z ids provided for z, documents. Each document should have an id.r%   )r   rL   r   EUCLIDEAN_DISTANCEr   rT   rU   r   scann_ops_pybindZcreate_searcherr[   ZbuilderZscore_brute_forcebuildrV   rZ   r   dictlen	Exceptionr   r<   values)clsr,   r-   r    r.   r/   r%   r0   r   r&   r'   r_   r!   Z	documentsrc   textrK   Zindex_to_idr"   r   r   r   Z__from+  sJ    
zScaNN.__from)r,   r    r.   r/   r0   r   c                 K  s&   | |}| j|||f||d|S )aN  Construct ScaNN wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the ScaNN database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import ScaNN
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                scann = ScaNN.from_texts(texts, embeddings)
        r6   )r7   _ScaNN__from)rx   r,   r    r.   r/   r0   r-   r   r   r   
from_textsg  s    
zScaNN.from_textszList[Tuple[str, List[float]]])r;   r    r.   r/   r0   r   c                 K  s8   dd |D }dd |D }| j |||f||d|S )a  Construct ScaNN wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the ScaNN database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import ScaNN
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                scann = ScaNN.from_embeddings(text_embedding_pairs, embeddings)
        c                 S  s   g | ]}|d  qS )r   r   rG   tr   r   r   rS     rO   z)ScaNN.from_embeddings.<locals>.<listcomp>c                 S  s   g | ]}|d  qS )rn   r   r|   r   r   r   rS     rO   r6   )rz   )rx   r;   r    r.   r/   r0   r,   r-   r   r   r   from_embeddings  s    zScaNN.from_embeddingsr!   None)folder_path
index_namer   c                 C  s   t |}|dj|d }|jddd | jt| t|dj|d d$}t| j	| j
f| W d   n1 sv0    Y  dS )zSave ScaNN index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
        {index_name}.scannr   Texist_okparents{index_name}.pklwbN)r   formatmkdirr!   	serializere   openpickledumpr"   r#   )r*   r   r   path
scann_pathfr   r   r   
save_local  s    zScaNN.save_local)allow_dangerous_deserialization)r   r    r   r   r0   r   c                K  s   |st dt|}|dj|d }|jddd td}|jt|}	t|dj|d d}
t	
|
\}}W d	   n1 s0    Y  | ||	||fi |S )
a  Load ScaNN index, docstore, and index_to_docstore_id from disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embedding: Embeddings to use when generating queries
            index_name: for saving with a specific index file name
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   Tr   r   r   rbN)r4   r   r   r   r   rr   Zload_searcherre   r   r   load)rx   r   r    r   r   r0   r   r   r   r!   r   r"   r#   r   r   r   
load_local  s$    zScaNN.load_localzCallable[[float], float]r   c                 C  s@   | j dur| j S | jtjkr"| jS | jtjkr4| jS tddS )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        NzJUnknown distance strategy, must be cosine, max_inner_product, or euclidean)r(   r&   r   r[   Z%_max_inner_product_relevance_score_fnrq   Z_euclidean_relevance_score_fnr4   )r*   r   r   r   _select_relevance_score_fn  s    	
z ScaNN._select_relevance_score_fnc                   sn   | dd|    du r$td| j|f|||d|} fdd|D }durjfdd|D }|S )z?Return docs and their similarity scores on a scale from 0 to 1.rP   NzLnormalize_score_fn must be provided to ScaNN constructor to normalize scores)rB   rC   rD   c                   s   g | ]\}}| |fqS r   r   )rG   rM   Zscore)r$   r   r   rS   ,  s   zBScaNN._similarity_search_with_relevance_scores.<locals>.<listcomp>c                   s    g | ]\}}| kr||fqS r   r   rQ   )rP   r   r   rS   0  s   )popr   r4   rh   )r*   rf   rB   rC   rD   r0   rk   Zdocs_and_rel_scoresr   )r$   rP   r   (_similarity_search_with_relevance_scores  s.    

z.ScaNN._similarity_search_with_relevance_scores)NN)NN)NN)N)r?   Nr@   )r?   Nr@   )r?   Nr@   )r?   Nr@   )NNF)NN)NN)r!   )r!   )r?   Nr@   )__name__
__module____qualname____doc__r   rq   r+   r9   r:   r=   r>   rd   rh   rl   rm   classmethodrz   r{   r~   r   r   r   r   r   r   r   r   r   !   sp            C   "           ;  #  & 5   r   )#
__future__r   r\   r   ro   pathlibr   typingr   r   r   r   r   r	   r
   numpyr   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z!langchain_community.docstore.baser   r   Z&langchain_community.docstore.in_memoryr   Z&langchain_community.vectorstores.utilsr   r   r   r   r   r   r   r   <module>   s    $