a
    bg                     @  s"  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lm Z m!Z! d d
l"m#Z# d dl$m%Z%m&Z& e'e(Z)ddddddZ*ddddddddZ+G dd deZ,dS )    )annotationsN)Path)
AnyCallableDictIterableListOptionalSequenceSizedTupleUnionDocument)
Embeddings)run_in_executor)VectorStore)AddableMixinDocstore)InMemoryDocstore)DistanceStrategymaximal_marginal_relevanceOptional[bool]r   )no_avx2returnc                 C  s^   | du r dt jv r tt d} z| r4ddlm} nddl}W n tyX   tdY n0 |S )aM  
    Import faiss if available, otherwise raise error.
    If FAISS_NO_AVX2 environment variable is set, it will be considered
    to load FAISS with no AVX2 optimization.

    Args:
        no_avx2: Load FAISS strictly with no AVX2 optimization
            so that the vectorstore is portable and compatible with other devices.
    NZFAISS_NO_AVX2r   )	swigfaisszCould not import faiss python package. Please install it with `pip install faiss-gpu` (for CUDA supported GPU) or `pip install faiss-cpu` (depending on Python version).)osenvironboolgetenvfaissr   ImportError)r   r     r"   t/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/faiss.pydependable_faiss_import'   s    

r$   strNone)xyx_namey_namer   c                 C  sZ   t | trVt |trVt| t|krVt| d| d| dt|  d| dt| d S )Nz and z% expected to be equal length but len(z)=z	 and len()
isinstancer   len
ValueError)r'   r(   r)   r*   r"   r"   r#   _len_check_if_sizedB   s    $r.   c                   @  s  e Zd ZdZddejfdddddd	d
dddZeddddZdddddZ	dddddZ
dddddZdddddZdd d!d"d#dd$d%d&Zdd d'd#ddd(d)d*Zdd d'd#ddd(d+d,Zdd-d'd#ddd.d/d0Zddd3d4d3dd5d6d7d8Zddd3d4d3dd5d6d9d:Zddd3d4d3dd5d;d<d=Zddd3d4d3dd5d;d>d?Zddd3d@d3ddAd6dBdCZddd3d4d3ddAd6dDdEZddd3d4d3ddAd;dFdGZddd3d4d3ddAd;dHdIZd1d2dJddKdd3d3dLd4d5dMdNdOZd1d2dJddKdd3d3dLd4d5dMdPdQZddd3d3dLd4ddAdRdSdTZddd3d3dLd4ddAdRdUdVZddd3d3dLd4ddAdWdXdYZddd3d3dLd4ddAdWdZd[Zdd#dd\d]d^d_Zd d`dadbdcZ e!dddejfd dddd"d#d	d
dd de	dfdgZ"e!ddddd'd#dd dhdidjZ#e!ddkddd'd#dd dhdldmZ$e!dd-ddd"d#dd dndodpZ%e!dd-ddd"d#dd dndqdrZ&dddd`dtdudvZ'e!dddwddddd	dd dxdydzZ(d{dd|d}Z)e!ddwd{ddd	dd d~ddZ*ddddZ+ddd3d4d3dd5d;ddZ,ddd3d4d3dd5d;ddZ-e.d4ddddZ/dddddZ0dS )FAISSu  FAISS vector store integration.

    See [The FAISS Library](https://arxiv.org/pdf/2401.08281) paper.

    Setup:
        Install ``langchain_community`` and ``faiss-cpu`` python packages.

        .. code-block:: bash

            pip install -qU langchain_community faiss-cpu

    Key init args — indexing params:
        embedding_function: Embeddings
            Embedding function to use.

    Key init args — client params:
        index: Any
            FAISS index to use.
        docstore: Docstore
            Docstore to use.
        index_to_docstore_id: Dict[int, str]
            Mapping of index to docstore id.

    Instantiate:
        .. code-block:: python

            import faiss
            from langchain_community.vectorstores import FAISS
            from langchain_community.docstore.in_memory import InMemoryDocstore
            from langchain_openai import OpenAIEmbeddings

            index = faiss.IndexFlatL2(len(OpenAIEmbeddings().embed_query("hello world")))

            vector_store = FAISS(
                embedding_function=OpenAIEmbeddings(),
                index=index,
                docstore= InMemoryDocstore(),
                index_to_docstore_id={}
            )

    Add Documents:
        .. code-block:: python

            from langchain_core.documents import Document

            document_1 = Document(page_content="foo", metadata={"baz": "bar"})
            document_2 = Document(page_content="thud", metadata={"bar": "baz"})
            document_3 = Document(page_content="i will be deleted :(")

            documents = [document_1, document_2, document_3]
            ids = ["1", "2", "3"]
            vector_store.add_documents(documents=documents, ids=ids)

    Delete Documents:
        .. code-block:: python

            vector_store.delete(ids=["3"])

    Search:
        .. code-block:: python

            results = vector_store.similarity_search(query="thud",k=1)
            for doc in results:
                print(f"* {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * thud [{'bar': 'baz'}]

    Search with filter:
        .. code-block:: python

            results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
            for doc in results:
                print(f"* {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * thud [{'bar': 'baz'}]

    Search with score:
        .. code-block:: python

            results = vector_store.similarity_search_with_score(query="qux",k=1)
            for doc, score in results:
                print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * [SIM=0.335304] foo [{'baz': 'bar'}]

    Async:
        .. code-block:: python

            # add documents
            # await vector_store.aadd_documents(documents=documents, ids=ids)

            # delete documents
            # await vector_store.adelete(ids=["3"])

            # search
            # results = vector_store.asimilarity_search(query="thud",k=1)

            # search with score
            results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
            for doc,score in results:
                print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * [SIM=0.335304] foo [{'baz': 'bar'}]

    Use as Retriever:
        .. code-block:: python

            retriever = vector_store.as_retriever(
                search_type="mmr",
                search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
            )
            retriever.invoke("thud")

        .. code-block:: python

            [Document(metadata={'bar': 'baz'}, page_content='thud')]

    NFz/Union[Callable[[str], List[float]], Embeddings]r   r   zDict[int, str]z"Optional[Callable[[float], float]]r   r   )embedding_functionindexdocstoreindex_to_docstore_idrelevance_score_fnnormalize_L2distance_strategyc                 C  sf   t |tstd || _|| _|| _|| _|| _|| _	|| _
| jtjkrb| j
rbtd| j  dS )z%Initialize with necessary components.t`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.z2Normalizing L2 is not applicable for metric type: N)r+   r   loggerwarningr0   r1   r2   r3   r6   override_relevance_score_fn_normalize_L2r   EUCLIDEAN_DISTANCEwarningswarn)selfr0   r1   r2   r3   r4   r5   r6   r"   r"   r#   __init__   s(    

zFAISS.__init__zOptional[Embeddings])r   c                 C  s   t | jtr| jS d S N)r+   r0   r   r?   r"   r"   r#   
embeddings   s    
zFAISS.embeddingsz	List[str]zList[List[float]])textsr   c                   s.   t  jtr j|S  fdd|D S d S )Nc                   s   g | ]}  |qS r"   )r0   ).0textrB   r"   r#   
<listcomp>       z*FAISS._embed_documents.<locals>.<listcomp>)r+   r0   r   embed_documentsr?   rD   r"   rB   r#   _embed_documents   s    zFAISS._embed_documentsc                   s*   t | jtr| j|I d H S tdd S Nr7   )r+   r0   r   aembed_documents	ExceptionrJ   r"   r"   r#   _aembed_documents   s
    zFAISS._aembed_documentsr%   zList[float])rF   r   c                 C  s&   t | jtr| j|S | |S d S rA   )r+   r0   r   Zembed_queryr?   rF   r"   r"   r#   _embed_query  s    zFAISS._embed_queryc                   s*   t | jtr| j|I d H S tdd S rL   )r+   r0   r   Zaembed_queryrN   rP   r"   r"   r#   _aembed_query  s
    zFAISS._aembed_queryzIterable[str]zIterable[List[float]]zOptional[Iterable[dict]]zOptional[List[str]])rD   rC   	metadatasidsr   c           
        s   t  }t| jts$td| j dt||dd |pBdd |D }t||dd |pbdd	 |D }d
d t|||D }t||dd |rt|tt|krtdt	j
|t	jd}| jr|| | j| | jdd t||D  t| j  fddt|D }	| j|	 |S )NzSIf trying to add texts, the underlying docstore should support adding items, which z	 does notrD   rS   c                 S  s   g | ]}t t qS r"   )r%   uuiduuid4rE   _r"   r"   r#   rG   (  rH   zFAISS.__add.<locals>.<listcomp>rT   c                 s  s   | ]
}i V  qd S rA   r"   rW   r"   r"   r#   	<genexpr>+  rH   zFAISS.__add.<locals>.<genexpr>c                 S  s    g | ]\}}}t |||d qS ))idZpage_contentmetadatar   )rE   id_tmr"   r"   r#   rG   ,  s   	documentsrC   z$Duplicate ids found in the ids list.Zdtypec                 S  s   i | ]\}}||qS r"   r"   )rE   r\   docr"   r"   r#   
<dictcomp><  rH   zFAISS.__add.<locals>.<dictcomp>c                   s   i | ]\}} | |qS r"   r"   )rE   jr\   starting_lenr"   r#   rb   >  rH   )r$   r+   r2   r   r-   r.   zipr,   setnparrayfloat32r;   r5   r1   addr3   	enumerateupdate)
r?   rD   rC   rS   rT   r    Z
_metadatasr_   vectorindex_to_idr"   rd   r#   Z__add  s4    


zFAISS.__addzOptional[List[dict]])rD   rS   rT   kwargsr   c                 K  s$   t |}| |}| j||||dS )al  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        rS   rT   )listrK   _FAISS__addr?   rD   rS   rT   rp   rC   r"   r"   r#   	add_textsB  s    
zFAISS.add_textsc                   s*   t |}| |I dH }| j||||dS )a  Run more texts through the embeddings and add to the vectorstore
            asynchronously.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        Nrq   )rr   rO   rs   rt   r"   r"   r#   
aadd_textsW  s    zFAISS.aadd_textsz!Iterable[Tuple[str, List[float]]])text_embeddingsrS   rT   rp   r   c                 K  s   t | \}}| j||||dS )a  Add the given texts and embeddings to the vectorstore.

        Args:
            text_embeddings: Iterable pairs of string and embedding to
                add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        rq   )rf   rs   )r?   rw   rS   rT   rp   rD   rC   r"   r"   r#   add_embeddingsm  s    zFAISS.add_embeddings      intz)Optional[Union[Callable, Dict[str, Any]]]zList[Tuple[Document, float]])	embeddingkfilterfetch_krp   r   c                   sD  t  }tj|gtjd}| jr(|| | j||du r<|n|\}}	g }
|dur\| |}t	|	d D ]\}}|dkrzqh| j
| }| j|}t|tstd| d| |dur||jr|
||d | f qh|
||d | f qh|ddur8| jtjtjfv rtjntj  fdd	|
D }
|
d| S )
a  Return docs most similar to query.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Union[Callable, Dict[str, Any]]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        r`   Nr   Could not find document for id , got score_thresholdc                   s"   g | ]\}} |r||fqS r"   r"   )rE   ra   Z
similaritycmpr   r"   r#   rG     s   
z@FAISS.similarity_search_with_score_by_vector.<locals>.<listcomp>)r$   rh   ri   rj   r;   r5   r1   search_create_filter_funcrl   r3   r2   r+   r   r-   r[   appendgetr6   r   MAX_INNER_PRODUCTZJACCARDoperatorgele)r?   r|   r}   r~   r   rp   r    rn   scoresindicesdocsfilter_funcrc   i_idra   r"   r   r#   &similarity_search_with_score_by_vector  s>    








z,FAISS.similarity_search_with_score_by_vectorc                   s$   t d| j|f|||d|I dH S )a  Return docs most similar to query asynchronously.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        Nr}   r~   r   )r   r   )r?   r|   r}   r~   r   rp   r"   r"   r#   'asimilarity_search_with_score_by_vector  s    z-FAISS.asimilarity_search_with_score_by_vector)queryr}   r~   r   rp   r   c                 K  s(   |  |}| j||f||d|}|S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        r~   r   )rQ   r   r?   r   r}   r~   r   rp   r|   r   r"   r"   r#   similarity_search_with_score  s    
z"FAISS.similarity_search_with_scorec                   s4   |  |I dH }| j||f||d|I dH }|S )a  Return docs most similar to query asynchronously.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        Nr   )rR   r   r   r"   r"   r#   asimilarity_search_with_score  s    z#FAISS.asimilarity_search_with_scorezOptional[Dict[str, Any]]zList[Document]c                 K  s(   | j ||f||d|}dd |D S )aY  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        r   c                 S  s   g | ]\}}|qS r"   r"   rE   ra   rX   r"   r"   r#   rG   M  rH   z5FAISS.similarity_search_by_vector.<locals>.<listcomp>)r   r?   r|   r}   r~   r   rp   docs_and_scoresr"   r"   r#   similarity_search_by_vector/  s    z!FAISS.similarity_search_by_vectorc                   s.   | j ||f||d|I dH }dd |D S )ah  Return docs most similar to embedding vector asynchronously.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        r   Nc                 S  s   g | ]\}}|qS r"   r"   r   r"   r"   r#   rG   m  rH   z6FAISS.asimilarity_search_by_vector.<locals>.<listcomp>)r   r   r"   r"   r#   asimilarity_search_by_vectorO  s    z"FAISS.asimilarity_search_by_vectorc                 K  s(   | j ||f||d|}dd |D S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        r   c                 S  s   g | ]\}}|qS r"   r"   r   r"   r"   r#   rG     rH   z+FAISS.similarity_search.<locals>.<listcomp>)r   r?   r   r}   r~   r   rp   r   r"   r"   r#   similarity_searcho  s    zFAISS.similarity_searchc                   s.   | j ||f||d|I dH }dd |D S )a  Return docs most similar to query asynchronously.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        r   Nc                 S  s   g | ]\}}|qS r"   r"   r   r"   r"   r#   rG     rH   z,FAISS.asimilarity_search.<locals>.<listcomp>)r   r   r"   r"   r#   asimilarity_search  s    zFAISS.asimilarity_search      ?r}   r   lambda_multr~   float)r|   r}   r   r   r~   r   c                  s`   j tj|gtjd|du r"|n|d \}}|dur |}g }	|d D ]V}
|
dkr\qN j|
 } j|}t|t	st
d| d| ||jrN|	|
 qNt|	g} fdd	|d D }ttj|gtjd|||d
}g }|D ]l}
|d |
 dkrq j|d |
  } j|}t|t	sDt
d| d| |||d |
 f q|S )az  Return docs and their similarity scores selected using the maximal marginal
            relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents and similarity scores selected by maximal marginal
                relevance and score for each.
        r`   N   r   r   r   r   c                   s$   g | ]}|d kr j t|qS )r   )r1   Zreconstructr{   )rE   r   rB   r"   r#   rG     rH   zLFAISS.max_marginal_relevance_search_with_score_by_vector.<locals>.<listcomp>)r}   r   )r1   r   rh   ri   rj   r   r3   r2   r+   r   r-   r[   r   r   )r?   r|   r}   r   r   r~   r   r   r   Zfiltered_indicesr   r   ra   rC   Zmmr_selectedr   r"   rB   r#   2max_marginal_relevance_search_with_score_by_vector  sD    



z8FAISS.max_marginal_relevance_search_with_score_by_vectorc             	     s   t d| j|||||dI dH S )a  Return docs and their similarity scores selected using the maximal marginal
            relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents and similarity scores selected by maximal marginal
                relevance and score for each.
        Nr   )r   r   )r?   r|   r}   r   r   r~   r"   r"   r#   3amax_marginal_relevance_search_with_score_by_vector  s    z9FAISS.amax_marginal_relevance_search_with_score_by_vector)r|   r}   r   r   r~   rp   r   c                 K  s"   | j |||||d}dd |D S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   c                 S  s   g | ]\}}|qS r"   r"   r   r"   r"   r#   rG   )  rH   zAFAISS.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)r   r?   r|   r}   r   r   r~   rp   r   r"   r"   r#   'max_marginal_relevance_search_by_vector  s    
z-FAISS.max_marginal_relevance_search_by_vectorc                   s(   | j |||||dI dH }dd |D S )a(  Return docs selected using the maximal marginal relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   Nc                 S  s   g | ]\}}|qS r"   r"   r   r"   r"   r#   rG   J  rH   zBFAISS.amax_marginal_relevance_search_by_vector.<locals>.<listcomp>)r   r   r"   r"   r#   (amax_marginal_relevance_search_by_vector+  s
    

z.FAISS.amax_marginal_relevance_search_by_vector)r   r}   r   r   r~   rp   r   c           	      K  s*   |  |}| j|f||||d|}|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering (if needed) to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )rQ   r   	r?   r   r}   r   r   r~   rp   r|   r   r"   r"   r#   max_marginal_relevance_searchL  s    
z#FAISS.max_marginal_relevance_searchc           	        s6   |  |I dH }| j|f||||d|I dH }|S )a+  Return docs selected using the maximal marginal relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering (if needed) to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        Nr   )rR   r   r   r"   r"   r#   amax_marginal_relevance_searchq  s    z$FAISS.amax_marginal_relevance_searchr   )rT   rp   r   c                   s   |du rt dt|| j }|r6t d| dd | j D fdd|D  | jtj	 tj
d | j|  fd	d
t| j D }dd t|D | _dS )zDelete by ID. These are the IDs in the vectorstore.

        Args:
            ids: List of ids to delete.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        NzNo ids provided to delete.zESome specified ids do not exist in the current store. Ids not found: c                 S  s   i | ]\}}||qS r"   r"   )rE   idxr\   r"   r"   r#   rb     rH   z FAISS.delete.<locals>.<dictcomp>c                   s   h | ]} | qS r"   r"   rE   r\   )reversed_indexr"   r#   	<setcomp>  rH   zFAISS.delete.<locals>.<setcomp>r`   c                   s   g | ]\}}| vr|qS r"   r"   rE   r   r\   )index_to_deleter"   r#   rG     s   z FAISS.delete.<locals>.<listcomp>c                 S  s   i | ]\}}||qS r"   r"   r   r"   r"   r#   rb     rH   T)r-   rg   
differencer3   valuesitemsr1   Z
remove_idsrh   ZfromiterZint64r2   deletesortedrl   )r?   rT   rp   Zmissing_idsZremaining_idsr"   )r   r   r#   r     s$    

zFAISS.deleter&   )targetr   c                 C  s   t | jtstdt| j}| j|j g }|j D ]:\}}|j	|}t |t
s`td||| ||f q:| jdd |D  dd |D }| j| dS )zMerge another FAISS object with the current one.

        Add the target FAISS to the current one.

        Args:
            target: FAISS object you wish to merge into the current one

        Returns:
            None.
        z'Cannot merge with this type of docstorezDocument should be returnedc                 S  s   i | ]\}}}||qS r"   r"   )rE   rX   r   ra   r"   r"   r#   rb     rH   z$FAISS.merge_from.<locals>.<dictcomp>c                 S  s   i | ]\}}}||qS r"   r"   )rE   r1   r   rX   r"   r"   r#   rb     rH   N)r+   r2   r   r-   r,   r3   r1   
merge_fromr   r   r   r   rk   rm   )r?   r   re   Z	full_infor   Z	target_idra   ro   r"   r"   r#   r     s    

zFAISS.merge_fromr   )	rD   rC   r|   rS   rT   r5   r6   rp   r   c                 K  s   t  }	|tjkr$|	t|d }
n|	t|d }
|dt }|di }| ||
||f||d|}|j||||d |S )Nr   r2   r3   )r5   r6   rq   )	r$   r   r   ZIndexFlatIPr,   ZIndexFlatL2popr   rs   )clsrD   rC   r|   rS   rT   r5   r6   rp   r    r1   r2   r3   Zvecstorer"   r"   r#   Z__from  s&    
	zFAISS.__from)rD   r|   rS   rT   rp   r   c                 K  s&   | |}| j|||f||d|S )aO  Construct FAISS wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the FAISS database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import FAISS
                from langchain_community.embeddings import OpenAIEmbeddings

                embeddings = OpenAIEmbeddings()
                faiss = FAISS.from_texts(texts, embeddings)
        rq   )rI   _FAISS__fromr   rD   r|   rS   rT   rp   rC   r"   r"   r#   
from_texts  s    
zFAISS.from_textsz	list[str]c                   s,   | |I dH }| j|||f||d|S )ae  Construct FAISS wrapper from raw documents asynchronously.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the FAISS database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import FAISS
                from langchain_community.embeddings import OpenAIEmbeddings

                embeddings = OpenAIEmbeddings()
                faiss = await FAISS.afrom_texts(texts, embeddings)
        Nrq   )rM   r   r   r"   r"   r#   afrom_texts  s    zFAISS.afrom_texts)rw   r|   rS   rT   rp   r   c                 K  s0   t | \}}| jt|t||f||d|S )a  Construct FAISS wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the FAISS database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import FAISS
                from langchain_community.embeddings import OpenAIEmbeddings

                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = zip(texts, text_embeddings)
                faiss = FAISS.from_embeddings(text_embedding_pairs, embeddings)
        rq   )rf   r   rr   )r   rw   r|   rS   rT   rp   rD   rC   r"   r"   r#   from_embeddingsB  s    zFAISS.from_embeddingsc                   s   | j ||f||d|S )z:Construct FAISS wrapper from raw documents asynchronously.rq   )r   )r   rw   r|   rS   rT   rp   r"   r"   r#   afrom_embeddingsi  s    
zFAISS.afrom_embeddingsr1   )folder_path
index_namer   c                 C  s   t |}|jddd t }|| jt|| d  t|| d d$}t| j	| j
f| W d   n1 sv0    Y  dS )a  Save FAISS index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
            index_name: for saving with a specific index file name
        T)exist_okparents.faiss.pklwbN)r   mkdirr$   Zwrite_indexr1   r%   openpickledumpr2   r3   )r?   r   r   pathr    fr"   r"   r#   
save_local{  s    zFAISS.save_local)allow_dangerous_deserialization)r   rC   r   r   rp   r   c                K  s   |st dt|}t }|t|| d }t|| d d}	t|	\}
}W d   n1 sj0    Y  | |||
|fi |S )a  Load FAISS index, docstore, and index_to_docstore_id from disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embeddings: Embeddings to use when generating queries
            index_name: for saving with a specific index file name
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        B  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   rbN)r-   r   r$   Z
read_indexr%   r   r   load)r   r   rC   r   r   rp   r   r    r1   r   r2   r3   r"   r"   r#   
load_local  s     zFAISS.load_localbytesc                 C  s   t | j| j| jfS )zCSerialize FAISS index, docstore, and index_to_docstore_id to bytes.)r   dumpsr1   r2   r3   rB   r"   r"   r#   serialize_to_bytes  s    zFAISS.serialize_to_bytes)
serializedrC   r   rp   r   c                K  s2   |st dt|\}}}| ||||fi |S )zGDeserialize FAISS index, docstore, and index_to_docstore_id from bytes.r   )r-   r   loads)r   r   rC   r   rp   r1   r2   r3   r"   r"   r#   deserialize_from_bytes  s    
zFAISS.deserialize_from_byteszCallable[[float], float]c                 C  sR   | j dur| j S | jtjkr"| jS | jtjkr4| jS | jtjkrF| jS t	ddS )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        NzJUnknown distance strategy, must be cosine, max_inner_product, or euclidean)
r:   r6   r   r   Z%_max_inner_product_relevance_score_fnr<   Z_euclidean_relevance_score_fnZCOSINEZ_cosine_relevance_score_fnr-   rB   r"   r"   r#   _select_relevance_score_fn  s    	
z FAISS._select_relevance_score_fnc                   sH   |     du rtd| j|f|||d|} fdd|D }|S )?Return docs and their similarity scores on a scale from 0 to 1.NLrelevance_score_fn must be provided to FAISS constructor to normalize scoresr   c                   s   g | ]\}}| |fqS r"   r"   rE   ra   Zscorer4   r"   r#   rG     s   zBFAISS._similarity_search_with_relevance_scores.<locals>.<listcomp>)r   r-   r   r?   r   r}   r~   r   rp   r   Zdocs_and_rel_scoresr"   r   r#   (_similarity_search_with_relevance_scores  s$    
z.FAISS._similarity_search_with_relevance_scoresc                   sN   |     du rtd| j|f|||d|I dH } fdd|D }|S )r   Nr   r   c                   s   g | ]\}}| |fqS r"   r"   r   r   r"   r#   rG   6  s   zCFAISS._asimilarity_search_with_relevance_scores.<locals>.<listcomp>)r   r-   r   r   r"   r   r#   )_asimilarity_search_with_relevance_scores  s$    
z/FAISS._asimilarity_search_with_relevance_scores Callable[[Dict[str, Any]], bool]r~   r   c                   s   t | r| S t| ts(tdt|  ddlm}m}m}m	}m
}m} ||||||d}dd dd d}||B  tt g d	 }	d
| D ](}
|
r|
dr|
|	vrtd|
 qdddd fdddddfdd| S )a  
        Create a filter function based on the provided filter.

        Args:
            filter: A callable or a dictionary representing the filter
            conditions for documents.

        Returns:
            A function that takes Document's metadata and returns True if it
            satisfies the filter conditions, otherwise False.

        Raises:
            ValueError: If the filter is invalid or contains unsuported operators.
        z5filter must be a dict of metadata or a callable, not r   )eqr   gtr   ltne)z$eqz$neqz$gtz$ltz$gtez$ltec                 S  s   | |v S rA   r"   abr"   r"   r#   <lambda>`  rH   z+FAISS._create_filter_func.<locals>.<lambda>c                 S  s   | |vS rA   r"   r   r"   r"   r#   r   a  rH   )z$inz$nin)$and$or$not
   $&filter contains unsupported operator: r%   z%Union[Dict[str, Any], List[Any], Any]r   )field	conditionr   c                   s   t  trbg   D ]0\}}|vr4td| | |f qdddfdd}|S t  trt krt fddS  fd	dS  fd
dS )a  
            Creates a filter function based on field and condition.

            Args:
                field: The document field to filter on
                condition: Filter condition (dict for operators, list for in,
                           or direct value for equality)

            Returns:
                A filter function that takes a document and returns boolean
            r   Dict[str, Any]r   )ra   r   c                   s    |   t fddD S )aW  
                    Evaluates a document against a set of predefined operators
                    and their values. This function applies multiple
                    comparison/sequence operators to a specific field value
                    from the document. All conditions must be satisfied for the
                    function to return True.

                    Args:
                        doc (Dict[str, Any]): The document to evaluate, containing
                        key-value pairs where keys are field names and values
                        are the field values. The document must contain the field
                        being filtered.

                    Returns:
                        bool: True if the document's field value satisfies all
                            operator conditions, False otherwise.
                    c                 3  s   | ]\}}| |V  qd S rA   r"   )rE   opvalueZ	doc_valuer"   r#   rY     rH   zYFAISS._create_filter_func.<locals>.filter_func_cond.<locals>.filter_fn.<locals>.<genexpr>)r   allra   )r   	operatorsr   r#   	filter_fn  s    
zFFAISS._create_filter_func.<locals>.filter_func_cond.<locals>.filter_fnc                   s   |   v S rA   r   r   )condition_setr   r"   r#   r     rH   zEFAISS._create_filter_func.<locals>.filter_func_cond.<locals>.<lambda>c                   s   |   v S rA   r  r   r   r   r"   r#   r     rH   c                   s   |   kS rA   r  r   r  r"   r#   r     rH   )r+   dictr   r-   r   rr   r,   	frozenset)r   r   r   r   r   )
OPERATIONSSET_CONVERT_THRESHOLD)r   r  r   r   r#   filter_func_condl  s    

z3FAISS._create_filter_func.<locals>.filter_func_condr   r   c                   s   d| v r*fdd| d D fddS d| v rTfdd| d D fddS d	| v rt| d	   fd
dS fdd|   D fddS )a  
            Creates a filter function that evaluates documents against specified
            filter conditions.

            This function processes a dictionary of filter conditions and returns
            a callable that can evaluate documents against these conditions. It
            supports logical operators ($and, $or, $not) and field-level filtering.

            Args:
                filter (Dict[str, Any]): A dictionary containing filter conditions.
                Can include:
                    - Logical operators ($and, $or, $not) with lists of sub-filters
                    - Field-level conditions with comparison or sequence operators
                    - Direct field-value mappings for equality comparison

            Returns:
                Callable[[Dict[str, Any]], bool]: A function that takes a document
                (as a dictionary) and returns True if the document matches all
                filter conditions, False otherwise.
            r   c                   s   g | ]} |qS r"   r"   rE   Z
sub_filterr   r"   r#   rG     rH   zBFAISS._create_filter_func.<locals>.filter_func.<locals>.<listcomp>c                   s   t  fddD S )Nc                 3  s   | ]}| V  qd S rA   r"   rE   r   r   r"   r#   rY     rH   SFAISS._create_filter_func.<locals>.filter_func.<locals>.<lambda>.<locals>.<genexpr>r   r   filtersr   r#   r     rH   z@FAISS._create_filter_func.<locals>.filter_func.<locals>.<lambda>r   c                   s   g | ]} |qS r"   r"   r	  r
  r"   r#   rG     rH   c                   s   t  fddD S )Nc                 3  s   | ]}| V  qd S rA   r"   r  r   r"   r#   rY     rH   r  )anyr   r  r   r#   r     rH   r   c                   s
    |  S rA   r"   r   )condr"   r#   r     rH   c                   s   g | ]\}} ||qS r"   r"   )rE   r   r   )r  r"   r#   rG     s   c                   s   t  fddD S )Nc                 3  s   | ]}| V  qd S rA   r"   )rE   r   r   r"   r#   rY     rH   r  r  r   )
conditionsr   r#   r     rH   )r   )r~   )r   r  )r  r  r  r#   r     s    
z.FAISS._create_filter_func.<locals>.filter_func)callabler+   r  r-   typer   r   r   r   r   r   r   r  rr   
startswith)r~   r   r   r   r   r   r   ZCOMPARISON_OPERATORSZSEQUENCE_OPERATORSZVALID_OPERATORSr   r"   )r  r  r   r  r#   r   ;  s4    
 	4'zFAISS._create_filter_funczSequence[str]zlist[Document])rT   r   c                  s     fdd|D }dd |D S )Nc                   s   g | ]} j |qS r"   )r2   r   r   rB   r"   r#   rG     rH   z$FAISS.get_by_ids.<locals>.<listcomp>c                 S  s   g | ]}t |tr|qS r"   )r+   r   )rE   ra   r"   r"   r#   rG     rH   r"   )r?   rT   r   r"   rB   r#   
get_by_ids  s    zFAISS.get_by_ids)NN)NN)NN)NN)ry   Nrz   )ry   Nrz   )ry   Nrz   )ry   Nrz   )ry   Nrz   )ry   Nrz   )ry   Nrz   )ry   Nrz   )ry   rz   r   N)ry   rz   r   N)ry   rz   r   N)ry   rz   r   N)N)NN)NN)NN)NN)r1   )r1   )ry   Nrz   )ry   Nrz   )1__name__
__module____qualname____doc__r   r<   r@   propertyrC   rK   rO   rQ   rR   rs   ru   rv   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   r  r"   r"   r"   r#   r/   K   s
   	#  -         D   *   %   %   #   #      H*    "    $    (    %" "  $  $  &   3        r/   )N)-
__future__r   loggingr   r   r   rU   r=   pathlibr   typingr   r   r   r   r   r	   r
   r   r   r   numpyrh   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.runnables.configr   Zlangchain_core.vectorstoresr   Z!langchain_community.docstore.baser   r   Z&langchain_community.docstore.in_memoryr   Z&langchain_community.vectorstores.utilsr   r   	getLoggerr  r8   r$   r.   r/   r"   r"   r"   r#   <module>   s(   0
	