a
    bgt                     @  s4  d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ ed
gZd
ZdZdZee dj!Z"e#e dj!Z$ej%j!Z&ddddZ'dddddZ(dddddZ)dddddZ*dddddZ+G dd deZ,dS ) z&Wrapper around TileDB vector database.    )annotationsN)AnyDictIterableListMappingOptionalTuple)Document)
Embeddingsguard_import)VectorStore)maximal_marginal_relevanceZ	euclideanZ	documentsvectorsuint64float32r   returnc                   C  s   t dt dfS )z@Import tiledb-vector-search if available, otherwise raise error.tiledb.vector_searchtiledbr    r   r   u/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/tiledb.pydependable_tiledb_import   s    r   str)groupr   c                 C  s
   | t  jS ) Get the URI of the vector index.)VECTOR_INDEX_NAMEurir   r   r   r   get_vector_index_uri_from_group#   s    r    c                 C  s
   | t  jS )zGet the URI of the documents array from group.

    Args:
        group: TileDB group object.

    Returns:
        URI of the documents array.
    )DOCUMENTS_ARRAY_NAMEr   r   r   r   r   "get_documents_array_uri_from_group(   s    	r"   )r   r   c                 C  s   |  dt  S )r   /)r   r   r   r   r   get_vector_index_uri4   s    r%   c                 C  s   |  dt  S )z#Get the URI of the documents array.r#   )r!   r$   r   r   r   get_documents_array_uri9   s    r&   c                   @  sx  e Zd ZdZddddddddddddd	d
d	d	ddZeddddZddeddddddddddZddddddddd	ddd d!Z	ddddddddd	dd"d#d$Z
d\ddddd	d%dd&d'Zd]ddddd	d%d"d(d)Zddd*dd+dddddd	dd,d-d.Zd^dddddd	d%d,d/d0Zd_dddddd	d%d1d2d3Zed4dd5dddd6d
dd7d8d9d:Zedded;dd<d=d>d?ddd@dAddddd	d dBdCdDZd`dAdd	dEdFdGdHZdadId@dAdd	d>dJdKdLZeddedMd;dd<fd>dd@dAdddddd	d dNdOdPZedded;dd<d=dQddd@dAddddd	d dRdSdTZeedddUddddd	d	d dVdWdXZd	d7dYdZd[ZdS )bTileDBa2  TileDB vector store.

    To use, you should have the ``tiledb-vector-search`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community import TileDB
            embeddings = OpenAIEmbeddings()
            db = TileDB(embeddings, index_uri, metric)

     NF)vector_index_uridocs_array_uriconfig	timestampallow_dangerous_deserializationr   r   zOptional[Mapping[str, Any]]r   bool)		embedding	index_urimetricr)   r*   r+   r,   r-   kwargsc                K  s8  |st d|| _|j| _|| _|| _|| _tdtd }
}|j|d |	| jd}|dkrf|nt
|| _|dkr||nt|| _|  |	| jd}|jd| _|  || _| jdkr|
jjf | j| j| jd	|	| _n.| jd
kr|
jjf | j| j| jd	|	| _W d   n1 s*0    Y  dS )a  Initialize with necessary components.

        Args:
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading data using pickle.
                data can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        a  TileDB relies on pickle for serialization and deserialization. This can be dangerous if the data is intercepted and/or modified by malicious actors prior to being de-serialized. If you are sure that the data is safe from modification, you can  set allow_dangerous_deserialization=True to proceed. Loading of compromised data using pickle can result in execution of arbitrary code on your machine.r   r   Zctx_or_configrr(   
index_typeFLAT)r   r+   r,   IVF_FLATN)
ValueErrorr/   Zembed_queryembedding_functionr0   r1   r+   r   	scope_ctxGroupr    r)   r"   r*   closemetagetr5   r,   
flat_indexZ	FlatIndexvector_indexivf_flat_indexZIVFFlatIndex)selfr/   r0   r1   r)   r*   r+   r,   r-   r2   	tiledb_vsr   index_groupr   r   r   r   __init__L   sV    	

zTileDB.__init__zOptional[Embeddings]r   c                 C  s   | j S N)r/   rB   r   r   r   
embeddings   s    zTileDB.embeddings   )kfilterscore_thresholdz	List[int]zList[float]intzOptional[Dict[str, Any]]floatzList[Tuple[Document, float]])idsscoresrJ   rK   rL   r   c                  s\  t d}g }|j| jd| j| jd}t||D ]\}	}
|	dkrJ|
dkrJq.|	tkr\|
tkr\q.||	 }|du s|t|d dkrt	d|	 d| |
d	}tt|d d d
 |durtt| tj }| _|dur&dd | D }t fdd| D r4| |
f q.| |
f q.|  fdd|D }|d| S )a  Turns TileDB results into a list of documents and scores.

        Args:
            ids: List of indices of the documents in the index.
            scores: List of distances of the documents in the index.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
            score_threshold: Optional, a floating point value to filter the
                resulting set of retrieved docs
        Returns:
            List of Documents and scores.
        r   r4   r,   r+   r   NtextzCould not find document for id z, got metadata)page_contentc                 S  s&   i | ]\}}|t |ts|gn|qS r   )
isinstancelist.0keyvaluer   r   r   
<dictcomp>   s   z0TileDB.process_index_results.<locals>.<dictcomp>c                 3  s"   | ]\}} j ||v V  qd S rF   )rS   r>   rW   )
result_docr   r   	<genexpr>   s   z/TileDB.process_index_results.<locals>.<genexpr>c                   s    g | ]\}}| kr||fqS r   r   )rX   docscore)rL   r   r   
<listcomp>       z0TileDB.process_index_results.<locals>.<listcomp>)r   openr*   r,   r+   zip
MAX_UINT64MAX_FLOAT_32lenr8   r>   r
   r   pickleloadsnparraytolistastypeuint8tobytesrS   itemsallappendr<   )rB   rO   rP   rJ   rK   rL   r   docs
docs_arrayidxr_   r^   Zpickled_metadatarS   r   )r\   rL   r   process_index_results   s@    


zTileDB.process_index_results   rJ   rK   fetch_k)r/   rJ   rK   rx   r2   r   c          	      K  sz   d|v r| d}nt}| jjtt|tjgtjfd|du rN|n|i|\}}| j|d |d |||dS )a[  Return docs most similar to query.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                nprobe: Optional, number of partitions to check if using IVF_FLAT index
                score_threshold: Optional, a floating point value to filter the
                    resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and distance
            in float for each. Lower score represents more similarity.
        rL   rJ   Nr   rO   rP   rK   rJ   rL   )	pop	MAX_FLOATr@   queryri   rj   rl   r   ru   )	rB   r/   rJ   rK   rx   r2   rL   dir   r   r   &similarity_search_with_score_by_vector   s     
z-TileDB.similarity_search_with_score_by_vector)r|   rJ   rK   rx   r2   r   c                K  s(   |  |}| j|f|||d|}|S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            Distance as float. Lower score represents more similarity.
        rw   )r9   r   )rB   r|   rJ   rK   rx   r2   r/   rr   r   r   r   similarity_search_with_score   s    
z#TileDB.similarity_search_with_scorezList[Document]c                 K  s(   | j |f|||d|}dd |D S )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        rw   c                 S  s   g | ]\}}|qS r   r   rX   r^   _r   r   r   r`   5  ra   z6TileDB.similarity_search_by_vector.<locals>.<listcomp>)r   )rB   r/   rJ   rK   rx   r2   docs_and_scoresr   r   r   similarity_search_by_vector  s    z"TileDB.similarity_search_by_vectorc                 K  s(   | j |f|||d|}dd |D S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        rw   c                 S  s   g | ]\}}|qS r   r   r   r   r   r   r`   N  ra   z,TileDB.similarity_search.<locals>.<listcomp>)r   )rB   r|   rJ   rK   rx   r2   r   r   r   r   similarity_search7  s    zTileDB.similarity_search      ?rJ   rx   lambda_multrK   )r/   rJ   rx   r   rK   r2   r   c                  s   d|v r| d}nt} jjtt|tjgtjfd|du rN|n|d i|\}}	 j|	d |d ||du r|n|d |d}
 fdd|
D }t	tj|gtjd	|||d
}g }|D ]}|
|
|  q|S )az  Return docs and their similarity scores selected using the maximal marginal
            relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents and similarity scores selected by maximal marginal
                relevance and score for each.
        rL   rJ   N   r   ry   c                   s$   g | ]\}} j |jgd  qS r   )r/   embed_documentsrT   r   rG   r   r   r`   }  s   zMTileDB.max_marginal_relevance_search_with_score_by_vector.<locals>.<listcomp>dtype)rJ   r   )rz   r{   r@   r|   ri   rj   rl   r   ru   r   rq   )rB   r/   rJ   rx   r   rK   r2   rL   rP   indicesresultsrH   Zmmr_selectedr   r~   r   rG   r   2max_marginal_relevance_search_with_score_by_vectorP  s<     

z9TileDB.max_marginal_relevance_search_with_score_by_vectorc                 K  s*   | j |f||||d|}dd |D S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   c                 S  s   g | ]\}}|qS r   r   r   r   r   r   r`     ra   zBTileDB.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)r   )rB   r/   rJ   rx   r   rK   r2   r   r   r   r   'max_marginal_relevance_search_by_vector  s    z.TileDB.max_marginal_relevance_search_by_vector)r|   rJ   rx   r   rK   r2   r   c           	      K  s*   |  |}| j|f||||d|}|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering (if needed) to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )r9   r   )	rB   r|   rJ   rx   r   rK   r2   r/   rr   r   r   r   max_marginal_relevance_search  s    
z$TileDB.max_marginal_relevance_searchT)	metadatasr+   znp.dtypeNone)r0   r5   
dimensionsvector_typer   r+   r   c                C  s  t dt d }}|j|dP z|| W n* |jyZ }	 z|	W Y d }	~	n
d }	~	0 0 ||d}
t|
j}t|
j}|dkr|jj	||||d n|dkr|j
j	||||d |
j|td |jd	d
td fttjd}||}|jdtddd}|g}|r,|jdtjdd}|| |j|dd|d}|j	|| |
j|td |
  W d    n1 sx0    Y  d S )Nr   r   r3   wr6   )r   r   r   r+   r7   )nameidr      )r   domainr   rR   ZU1T)r   r   varrS   F)r   sparseZallows_duplicatesattrs)r   r:   Zgroup_createZTileDBErrorr;   r%   r   r&   r?   createrA   addr   ZDimrd   ri   r   r   DomainZAttrrm   rq   ZArraySchemaZArrayr!   r<   )clsr0   r5   r   r   r   r+   rC   r   errr   r)   docs_uridimdomZ	text_attrr   metadata_attrZschemar   r   r   r     s\    





zTileDB.creater6   r   )r   rO   r1   r5   r+   index_timestampz	List[str]zList[List[float]]zOptional[List[dict]]zOptional[List[str]])textsrH   r/   r0   r   rO   r1   r5   r+   r   r2   r   c             
   K  s  |t vr td| dtt  tdtd }}t|tj}| j|||j	d |j
|d u|	d |j|	dZ |stdt|}t|}|d u rd	d
 |D }t|tj}|jjf |||||
dkr|
nd |	d| ||d}|d u r.tjt|tjd}tt|D ]}|||< qi }t||d< |d urtjt|gtd}d}|D ](}tjt|tjd||< |d7 }qf||d< |||< W d    n1 s0    Y  W d    n1 s0    Y  | f ||||	d|S )NzUnsupported distance metric: z. Expected one of r   r   r   )r0   r5   r   r   r   r+   r3   z3embeddings must be provided to build a TileDB indexc                 S  s    g | ]}t td td qS r   r   r   randomrandintrd   rX   r   r   r   r   r`   ?  ra   z!TileDB.__from.<locals>.<listcomp>r   )r5   r0   input_vectorsexternal_idsr   r+   r   r   rR   rS   )r/   r0   r1   r+   )INDEX_METRICSr8   rV   r   ri   rj   rl   r   r   shaper   r:   r%   r&   r   Z	ingestionZingestrb   Zzerosrf   rangeemptyobject
frombufferrg   dumpsrm   )r   r   rH   r/   r0   r   rO   r1   r5   r+   r   r2   rC   r   r   r)   r   r   Ar~   datar   rS   r   r   r   Z__from  sz    	


HzTileDB.__fromzOptional[bool])rO   r,   r2   r   c                 K  s2   t |t j}| jj||dkr&|ndd dS )am  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            timestamp: Optional timestamp to delete with.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        r   N)r   r,   T)ri   rj   rl   r   r@   Zdelete_batch)rB   rO   r,   r2   r   r   r   r   deletee  s
    zTileDB.deletezIterable[str])r   r   rO   r,   r2   r   c                 K  s:  t d}| jt|}|du r.dd |D }t|tj}tjt	|dd}	t
t	|D ]}
tj||
 tjd|	|
< q^| jj|	||dkr|ndd i }t||d	< |durtjt	|gtd}d}
|D ]&}tjt|tjd||
< |
d
7 }
q||d< |j| jd|dkr|nd| jd}|||< |  |S )a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional ids of each text object.
            timestamp: Optional timestamp to write new texts with.
            kwargs: vectorstore specific parameters

        Returns:
            List of ids from adding the texts into the vectorstore.
        r   Nc                 S  s    g | ]}t td td qS r   r   r   r   r   r   r`     ra   z$TileDB.add_texts.<locals>.<listcomp>Or   r   )r   r   r,   rR   r   rS   r   rQ   )r   r/   r   rV   ri   rj   rl   r   r   rf   r   r   r@   Zupdate_batchr   r   rg   r   rm   rb   r*   r+   r<   )rB   r   r   rO   r,   r2   r   rH   r   r   r~   rr   r   rS   rs   r   r   r   	add_textsz  s>    

zTileDB.add_textsz/tmp/tiledb_array)r   r/   r   rO   r1   r0   r5   r+   r   r2   r   c
                 K  s4   g }| |}| jf ||||||||||	d
|
S )a  Construct a TileDB index from raw documents.

        Args:
            texts: List of documents to index.
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            ids: Optional ids of each text object.
            metric: Metric to use for indexing. Defaults to "euclidean".
            index_uri: The URI to write the TileDB arrays
            index_type: Optional,  Vector index type ("FLAT", IVF_FLAT")
            config: Optional, TileDB config
            index_timestamp: Optional, timestamp to write new texts with.

        Example:
            .. code-block:: python

                from langchain_community import TileDB
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                index = TileDB.from_texts(texts, embeddings)
        
r   rH   r/   r   rO   r1   r0   r5   r+   r   )r   _TileDB__from)r   r   r/   r   rO   r1   r0   r5   r+   r   r2   rH   r   r   r   
from_texts  s     #
zTileDB.from_textszList[Tuple[str, List[float]]])text_embeddingsr/   r0   r   rO   r1   r5   r+   r   r2   r   c                K  sB   dd |D }dd |D }| j f ||||||||||	d
|
S )a  Construct TileDB index from embeddings.

        Args:
            text_embeddings: List of tuples of (text, embedding)
            embedding: Embedding function to use.
            index_uri: The URI to write the TileDB arrays
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Optional, Metric to use for indexing. Defaults to "euclidean".
            index_type: Optional, Vector index type ("FLAT", IVF_FLAT")
            config: Optional, TileDB config
            index_timestamp: Optional, timestamp to write new texts with.

        Example:
            .. code-block:: python

                from langchain_community import TileDB
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                db = TileDB.from_embeddings(text_embedding_pairs, embeddings)
        c                 S  s   g | ]}|d  qS r   r   rX   tr   r   r   r`   	  ra   z*TileDB.from_embeddings.<locals>.<listcomp>c                 S  s   g | ]}|d  qS )r   r   r   r   r   r   r`   
  ra   r   )r   )r   r   r/   r0   r   rO   r1   r5   r+   r   r2   r   rH   r   r   r   from_embeddings  s     %zTileDB.from_embeddings)r1   r+   r,   )r0   r/   r1   r+   r,   r2   r   c                K  s   | f |||||d|S )a}  Load a TileDB index from a URI.

        Args:
            index_uri: The URI of the TileDB vector index.
            embedding: Embeddings to use when generating queries.
            metric: Optional, Metric to use for indexing. Defaults to "euclidean".
            config: Optional, TileDB config
            timestamp: Optional, timestamp to use for opening the arrays.
        )r/   r0   r1   r+   r,   r   )r   r0   r/   r1   r+   r,   r2   r   r   r   load  s    zTileDB.load)r2   r   c                 K  s   | j jf i || _ d S rF   )r@   consolidate_updates)rB   r2   r   r   r   r   7  s    zTileDB.consolidate_updates)rI   Nrv   )rI   Nrv   )rI   rv   r   N)rI   rv   r   N)Nr   )NNr   )__name__
__module____qualname____doc__rE   propertyrH   r{   ru   r   r   r   r   r   r   r   classmethodr   DEFAULT_METRICr   r   r   r   r   r   r   r   r   r   r   r'   >   s   "J=+#       >    '    % ?*Q    7&2(5 r'   )-r   
__future__r   rg   r   systypingr   r   r   r   r   r   r	   numpyri   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   	frozensetr   r   r!   r   Ziinfor   maxrd   Zfinfore   
float_infor{   r   r    r"   r%   r&   r'   r   r   r   r   <module>   s0   $
