a
    bg#a                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	Zd
dddZdddddZG dd deZdS )    )annotationsN)AnyCallableDictIterableListOptionalType)Document)
Embeddingsguard_import)VectorStore)maximal_marginal_relevance   r   returnc                   C  s   t dS )zImport lancedb package.lancedbr    r   r   v/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/lancedb.pyimport_lancedb   s    r   zDict[str, str]str)filterr   c                 C  s   d dd |  D S )z2Converts a dict filter to a LanceDB filter string.z AND c                 S  s    g | ]\}}| d | dqS )z = ''r   ).0kvr   r   r   
<listcomp>       z#to_lance_filter.<locals>.<listcomp>)joinitems)r   r   r   r   to_lance_filter   s    r!   c                   @  s
  e Zd ZdZdddddddddddd	ddefd
dddddddddd
dd
dddddZdZddddddZeddddZd[ddddddd d!Z	d\dd"dd#d$d%Z
d]ddd)d)d)ddd*d+d,d-Zd.d.d/d0d1Zd^dddddd2d3d4Zd_dd)d
dddd5d6d7Zd8dd9d:Zd`d;d)d<dddd=d>d?Zdad;d)d<dddd=d@dAZdbd.d)d<dddBdCdDZdcd.d)dd
d"ddEdFdGdHZddd.d)ddKd<ddEdLdMdNZded;d)ddKd<ddEdOdPdQZedfdRddSdd
ddddddddd
ddd dTdUdVZdgdd"ddddd*dWdXdYZdS )hLanceDBay  `LanceDB` vector store.

    To use, you should have ``lancedb`` python package installed.
    You can install it with ``pip install lancedb``.

    Args:
        connection: LanceDB connection to use. If not provided, a new connection
                    will be created.
        embedding: Embedding to use for the vectorstore.
        vector_key: Key to use for the vector in the database. Defaults to ``vector``.
        id_key: Key to use for the id in the database. Defaults to ``id``.
        text_key: Key to use for the text in the database. Defaults to ``text``.
        table_name: Name of the table to use. Defaults to ``vectorstore``.
        api_key: API key to use for LanceDB cloud database.
        region: Region to use for LanceDB cloud database.
        mode: Mode to use for adding data to the table. Valid values are
              ``append`` and ``overwrite``. Defaults to ``overwrite``.



    Example:
        .. code-block:: python
            vectorstore = LanceDB(uri='/lancedb', embedding_function)
            vectorstore.add_texts(['text1', 'text2'])
            result = vectorstore.similarity_search('text1')
    Nz/tmp/lancedbvectoridtextvectorstore	overwritel2zOptional[Any]zOptional[Embeddings]zOptional[str]z"Optional[Callable[[float], float]]int)
connection	embeddinguri
vector_keyid_keytext_key
table_nameapi_keyregionmodetabledistancererankerrelevance_score_fnlimitc                 C  s  t d}|| _|| _|| _|| _|dkr6|p8tdnd| _|	| _|
| _	|| _
|| _|| _d| _t||jjrv|| _n|du rd| _ntdt|tr| jdu r|drtd| jdu rtdt||jjr|| _n|t|t|jjfrtd	n`| jdu r||| _nFt|trX|drB|j|| j| jd
| _n||| _td |durz@t||jj|jjjfsJ || _t |dr|j!nd| _"W n t#y   tdY n0 n| j$|dd| _dS )z$Initialize with Lance DB vectorstorer    ZLANCE_API_KEYNz9`reranker` has to be a lancedb.rerankers.Reranker object.zdb://z&API key is required for LanceDB cloud.z#embedding object should be providedzs`connection` has to be a lancedb.db.LanceDBConnection object.                `lancedb.db.LanceTable` is deprecated.)r1   r2   z[api key provided with local uri.                            The data will be stored locallynameZremote_tablezj`table` has to be a lancedb.db.LanceTable or 
                    lancedb.remote.table.RemoteTable object.T)set_default)%r   
_embedding_vector_key_id_key	_text_keyosgetenvr1   r2   r3   r5   override_relevance_score_fnr8   
_fts_index
isinstanceZ	rerankersZReranker	_reranker
ValueErrorr   
startswithdbZLanceDBConnection_connectionZ
LanceTableconnectwarningswarnremoter4   ZRemoteTable_tablehasattrr:   _table_nameAssertionError	get_table)selfr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r   r   r   r   __init__:   sl    





zLanceDB.__init__Fr   bool)resultsscorer   c                   s   j j}d|v rdnd|v r$dnd d|v  d u s<|sZ fddttD S r|r fddttD S d S )NZ	_distanceZ_relevance_scoremetadatac                   s:   g | ]2}t j |   r.d  |  ni dqS rX   )Zpage_contentrX   r
   r?   Zas_pyr   idx)has_metadatarV   rS   r   r   r      s
   z+LanceDB.results_to_docs.<locals>.<listcomp>c                   sJ   g | ]B}t j |   r.d  |  ni d |  fqS rY   rZ   r[   r]   rV   Z	score_colrS   r   r   r      s   
)Zschemanamesrangelen)rS   rV   rW   columnsr   r^   r   results_to_docs   s    


zLanceDB.results_to_docsr   c                 C  s   | j S )N)r<   rS   r   r   r   
embeddings   s    zLanceDB.embeddingszIterable[str]zOptional[List[dict]]zOptional[List[str]]z	List[str])texts	metadatasidskwargsr   c                 K  s   g }|pdd |D }| j t|}t|D ]L\}}|| }	|rJ|| n
d|| i}
|| j|	| j|| | j|d|
i q.|  }|du r| j	j
| j|d}|| _n&| jdu r|j|| jd n
|| d| _|S )a  Turn texts into embedding and add it to the database

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            ids: Optional list of ids to associate with the texts.

        Returns:
            List of ids of the added texts.
        c                 S  s   g | ]}t t qS r   r   uuiduuid4r   _r   r   r   r      r   z%LanceDB.add_texts.<locals>.<listcomp>r$   rX   Ndata)r3   )r<   Zembed_documentslist	enumerateappendr=   r>   r?   rR   rI   create_tablerP   rN   r1   addr3   rC   )rS   rf   rg   rh   ri   docsre   r\   r%   r+   rX   tblr   r   r   	add_texts   s,    
	

zLanceDB.add_textszOptional[bool])r:   r;   r   c                 C  sN   |dur |r|| _ | j }q&|}n| j }z| j|W S  tyH   Y dS 0 dS )a  
        Fetches a table object from the database.

        Args:
            name (str, optional): The name of the table to fetch. Defaults to None
                                    and fetches current table object.
            set_default (bool, optional): Sets fetched table as the default table.
                                        Defaults to False.

        Returns:
            Any: The fetched table object.

        Raises:
            ValueError: If the specified table is not found in the database.

        N)rP   rI   Z
open_table	Exception)rS   r:   r;   _namer   r   r   rR      s    zLanceDB.get_table   `   L2zOptional[int]None)col_name
vector_colnum_partitionsnum_sub_vectorsindex_cache_sizemetricr:   r   c           	      C  s@   |  |}|r$|j|||||d n|r4|| ntddS )aO  
        Create a scalar(for non-vector cols) or a vector index on a table.
        Make sure your vector column has enough data before creating an index on it.

        Args:
            vector_col: Provide if you want to create index on a vector column.
            col_name: Provide if you want to create index on a non-vector column.
            metric: Provide the metric to use for vector index. Defaults to 'L2'
                    choice of metrics: 'L2', 'dot', 'cosine'
            num_partitions: Number of partitions to use for the index. Defaults to 256.
            num_sub_vectors: Number of sub-vectors to use for the index. Defaults to 96.
            index_cache_size: Size of the index cache. Defaults to None.
            name: Name of the table to create index on. Defaults to None.

        Returns:
            None
        )r   vector_column_namer   r   r   z%Provide either vector_col or col_nameN)rR   create_indexZcreate_scalar_indexrF   )	rS   r   r   r   r   r   r   r:   rw   r   r   r   r     s    
zLanceDB.create_indexr   )r,   r   c                 C  sB   t |d$}t| dW  d   S 1 s40    Y  dS )z!Get base64 string from image URI.rbzutf-8N)openbase64	b64encodereaddecode)rS   r,   Z
image_filer   r   r   encode_image2  s    zLanceDB.encode_image)urisrg   rh   ri   r   c                   s      } fdd|D }|du r0dd |D }d} jdurZt jdrZ jj|d}ntdg }t|D ]H\}	}
|r||	 n
d||	 i}| j|
 j||	  j	||	 d	|i qn|du rڈ j
j j|d
}| _n
|| |S )as  Run more images through the embeddings and add to the vectorstore.

        Args:
            uris List[str]: File path to the image.
            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
            ids (Optional[List[str]], optional): Optional list of IDs.

        Returns:
            List[str]: List of IDs of the added images.
        c                   s   g | ]} j |d qS ))r,   )r   )r   r,   rd   r   r   r   K  r   z&LanceDB.add_images.<locals>.<listcomp>Nc                 S  s   g | ]}t t qS r   rj   rm   r   r   r   r   N  r   embed_image)r   zEembedding object should be provided and must have embed_image method.r$   rX   ro   )rR   r<   rO   r   rF   rr   rs   r=   r>   r?   rI   rt   rP   rN   ru   )rS   r   rg   rh   ri   rw   Z	b64_textsre   rp   r\   ZembrX   r   rd   r   
add_images7  s2    


zLanceDB.add_images)queryr   r   r:   ri   r   c                 K  s   |d u r| j }| |}t|tr*t|}|dd}|dd}|d }	rx|j|| jd ||	j	||d}
n |j|| jd |j	||d}
|dkr| j
d ur|
j| j
d	 |
 }t|d
krtd |S )N	prefilterF
query_typer#   metrics)r   r   )r   hybrid)r6   r   zNo results found for the query.)r8   rR   rD   dictr!   getsearchr=   r   whererE   ZrerankZto_arrowra   rK   rL   )rS   r   r   r   r:   ri   rw   r   r   r   Zlance_queryrv   r   r   r   _queryk  s8    


zLanceDB._queryzCallable[[float], float]c                 C  sR   | j r| j S | jdkr| jS | jdkr,| jS | jdkr<| jS td| j ddS )a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        Zcosiner(   ipzANo supported normalization function for distance metric of type: z=.Consider providing relevance_score_fn to Chroma constructor.N)rB   r5   Z_cosine_relevance_score_fnZ_euclidean_relevance_score_fnZ%_max_inner_product_relevance_score_fnrF   rd   r   r   r   _select_relevance_score_fn  s    	


z"LanceDB._select_relevance_score_fnzList[float]zOptional[Dict[str, str]])r+   r   r   r:   ri   r   c                 K  s>   |du r| j }| j||f||d|}| j||dddS )zD
        Return documents most similar to the query vector.
        Nr   r:   rW   FrW   )r8   r   rc   pop)rS   r+   r   r   r:   ri   resr   r   r   similarity_search_by_vector  s    z#LanceDB.similarity_search_by_vectorc                   s@   |du r| j }|   | j||fddi|} fdd|D S )zZ
        Return documents most similar to the query vector with relevance scores.
        NrW   Tc                   s    g | ]\}}| t |fqS r   )float)r   docrW   r7   r   r   r     s   zMLanceDB.similarity_search_by_vector_with_relevance_scores.<locals>.<listcomp>)r8   r   r   )rS   r+   r   r   r:   ri   Zdocs_and_scoresr   r   r   1similarity_search_by_vector_with_relevance_scores  s    
z9LanceDB.similarity_search_by_vector_with_relevance_scores)r   r   r   ri   r   c                 K  s  |du r| j }|dd}|dd}|dd}| jdu rDtd|dksT|d	kr| jdu r| jdu r| |}|j| jdd
| _|d	kr| j	|}	|	|f}
n|}
| j
|
|f||d|}| j||dS tdn2| j	|}	| j
|	|fd|i|}| j||dS dS )zAReturn documents most similar to the query with relevance scores.NrW   Tr:   r   r#   z4search needs an emmbedding function to be specified.ftsr   )replacer   r   z?Full text/ Hybrid search is not supported in LanceDB Cloud yet.r   )r8   r   r<   rF   r1   rC   rR   Zcreate_fts_indexr?   embed_queryr   rc   NotImplementedError)rS   r   r   r   ri   rW   r:   r   rw   r+   r   r   r   r   r   similarity_search_with_score  s.    


z$LanceDB.similarity_search_with_scorezList[Document])r   r   r:   r   r   ri   r   c              	   K  s"   | j f |||||dd|}|S )ap  Return documents most similar to the query

        Args:
            query: String to query the vectorstore with.
            k: Number of documents to return.
            filter (Optional[Dict]): Optional filter arguments
                sql_filter(Optional[string]): SQL filter to apply to the query.
                prefilter(Optional[bool]): Whether to apply the filter prior
                                             to the vector search.
        Raises:
            ValueError: If the specified table is not found in the database.

        Returns:
            List of documents most similar to the query.
        F)r   r   r:   r   r   rW   )r   )rS   r   r   r:   r   r   ri   r   r   r   r   similarity_search  s    zLanceDB.similarity_search         ?r   )r   r   fetch_klambda_multr   ri   r   c           	      K  sD   |du r| j }| jdu r td| j|}| j|||||d}|S )a?  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        NzBFor MMR search, you must specify an embedding function oncreation.)r   r   )r8   r<   rF   r   'max_marginal_relevance_search_by_vector)	rS   r   r   r   r   r   ri   r+   rv   r   r   r   max_marginal_relevance_search  s    
z%LanceDB.max_marginal_relevance_search)r+   r   r   r   r   ri   r   c           
        sf   | j f |||d|}ttj|tjd|d  |p:| j|d | |} fddt|D }	|	S )aH  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        )r   r   r   )Zdtyper#   )r   r   c                   s   g | ]\}}| v r|qS r   r   )r   irZmmr_selectedr   r   r   l  r   zCLanceDB.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)	r   r   nparrayZfloat32Z	to_pylistr8   rc   rr   )
rS   r+   r   r   r   r   ri   rV   
candidatesZselected_resultsr   r   r   r   B  s     

z/LanceDB.max_marginal_relevance_search_by_vectorzType[LanceDB]r   )clsrf   r+   rg   r*   r-   r.   r/   r0   r1   r2   r3   r5   r6   r7   ri   r   c                 K  s:   t f |||||||	|
||||d|}|j||d |S )N)r*   r+   r-   r.   r/   r0   r1   r2   r3   r5   r6   r7   )rg   )r"   rx   )r   rf   r+   rg   r*   r-   r.   r/   r0   r1   r2   r3   r5   r6   r7   ri   instancer   r   r   
from_textso  s$    zLanceDB.from_texts)rh   
delete_allr   drop_columnsr:   ri   r   c                 K  s~   |  |}|r|| n`|r>|| j dd| n<|rb| jdurVtdqz|| n|rr|d ntddS )a  
        Allows deleting rows by filtering, by ids or drop columns from the table.

        Args:
            filter: Provide a string SQL expression -  "{col} {operation} {value}".
            ids: Provide list of ids to delete from the table.
            drop_columns: Provide list of columns to drop from the table.
            delete_all: If True, delete all rows from the table.
        z
 in ('{}'),Nz;Column operations currently not supported in LanceDB Cloud.truez6Provide either filter, ids, drop_columns or delete_all)	rR   deleter>   formatr   r1   r   r   rF   )rS   rh   r   r   r   r:   ri   rw   r   r   r   r     s    
 
zLanceDB.delete)F)NN)NF)NNr{   r|   Nr}   N)NN)NNN)NNN)NNN)NN)NNNF)Nr   r   N)Nr   r   N)NNr#   r$   r%   r&   NNr'   r(   NN)NNNNN)__name__
__module____qualname____doc__	DEFAULT_KrT   rc   propertyre   rx   rR   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r   r   r   r   r   r"      s   ,W"  1 #       *  7   &        +         /    -            2'     r"   )
__future__r   r   r@   rk   rK   typingr   r   r   r   r   r   r	   numpyr   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   r   r   r!   r"   r   r   r   r   <module>   s   $