a
    bg;                     @  s   d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ eeZG dd deZdS )    )annotationsN)deepcopy)Enum)AnyIterableListOptionalTuple)Document)
Embeddings)run_in_executor)VectorStore)maximal_marginal_relevancec                   @  s  e Zd ZdZdEdddddddddZedd	d
dZdFdddddddddZedGddddddddddd dddZ	G dd de
Zdejdfdddddddd d!Zdejdfdddddd"dd#d$Zdejdfd%ddddd"d&d'd(Zdejdfd%dddddd&d)d*ZdHdd-dddd.ddd"d/d0d1ZdId%dddd3dd4d5d6Zd7dd8d9d:Zdd;d<d=d>ZdJddd?d@dAdBZdKddd?d@dCdDZdS )LRockseta  `Rockset` vector store.

    To use, you should have the `rockset` python package installed. Note that to use
    this, the collection being used must already exist in your Rockset instance.
    You must also ensure you use a Rockset ingest transformation to apply
    `VECTOR_ENFORCE` on the column being used to store `embedding_key` in the
    collection.
    See: https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details

    Everything below assumes `commons` Rockset workspace.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Rockset
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            import rockset

            # Make sure you use the right host (region) for your Rockset instance
            # and APIKEY has both read-write access to your collection.

            rs = rockset.RocksetClient(host=rockset.Regions.use1a1, api_key="***")
            collection_name = "langchain_demo"
            embeddings = OpenAIEmbeddings()
            vectorstore = Rockset(rs, collection_name, embeddings,
                "description", "description_embedding")

    commonsr   r   str)client
embeddingscollection_nametext_keyembedding_key	workspacec                 C  s   zddl m} W n ty*   tdY n0 t||sHtdt| || _|| _|| _|| _	|| _
|| _z| jd W n ty   Y n0 dS )aN  Initialize with Rockset client.
        Args:
            client: Rockset client object
            collection: Rockset collection to insert docs / query
            embeddings: Langchain Embeddings object to use to generate
                        embedding for given text.
            text_key: column in Rockset collection to use to store the text
            embedding_key: column in Rockset collection to use to store the embedding.
                           Note: We must apply `VECTOR_ENFORCE()` on this column via
                           Rockset ingest transformation.

        r   )RocksetClient]Could not import rockset client python package. Please install it with `pip install rockset`.z;client should be an instance of rockset.RocksetClient, got Z	langchainN)rocksetr   ImportError
isinstance
ValueErrortype_client_collection_name_embeddings	_text_key_embedding_key
_workspaceZset_applicationAttributeError)selfr   r   r   r   r   r   r    r'   x/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/rocksetdb.py__init__1   s,    

zRockset.__init__returnc                 C  s   | j S N)r!   r&   r'   r'   r(   r   a   s    zRockset.embeddingsN    zIterable[str]zOptional[List[dict]]zOptional[List[str]]intz	List[str])texts	metadatasids
batch_sizekwargsr+   c                 K  s   g }g }t |D ]\}}	t||kr6|| |7 }g }i }
|rVt||krVt|| }
|rrt||krr|| |
d< |	|
| j< | j|	|
| j< ||
 qt|dkr|| |7 }g }|S )a  Run more texts through the embeddings and add to the vectorstore

                Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            batch_size: Send documents in batches to rockset.

        Returns:
            List of ids from adding the texts into the vectorstore.

        _idr   )		enumeratelen_write_documents_to_rocksetr   r"   r!   embed_queryr#   append)r&   r0   r1   r2   r3   r4   batchZ
stored_idsitextdocr'   r'   r(   	add_textse   s$    
zRockset.add_texts )r0   	embeddingr1   r   r   r   r   r2   r3   r4   r+   c
                 K  sX   |dusJ d|sJ d|s(J d|s4J d| |||||}| ||||	 |S )znCreate Rockset wrapper with existing texts.
        This is intended as a quicker way to get started.
        NzRockset Client cannot be NonezCollection name cannot be emptyzText key name cannot be emptyzEmbedding key cannot be empty)r?   )clsr0   rA   r1   r   r   r   r   r2   r3   r4   r   r'   r'   r(   
from_texts   s    zRockset.from_textsc                   @  s&   e Zd ZdZdZdZddddZdS )	zRockset.DistanceFunction
COSINE_SIMEUCLIDEAN_DISTDOT_PRODUCTr   r*   c                 C  s   | j dkrdS dS )NrE   ASCZDESC)valuer-   r'   r'   r(   order_by   s    
z!Rockset.DistanceFunction.order_byN)__name__
__module____qualname__rD   rE   rF   rI   r'   r'   r'   r(   DistanceFunction   s   rM      zOptional[str]zList[Tuple[Document, float]])querykdistance_func	where_strr4   r+   c                 K  s    | j | j||||fi |S )a  Perform a similarity search with Rockset

        Args:
            query (str): Text to look up documents similar to.
            distance_func (DistanceFunction): how to compute distance between two
                vectors in Rockset.
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): Metadata filters supplied as a
                SQL `where` condition string. Defaults to None.
                eg. "price<=70.0 AND brand='Nintendo'"

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection.

        Returns:
            List[Tuple[Document, float]]: List of documents with their relevance score
        )1similarity_search_by_vector_with_relevance_scoresr!   r9   r&   rO   rP   rQ   rR   r4   r'   r'   r(   'similarity_search_with_relevance_scores   s    
z/Rockset.similarity_search_with_relevance_scoreszList[Document]c                 K  s    | j | j||||fi |S )zaSame as `similarity_search_with_relevance_scores` but
        doesn't return the scores.
        )similarity_search_by_vectorr!   r9   rT   r'   r'   r(   similarity_search   s    
zRockset.similarity_searchzList[float])rA   rP   rQ   rR   r4   r+   c                 K  s&   | j ||||fi |}dd |D S )zZAccepts a query_embedding (vector), and returns documents with
        similar embeddings.c                 S  s   g | ]\}}|qS r'   r'   ).0r>   _r'   r'   r(   
<listcomp>       z7Rockset.similarity_search_by_vector.<locals>.<listcomp>)rS   )r&   rA   rP   rQ   rR   r4   Zdocs_and_scoresr'   r'   r(   rV      s    z#Rockset.similarity_search_by_vectorc              
   K  s>  d}d|v r|d }|  |||||}z| jjjd|id}W n4 tyr }	 ztd|	 g W  Y d}	~	S d}	~	0 0 g }
|jD ]}i }t|t	sJ d
t|| D ]v\}}|| jkrt|tsJ d
| jt||}q|d	krt|ts
J d

t||}q|dvr|||< q|
t||d|f q~|
S )z|Accepts a query_embedding (vector), and returns documents with
        similar embeddings along with their relevance scores.Texclude_embeddingsrO   )sqlz$Exception when querying Rockset: %s
Nz;document should be of type `dict[str,Any]`. But found: `{}`zIpage content stored in column `{}` must be of type `str`. But found: `{}`distzDComputed distance between vectors must of type `float`. But found {})r5   Z_event_time_meta)page_contentmetadata)_build_query_sqlr   ZQueriesrO   	Exceptionloggererrorresultsr   dictformatr   itemsr"   r   floatr:   r
   )r&   rA   rP   rQ   rR   r4   r\   Zq_strZquery_responseeZfinalResultZdocumentra   vr`   Zscorer'   r'   r(   rS      sT    





z9Rockset.similarity_search_by_vector_with_relevance_scores         ?)rR   rj   )rO   rP   fetch_klambda_multrR   r4   r+   c                  sx   j |}j|f||dd| fdd D }tt||||d}	|	D ]}
 |
 jj= qR fdd|	D S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            distance_func (DistanceFunction): how to compute distance between two
                vectors in Rockset.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            where_str: where clause for the sql query
        Returns:
            List of Documents selected by maximal marginal relevance.
        F)rP   rR   r\   c                   s   g | ]}|j  j qS r'   )ra   r#   )rX   r>   r-   r'   r(   rZ   S  r[   z9Rockset.max_marginal_relevance_search.<locals>.<listcomp>)rp   rP   c                   s   g | ]} | qS r'   r'   rX   r<   )initial_docsr'   r(   rZ   a  r[   )r!   r9   rV   r   nparrayra   r#   )r&   rO   rP   ro   rp   rR   r4   query_embeddingr   Zselected_indicesr<   r'   )rr   r&   r(   max_marginal_relevance_search-  s(    	z%Rockset.max_marginal_relevance_searchTbool)ru   rQ   rP   rR   r\   r+   c           	      C  s   d tt|}|j d| j d| d}|r:d| dnd}|rPd| j d	nd}d
| d| d| j d| j d| d|  dt| dS )zABuilds Rockset SQL query to query similar vectors to query_vector,(z, [z
]) as distzWHERE 
r@   z EXCEPT(z),zSELECT * z
FROM .zORDER BY dist z
LIMIT )joinmapr   rH   r#   r$   r    rI   )	r&   ru   rQ   rP   rR   r\   Zq_embedding_strZdistance_strZselect_embeddingr'   r'   r(   rb   e  s,    
zRockset._build_query_sqlz
List[dict])r;   r+   c                 C  s(   | j jj| j|| jd}dd |jD S )NZ
collectiondatar   c                 S  s   g | ]
}|j qS r'   )r5   )rX   Z
doc_statusr'   r'   r(   rZ     r[   z7Rockset._write_documents_to_rockset.<locals>.<listcomp>)r   	DocumentsZadd_documentsr    r$   r   )r&   r;   Zadd_doc_resr'   r'   r(   r8   ~  s    
z#Rockset._write_documents_to_rocksetNone)r2   r+   c                   sV   zddl m  W n ty*   tdY n0 | jjj| j fdd|D | jd dS )z1Delete a list of docs from the Rockset collectionr   DeleteDocumentsRequestDatar   c                   s   g | ]} |d qS ))idr'   rq   r   r'   r(   rZ     r[   z(Rockset.delete_texts.<locals>.<listcomp>r   N)Zrockset.modelsr   r   r   r   Zdelete_documentsr    r$   )r&   r2   r'   r   r(   delete_texts  s    
zRockset.delete_textszOptional[bool])r2   r4   r+   c              
   K  sR   z|d u rg }|  | W n2 tyL } ztd| W Y d }~dS d }~0 0 dS )Nz.Exception when deleting docs from Rockset: %s
FT)r   rc   rd   re   )r&   r2   r4   rk   r'   r'   r(   delete  s    zRockset.deletec                   s   t d | j|fi |I d H S r,   )r   r   )r&   r2   r4   r'   r'   r(   adelete  s    zRockset.adelete)r   )NNr.   )NNr@   r@   r@   Nr.   )rN   rm   rn   )rN   NT)N)N)rJ   rK   rL   __doc__r)   propertyr   r?   classmethodrC   r   rM   rD   rU   rW   rV   rS   rv   rb   r8   r   r   r   r'   r'   r'   r(   r      sd   $ 0   (       &$7   <    r   )
__future__r   loggingcopyr   enumr   typingr   r   r   r   r	   numpyrs   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.runnablesr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   	getLoggerrJ   rd   r   r'   r'   r'   r(   <module>   s   
