a
    bgm0                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ erd dlmZ d dlmZ G d	d
 d
eeZedeeef dZeeZdZG dd deZdS )    )annotationsN)Enum)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTypeVarUnion)Document)VectorStore)
Embeddings)
Collectionc                   @  s   e Zd ZdZdZdZdZdS )DocumentDBSimilarityTypez)DocumentDB Similarity Type as enumerator.ZcosineZ
dotProductZ	euclideanN)__name__
__module____qualname____doc__COSDOTZEUC r   r   y/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/vectorstores/documentdb.pyr      s
   r   DocumentDBDocumentType)bound   c                	   @  sL  e Zd ZdZdddddddddd	d
dZeddddZddddZeddddd dddZ	ddddZ
ddddZdejddfddddd d!d"d#ZdJd%d&dd'd(d)d*Zd+d,d'd-d.d/ZedKd+dd0d1dd d2d3d4ZdLd5dd6d7d8d9ZdMd:dd;d<d=ZdNd@dddAdBdCdDdEZdOd$dFddddAddBdGdHdIZd$S )PDocumentDBVectorSearcha  `Amazon DocumentDB (with MongoDB compatibility)` vector store.
    Please refer to the official Vector Search documentation for more details:
    https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html

    To use, you should have both:
    - the ``pymongo`` python package installed
    - a connection string and credentials associated with a DocumentDB cluster

    Example:
        . code-block:: python

            from langchain_community.vectorstores import DocumentDBVectorSearch
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            from pymongo import MongoClient

            mongo_client = MongoClient("<YOUR-CONNECTION-STRING>")
            collection = mongo_client["<db_name>"]["<collection_name>"]
            embeddings = OpenAIEmbeddings()
            vectorstore = DocumentDBVectorSearch(collection, embeddings)
    ZvectorSearchIndexZtextContentZvectorContent)
index_nametext_keyembedding_keyz"Collection[DocumentDBDocumentType]r   str)
collection	embeddingr   r   r    c                C  s*   || _ || _|| _|| _|| _tj| _dS )a  Constructor for DocumentDBVectorSearch

        Args:
            collection: MongoDB collection to add the texts to.
            embedding: Text embedding model to use.
            index_name: Name of the Vector Search index.
            text_key: MongoDB field that will contain the text
                for each document.
            embedding_key: MongoDB field that will contain the embedding
                for each document.
        N)_collection
_embedding_index_name	_text_key_embedding_keyr   r   _similarity_type)selfr"   r#   r   r   r    r   r   r   __init__B   s    zDocumentDBVectorSearch.__init__)returnc                 C  s   | j S N)r%   r*   r   r   r   
embeddings]   s    z!DocumentDBVectorSearch.embeddingsc                 C  s   | j S )zUReturns the index name

        Returns:
            Returns the index name

        )r&   r.   r   r   r   get_index_namea   s    z%DocumentDBVectorSearch.get_index_namer   )connection_string	namespacer#   kwargsr,   c           
      K  s`   zddl m} W n ty*   tdY n0 ||}|d\}}|| | }	| |	|fi |S )a  Creates an Instance of DocumentDBVectorSearch from a Connection String

        Args:
            connection_string: The DocumentDB cluster endpoint connection string
            namespace: The namespace (database.collection)
            embedding: The embedding utility
            **kwargs: Dynamic keyword arguments

        Returns:
            an instance of the vector store

        r   )MongoClientzGCould not import pymongo, please install it with `pip install pymongo`..)Zpymongor4   ImportErrorsplit)
clsr1   r2   r#   r3   r4   clientZdb_nameZcollection_namer"   r   r   r   from_connection_stringj   s    
z-DocumentDBVectorSearch.from_connection_stringboolc                 C  s6   | j  }| j}|D ]}|d}||kr dS qdS )zVerifies if the specified index name during instance
            construction exists on the collection

        Returns:
          Returns True on success and False if no such index exists
            on the collection
        nameTF)r$   Zlist_indexesr&   pop)r*   cursorr   resZcurrent_index_namer   r   r   index_exists   s    

z#DocumentDBVectorSearch.index_existsNonec                 C  s   |   r| j| j dS )zEDeletes the index specified during instance construction if it existsN)r@   r$   Z
drop_indexr&   r.   r   r   r   delete_index   s    z#DocumentDBVectorSearch.delete_indexi      @   intr   zdict[str, Any])
dimensions
similaritymef_constructionr,   c              	   C  sH   || _ | jj| j| jdid||||ddgd}| jj}||}|S )a  Creates an index using the index name specified at
            instance construction

        Args:
            dimensions: Number of dimensions for vector similarity.
                The maximum number of supported dimensions is 2000

            similarity: Similarity algorithm to use with the HNSW index.
                 Possible options are:
                    - DocumentDBSimilarityType.COS (cosine distance),
                    - DocumentDBSimilarityType.EUC (Euclidean distance), and
                    - DocumentDBSimilarityType.DOT (dot product).

            m: Specifies the max number of connections for an HNSW index.
                Large impact on memory consumption.

            ef_construction: Specifies the size of the dynamic candidate list
                for constructing the graph for HNSW index. Higher values lead
                to more accurate results but slower indexing speed.


        Returns:
            An object describing the created index

        vectorZhnsw)typerG   rF   rH   ZefConstruction)r<   keyZvectorOptions)ZcreateIndexesZindexes)r)   r$   r<   r&   r(   Zdatabasecommand)r*   rF   rG   rH   rI   Zcreate_index_commandsZcurrent_databaseZcreate_index_responsesr   r   r   create_index   s$     z#DocumentDBVectorSearch.create_indexNzIterable[str]zOptional[List[Dict[str, Any]]]r	   )texts	metadatasr3   r,   c                 K  s   | dt}|pdd |D }g }g }g }tt||D ]J\}	\}
}||
 || |	d | dkr8|| || g }g }q8|r|| || |S )N
batch_sizec                 s  s   | ]
}i V  qd S r-   r   ).0_r   r   r   	<genexpr>       z3DocumentDBVectorSearch.add_texts.<locals>.<genexpr>   r   )getDEFAULT_INSERT_BATCH_SIZE	enumeratezipappendextend_insert_texts)r*   rO   rP   r3   rQ   Z
_metadatasZtexts_batchZmetadatas_batchZ
result_idsitextmetadatar   r   r   	add_texts   s    

z DocumentDBVectorSearch.add_textsz	List[str]zList[Dict[str, Any]])rO   rP   r,   c                   s@   |sg S  j |} fddt|||D } j|}|jS )zUsed to Load Documents into the collection

        Args:
            texts: The list of documents strings to load
            metadatas: The list of metadata objects associated with each document

        Returns:

        c                   s&   g | ]\}}} j | j|i|qS r   )r'   r(   )rR   trH   r#   r.   r   r   
<listcomp>  s   z8DocumentDBVectorSearch._insert_texts.<locals>.<listcomp>)r%   Zembed_documentsrZ   r$   Zinsert_manyZinserted_ids)r*   rO   rP   r/   Z	to_insertZinsert_resultr   r.   r   r]      s    

z$DocumentDBVectorSearch._insert_textszOptional[List[dict]]z,Optional[Collection[DocumentDBDocumentType]])rO   r#   rP   r"   r3   r,   c                 K  s4   |d u rt d| ||fi |}|j||d |S )Nz*Must provide 'collection' named parameter.)rP   )
ValueErrorra   )r8   rO   r#   rP   r"   r3   Zvectorstorer   r   r   
from_texts  s
    	z!DocumentDBVectorSearch.from_textszOptional[List[str]]zOptional[bool])idsr3   r,   c                 K  s(   |d u rt d|D ]}| | qdS )Nz#No document ids provided to delete.T)rd   delete_document_by_id)r*   rf   r3   document_idr   r   r   delete  s
    zDocumentDBVectorSearch.deletezOptional[str])rh   r,   c              
   C  sh   zddl m} W n. ty> } ztd|W Y d}~n
d}~0 0 |du rPtd| jd||i dS )zjRemoves a Specific Document by Id

        Args:
            document_id: The document identifier
        r   )ObjectIdz>Unable to import bson, please install with `pip install bson`.Nz"No document id provided to delete.Z_id)Zbson.objectidrj   r6   rd   r$   Z
delete_one)r*   rh   rj   er   r   r   rg   &  s    z,DocumentDBVectorSearch.delete_document_by_id   (   zList[float]zOptional[Dict[str, Any]]zList[Document])r/   k	ef_searchfilterr,   c           
   	   C  sh   |si }d|idd|| j | j||diig}| j|}g }|D ]"}|| j}	|t|	|d q@|S )a   Returns a list of documents.

        Args:
            embeddings: The query vector
            k: the number of documents to return
            ef_search: Specifies the size of the dynamic candidate list
                that HNSW index uses during search. A higher value of
                efSearch provides better recall at cost of speed.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
        Returns:
            A list of documents closest to the query vector
        z$matchz$searchZvectorSearch)rJ   pathrG   rn   ZefSearch)Zpage_contentr`   )r(   r)   r$   Z	aggregater=   r'   r[   r   )
r*   r/   rn   ro   rp   Zpipeliner>   docsr?   r_   r   r   r    _similarity_search_without_score7  s(    z7DocumentDBVectorSearch._similarity_search_without_score)rp   )queryrn   ro   rp   r3   r,   c                K  s,   | j |}| j||||d}dd |D S )N)r/   rn   ro   rp   c                 S  s   g | ]}|qS r   r   )rR   docr   r   r   rc   t  rU   z<DocumentDBVectorSearch.similarity_search.<locals>.<listcomp>)r%   Zembed_queryrs   )r*   rt   rn   ro   rp   r3   r/   rr   r   r   r   similarity_searchg  s
    	z(DocumentDBVectorSearch.similarity_search)N)NN)N)N)rl   rm   N)rl   rm   )r   r   r   r   r+   propertyr/   r0   classmethodr:   r@   rB   r   r   rN   ra   r]   re   ri   rg   rs   rv   r   r   r   r   r   ,   sF   		A      3  r   )
__future__r   loggingenumr   typingr   r   r   r   r   r	   r
   r   r   Zlangchain_core.documentsr   Zlangchain_core.vectorstoresr   Zlangchain_core.embeddingsr   Zpymongo.collectionr   r!   r   r   	getLoggerr   loggerrX   r   r   r   r   r   <module>   s   ,
