a
    bg                     @   s   d dl mZ d dlmZmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ G dd deeZG dd deZdS )    )Enum)AnyDictListOptionalUnionN)CallbackManagerForRetrieverRun)Document)
Embeddings)BaseRetriever)
get_fields)
ConfigDict)maximal_marginal_relevancec                   @   s   e Zd ZdZdZdZdS )
SearchTypez-Enumerator of the types of search to perform.
similaritymmrN)__name__
__module____qualname____doc__r   r    r   r   u/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/retrievers/docarray.pyr      s   r   c                   @   s   e Zd ZU dZdZeed< eed< eed< eed< e	j
Ze	ed< dZeed	< dZee ed
< eddZeeee dddZejeeeeeef ef  dddZejee dddZejee dddZeeeef ef edddZdS )DocArrayRetrievera  `DocArray Document Indices` retriever.

    Currently, it supports 5 backends:
    InMemoryExactNNIndex, HnswDocumentIndex, QdrantDocumentIndex,
    ElasticDocIndex, and WeaviateDocumentIndex.

    Args:
        index: One of the above-mentioned index instances
        embeddings: Embedding model to represent text as vectors
        search_field: Field to consider for searching in the documents.
            Should be an embedding/vector/tensor.
        content_field: Field that represents the main content in your document schema.
            Will be used as a `page_content`. Everything else will go into `metadata`.
        search_type: Type of search to perform (similarity / mmr)
        filters: Filters applied for document retrieval.
        top_k: Number of documents to return
    Nindex
embeddingssearch_fieldcontent_fieldsearch_type   top_kfiltersT)Zarbitrary_types_allowed)queryrun_managerreturnc                C   sX   t | j|}| jtjkr*| |}n*| jtjkrB| 	|}nt
d| j d|S )zGet documents relevant for a query.

        Args:
            query: string to find relevant documents for

        Returns:
            List of relevant documents
        zSearch type z5 does not exist. Choose either 'similarity' or 'mmr'.)nparrayr   Zembed_queryr   r   r   _similarity_searchr   _mmr_search
ValueError)selfr!   r"   	query_embresultsr   r   r   _get_relevant_documents5   s    z)DocArrayRetriever._get_relevant_documents)r*   r   r#   c           	      C   s   ddl m}m} i }| j}t| j|r6| j|d< d}n"t| j|rN| j|d< n
| j|d< | jr| j j||dj	f i |j
|d}| j|}t|d	r|j}|d
| }n| jj|||dj}|S )a  
        Perform a search using the query embedding and return top_k documents.

        Args:
            query_emb: Query represented as an embedding
            top_k: Number of documents to return

        Returns:
            A list of top_k documents matching the query
        r   )ElasticDocIndexWeaviateDocumentIndexZwhere_filter r!   Zfilter_query)r!   r   )limit	documentsN)r!   r   r0   )Zdocarray.indexr-   r.   r   
isinstancer   r    Zbuild_queryfindfilterbuildZexecute_queryhasattrr1   )	r)   r*   r   r-   r.   Zfilter_argsr   r!   docsr   r   r   _searchQ   s4    



	
zDocArrayRetriever._search)r*   r#   c                    s&    j | jd} fdd|D }|S )z
        Perform a similarity search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of documents most similar to the query
        r*   r   c                    s   g | ]}  |qS r   _docarray_to_langchain_doc.0docr)   r   r   
<listcomp>       z8DocArrayRetriever._similarity_search.<locals>.<listcomp>)r8   r   )r)   r*   r7   r+   r   r?   r   r&      s    
z$DocArrayRetriever._similarity_searchc                    sD   j |dd t|fdd D jd} fdd|D }|S )z
        Perform a maximal marginal relevance (mmr) search.

        Args:
            query_emb: Query represented as an embedding

        Returns:
            A list of diverse documents related to the query
           r9   c                    s,   g | ]$}t |tr| j n
t| jqS r   )r2   dictr   getattrr<   r?   r   r   r@      s   z1DocArrayRetriever._mmr_search.<locals>.<listcomp>)kc                    s   g | ]}  | qS r   r:   )r=   idxr7   r)   r   r   r@      rA   )r8   r   r   )r)   r*   Zmmr_selectedr+   r   rG   r   r'      s    


zDocArrayRetriever._mmr_search)r>   r#   c                 C   s   t |tr| nt|}| j|vr6td| j dtt |trL|| j n
t|| jd}|D ]F}t |trv|| nt||}t |tt	t
tfr`|| jkr`||j|< q`|S )a;  
        Convert a DocArray document (which also might be a dict)
        to a langchain document format.

        DocArray document can contain arbitrary fields, so the mapping is done
        in the following way:

        page_content <-> content_field
        metadata <-> all other fields excluding
            tensors and embeddings (so float, int, string)

        Args:
            doc: DocArray document

        Returns:
            Document in langchain format

        Raises:
            ValueError: If the document doesn't contain the content field
        z.Document does not contain the content field - .)Zpage_content)r2   rC   keysr   r   r(   r	   rD   strintfloatboolmetadata)r)   r>   fieldsZlc_docnamevaluer   r   r   r;      s$    

z,DocArrayRetriever._docarray_to_langchain_doc)r   r   r   r   r   r   __annotations__r
   rJ   r   r   r   r   rK   r    r   r   Zmodel_configr   r   r	   r,   r$   Zndarrayr   r   r8   r&   r'   r;   r   r   r   r   r      s*   
.r   )enumr   typingr   r   r   r   r   numpyr$   Zlangchain_core.callbacksr   Zlangchain_core.documentsr	   Zlangchain_core.embeddingsr
   Zlangchain_core.retrieversr   Zlangchain_core.utils.pydanticr   Zpydanticr   Z&langchain_community.vectorstores.utilsr   rJ   r   r   r   r   r   r   <module>   s   