a
    bg                  
   @   s   d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ eed
ddZdee eeeeee  eee  ee ddddZG dd deZdS )z7Taken from: https://docs.pinecone.io/docs/hybrid-search    N)AnyDictListOptional)CallbackManagerForRetrieverRun)Document)
Embeddings)BaseRetriever)pre_init)
ConfigDict)textreturnc                 C   s   t t| d S )zhHash a text using SHA256.

    Args:
        text: Text to hash.

    Returns:
        Hashed text.
    zutf-8)strhashlibsha256encode	hexdigest)r    r   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/retrievers/pinecone_hybrid_search.py	hash_text   s    	r   )contextsindex
embeddingssparse_encoderids	metadatas	namespacer   c              	   C   s8  d}t dt| |}zddlm}	 |	|}W n ty>   Y n0 |du rVdd | D }|D ]}
t|
| t| }| |
| }||
| }|r||
| ndd |D }dd t||D }||}||}|D ]}d	d |d
 D |d
< qg }t||||D ]"\}}}}|	||||d q |j
||d qZdS )a  Create an index from a list of contexts.

    It modifies the index argument in-place!

    Args:
        contexts: List of contexts to embed.
        index: Index to use.
        embeddings: Embeddings model to use.
        sparse_encoder: Sparse encoder to use.
        ids: List of ids to use for the documents.
        metadatas: List of metadata to use for the documents.
        namespace: Namespace value for index partition.
        r   )tqdmNc                 S   s   g | ]}t |qS r   )r   ).0contextr   r   r   
<listcomp>;       z create_index.<locals>.<listcomp>c                 S   s   g | ]}i qS r   r   )r   _r   r   r   r!   D   r"   c                 S   s   g | ]\}}d |i|qS )r    r   )r   r    metadatar   r   r   r!   G   s   c                 S   s   g | ]}t |qS r   floatr   s1r   r   r   r!   Q   r"   values)idZsparse_valuesr)   r$   )r   )rangelenZ	tqdm.autor   ImportErrorminzipZembed_documentsZencode_documentsappendZupsert)r   r   r   r   r   r   r   Z
batch_sizeZ	_iteratorr   iZi_endZcontext_batchZ	batch_idsZmetadata_batchmetaZdense_embedsZsparse_embedssZvectorsZdoc_idsparseZdenser$   r   r   r   create_index   sD    


r5   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< dZeed	< dZee ed
< edddZdee eee  eee  ee ddddZeeedddZeeeee dddZdS )PineconeHybridSearchRetrieverz#`Pinecone Hybrid Search` retriever.r   Nr   r      top_kg      ?alphar   TZforbid)Zarbitrary_types_allowedextra)textsr   r   r   r   c              	   C   s    t || j| j| j|||d d S )N)r   r   r   )r5   r   r   r   )selfr;   r   r   r   r   r   r   	add_texts{   s    z'PineconeHybridSearchRetriever.add_texts)r)   r   c                 C   s<   zddl m} ddlm} W n ty6   tdY n0 |S )z?Validate that api key and python package exists in environment.r   hybrid_convex_scale)BaseSparseEncoderzbCould not import pinecone_text python package. Please install it with `pip install pinecone_text`.)pinecone_text.hybridr?   Z(pinecone_text.sparse.base_sparse_encoderr@   r-   )clsr)   r?   r@   r   r   r   validate_environment   s    
z2PineconeHybridSearchRetriever.validate_environment)queryrun_managerkwargsr   c                K   s   ddl m} | j|}| j|}|||| j\}}dd |d D |d< | jjf ||| j	d| j
d|}g }|d D ]H}	|	d	 d
}
|	d	 }d|vrd|	v r|	d |d< |t|
|d qz|S )Nr   r>   c                 S   s   g | ]}t |qS r   r%   r'   r   r   r   r!      r"   zIPineconeHybridSearchRetriever._get_relevant_documents.<locals>.<listcomp>r)   T)ZvectorZsparse_vectorr8   Zinclude_metadatar   matchesr$   r    Zscore)Zpage_contentr$   )rA   r?   r   Zencode_queriesr   Zembed_queryr9   r   rD   r8   r   popr0   r   )r<   rD   rE   rF   r?   Z
sparse_vecZ	dense_vecresultZfinal_resultresr    r$   r   r   r   _get_relevant_documents   s,    z5PineconeHybridSearchRetriever._get_relevant_documents)NNN)__name__
__module____qualname____doc__r   __annotations__r   r   r   r8   intr9   r&   r   r   r   r   Zmodel_configr   dictr=   r
   r   rC   r   r   rK   r   r   r   r   r6   e   s4   
   

r6   )NNN)rO   r   typingr   r   r   r   Zlangchain_core.callbacksr   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.retrieversr	   Zlangchain_core.utilsr
   Zpydanticr   r   r   rR   r5   r6   r   r   r   r   <module>   s.      

K