a
    ag~)                     @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlm Z m!Z! ddl"m#Z# edZ$ededZ%ee$ e
e$ge%f ee$ dddZ&G dd deZ'dS )zo
Ensemble retriever that ensemble the results of
multiple retrievers by using weighted  Reciprocal Rank Fusion
    N)defaultdict)Hashable)chain)	AnyCallableDictIterableIteratorListOptionalTypeVarcast)#AsyncCallbackManagerForRetrieverRunCallbackManagerForRetrieverRun)Document)BaseRetrieverRetrieverLike)RunnableConfig)ensure_configpatch_config)ConfigurableFieldSpecget_unique_config_specs)model_validatorTH)bound)iterablekeyreturnc                 c   s4   t  }| D ]$}|| }|vr
|| |V  q
dS )a  Yield unique elements of an iterable based on a key function.

    Args:
        iterable: The iterable to filter.
        key: A function that returns a hashable key for each element.

    Yields:
        Unique elements of the iterable based on the key function.
    N)setadd)r   r   seenek r$   k/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain/retrievers/ensemble.pyunique_by_key(   s
    

r&   c                   @   sJ  e Zd ZU dZee ed< ee ed< dZe	ed< dZ
ee ed< eee dd	d
Zeddeeeef edddZd#eee eee dddZd$eee eee dddZeeee dddZeeee dddZddeeee ee dddZddeeee ee dddZeee  ee d d!d"ZdS )%EnsembleRetrieverae  Retriever that ensembles the multiple retrievers.

    It uses a rank fusion.

    Args:
        retrievers: A list of retrievers to ensemble.
        weights: A list of weights corresponding to the retrievers. Defaults to equal
            weighting for all retrievers.
        c: A constant added to the rank, controlling the balance between the importance
            of high-ranked items and the consideration given to lower-ranked items.
            Default is 60.
        id_key: The key in the document's metadata used to determine unique documents.
            If not specified, page_content is used.
    
retrieversweights<   cNid_key)r   c                 C   s   t dd | jD S )z+List configurable fields for this runnable.c                 s   s   | ]}|j D ]
}|V  qqd S N)config_specs).0	retrieverspecr$   r$   r%   	<genexpr>Q   s   z1EnsembleRetriever.config_specs.<locals>.<genexpr>)r   r(   selfr$   r$   r%   r.   N   s    zEnsembleRetriever.config_specsbefore)mode)valuesr   c                 C   s,   | ds(t|d }d| g| |d< |S )Nr)   r(      )getlen)clsr7   Zn_retrieversr$   r$   r%   set_weightsU   s    
zEnsembleRetriever.set_weights)inputconfigkwargsr   c           	   
   K   s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
pj|  i|}z| j	|||d}W n2 t
y } z|| |W Y d }~n d }~0 0 |j|fi | |S d S )Nr   )CallbackManager	callbacksverboseFtagsmetadatarB   Zinheritable_tagsZ
local_tagsZinheritable_metadataZlocal_metadatanamerun_namerun_managerr>   )langchain_core.callbacksr@   r   	configurer9   rC   rD   on_retriever_startget_namerank_fusion	Exceptionon_retriever_erroron_retriever_end)	r4   r=   r>   r?   r@   callback_managerrI   resultr"   r$   r$   r%   invoke]   s<    


	
zEnsembleRetriever.invokec           	   
      s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
pj|  i|I d H }z| j	|||dI d H }W n8 t
y } z ||I d H  |W Y d }~n&d }~0 0 |j|fi |I d H  |S d S )Nr   )AsyncCallbackManagerrA   rB   FrC   rD   rE   rF   rG   rH   )rJ   rU   r   rK   r9   rC   rD   rL   rM   arank_fusionrO   rP   rQ   )	r4   r=   r>   r?   rU   rR   rI   rS   r"   r$   r$   r%   ainvoke~   s@    


	zEnsembleRetriever.ainvoke)queryrI   r   c                C   s   |  ||}|S )z
        Get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        )rN   r4   rX   rI   fused_documentsr$   r$   r%   _get_relevant_documents   s    z)EnsembleRetriever._get_relevant_documentsc                   s   |  ||I dH }|S )z
        Asynchronously get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        N)rV   rY   r$   r$   r%   _aget_relevant_documents   s    z*EnsembleRetriever._aget_relevant_documents)r>   )rX   rI   r>   r   c                   sR    fddt | jD }tt|D ]}dd || D ||< q(| |}|S )z
        Retrieve the results of the retrievers and use rank_fusion_func to get
        the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    s6   g | ].\}}| t jd |d  ddqS Z
retriever_r8   )tag)rA   )rT   r   	get_childr/   ir0   r>   rX   rI   r$   r%   
<listcomp>   s   z1EnsembleRetriever.rank_fusion.<locals>.<listcomp>c                 S   s*   g | ]"}t |tr"ttt|d n|qS )page_content)
isinstancestrr   r   r/   docr$   r$   r%   rc      s   )	enumerater(   ranger:   weighted_reciprocal_rankr4   rX   rI   r>   Zretriever_docsra   rZ   r$   rb   r%   rN      s    
zEnsembleRetriever.rank_fusionc                   s^   t j fddt| jD  I dH }tt|D ]}dd || D ||< q4| |}|S )z
        Asynchronously retrieve the results of the retrievers
        and use rank_fusion_func to get the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    s6   g | ].\}}| t jd |d  ddqS r]   )rW   r   r_   r`   rb   r$   r%   rc     s   z2EnsembleRetriever.arank_fusion.<locals>.<listcomp>Nc                 S   s$   g | ]}t |tst|d n|qS rd   )rf   r   rh   r$   r$   r%   rc     s   )asynciogatherrj   r(   rk   r:   rl   rm   r$   rb   r%   rV      s    

zEnsembleRetriever.arank_fusion)	doc_listsr   c                    s   t |t jkrtdtt t|jD ]P\}}t|ddD ]:\}} jdu r\|jn
|j	j   ||j
  7  < qBq.t|}tt|fddd fddd	}|S )
a  
        Perform weighted Reciprocal Rank Fusion on multiple rank lists.
        You can find more details about RRF here:
        https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf

        Args:
            doc_lists: A list of rank lists, where each rank list contains unique items.

        Returns:
            list: The final aggregated list of items sorted by their weighted RRF
                    scores in descending order.
        z<Number of rank lists must be equal to the number of weights.r8   )startNc                    s    j d u r| jS | j j  S r-   r,   re   rD   ri   r3   r$   r%   <lambda>F  s    z<EnsembleRetriever.weighted_reciprocal_rank.<locals>.<lambda>Tc                    s     j d u r| jn
| jj   S r-   rr   rs   Z	rrf_scorer4   r$   r%   rt   M  s   )reverser   )r:   r)   
ValueErrorr   floatziprj   r,   re   rD   r+   r   from_iterablesortedr&   )r4   rp   Zdoc_listweightZrankri   Zall_docsZsorted_docsr$   ru   r%   rl      s0    

	

z*EnsembleRetriever.weighted_reciprocal_rank)N)N) __name__
__module____qualname____doc__r
   r   __annotations__rx   r+   intr,   r   rg   propertyr   r.   r   classmethodr   r   r<   r   r   rT   rW   r   r[   r   r\   rN   rV   rl   r$   r$   r$   r%   r'   9   sZ   
 
" 
%.-
r'   )(r   rn   collectionsr   collections.abcr   	itertoolsr   typingr   r   r   r   r	   r
   r   r   r   rJ   r   r   Zlangchain_core.documentsr   Zlangchain_core.retrieversr   r   Zlangchain_core.runnablesr   Zlangchain_core.runnables.configr   r   Zlangchain_core.runnables.utilsr   r   Zpydanticr   r   r   r&   r'   r$   r$   r$   r%   <module>   s    ,$