a
    agC                     @   s   d Z ddlmZ ddlmZmZmZmZ ddlZ	ddl
mZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZ edddZG dd deeZG dd deZG dd deeZ G dd deeZ!dS )z@A chain for comparing the output of two models using embeddings.    )Enum)AnyDictListOptionalN)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  C   sV   zddl m}  W n> tyN   zddlm}  W n tyH   tdY n0 Y n0 |  S )zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.)langchain_openair   ImportError%langchain_community.embeddings.openair    r   z/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain/evaluation/embedding_distance/base.py_embedding_factory   s    r   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    ZcosineZ	euclideanZ	manhattanZ	chebyshevZhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr   r   r   r   r   ,   s   
r   c                   @   s:  e Zd ZU dZeedZeed< ee	j
dZe	ed< eeeef eeef dddZed	d
Zeee dddZeedddZe	edddZeejejejdddZeejejejdddZeejejejdddZeejejejdddZ eejejejdddZ!eje"dd d!Z#d"S )#_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metric)valuesr   c                 C   s   | d}g }zddlm} || W n ty:   Y n0 zddlm} || W n tyh   Y n0 |svtdt|t|rzddl}W n ty   tdY n0 |S )zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r*   r   r   r   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr   r   appendr   r   
isinstancetupletiktoken)clsr-   r*   Ztypes_r   r2   r   r   r   _validate_tiktoken_installedJ   s0    


z9_EmbeddingDistanceChainMixin._validate_tiktoken_installedT)Zarbitrary_types_allowedr   c                 C   s   dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer   selfr   r   r   output_keys|   s    z(_EmbeddingDistanceChainMixin.output_keys)resultr   c                 C   s$   d|d i}t |v r |t  |t < |S )Nr5   r   )r7   r9   parsedr   r   r   _prepare_output   s    z,_EmbeddingDistanceChainMixin._prepare_output)metricr   c              
   C   sN   t j| jt j| jt j| jt j| jt j	| j
i}||v r<|| S td| dS )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: N)r   r#   _cosine_distancer$   _euclidean_distancer%   _manhattan_distancer&   _chebyshev_distancer'   _hamming_distance
ValueError)r7   r<   Zmetricsr   r   r   _get_metric   s    
z(_EmbeddingDistanceChainMixin._get_metric)abr   c                 C   s:   zddl m} W n ty*   tdY n0 d|| | S )zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.g      ?)Zlangchain_community.utils.mathrF   r   )rD   rE   rF   r   r   r   r=      s    
z-_EmbeddingDistanceChainMixin._cosine_distancec                 C   s   t j| | S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        )npZlinalgZnormrD   rE   r   r   r   r>      s    z0_EmbeddingDistanceChainMixin._euclidean_distancec                 C   s   t t | | S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        )rG   sumabsrH   r   r   r   r?      s    z0_EmbeddingDistanceChainMixin._manhattan_distancec                 C   s   t t | | S )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        )rG   maxrJ   rH   r   r   r   r@      s    z0_EmbeddingDistanceChainMixin._chebyshev_distancec                 C   s   t | |kS )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        )rG   meanrH   r   r   r   rA      s    z._EmbeddingDistanceChainMixin._hamming_distance)vectorsr   c                 C   s6   |  | j}||d dd|d dd }|S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r      )rC   r,   Zreshapeitem)r7   rM   r<   r5   r   r   r   _compute_score   s    	&z+_EmbeddingDistanceChainMixin._compute_scoreN)$r   r    r!   r"   r   r   r*   r
   __annotations__r   r#   r,   r   r   strr   r4   r   Zmodel_configpropertyr   r8   dictr;   rC   staticmethodrG   Zndarrayr=   Zfloatingr>   r?   r@   rA   floatrQ   r   r   r   r   r(   >   s.   
"-r(   c                
   @   s  e Zd ZdZeedddZeedddZee	e dddZ
deeef ee eeef d
ddZdeeef ee eeef d
ddZd	d	d	d	ddeee eee	e  eeeef  eeedddZd	d	d	d	ddeee eee	e  eeeef  eeedddZd	S )EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                 C   s   dS )zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr   r6   r   r   r   requires_reference  s    z-EmbeddingDistanceEvalChain.requires_referencec                 C   s   d| j j dS )NZ
embedding_	_distancer,   valuer6   r   r   r   evaluation_name  s    z*EmbeddingDistanceEvalChain.evaluation_namec                 C   s   ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer   r6   r   r   r   
input_keys  s    z%EmbeddingDistanceEvalChain.input_keysNinputsrun_managerr   c                 C   s0   t | j|d |d g}| |}d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r_   r`   r5   rG   arrayr*   Zembed_documentsrQ   r7   rc   rd   rM   r5   r   r   r   _call  s
    
z EmbeddingDistanceEvalChain._callc                    s:   | j |d |d gI dH }t|}| |}d|iS )a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r_   r`   Nr5   r*   Zaembed_documentsrG   rf   rQ   r7   rc   rd   ZembeddedrM   r5   r   r   r   _acall-  s    


z!EmbeddingDistanceEvalChain._acallF)r`   	callbackstagsmetadatainclude_run_info)r_   r`   rl   rm   rn   ro   kwargsr   c          	      K   s"   | ||d||||d}|  |S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r_   r`   rc   rl   rm   rn   ro   r;   	r7   r_   r`   rl   rm   rn   ro   rp   r9   r   r   r   _evaluate_stringsF  s    z,EmbeddingDistanceEvalChain._evaluate_stringsc          	         s*   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        rq   rr   NZacallr;   rt   r   r   r   _aevaluate_stringsh  s    z-EmbeddingDistanceEvalChain._aevaluate_strings)N)N)r   r    r!   r"   rT   boolrY   rS   r]   r   ra   r   r   r   r   rh   r   rk   r	   rU   ru   rw   r   r   r   r   rX      sb   
 

 


&
rX   c                
   @   s   e Zd ZdZeee dddZeedddZde	ee
f ee e	ee
f dd	d
Zde	ee
f ee e	ee
f dddZdddddeeeeee  ee	ee
f  ee
edddZdddddeeeeee  ee	ee
f  ee
edddZdS )"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 C   s   ddgS )r^   r_   prediction_br   r6   r   r   r   ra     s    z-PairwiseEmbeddingDistanceEvalChain.input_keysc                 C   s   d| j j dS )NZpairwise_embedding_rZ   r[   r6   r   r   r   r]     s    z2PairwiseEmbeddingDistanceEvalChain.evaluation_nameNrb   c                 C   s0   t | j|d |d g}| |}d|iS )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r_   rz   r5   re   rg   r   r   r   rh     s    
z(PairwiseEmbeddingDistanceEvalChain._callc                    s:   | j |d |d gI dH }t|}| |}d|iS )a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r_   rz   Nr5   ri   rj   r   r   r   rk     s    


z)PairwiseEmbeddingDistanceEvalChain._acallF)rl   rm   rn   ro   )r_   rz   rl   rm   rn   ro   rp   r   c          	      K   s"   | ||d||||d}|  |S )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r_   rz   rr   rs   	r7   r_   rz   rl   rm   rn   ro   rp   r9   r   r   r   _evaluate_string_pairs  s    z9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairsc          	         s*   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r{   rr   Nrv   r|   r   r   r   _aevaluate_string_pairs  s    z:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairs)N)N)r   r    r!   r"   rT   r   rS   ra   r]   r   r   r   r   rh   r   rk   r	   rx   rU   r}   r~   r   r   r   r   ry     sZ   	 

 


(
ry   )"r"   enumr   typingr   r   r   r   numpyrG   Z langchain_core.callbacks.managerr   r   r	   Zlangchain_core.embeddingsr
   Zlangchain_core.utilsr   Zpydanticr   r   Zlangchain.chains.baser   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r   rS   r   r(   rX   ry   r   r   r   r   <module>   s&    : 
