a
    agG                     @  s   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ eeZG dd deeZG dd deZG dd dZG dd deeZ G dd deeZ!G dd deeZ"dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Enum)AnyOptionalSequenceTupleUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                   @  s`   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdS )EvaluatorTypezThe types of the evaluators.ZqaZcot_qaZ
context_qaZpairwise_stringZscore_stringZlabeled_pairwise_stringZlabeled_score_stringZ
trajectorycriteriaZlabeled_criteriaZstring_distanceZexact_matchZregex_matchZpairwise_string_distanceZembedding_distanceZpairwise_embedding_distanceZjson_validityZjson_equalityZjson_edit_distanceZjson_schema_validationN)__name__
__module____qualname____doc__ZQAZCOT_QAZ
CONTEXT_QAZPAIRWISE_STRINGZSCORE_STRINGZLABELED_PAIRWISE_STRINGZLABELED_SCORE_STRINGZAGENT_TRAJECTORYZCRITERIAZLABELED_CRITERIAZSTRING_DISTANCEZEXACT_MATCHZREGEX_MATCHZPAIRWISE_STRING_DISTANCEZEMBEDDING_DISTANCEZPAIRWISE_EMBEDDING_DISTANCEZJSON_VALIDITYZJSON_EQUALITYZJSON_EDIT_DISTANCEZJSON_SCHEMA_VALIDATION r   r   i/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain/evaluation/schema.pyr      s,   r   c                   @  s*   e Zd ZdZeeddd dddZdS )LLMEvalChainz,A base class for evaluators that use an LLM.r   r   )llmkwargsreturnc                 K  s   dS )z#Create a new evaluator from an LLM.Nr   )clsr   r   r   r   r   from_llmM   s    zLLMEvalChain.from_llmN)r   r   r   r   classmethodr   r   r   r   r   r   r   J   s   r   c                   @  sl   e Zd ZdZeddddZeddddZeddd	d
ZeddddZdddddddZ	dS )_EvalArgsMixinz(Mixin for checking evaluation arguments.boolr   c                 C  s   dS z2Whether this evaluator requires a reference label.Fr   selfr   r   r   requires_referenceV   s    z!_EvalArgsMixin.requires_referencec                 C  s   dS )0Whether this evaluator requires an input string.Fr   r#   r   r   r   requires_input[   s    z_EvalArgsMixin.requires_inputstrc                 C  s   d| j j dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r   r#   r   r   r   _skip_input_warning`   s    z"_EvalArgsMixin._skip_input_warningc                 C  s   d| j j dS )z*Warning to show when reference is ignored.zIgnoring reference in r)   r*   r#   r   r   r   _skip_reference_warninge   s    z&_EvalArgsMixin._skip_reference_warningNOptional[str]None)	referenceinputr   c                 C  sx   | j r"|du r"t| jj dn|dur:| j s:t| j | jr\|du r\t| jj dn|durt| jstt| j dS )a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)r'   
ValueErrorr+   r   r   r,   r%   r-   )r$   r0   r1   r   r   r   _check_evaluation_argsl   s    
z%_EvalArgsMixin._check_evaluation_args)NN)
r   r   r   r   propertyr%   r'   r,   r-   r3   r   r   r   r   r   S   s     r   c                   @  s   e Zd ZdZeddddZeddddZed	d	d
ddddddddZd	d	d
ddddddddZ	d	d	d
ddddddddZ
d	d	d
ddddddddZd	S )StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.r(   r!   c                 C  s   | j jS )zThe name of the evaluation.r*   r#   r   r   r   evaluation_name   s    zStringEvaluator.evaluation_namer    c                 C  s   dS r"   r   r#   r   r   r   r%      s    z"StringEvaluator.requires_referenceNr0   r1   zUnion[str, Any]zOptional[Union[str, Any]]r   dict)
predictionr0   r1   r   r   c                K  s   dS )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr   r$   r9   r0   r1   r   r   r   r   _evaluate_strings   s    	z!StringEvaluator._evaluate_stringsc                  s"   t d| jf|||d|I dH S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr9   r0   r1   )r   r;   r:   r   r   r   _aevaluate_strings   s    z"StringEvaluator._aevaluate_stringsr.   c                K  s&   | j ||d | jf |||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r7   r<   )r3   r;   r:   r   r   r   evaluate_strings   s    z StringEvaluator.evaluate_stringsc                  s,   | j ||d | jf |||d|I dH S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r7   r<   N)r3   r=   r:   r   r   r   aevaluate_strings   s    z!StringEvaluator.aevaluate_strings)r   r   r   r   r4   r6   r%   r   r;   r=   r>   r?   r   r   r   r   r5      s$   #r5   c                	   @  s   e Zd ZdZeddddddddddd	d
ZddddddddddddZddddddddddddZddddddddddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nr7   r(   r.   r   r8   )r9   prediction_br0   r1   r   r   c                K  s   dS )1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr   r$   r9   rA   r0   r1   r   r   r   r   _evaluate_string_pairs   s    
z.PairwiseStringEvaluator._evaluate_string_pairsc                  s$   t d| jf||||d|I dH S )@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr9   rA   r0   r1   )r   rD   rC   r   r   r   _aevaluate_string_pairs  s    z/PairwiseStringEvaluator._aevaluate_string_pairsc                K  s(   | j ||d | jf ||||d|S )rB   r7   rF   )r3   rD   rC   r   r   r   evaluate_string_pairs/  s    z-PairwiseStringEvaluator.evaluate_string_pairsc                  s.   | j ||d | jf ||||d|I dH S )rE   r7   rF   N)r3   rG   rC   r   r   r   aevaluate_string_pairsL  s    z.PairwiseStringEvaluator.aevaluate_string_pairs)	r   r   r   r   r   rD   rG   rH   rI   r   r   r   r   r@      s   #"r@   c                	   @  s   e Zd ZdZeddddZedddd	dd
dddddZdddd	dd
dddddZdddd	dd
dddddZ	dddd	dd
dddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.r    r!   c                 C  s   dS )r&   Tr   r#   r   r   r   r'   m  s    z'AgentTrajectoryEvaluator.requires_inputN)r0   r(   z!Sequence[Tuple[AgentAction, str]]r.   r   r8   )r9   agent_trajectoryr1   r0   r   r   c                K  s   dS )  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr   r$   r9   rK   r1   r0   r   r   r   r   _evaluate_agent_trajectoryr  s    
z3AgentTrajectoryEvaluator._evaluate_agent_trajectoryc                  s$   t d| jf||||d|I dH S )  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)r9   rK   r0   r1   )r   rN   rM   r   r   r   _aevaluate_agent_trajectory  s    z4AgentTrajectoryEvaluator._aevaluate_agent_trajectoryc                K  s(   | j ||d | jf ||||d|S )rL   r7   r9   r1   rK   r0   )r3   rN   rM   r   r   r   evaluate_agent_trajectory  s    z2AgentTrajectoryEvaluator.evaluate_agent_trajectoryc                  s.   | j ||d | jf ||||d|I dH S )rO   r7   rQ   N)r3   rP   rM   r   r   r   aevaluate_agent_trajectory  s    z3AgentTrajectoryEvaluator.aevaluate_agent_trajectory)r   r   r   r   r4   r'   r   rN   rP   rR   rS   r   r   r   r   rJ   j  s   %$rJ   )#r   
__future__r   loggingabcr   r   enumr   typingr   r   r   r	   r
   warningsr   Zlangchain_core.agentsr   Zlangchain_core.language_modelsr   Zlangchain_core.runnables.configr   Zlangchain.chains.baser   	getLoggerr   loggerr(   r   r   r   r5   r@   rJ   r   r   r   r   <module>   s"   
6	1tr