a
    `goy                     @  sh  d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZ ddlmZ ddlmZ zddlmZmZmZmZ W n* ey   ddlmZmZmZmZ Y n0 ddlZdd	lm Z  dd
l!m"Z"m#Z#m$Z$m%Z% e&e'Z(G dd deZ)G dd deddZ*G dd deZ+G dd deddZ,G dd dZ-ee+e,e.f Z/G dd deZ0ee0e.f Z1G dd de-Z2ddddZ3dZ4d d!d"d#Z5d$d$d%d&d'Z6G d(d) d)Z7d*d)d+d,d-Z8d.d/d+d0d1Z9d.d2d+d3d4Z:d5d6d7d8d9Z;eeeej% eej$ gee+e,f f eeej% eej$ gee+e,f f f Z<d.d:d+d;d<Z=dS )=z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)
Any	AwaitableCallableDictListLiteralOptionalSequenceUnioncast)	TypedDict)schemas)	BaseModelFieldValidationError	validator)wraps)
SCORE_TYPE
VALUE_TYPEExampleRunc                   @  s"   e Zd ZU dZded< ded< dS )Categoryz$A category for categorical feedback.Optional[Union[float, int]]valuestrlabelN__name__
__module____qualname____doc____annotations__ r$   r$   l/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langsmith/evaluation/evaluator.pyr   1   s   
r   c                   @  s2   e Zd ZU dZded< ded< ded< ded< d	S )
FeedbackConfigziConfiguration to define a type of feedback.

    Applied on on the first creation of a feedback_key.
    z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[List[Union[Category, dict]]]
categoriesNr   r$   r$   r$   r%   r&   :   s
   
r&   F)totalc                   @  s   e Zd ZU dZded< dZded< dZded< dZd	ed
< dZded< e	e
dZded< dZded< dZded< dZded< dZded< G dd dZeddddd ZdS )EvaluationResultzEvaluation result.r   keyNr   scorer   r   zOptional[str]commentzOptional[Dict]
correction)default_factoryr   evaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextrac                   @  s   e Zd ZdZdZdS )zEvaluationResult.ConfigzPydantic model configuration.FN)r   r    r!   r"   Zallow_extrar$   r$   r$   r%   Configd   s   r8   T)prec                 C  s6   d|vs|d du r2t |ttfr2td|  |S )z$Check that the value is not numeric.r.   NzJNumeric values should be provided in the 'score' field, not 'value'. Got: )
isinstanceintfloatloggerwarning)clsvvaluesr$   r$   r%   check_value_non_numerici   s    z(EvaluationResult.check_value_non_numeric)r   r    r!   r"   r#   r.   r   r/   r0   r   dictr2   r3   r5   r6   r7   r8   r   rB   r$   r$   r$   r%   r,   I   s   

r,   c                   @  s   e Zd ZU dZded< dS )EvaluationResultszqBatch evaluation results.

    This makes it easy for your evaluator to return multiple
    metrics at once.
    zList[EvaluationResult]resultsNr   r$   r$   r$   r%   rD   x   s   
rD   c                   @  s<   e Zd ZdZedddddddZdddddd	d
ZdS )RunEvaluatorzEvaluator interface class.Nr   Optional[Example]*Union[EvaluationResult, EvaluationResults]runexamplereturnc                 C  s   dS )zEvaluate an example.Nr$   selfrJ   rK   r$   r$   r%   evaluate_run   s    zRunEvaluator.evaluate_runc                   s   t  d| j||I dH S )z#Evaluate an example asynchronously.N)asyncioget_running_looprun_in_executorrO   rM   r$   r$   r%   aevaluate_run   s    
zRunEvaluator.aevaluate_run)N)N)r   r    r!   r"   r   rO   rS   r$   r$   r$   r%   rF      s     rF   c                   @  s:   e Zd ZU dZded< ded< dZded< dZd	ed
< dS )ComparisonEvaluationResultzFeedback scores for the results of comparative evaluations.

    These are generated by functions that compare two or more runs,
    returning a ranking or other feedback.
    r   r-   z'Dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr4   r5   z6Optional[Union[str, Dict[Union[uuid.UUID, str], str]]]r/   )r   r    r!   r"   r#   r5   r/   r$   r$   r$   r%   rT      s   
rT   c                      s   e Zd ZdZd)dddddZd*d	d
dddddZdd
ddddZdd
ddddZeddddZ	d+dddddd Z
d,ddd! fd"d#Zd-ddddd$d%Zd&dd'd(Z  ZS ).DynamicRunEvaluatora  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

    This class is designed to be used with the `@run_evaluator` decorator, allowing
    functions that take a `Run` and an optional `Example` as arguments, and return
    an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

    Attributes:
        func (Callable): The function that is wrapped by this evaluator.
    NXCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]zIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]]funcafuncc                 C  s   t |}|rt |}t||  ddlm} |durR|j|td| _t|dd| _t	
|r|durltd|j|td| _t|dd| _n4|jttttt gtf |td| _t|dd| _dS )zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns a dict or `ComparisonEvaluationResult`.
        r   run_helpersNZprocess_inputsr   rV   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.)_normalize_evaluator_funcr   	langsmithr\   ensure_traceable_serialize_inputsrZ   getattr_nameinspectiscoroutinefunction	TypeErrorr   r   r   r
   r   _RUNNABLE_OUTPUTrY   rN   rY   rZ   r\   r$   r$   r%   __init__   s0    
zDynamicRunEvaluator.__init__FzUnion[EvaluationResult, dict]	uuid.UUIDboolr,   )resultr5   allow_no_keyrL   c              
     s   t  tr js| _ S zd s.td  d vrD|rD| j d< t fdddD rhtd  tf i d|i W S  ty } ztd  |W Y d }~n
d }~0 0 d S )	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r-   c                 3  s   | ]}| vV  qd S Nr$   ).0krm   r$   r%   	<genexpr>       z@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>)r.   r   r/   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r5   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got )r:   r,   r5   
ValueErrorrd   allr   )rN   rm   r5   rn   er$   rr   r%   _coerce_evaluation_result   s8    

z-DynamicRunEvaluator._coerce_evaluation_resultzUnion[dict, EvaluationResults]rH   )rE   r5   rL   c                   sP   d|v r:|  } fdd|d D |d< tf i |S  jtt|ddS )NrE   c                   s   g | ]} j |d qS ))r5   )rx   )rp   rrN   r5   r$   r%   
<listcomp>  s   zBDynamicRunEvaluator._coerce_evaluation_results.<locals>.<listcomp>T)r5   rn   )copyrD   rx   r   rC   )rN   rE   r5   cpr$   rz   r%   _coerce_evaluation_results  s    
z.DynamicRunEvaluator._coerce_evaluation_resultszMUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list])rm   r5   rL   c                 C  s.   t |tr|js||_|S t|}| ||S ro   )r:   r,   r5   _format_evaluator_resultr~   )rN   rm   r5   r$   r$   r%   _format_result  s    
z"DynamicRunEvaluator._format_resultrL   c                 C  s
   t | dS zCheck if the evaluator function is asynchronous.

        Returns:
            bool: True if the evaluator function is asynchronous, False otherwise.
        rZ   hasattrrN   r$   r$   r%   is_async'  s    zDynamicRunEvaluator.is_asyncr   rG   rI   c                 C  s   t | ds6t }| r$tdn|| ||S t }d|j	i}t
|ddrbt|j|d< | j||||dd}| ||S )	a  Evaluate a run using the wrapped function.

        This method directly invokes the wrapped function with the provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rY   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.r6   
session_idN
experimentrun_idmetadataZlangsmith_extra)r   rP   get_event_loop
is_runningRuntimeErrorrun_until_completerS   uuiduuid4idrc   r   r   rY   r   )rN   rJ   rK   running_loopr5   r   rm   r$   r$   r%   rO   0  s"    

z DynamicRunEvaluator.evaluate_runrJ   rK   c                   sr   t | dst ||I dH S t }d|ji}t|ddrJt|j|d< | j	||||ddI dH }| 
||S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        rZ   Nr6   r   r   r   r   )r   superrS   r   r   r   rc   r   r   rZ   r   )rN   rJ   rK   r5   r   rm   	__class__r$   r%   rS   R  s    

z!DynamicRunEvaluator.aevaluate_runc                 C  s   |  ||S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        )rO   rM   r$   r$   r%   __call__m  s    zDynamicRunEvaluator.__call__r   c                 C  s   d| j  dS ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >rd   r   r$   r$   r%   __repr__~  s    zDynamicRunEvaluator.__repr__)N)F)N)N)N)r   r    r!   r"   rj   rx   r~   r   propertyr   rO   rS   r   r   __classcell__r$   r$   r   r%   rV      s    6 	 " rV   rW   rY   c                 C  s   t | S )zmCreate a run evaluator from a function.

    Decorator that transforms a function into a `RunEvaluator`.
    )rV   r   r$   r$   r%   run_evaluator  s    	r   i'  r   )objc                 C  s,   t | }t|tkr(|d td  d }|S )N   z...))reprlen_MAXSIZE)r   sr$   r$   r%   _maxsize_repr  s    r   rC   )inputsrL   c                 C  s&   t | d}t | d}||dS )NrJ   rK   r   )r   get)r   Zrun_truncatedZexample_truncatedr$   r$   r%   rb     s    rb   c                   @  s   e Zd ZdZd"dddddZedd	d
dZd#ddddddZd$ddddddZd%ddddddZ	dd	ddZ
edddddZdddddd d!ZdS )&DynamicComparisonRunEvaluatorz4Compare predictions (as traces) from 2 or more runs.NfCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]zUOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]rX   c                 C  s   t |}|rt |}t||  ddlm} |durR|j|td| _t|dd| _t	
|r|durltd|j|td| _t|dd| _n8|jtttt tt gtf |td| _t|dd| _dS )zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns an `EvaluationResult` or `EvaluationResults`.
        r   r[   Nr]   r   rV   r^   )$_normalize_comparison_evaluator_funcr   r`   r\   ra   rb   rZ   rc   rd   re   rf   rg   r   r   r   r   r
   r   _COMPARISON_OUTPUTrY   ri   r$   r$   r%   rj     s>    

z&DynamicComparisonRunEvaluator.__init__rl   r   c                 C  s
   t | dS r   r   r   r$   r$   r%   r     s    z&DynamicComparisonRunEvaluator.is_asyncSequence[Run]rG   rT   runsrK   rL   c                 C  sl   t | ds6t }| r$tdn|| ||S t }| 	|}| j
||||dd}| |||S )zCompare runs to score preferences.

        Args:
            runs: A list of runs to compare.
            example: An optional example to be used in the evaluation.

        rY   r   r   tagsr   )r   rP   r   r   r   r   acompare_runsr   r   	_get_tagsrY   _format_results)rN   r   rK   r   r5   r   rm   r$   r$   r%   compare_runs  s"    



z*DynamicComparisonRunEvaluator.compare_runsc                   sR   t | ds| ||S t }| |}| j||||ddI dH }| |||S )a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            runs (Run): The runs to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        rZ   r   r   N)r   r   r   r   r   rZ   r   )rN   r   rK   r5   r   rm   r$   r$   r%   r     s    

z+DynamicComparisonRunEvaluator.acompare_runsc                 C  s   |  ||S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        )r   )rN   r   rK   r$   r$   r%   r     s    z&DynamicComparisonRunEvaluator.__call__r   c                 C  s   d| j  dS )r   z<DynamicComparisonRunEvaluator r   r   r   r$   r$   r%   r   /  s    z&DynamicComparisonRunEvaluator.__repr__z	List[str])r   rL   c                 C  sF   g }| D ]8}| dt|j  t|ddr| dt|j  q|S )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   rc   r   )r   r   rJ   r$   r$   r%   r   3  s    z'DynamicComparisonRunEvaluator._get_tagsz-Union[dict, list, ComparisonEvaluationResult]rk   )rm   r5   r   rL   c              
   C  s   t |tr|js||_|S t |trDdd t||D | j|d}n0t |trbd|vrt| j|d< nd|}t|ztf i d|i|W S  ty } ztd| |W Y d }~n
d }~0 0 d S )Nc                 S  s   i | ]\}}|j |qS r$   )r   )rp   rJ   r.   r$   r$   r%   
<dictcomp>J  rt   zADynamicComparisonRunEvaluator._format_results.<locals>.<dictcomp>)rU   r-   r5   r-   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r5   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got )	r:   rT   r5   listziprd   rC   ru   r   )rN   rm   r5   r   msgrw   r$   r$   r%   r   >  s:    



z-DynamicComparisonRunEvaluator._format_results)N)N)N)N)r   r    r!   r"   rj   r   r   r   r   r   r   staticmethodr   r   r$   r$   r$   r%   r     s    8	    
r   r   )rY   rL   c                 C  s   t | S )z.Create a comaprison evaluator from a function.)r   r   r$   r$   r%   comparison_evaluatorc  s    r   r   z|Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]]c                   s  dt  }dd |j D rHtfddD s^tdkr^d d}t|ntfd	dD rd
dgkr S t  rdddd fdd}t drt	 dn|j
|_
|S dddd fdd}t drt	 dn|j
|_
|S d S )N)rJ   rK   r   outputsreference_outputsattachmentsc                 S  s&   g | ]\}}|j |j|jfv r|qS r$   kindPOSITIONAL_OR_KEYWORDPOSITIONAL_ONLYrp   pnamepr$   r$   r%   r{   |  s   z-_normalize_evaluator_func.<locals>.<listcomp>c                 3  s   | ]}| v V  qd S ro   r$   rp   r   supported_argsr$   r%   rs     rt   z,_normalize_evaluator_func.<locals>.<genexpr>   kInvalid evaluator function. Must have at least one positional argument. Supported positional arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc                 3  s   | ]}| v V  qd S ro   r$   r   r   r$   r%   rs     s   rJ   rK   r   rG   rh   rI   c                   s^   | ||r|j ni | jpi |r&|jp(i ni |r6|jp8i ni d  fddD }| I d H S )NrJ   rK   r   r   r   r   c                 3  s   | ]} | V  qd S ro   r$   rp   argZarg_mapr$   r%   rs     rt   z>_normalize_evaluator_func.<locals>.awrapper.<locals>.<genexpr>r   r   r   rJ   rK   argsrY   positional_argsr   r%   awrapper  s    z+_normalize_evaluator_func.<locals>.awrapperr   r   c                   sP   | ||r|j ni | jpi |jp i |r.|jp0i ni d  fddD }| S )Nr   c                 3  s   | ]} | V  qd S ro   r$   r   r   r$   r%   rs     rt   z=_normalize_evaluator_func.<locals>.wrapper.<locals>.<genexpr>r   r   r   r   r%   wrapper  s    z*_normalize_evaluator_func.<locals>.wrapperre   	signature
parametersitemsrv   r   ru   rf   r   rc   r   rY   sigr   r   r   r$   rY   r   r   r%   r_   m  sD    




r_   zUnion[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]c                   s  dt  }dd |j D rHtfddD s^tdkr^d d}t|ntfd	dD rd
dgkr S t  rdddd fdd}t drt	 dn|j
|_
|S dddd fdd}t drt	 dn|j
|_
|S d S )Nr   rK   r   r   r   c                 S  s&   g | ]\}}|j |j|jfv r|qS r$   r   r   r$   r$   r%   r{     s   z8_normalize_comparison_evaluator_func.<locals>.<listcomp>c                 3  s   | ]}| v V  qd S ro   r$   r   r   r$   r%   rs     rt   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>r   r   r   c                 3  s   | ]}| v V  qd S ro   r$   r   r   r$   r%   rs     s   r   rK   r   rG   r   r   c                   sR   | ||r|j ni dd | D |r*|jp,i ni d  fddD }| I d H S )Nc                 S  s   g | ]}|j pi qS r$   r   rp   rJ   r$   r$   r%   r{     rt   zJ_normalize_comparison_evaluator_func.<locals>.awrapper.<locals>.<listcomp>r   c                 3  s   | ]} | V  qd S ro   r$   r   r   r$   r%   rs     rt   zI_normalize_comparison_evaluator_func.<locals>.awrapper.<locals>.<genexpr>r   r   r   rK   r   r   r   r%   r     s    z6_normalize_comparison_evaluator_func.<locals>.awrapperr   r   c                   sL   | ||r|j ni dd | D |r*|jp,i ni d  fddD }| S )Nc                 S  s   g | ]}|j pi qS r$   r   r   r$   r$   r%   r{     rt   zI_normalize_comparison_evaluator_func.<locals>.wrapper.<locals>.<listcomp>r   c                 3  s   | ]} | V  qd S ro   r$   r   r   r$   r%   rs     rt   zH_normalize_comparison_evaluator_func.<locals>.wrapper.<locals>.<genexpr>r   r   r   r   r%   r     s    z5_normalize_comparison_evaluator_func.<locals>.wrapperr   r   r$   r   r%   r     sD    




r   z;Union[EvaluationResults, dict, str, int, bool, float, list]zUnion[EvaluationResults, dict])rm   rL   c                 C  s   t | tttfrd| i} nx| s.td|  ndt | trdtdd | D sZtd|  dd| i} n.t | trxd| i} nt | trntd	|  | S )
Nr.   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c                 s  s   | ]}t |tV  qd S ro   )r:   rC   )rp   xr$   r$   r%   rs     rt   z+_format_evaluator_result.<locals>.<genexpr>z8Expected a list of dicts or EvaluationResults. Received .rE   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	r:   rl   r<   r;   ru   r   rv   r   rC   rr   r$   r$   r%   r   
  s0    






r   SUMMARY_EVALUATOR_Tc                   s   dt  }dd |j D rHtfddD srtdkrrd d}rh|d	 d7 }t|n^tfd
dD rddgkr S dddd fdd}t drt dn|j	|_	|S d S )Nr   examplesr   r   r   c                 S  s&   g | ]\}}|j |j|jfv r|qS r$   r   r   r$   r$   r%   r{   5  s   z0_normalize_summary_evaluator.<locals>.<listcomp>c                 3  s   | ]}| v V  qd S ro   r$   r   r   r$   r%   rs   ;  rt   z/_normalize_summary_evaluator.<locals>.<genexpr>r   r   r   z Received positional arguments c                 3  s   | ]}| v V  qd S ro   r$   r   r   r$   r%   rs   G  s   r   r   zSequence[schemas.Run]zSequence[schemas.Example]rH   )r   r   rL   c                   s^   | |dd |D dd | D dd |D d  fddD }| }t |trV|S t|S )Nc                 S  s   g | ]
}|j qS r$   )r   rp   rK   r$   r$   r%   r{   S  rt   zA_normalize_summary_evaluator.<locals>.wrapper.<locals>.<listcomp>c                 S  s   g | ]}|j pi qS r$   r   r   r$   r$   r%   r{   T  rt   c                 S  s   g | ]}|j pi qS r$   r   r   r$   r$   r%   r{   U  rt   r   c                 3  s   | ]} | V  qd S ro   r$   r   r   r$   r%   rs   W  rt   z@_normalize_summary_evaluator.<locals>.wrapper.<locals>.<genexpr>)r:   r,   r   )r   r   r   rm   r   r   r%   r   M  s    
z-_normalize_summary_evaluator.<locals>.wrapperr   )
re   r   r   r   rv   r   ru   r   rc   r   )rY   r   r   r   r$   r   r%   _normalize_summary_evaluator2  s6    



r   )>r"   
__future__r   rP   re   r   abcr   typingr   r   r   r   r   r	   r
   r   r   r   Ztyping_extensionsr   r`   r   Zpydantic.v1r   r   r   r   ImportErrorZpydanticlogging	functoolsr   Zlangsmith.schemasr   r   r   r   	getLoggerr   r=   r   r&   r,   rD   rF   rC   rh   rT   r   rV   r   r   r   rb   r   r   r_   r   r   r   r   r$   r$   r$   r%   <module>   sd   0
	/ W E
SJ

