a
    ag7*                     @  s  d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ dddddZdddddZG dd deeeZ G dd deeeZ!G dd de!Z"dS )z-LLM Chains for evaluating question answering.    )annotationsN)AnyListOptionalSequenceTuple)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYstrzOptional[Tuple[str, int]])textreturnc                 C  s   t d|  t j}|rD|d dkr.dS |d dkrDdS z|   d t	ddt
j}| dkrzW dS | dkrW dS |   d	 t	ddt
j}| dkrW dS | dkrW dS W n ty   Y n0 d S )
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translater   	maketransstringpunctuation
IndexError)r   matchZ
first_word	last_word r)   p/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain/evaluation/qa/eval_chain.py
_get_score   s4     

r+   dictc                 C  s6   |   }t|}|du r"d\}}n|\}}|||dS )zParse the output text.

    Args:
        text (str): The output text to parse.

    Returns:
        Any: The parsed output.
    N)NN)	reasoningvaluescore)r   r+   )r   r-   Zparsed_scoresr.   r/   r)   r)   r*   _parse_string_eval_output1   s    	
r0   c                
   @  s  e Zd ZU dZdZded< eddZeddd	d
Z	e
ddddZe
ddddZe
ddddZed.dddd dddZd/ddddddddddd d!Zd"d"d#d$d%Zdddd&d'dd(d(dddd"d)d*d+Zdddd&d'dd(d(dddd"d)d,d-ZdS )0QAEvalChainz,LLM Chain for evaluating question answering.resultsr   
output_keyignoreextraboolr   c                 C  s   dS NFr)   clsr)   r)   r*   is_lc_serializableP   s    zQAEvalChain.is_lc_serializablec                 C  s   dS )NZcorrectnessr)   selfr)   r)   r*   evaluation_nameT   s    zQAEvalChain.evaluation_namec                 C  s   dS NTr)   r=   r)   r)   r*   requires_referenceX   s    zQAEvalChain.requires_referencec                 C  s   dS r@   r)   r=   r)   r)   r*   requires_input\   s    zQAEvalChain.requires_inputNr	   Optional[PromptTemplate]r   llmpromptkwargsr   c                 K  sH   |pt }h d}|t|jkr4td| d|j | f ||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'input', 'answer' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            QAEvalChain: the loaded QA eval chain.
        >   resultanswerqueryInput variables should be 
, but got rE   rF   )r   setinput_variables
ValueError)r;   rE   rF   rG   expected_input_varsr)   r)   r*   from_llm`   s    zQAEvalChain.from_llmrJ   rI   rH   	callbackszSequence[dict]r   
List[dict])examplespredictionsquestion_key
answer_keyprediction_keyrT   r   c                  s*    fddt |D }| j||dS )5Evaluate question answering examples and predictions.c                   s,   g | ]$\}}| |  |  d qS )rJ   rI   rH   r)   .0iZexamplerY   rZ   rW   rX   r)   r*   
<listcomp>   s
   
z(QAEvalChain.evaluate.<locals>.<listcomp>rS   	enumerateapply)r>   rV   rW   rX   rY   rZ   rT   inputsr)   r`   r*   evaluate   s    	zQAEvalChain.evaluater,   rH   r   c                 C  s&   t || j }t|v r"|t |t< |S Nr0   r3   r   r>   rH   parsed_resultr)   r)   r*   _prepare_output   s    zQAEvalChain._prepare_outputF	referenceinputrT   include_run_infoOptional[str]
predictionrn   ro   rT   rp   rG   r   c                K  s    | |||d||d}|  |S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): the LLM or chain prediction to evaluate.
            reference (Optional[str], optional): the reference label
                to evaluate against.
            input (Optional[str], optional): the input to consider during evaluation
            callbacks (Callbacks, optional): the callbacks to use for tracing.
            include_run_info (bool, optional): whether to include run info in the
                returned results.
            **kwargs: additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r\   rT   rp   rl   r>   rs   rn   ro   rT   rp   rG   rH   r)   r)   r*   _evaluate_strings   s    	zQAEvalChain._evaluate_stringsc                  s(   | j |||d||dI d H }| |S )Nr\   re   rT   rp   Zacallrl   rv   r)   r)   r*   _aevaluate_strings   s    

zQAEvalChain._aevaluate_strings)N)rJ   rI   rH   )__name__
__module____qualname____doc__r3   __annotations__r   model_configclassmethodr<   propertyr?   rA   rB   rR   rf   rl   rw   rz   r)   r)   r)   r*   r1   G   sB   
 "   
'r1   c                
   @  s  e Zd ZdZeddddZeddddZedddd	Ze	d
dZ
edddddZeddddZed0dddd dddZd1dddddddd dd!d"d#Zd$d$d%d&d'Zdddd(d)dd*d*d ddd$d+d,d-Zdddd(d)dd*d*d ddd$d+d.d/ZdS )2ContextQAEvalChainz3LLM Chain for evaluating QA w/o GT based on contextr7   r8   c                 C  s   dS r9   r)   r:   r)   r)   r*   r<      s    z%ContextQAEvalChain.is_lc_serializablec                 C  s   dS )z.Whether the chain requires a reference string.Tr)   r=   r)   r)   r*   rA      s    z%ContextQAEvalChain.requires_referencec                 C  s   dS )z+Whether the chain requires an input string.Tr)   r=   r)   r)   r*   rB      s    z!ContextQAEvalChain.requires_inputr4   r5   r
   None)rF   r   c                 C  s0   h d}|t |jkr,td| d|j d S )N>   contextrH   rJ   rK   rL   )rN   rO   rP   )r;   rF   rQ   r)   r)   r*   _validate_input_vars   s    z'ContextQAEvalChain._validate_input_varsr   c                 C  s   dS )NzContextual Accuracyr)   r=   r)   r)   r*   r?      s    z"ContextQAEvalChain.evaluation_nameNr	   rC   r   rD   c                 K  s&   |pt }| | | f ||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'query', 'context' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            ContextQAEvalChain: the loaded QA eval chain.
        rM   )r   r   r;   rE   rF   rG   r)   r)   r*   rR      s    
zContextQAEvalChain.from_llmrJ   r   rH   rS   rU   r   )rV   rW   rX   context_keyrZ   rT   r   c                  s*    fddt |D }| j||dS )r[   c                   s,   g | ]$\}}| |  |  d qS )rJ   r   rH   r)   r]   r   rZ   rW   rX   r)   r*   ra     s
   
z/ContextQAEvalChain.evaluate.<locals>.<listcomp>rS   rb   )r>   rV   rW   rX   r   rZ   rT   re   r)   r   r*   rf     s    	zContextQAEvalChain.evaluater,   rg   c                 C  s&   t || j }t|v r"|t |t< |S rh   ri   rj   r)   r)   r*   rl   "  s    z"ContextQAEvalChain._prepare_outputFrm   rq   rr   c                K  s    | |||d||d}|  |S )Nr   rt   ru   rv   r)   r)   r*   rw   (  s    
	z$ContextQAEvalChain._evaluate_stringsc                  s(   | j |||d||dI d H }| |S )Nr   rx   ry   rv   r)   r)   r*   rz   =  s    

z%ContextQAEvalChain._aevaluate_strings)N)rJ   r   rH   )r{   r|   r}   r~   r   r<   r   rA   rB   r   r   r   r?   rR   rf   rl   rw   rz   r)   r)   r)   r*   r      sD       
r   c                   @  sN   e Zd ZdZeddddZeddddZedd
ddd dddZd	S )CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.r7   r8   c                 C  s   dS r9   r)   r:   r)   r)   r*   r<   R  s    z!CotQAEvalChain.is_lc_serializabler   c                 C  s   dS )NzCOT Contextual Accuracyr)   r=   r)   r)   r*   r?   V  s    zCotQAEvalChain.evaluation_nameNr	   rC   r   rD   c                 K  s&   |pt }| | | f ||d|S )zLoad QA Eval Chain from LLM.rM   )r   r   r   r)   r)   r*   rR   Z  s    
zCotQAEvalChain.from_llm)N)	r{   r|   r}   r~   r   r<   r   r?   rR   r)   r)   r)   r*   r   O  s    r   )#r~   
__future__r   r   r$   typingr   r   r   r   r   Z langchain_core.callbacks.managerr   Zlangchain_core.language_modelsr	   Zlangchain_core.promptsr
   Zpydanticr   Zlangchain.chains.llmr   Z#langchain.evaluation.qa.eval_promptr   r   r   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r+   r0   r1   r   r   r)   r)   r)   r*   <module>   s$    
