a
    bgk                     @   sT  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZm Z m!Z!m"Z"m#Z# e $e%Z&edddZ'ee( dddZ)ee( dddZ*de(eee+dddZ,e(e(edddZ-G dd dZ.G dd deeZ/dS )    N)deepcopy)Path)AnyDictListOptionalSequenceUnion)AgentActionAgentFinish)BaseCallbackHandler)Document)	LLMResult)get_from_dict_or_envguard_import)BaseMetadataCallbackHandlerflatten_dicthash_stringimport_pandasimport_spacyimport_textstatreturnc                   C   s   t dS )zKImport the mlflow python package and raise an error if it is not installed.mlflow)r    r   r   {/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/callbacks/mlflow_callback.pyimport_mlflow   s    r   c                   C   s   g dS )z!Get the metrics to log to MLFlow.)stepstartsendserrorstext_ctrchain_starts
chain_ends
llm_startsllm_endsllm_streamstool_starts	tool_ends
agent_endsretriever_startsretriever_endsr   r   r   r   r   mlflow_callback_metrics"   s    r,   c                   C   s   g dS )z.Get the text complexity metrics from textstat.)Zflesch_reading_easeZflesch_kincaid_gradeZ
smog_indexZcoleman_liau_indexZautomated_readability_indexZdale_chall_readability_scoreZdifficult_wordsZlinsear_write_formulaZgunning_fogZfernandez_huertaZszigriszt_pazosZgutierrez_poliniZcrawfordZgulpease_indexZosmanr   r   r   r   r   get_text_complexity_metrics7   s    r-   )textnlptextstatr   c           
         s   i }dur: fddt  D }|d|i || |durt }| }|jj|dddd}|jj|d	ddd}||d
}	||	 |S )a  Analyze text using textstat and spacy.

    Parameters:
        text (str): The text to analyze.
        nlp (spacy.lang): The spacy language model to use for visualization.
        textstat: The textstat library to use for complexity metrics calculation.

    Returns:
        (dict): A dictionary containing the complexity metrics and visualization
            files serialized to  HTML string.
    Nc                    s   i | ]}|t | qS r   )getattr.0keyr.   r0   r   r   
<dictcomp>_   s   z analyze_text.<locals>.<dictcomp>text_complexity_metricsdepFT)stylejupyterpageent)dependency_treeentities)r-   updater   Zdisplacyrender)
r.   r/   r0   respr7   spacydocZdep_outZent_outZtext_visualizationsr   r5   r   analyze_textM   s"    

rD   )prompt
generationr   c                 C   s*   |  dd}| dd}d| d| dS )zConstruct an html element from a prompt and a generation.

    Parameters:
        prompt (str): The prompt.
        generation (str): The generation.

    Returns:
        (str): The html string.
z<br>z
    <p style="color:black;">z>:</p>
    <blockquote>
      <p style="color:green;">
        z"
      </p>
    </blockquote>
    )replace)rE   rF   Zformatted_promptZformatted_generationr   r   r   )construct_html_from_prompt_and_generationw   s    	rI   c                   @   s   e Zd ZdZedddZd%eeeef ee ddddZ	dd	d
dZ
eeddddZd&eeeef eeef f ee ddddZeeef eddddZeeddddZeeddddZeeddddZeddd d!Zedd"d#d$ZdS )'MlflowLoggera  Callback Handler that logs metrics and artifacts to mlflow server.

    Parameters:
        name (str): Name of the run.
        experiment (str): Name of the experiment.
        tags (dict): Tags to be attached for the run.
        tracking_uri (str): MLflow tracking server uri.

    This handler implements the helper functions to initialize,
    log metrics and artifacts to the mlflow server.
    )kwargsc                 K   s   t  | _dtjv r@| jd | jjj | _| j	| j| _
nzt|ddd}| j| |d }r|| j|jj| _n>t|dd}| j|| _
| j
d ur| j
j| _n| j|| _| |d	 |d
 |dd  |dd| _d S )NDATABRICKS_RUNTIME_VERSIONZ
databrickstracking_uriZMLFLOW_TRACKING_URI run_idexperiment_nameZMLFLOW_EXPERIMENT_NAMErun_namerun_tagsartifacts_dir)r   r   osenvironZset_tracking_uriZtrackingZfluentZ_get_experiment_id	mlf_expidZget_experimentZmlf_expr   getZget_runinfoZexperiment_idZget_experiment_by_nameZcreate_experiment	start_rundir)selfrK   rM   rO   rP   r   r   r   __init__   s,    

zMlflowLogger.__init__N)nametagsrO   r   c                 C   sh   |du r^| dr>dtjtjtj dd}|dd | }| j j	| j
||d}|jj}|| _dS )z
        If run_id is provided, it will reuse the run with the given run_id.
        Otherwise, it starts a new run, auto generates the random suffix for name.
        Nz-%rN      )k)rQ   r^   )endswithjoinrandomchoicesstringascii_uppercasedigitsr   ZMlflowClientZ
create_runrV   rX   rO   )r[   r]   r^   rO   rnamerunr   r   r   rY      s    

zMlflowLogger.start_runr   c                 C   s   | j   dS )zTo finish the run.N)r   Zend_runr[   r   r   r   
finish_run   s    zMlflowLogger.finish_run)r4   valuer   c                 C   s   | j j||| jd dS )zTo log metric to mlflow server.rO   N)r   Z
log_metricrO   )r[   r4   rm   r   r   r   metric   s    zMlflowLogger.metricr   )datar   r   c                 C   s   | j j|| jd dS )z%To log all metrics in the input dict.rn   N)r   Zlog_metricsrO   )r[   rp   r   r   r   r   metrics   s    zMlflowLogger.metrics)rp   filenamer   c                 C   s*   | j j|tj| j| d| jd dS )z,To log the input data as json file artifact.z.jsonrn   N)r   Zlog_dictrT   pathrc   rZ   rO   )r[   rp   rr   r   r   r   jsonf   s    zMlflowLogger.jsonf)r]   	dataframer   c                 C   s   |  | d|  dS )z1To log the input pandas dataframe as a html tableZtable_N)htmlZto_html)r[   r]   ru   r   r   r   table   s    zMlflowLogger.table)rv   rr   r   c                 C   s*   | j j|tj| j| d| jd dS )z3To log the input html string as html file artifact.z.htmlrn   Nr   Zlog_textrT   rs   rc   rZ   rO   )r[   rv   rr   r   r   r   rv      s    zMlflowLogger.html)r.   rr   r   c                 C   s*   | j j|tj| j| d| jd dS )z,To log the input text as text file artifact.z.txtrn   Nrx   )r[   r.   rr   r   r   r   r.      s    zMlflowLogger.text)rs   r   c                 C   s   | j j|| jd dS )z/To upload the file from given path as artifact.rn   N)r   Zlog_artifactrO   )r[   rs   r   r   r   artifact   s    zMlflowLogger.artifact)chainr   c                 C   s   | j jj|d| jd d S )Nzlangchain-modelrn   )r   	langchainZ	log_modelrO   )r[   rz   r   r   r   langchain_artifact   s    zMlflowLogger.langchain_artifact)N)r   )__name__
__module____qualname____doc__r   r\   strr   r   rY   rl   floatro   r	   intrq   rt   rw   rv   r.   ry   r|   r   r   r   r   rJ      s(   !   rJ   c                	       s  e Zd ZdZdAee ee ee ee ee edd fddZdd	d
dZeee	f e
e e	ddddZee	ddddZee	ddddZee	ddddZeee	f eee	f e	ddddZeeee	f ee
e f e	ddddZee	ddddZeee	f ee	dd d!d"Ze	e	dd#d$d%Zee	ddd&d'Zee	dd(d)d*Zee	dd+d,d-Zee	e	d.d/d0Zeee	f ee	e	d1d2d3Zee e	e	d4d5d6Z ee	e	dd7d8Z!e	d	d9d:Z"e#d	d;d<Z$dBe	e#dd>d?d@Z%  Z&S )CMlflowCallbackHandleraO  Callback Handler that logs metrics and artifacts to mlflow server.

    Parameters:
        name (str): Name of the run.
        experiment (str): Name of the experiment.
        tags (dict): Tags to be attached for the run.
        tracking_uri (str): MLflow tracking server uri.

    This handler will utilize the associated callback method called and formats
    the input of each callback function with metadata regarding the state of LLM run,
    and adds the response to the list of records for both the {method}_records and
    action. It then logs the response to mlflow server.
    langchainrun-%r{   NrN   )r]   
experimentr^   rM   rO   rS   r   c           	         s^  t   t  t   || _|| _|p(i | _|| _|| _|| _	t
 | _t| j| j| j| j| j| j	d| _g | _d| _z
t }W n0 ty } zt|j W Y d}~n8d}~0 0 z|d| _W n ty   td Y n0 zt | _W n8 ty$ } zt|j d| _W Y d}~n
d}~0 0 dd t D | _g g g g g g g g g g g g g d| _dS )zInitialize callback handler.)rM   rP   rQ   rR   rO   rS   NZen_core_web_smzfRun `python -m spacy download en_core_web_sm` to download en_core_web_sm model for text visualization.c                 S   s   i | ]
}|d qS )r   r   r2   r   r   r   r6   =      z2MlflowCallbackHandler.__init__.<locals>.<dictcomp>)on_llm_start_recordson_llm_token_recordson_llm_end_recordson_chain_start_recordson_chain_end_recordson_tool_start_recordson_tool_end_recordson_text_recordson_agent_finish_recordson_agent_action_recordson_retriever_start_recordson_retriever_end_recordsaction_records)r   r   superr\   r]   r   r^   rM   rO   rS   tempfileTemporaryDirectorytemp_dirrJ   mlflgr   r/   r   ImportErrorloggerwarningmsgloadOSErrorr   r0   r,   rq   records)	r[   r]   r   r^   rM   rO   rS   rB   e	__class__r   r   r\     sd    



	
"
zMlflowCallbackHandler.__init__r   c                 C   s@   | j  D ]\}}d| j |< q
| j D ]\}}g | j|< q(d S )Nr   )rq   itemsr   )r[   r`   vr   r   r   _resetO  s    zMlflowCallbackHandler._reset)
serializedpromptsrK   r   c           	      K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|ddi |t| || j  | jj | j | j d d t|D ]R\}}t|}||d< | jd	 | | jd
 | | j|d| d|  qdS )zRun when LLM starts.r      r$   r   actionon_llm_startr   rE   r   r   Z
llm_start_Z_prompt_N)	rq   r?   r   r   	enumerater   r   appendrt   )	r[   r   r   rK   r$   rA   idxrE   Zprompt_respr   r   r   r   U  s    
z"MlflowCallbackHandler.on_llm_start)tokenrK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d }i }|d|d || j  | jj | j | j d d | jd | | jd | | j|d	|  d
S )z#Run when LLM generates a new token.r   r   r&   on_llm_new_token)r   r   r   r   r   Zllm_new_tokens_Nrq   r?   r   r   r   rt   )r[   r   rK   r&   rA   r   r   r   r   m  s    
z&MlflowCallbackHandler.on_llm_new_token)responserK   r   c              	   K   s  | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|ddi |t|jp`i  || j  | jj | j | j d d |jD ] }t|D ]\}}t|}|t|  |t	|j
| j| jd d	|v r|d	}	| jj |	| j d d | jd
 | | jd | | j|d| d|  d|v rh|d }
| j|
dt|j
  d|v r|d }| j|dt|j
  qqdS )zRun when LLM ends running.r   r   r%   r   r   
on_llm_endr   )r/   r0   r7   r   r   Zllm_end_Z_generation_r=   zdep-r>   zent-N)rq   r?   r   Z
llm_outputr   generationsr   r   dictrD   r.   r/   r0   popr   r   rt   rv   r   )r[   r   rK   r%   rA   r   r   rF   Zgeneration_respZcomplexity_metricsr=   r>   r   r   r   r   ~  sN    


z MlflowCallbackHandler.on_llm_end)errorrK   r   c                 K   s(   | j d  d7  < | j d  d7  < dS )zRun when LLM errors.r   r   r    Nrq   r[   r   rK   r   r   r   on_llm_error  s    z"MlflowCallbackHandler.on_llm_error)r   inputsrK   r   c                 K   s  | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|ddi |t| || j  | jj | j | j d d t|trdd	d
 | D }n(t|trddd
 |D }nt	|}t
|}||d< | jd | | jd | | j|d|  dS )zRun when chain starts running.r   r   r"   r   r   on_chain_startr   ,c                 S   s   g | ]\}}| d | qS =r   r3   r`   r   r   r   r   
<listcomp>  r   z8MlflowCallbackHandler.on_chain_start.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   r   )r3   inputr   r   r   r     r   r   r   r   Zchain_start_N)rq   r?   r   r   
isinstancer   rc   r   listr   r   r   r   rt   )r[   r   r   rK   r"   rA   Zchain_inputZ
input_respr   r   r   r     s&    


z$MlflowCallbackHandler.on_chain_start)outputsrK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }t|trhddd | D }n$t|trdtt|}nt|}|d|d	 || j  | j	j | j | j d d
 | j
d | | j
d | | j	|d|  dS )zRun when chain ends running.r   r   r#   r   r   c                 S   s   g | ]\}}| d | qS r   r   r   r   r   r   r     r   z6MlflowCallbackHandler.on_chain_end.<locals>.<listcomp>on_chain_end)r   r   r   r   r   Z
chain_end_N)rq   r   r   rc   r   r   mapr   r?   r   r   r   rt   )r[   r   rK   r#   rA   Zchain_outputr   r   r   r     s     


z"MlflowCallbackHandler.on_chain_endc                 K   s(   | j d  d7  < | j d  d7  < dS )zRun when chain errors.r   r   r    Nr   r   r   r   r   on_chain_error  s    z$MlflowCallbackHandler.on_chain_error)r   	input_strrK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|d |t| || j  | jj | j | j d d | jd | | jd	 | | j|d
|  dS )zRun when tool starts running.r   r   r'   r   on_tool_start)r   r   r   r   r   Ztool_start_Nrq   r?   r   r   r   r   rt   )r[   r   r   rK   r'   rA   r   r   r   r     s    
z#MlflowCallbackHandler.on_tool_start)outputrK   r   c                 K   s   t |}| jd  d7  < | jd  d7  < | jd  d7  < | jd }i }|d|d || j | jj| j| jd d | jd | | jd	 | | j|d
|  dS )zRun when tool ends running.r   r   r(   r   on_tool_end)r   r   r   r   r   Z	tool_end_N)r   rq   r?   r   r   r   rt   )r[   r   rK   r(   rA   r   r   r   r     s    
z!MlflowCallbackHandler.on_tool_endc                 K   s(   | j d  d7  < | j d  d7  < dS )zRun when tool errors.r   r   r    Nr   r   r   r   r   on_tool_error  s    z#MlflowCallbackHandler.on_tool_error)r.   rK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d }i }|d|d || j  | jj | j | j d d | jd | | jd | | j|d	|  d
S )z,
        Run when text is received.
        r   r   r!   on_text)r   r.   r   r   r   Zon_text_Nr   )r[   r.   rK   r!   rA   r   r   r   r     s    
zMlflowCallbackHandler.on_text)finishrK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|jd |jd || j  | jj | j | j d d | jd	 | | jd
 | | j|d|  dS )zRun when agent ends running.r   r   r)   r   on_agent_finishr   )r   r   logr   r   r   Zagent_finish_N)rq   r?   Zreturn_valuesr   r   r   r   rt   )r[   r   rK   r)   rA   r   r   r   r   -  s     
z%MlflowCallbackHandler.on_agent_finish)r   rK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|j|j|jd || j  | jj | j | j d d | jd | | jd	 | | j|d
|  dS )zRun on agent action.r   r   r'   r   on_agent_action)r   tool
tool_inputr   r   r   r   Zagent_action_N)	rq   r?   r   r   r   r   r   r   rt   )r[   r   rK   r'   rA   r   r   r   r   D  s"    
z%MlflowCallbackHandler.on_agent_action)r   queryrK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|d |t| || j  | jj | j | j d d | jd | | jd	 | | j|d
|  dS )z"Run when Retriever starts running.r   r   r*   r   on_retriever_start)r   r   r   r   r   Zretriever_start_Nr   )r[   r   r   rK   r*   rA   r   r   r   r   Z  s    
z(MlflowCallbackHandler.on_retriever_start)	documentsrK   r   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }dd |D }|d|d || j  | jj | j | j d d	 | jd
 | | jd | | j|d|  dS )z Run when Retriever ends running.r   r   r+   r   c                 S   s(   g | ] }|j d d |j D dqS )c                 S   s8   i | ]0\}}|t |ts t|nd dd |D qS )r   c                 s   s   | ]}t |V  qd S )Nr   r3   xr   r   r   	<genexpr>  r   zOMlflowCallbackHandler.on_retriever_end.<locals>.<listcomp>.<dictcomp>.<genexpr>)r   r   r   rc   r   r   r   r   r6     s
   
zEMlflowCallbackHandler.on_retriever_end.<locals>.<listcomp>.<dictcomp>)page_contentmetadata)r   r   r   )r3   rC   r   r   r   r     s   z:MlflowCallbackHandler.on_retriever_end.<locals>.<listcomp>on_retriever_end)r   r   r   r   r   Zretriever_end_Nr   )r[   r   rK   r+   rA   Zretriever_documentsr   r   r   r   r  s    
z&MlflowCallbackHandler.on_retriever_endc                 K   s(   | j d  d7  < | j d  d7  < dS )zRun when Retriever errors.r   r   r    Nr   r   r   r   r   on_retriever_error  s    z(MlflowCallbackHandler.on_retriever_errorc           
         s4  t  }|| jd }|| jd  ddg}d|jv rD|d n*d|jv rn|d dd |d< |d || jd	d
jddid	d
}| jdurt	 ng }| j
durddgng }g d} fdd|D } ddg| | |  jd	d
jdddd	d
}|j||gd	d
}	|	ddg jdd d	d
|	d< |	S )z=Create a dataframe with all the information from the session.r   r   r   rE   r]   idc                 S   s   | d S )Nra   r   )Zid_r   r   r   <lambda>  r   zCMlflowCallbackHandler._create_session_analysis_df.<locals>.<lambda>r   )ZaxisZprompt_stepNr=   r>   )Ztoken_usage_total_tokensZtoken_usage_prompt_tokensZtoken_usage_completion_tokensc                    s   g | ]}| j v r|qS r   )columnsr   Zon_llm_end_records_dfr   r   r     s   zEMlflowCallbackHandler._create_session_analysis_df.<locals>.<listcomp>r.   Zoutput_stepr   )r   r.   c                 S   s   t | d | d S )NrE   r   )rI   )rowr   r   r   r     s   	chat_html)r   	DataFramer   r   r   applyZdropnarenamer0   r-   r/   concat)
r[   pdZon_llm_start_records_dfZllm_input_columnsZllm_input_prompts_dfZcomplexity_metrics_columnsZvisualizations_columnsZtoken_usage_columnsZllm_outputs_dfsession_analysis_dfr   r   r   _create_session_analysis_df  sb    



	


z1MlflowCallbackHandler._create_session_analysis_dfc                 C   s   t | jd S )Nr   )boolr   rk   r   r   r   _contain_llm_records  s    z*MlflowCallbackHandler._contain_llm_recordsF)langchain_assetr   r   c                 C   s|  t  }| jd|| jd  |  rv|  }|d}|jdddd}| jd|| | j	d
| d |r`dtt|v r| j| ntt| jjd	}z|| | j| W n ty<   z|| | j| W nF ty   td
 t  Y n$ ty6   td
 t  Y n0 Y n$ ty^   td
 t  Y n0 |rx| j  |   d S )Nr   r   rG   rN   T)regexZsession_analysiszlangchain.chains.llm.LLMChainz
model.jsonzCould not save model.)r   r   rw   r   r   r   r   r   rH   rv   rc   tolistr   typer|   r   r   r]   savery   
ValueErrorZ
save_agentAttributeErrorprint	traceback	print_excNotImplementedErrorrl   r   )r[   r   r   r   r   r   Zlangchain_asset_pathr   r   r   flush_tracker  sD    




z#MlflowCallbackHandler.flush_tracker)r   r{   NNNrN   )NF)'r}   r~   r   r   r   r   r   r\   r   r   r   r   r   r   r   BaseExceptionr   r   r	   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r      sf         G.
$=r   )NN)0loggingrT   rd   rf   r   r   copyr   pathlibr   typingr   r   r   r   r   r	   Zlangchain_core.agentsr
   r   Zlangchain_core.callbacksr   Zlangchain_core.documentsr   Zlangchain_core.outputsr   Zlangchain_core.utilsr   r   Z#langchain_community.callbacks.utilsr   r   r   r   r   r   	getLoggerr}   r   r   r   r,   r-   r   rD   rI   rJ   r   r   r   r   r   <module>   s:     	
  *l