a
    dgK                     @  sJ  d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d d	lmZmZm Z  d d
l!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) erd dl*Z+e,e-Z.G dd dZ/dddddZ0G dd dZ1dddddZ2d?ddddddZ3dddddZ4dd d!d"d#d$Z5d%d&d'd(d)Z6d%d%d'd*d+Z7G d,d- d-eZ8ed.e8d/Z9G d0d1 d1ee9 eZ:ed2e:d/Z;G d3d4 d4ee; eZ<G d5d6 d6e<Z=G d7d8 d8ee; eZ>G d9d: d:ee; eeZ?G d;d< d<e?e: eZ@G d=d> d>eee; ZAdS )@    )annotationsN)ABCabstractmethod)
TYPE_CHECKINGAnyDictGenericListOptionalTupleTypeTypeVarUnion)Chain)LLMChain)CallbackManagerForChainRun)BasePromptTemplateChatPromptTemplateHumanMessagePromptTemplateSystemMessagePromptTemplate)	BaseModel
ConfigDictmodel_validator)_Embed)MetricsTrackerAverageMetricsTrackerRollingWindow)ModelRepository)VwLoggerc                   @  s,   e Zd ZddddZddddZeZd	S )
_BasedOnr   valuec                 C  s
   || _ d S Nr   selfr     r$   r/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_experimental/rl_chain/base.py__init__-   s    z_BasedOn.__init__strreturnc                 C  s
   t | jS r!   r'   r    r#   r$   r$   r%   __str__0   s    z_BasedOn.__str__N__name__
__module____qualname__r&   r,   __repr__r$   r$   r$   r%   r   ,   s   r   r   )anythingr)   c                 C  s   t | S )z4Wrap a value to indicate that it should be based on.)r   r2   r$   r$   r%   BasedOn6   s    r4   c                   @  s,   e Zd ZddddZddddZeZd	S )
_ToSelectFromr   r   c                 C  s
   || _ d S r!   r   r"   r$   r$   r%   r&   =   s    z_ToSelectFrom.__init__r'   r(   c                 C  s
   t | jS r!   r*   r+   r$   r$   r%   r,   @   s    z_ToSelectFrom.__str__Nr-   r$   r$   r$   r%   r5   <   s   r5   c                 C  s   t | tstdt| S )z9Wrap a value to indicate that it should be selected from.z*ToSelectFrom must be a list to select from)
isinstancelist
ValueErrorr5   r3   r$   r$   r%   ToSelectFromF   s    
r9   Fbool)r2   keepr)   c                   s   t | trtt| j dS t | tr8tt| j dS t | trT fdd| D S t | trt fdd| 	 D S t | t
r| S t
|  dS )z4Wrap a value to indicate that it should be embedded.r;   c                   s   g | ]}t | d qS r<   Embed).0vr<   r$   r%   
<listcomp>V       zEmbed.<locals>.<listcomp>c                   s   i | ]\}}|t | d qS r=   r>   )r@   krA   r<   r$   r%   
<dictcomp>X   rC   zEmbed.<locals>.<dictcomp>)r6   r5   r9   r?   r    r   r4   r7   dictitemsr   )r2   r;   r$   r<   r%   r?   N   s    




r?   c                 C  s   t | ddS )z=Wrap a value to indicate that it should be embedded and kept.Tr<   r>   r3   r$   r$   r%   EmbedAndKeep^   s    rH   z'vw.TextFormatParser'r'   zList['vw.Example'])parser	input_strr)   c                   s    fdd| dD S )z/Parse the input string into a list of examples.c                   s   g | ]}  |qS r$   )
parse_line)r@   linerI   r$   r%   rB   j   rC   zparse_lines.<locals>.<listcomp>
)split)rI   rJ   r$   rM   r%   parse_linesg   s    rP   Dict[str, Any]zTuple[Dict, Dict]inputsr)   c                   s@    fdd   D }|s"td fdd   D }||fS )z1Get the BasedOn and ToSelectFrom from the inputs.c                   s&   i | ]}t  | tr| | jqS r$   )r6   r5   r    r@   rD   rS   r$   r%   rE   o   s   z3get_based_on_and_to_select_from.<locals>.<dictcomp>z}No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from.c                   sB   i | ]:}t  | tr|t  | jtr2 | jn
 | jgqS r$   )r6   r   r    r7   rT   rU   r$   r%   rE   z   s   )keysr8   )rS   Zto_select_fromZbased_onr$   rU   r%   get_based_on_and_to_select_fromm   s    

rW   c                 C  sN   |   }| D ]8\}}t|ts,t|trt|jtst|j|| _q|S )a  Prepare the inputs for auto embedding.

    Go over all the inputs and if something is either wrapped in _ToSelectFrom or _BasedOn, and if their inner values are not already _Embed,
    then wrap them in EmbedAndKeep while retaining their _ToSelectFrom or _BasedOn status
    )copyrG   r6   r5   r   r    r   rH   )rS   Znext_inputsrD   rA   r$   r$   r%   prepare_inputs_for_autoembed   s    rY   c                   @  s   e Zd ZdZdS )Selectedz.Abstract class to represent the selected item.N)r.   r/   r0   __doc__r$   r$   r$   r%   rZ      s   rZ   	TSelected)boundc                   @  s4   e Zd ZU dZded< ded< d
ddddd	ZdS )Eventz%Abstract class to represent an event.rQ   rS   zOptional[TSelected]selectedNrS   r_   c                 C  s   || _ || _d S r!   r`   )r#   rS   r_   r$   r$   r%   r&      s    zEvent.__init__)N)r.   r/   r0   r[   __annotations__r&   r$   r$   r$   r%   r^      s   
r^   TEventc                   @  sh   e Zd ZdZddddZeddddd	Zedd
dddZedd
dddZd
dddZ	dS )Policyz%Abstract class to represent a policy.r   )kwargsc                 K  s   d S r!   r$   )r#   rd   r$   r$   r%   r&      s    zPolicy.__init__rb   eventr)   c                 C  s   d S r!   r$   r#   rf   r$   r$   r%   predict   s    zPolicy.predictNonec                 C  s   d S r!   r$   rg   r$   r$   r%   learn   s    zPolicy.learnc                 C  s   d S r!   r$   rg   r$   r$   r%   log   s    z
Policy.logr(   c                 C  s   d S r!   r$   r+   r$   r$   r%   save   s    zPolicy.saveN)
r.   r/   r0   r[   r&   r   rh   rj   rk   rl   r$   r$   r$   r%   rc      s   rc   c                      sn   e Zd ZdZddddddd fdd	Zd
ddddZd
ddddZd
ddddZddddZ  Z	S )VwPolicyzVowpal Wabbit policy.r   	List[str]Embedderr   r   )
model_repovw_cmdfeature_embedder	vw_loggerargsrd   c                   s6   t  j|i | || _| j|| _|| _|| _d S r!   )superr&   rp   load	workspacerr   rs   )r#   rp   rq   rr   rs   rt   rd   	__class__r$   r%   r&      s
    	zVwPolicy.__init__rb   re   c                 C  s.   dd l }|| j}| jt|| j|S Nr   )vowpal_wabbit_nextTextFormatParserrw   Zpredict_onerP   rr   format)r#   rf   vwtext_parserr$   r$   r%   rh      s
    zVwPolicy.predictri   c                 C  s:   dd l }| j|}|| j}t||}| j| d S rz   )r{   rr   r}   r|   rw   rP   Z	learn_one)r#   rf   r~   vw_exr   Zmulti_exr$   r$   r%   rj      s
    
zVwPolicy.learnc                 C  s&   | j  r"| j|}| j | d S r!   )rs   Zlogging_enabledrr   r}   rk   )r#   rf   r   r$   r$   r%   rk      s    
zVwPolicy.logr(   c                 C  s   | j | j d S r!   )rp   rl   rw   r+   r$   r$   r%   rl      s    zVwPolicy.save)
r.   r/   r0   r[   r&   rh   rj   rk   rl   __classcell__r$   r$   rx   r%   rm      s   rm   c                   @  s4   e Zd ZdZdddddZedddd	d
ZdS )ro   z(Abstract class to represent an embedder.r   )rt   rd   c                 O  s   d S r!   r$   )r#   rt   rd   r$   r$   r%   r&      s    zEmbedder.__init__rb   r'   re   c                 C  s   d S r!   r$   rg   r$   r$   r%   r}      s    zEmbedder.formatN)r.   r/   r0   r[   r&   r   r}   r$   r$   r$   r%   ro      s   ro   c                   @  s(   e Zd ZdZedddddddZd	S )
SelectionScorerzHAbstract class to grade the chosen selection or the response of the llm.rQ   r'   rb   floatrS   llm_responserf   r)   c                 C  s   d S r!   r$   )r#   rS   r   rf   r$   r$   r%   score_response   s    zSelectionScorer.score_responseN)r.   r/   r0   r[   r   r   r$   r$   r$   r%   r      s   r   c                   @  s   e Zd ZU dZded< dZded< dZded< ed	d
ddZedd
ddZ	e
ddedddddZdddddddZdS )AutoSelectionScorerzAuto selection scorer.r   	llm_chainNzUnion[BasePromptTemplate, None]promptzOptional[str]scoring_criteria_template_strr   r(   c                   C  s
   t dS )Na  PLEASE RESPOND ONLY WITH A SINGLE FLOAT AND NO OTHER TEXT EXPLANATION
                 You are a strict judge that is called on to rank a response based on                     given criteria. You must respond with your ranking by providing a                         single float within the range [0, 1], 0 being very bad                             response and 1 being very good response.)r   from_templater$   r$   r$   r%   get_default_system_prompt  s    z-AutoSelectionScorer.get_default_system_promptr   c                  C  s(   d} t | }t }t||g}|S )NzGiven this based_on "{rl_chain_selected_based_on}"             as the most important attribute, rank how good or bad this text is:                 "{rl_chain_selected}".)r   r   r   r   r   from_messages)Zhuman_templatehuman_message_promptdefault_system_promptZchat_promptr$   r$   r%   get_default_prompt  s    
z&AutoSelectionScorer.get_default_promptbefore)moderQ   r   )valuesr)   c                 C  s   | d}| d}| d}|d u r8|d u r8t }n0|d u rh|d urht|}t }t||g}||d< t||d|d< |S )Nllmr   r   )r   r   r   )	getr   r   r   r   r   r   r   r   )clsr   r   r   r   r   r   r$   r$   r%   set_prompt_and_llm_chain  s     



z,AutoSelectionScorer.set_prompt_and_llm_chainr'   r^   r   r   c              
   C  sd   | j jf d|i|}| }zt|}|W S  ty^ } ztd| W Y d }~n
d }~0 0 d S )Nr   zThe auto selection scorer did not manage to score the response, there is always the option to try again or tweak the reward prompt. Error: )r   rh   stripr   	ExceptionRuntimeError)r#   rS   r   rf   Zrankingresper$   r$   r%   r   .  s    z"AutoSelectionScorer.score_response)r.   r/   r0   r[   ra   r   r   staticmethodr   r   r   classmethodr   r   r$   r$   r$   r%   r      s   
	r   c                      s  e Zd ZU dZG dd deZded< dZded< d	ed
< ded< e Zded< dZ	ded< dZ
ded< dZded< dZded< dZded< dddedddfdddddd d!d!d"d"d#
 fd$d%Zedd&d'Zed(d)d*d+Zed(d)d,d-ZdSd.d/dd0d1d2d3Zd0d)d4d5Zd0d)d6d7Zd0d)d8d9Zd/d0d: fd;d<Zdd)d=d>Zed/d?d:d@dAZed/d?d"dBdCdDdEZedd?dBdFdGdHZed?dId?dJdKdLZdTd/dMd/dNdOdPZ edd)dQdRZ!  Z"S )URLChaina?  Chain that leverages the Vowpal Wabbit (VW) model as a learned policy
    for reinforcement learning.

    Attributes:
        - llm_chain (Chain): Represents the underlying Language Model chain.
        - prompt (BasePromptTemplate): The template for the base prompt.
        - selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None.
        - policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt.
        - auto_embed (bool): Determines if embedding should be automatic. Default is False.
        - metrics (Optional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]]): Tracker for metrics, can be set to None.

    Initialization Attributes:
        - feature_embedder (Embedder): Embedder used for the `BasedOn` and `ToSelectFrom` inputs.
        - model_save_dir (str, optional): Directory for saving the VW model. Default is the current directory.
        - reset_model (bool): If set to True, the model starts training from scratch. Default is False.
        - vw_cmd (List[str], optional): Command line arguments for the VW model.
        - policy (Type[VwPolicy]): Policy used by the chain.
        - vw_logs (Optional[Union[str, os.PathLike]]): Path for the VW logs.
        - metrics_step (int): Step for the metrics tracker. Default is -1. If set without metrics_window_size, average metrics will be tracked, otherwise rolling window metrics will be tracked.
        - metrics_window_size (int): Window size for the metrics tracker. Default is -1. If set, rolling window metrics will be tracked.

    Notes:
        The class initializes the VW model using the provided arguments. If `selection_scorer` is not provided, a warning is logged, indicating that no reinforcement learning will occur unless the `update_with_delayed_score` method is called.
    c                   @  s@   e Zd ZdZdddddZddddd	Zdddd
dZdS )zRLChain._NoOpPolicyz$Placeholder policy that does nothingrb   r   re   c                 C  s   d S r!   r$   rg   r$   r$   r%   rh   Y  s    zRLChain._NoOpPolicy.predictri   c                 C  s   d S r!   r$   rg   r$   r$   r%   rj   \  s    zRLChain._NoOpPolicy.learnc                 C  s   d S r!   r$   rg   r$   r$   r%   rk   _  s    zRLChain._NoOpPolicy.logN)r.   r/   r0   r[   rh   rj   rk   r$   r$   r$   r%   _NoOpPolicyV  s   r   r   r   resultr'   
output_keyr   r   zUnion[SelectionScorer, None]selection_scorerrc   active_policyFr:   
auto_embedTselection_scorer_activatedZrl_chain_selectedselected_input_keyZrl_chain_selected_based_onselected_based_on_input_keyNzCOptional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]]metricsz./ro   zOptional[List[str]]zType[Policy]z!Optional[Union[str, os.PathLike]]intr   )
rr   model_save_dirreset_modelrq   policyvw_logsmetrics_stepmetrics_window_sizert   rd   c	                   s   t  j|	i |
 | jd u r&td t| jtjrX|t	|d|d|pHg |t
|d| _|dkrpt||d| _nt|d| _d S )NzNo selection scorer provided, which means that no                     reinforcement learning will be done in the RL chain                         unless update_with_delayed_score is called.T)Zwith_historyreset)rp   rq   rr   rs   r   )stepZwindow_size)r   )ru   r&   r   loggerwarningr6   r   r   r   r   r   r   r   r   )r#   rr   r   r   rq   r   r   r   r   rt   rd   rx   r$   r%   r&   n  s&    
	
zRLChain.__init__Zforbid)Zarbitrary_types_allowedextrarn   r(   c                 C  s   g S )z1Expect input key.
        :meta private:
        r$   r+   r$   r$   r%   
input_keys  s    zRLChain.input_keysc                 C  s   | j gS )z3Expect output key.

        :meta private:
        )r   r+   r$   r$   r%   output_keys  s    zRLChain.output_keysr   rQ   ri   )scorechain_responseforce_scorer)   c                 C  s\   |   r|std| jr&| j| |d }| j||d | jj|d | jj|d dS )z
        Updates the learned policy with the score provided.
        Will raise an error if selection_scorer is set, and force_score=True was not provided during the method call
        zsThe selection scorer is set, and force_score was not set to True. Please set force_score=True to use this function.selection_metadata)rf   r   rf   N)_can_use_selection_scorerr   r   on_feedback#_call_after_scoring_before_learningr   rj   rk   )r#   r   r   r   rf   r$   r$   r%   update_with_delayed_score  s    z!RLChain.update_with_delayed_scorec                 C  s
   d| _ dS )z
        Deactivates the selection scorer, meaning that the chain will no longer attempt to use the selection scorer to score responses.
        FNr   r+   r$   r$   r%   deactivate_selection_scorer  s    z#RLChain.deactivate_selection_scorerc                 C  s
   d| _ dS )z
        Activates the selection scorer, meaning that the chain will attempt to use the selection scorer to score responses.
        TNr   r+   r$   r$   r%   activate_selection_scorer  s    z!RLChain.activate_selection_scorerc                 C  s   | j   dS )z_
        This function should be called to save the state of the learned policy model.
        N)r   rl   r+   r$   r$   r%   save_progress  s    zRLChain.save_progressrR   c                   sF   t  | | j| v s(| j| v rBtd| j d| j dd S )NzThe rl chain does not accept 'z' or 'zG' as input keys, they are reserved for internal use during auto reward.)ru   _validate_inputsr   rV   r   r8   r#   rS   rx   r$   r%   r     s    zRLChain._validate_inputsc                 C  s   | j duo| jS )zc
        Returns whether the chain can use the selection scorer to score responses or not.
        N)r   r   r+   r$   r$   r%   r     s    z!RLChain._can_use_selection_scorerrb   c                 C  s   d S r!   r$   r   r$   r$   r%   _call_before_predict  s    zRLChain._call_before_predictzTuple[Dict[str, Any], TEvent])rS   rf   
predictionr)   c                 C  s   d S r!   r$   )r#   rS   rf   r   r$   r$   r%   _call_after_predict_before_llm  s    z&RLChain._call_after_predict_before_llm)r   rf   r)   c                 C  s   d S r!   r$   )r#   r   rf   r$   r$   r%   _call_after_llm_before_scoring  s    z&RLChain._call_after_llm_before_scoringzOptional[float])rf   r   r)   c                 C  s   d S r!   r$   )r#   rf   r   r$   r$   r%   r     s    z+RLChain._call_after_scoring_before_learningz$Optional[CallbackManagerForChainRun])rS   run_managerr)   c              
   C  s  |p
t  }| j|d}| jj|d}| jr6| j  | j|||d\}}| jj	f i |d|
 i}|j|d| jd | }| jr|jd| jd |}|jd	| jd |j|d
| jd | j||d\}}d }	z|  r| jj|||d}	W n6 ty( }
 ztd|
  W Y d }
~
n
d }
~
0 0 | jrH|	d urH| j|	 | j|	|d}| jj|d | jj|d | j||diS )NrU   r   )rS   rf   r   	callbacksgreen)colorverbosez
Code: )r   z	
Answer: yellow)r   rf   )rS   r   rf   zzThe selection scorer was not able to score,                 and the chain was not able to adjust to this response, error: )r   rf   )responser   )r   Zget_noop_managerr   r   rh   r   Zon_decisionr   r   runZ	get_childZon_textr   r   r   r   r   r   r   r   infor   r   rj   rk   r   )r#   rS   r   Z_run_managerrf   r   Znext_chain_inputstoutputr   r   r$   r$   r%   _call  sJ    



zRLChain._callc                 C  s   dS )NZllm_personalizer_chainr$   r+   r$   r$   r%   _chain_type!  s    zRLChain._chain_type)F)N)#r.   r/   r0   r[   rc   r   ra   r   r   r   r   r   r   r   rm   r&   r   Zmodel_configpropertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r$   r$   rx   r%   r   <  s^   
&& 
 3r   )F)B
__future__r   loggingosabcr   r   typingr   r   r   r   r	   r
   r   r   r   r   Zlangchain.chains.baser   Zlangchain.chains.llmr   Z langchain_core.callbacks.managerr   Zlangchain_core.promptsr   r   r   r   Zpydanticr   r   r   Z'langchain_experimental.rl_chain.helpersr   Z'langchain_experimental.rl_chain.metricsr   r   Z0langchain_experimental.rl_chain.model_repositoryr   Z)langchain_experimental.rl_chain.vw_loggerr   r{   r~   	getLoggerr.   r   r   r4   r5   r9   r?   rH   rP   rW   rY   rZ   r\   r^   rb   rc   rm   ro   r   r   r   r$   r$   r$   r%   <module>   sF   0


	+
	?