a
    `g                  
   @   s^  d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ eeddd	Zejeee d
ddZeddddde	ej eee ee eeejdddZ eeeej dddZ!edZ"edZ#ee" ee# ee
e"e#f  dddZ$edddee%ee& ee ddddZ'dS )zfBeta utility functions to assist in common eval workflows.

These functions may change in the future.
    N)DefaultDictListOptionalSequenceTupleTypeVar)
evaluation)	warn_beta)Client)run_dictid_mapc                 C   sf   | d }|  D ]\}}|t|t|}q|| d< | drP|| d  | d< | dsbi | d< | S )a  Convert the IDs in the run dictionary using the provided ID map.

    Parameters:
    - run_dict (dict): The dictionary representing a run.
    - id_map (dict): The dictionary mapping old IDs to new IDs.

    Returns:
    - dict: The updated run dictionary.
    dotted_orderparent_run_idextra)itemsreplacestrget)r   r   dokv r   c/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langsmith/beta/_evals.py_convert_ids   s    


r   )rootrun_to_example_mapreturnc                    s   | g}t  }| j|i g }|r| }|jh dd} |d t   |d <  |d  |d<  |d  |d< |jr||j || q fdd|D }|| j	 |d d< |S )	a&  Convert the root run and its child runs to a list of dictionaries.

    Parameters:
    - root (ls_schemas.Run): The root run to convert.
    - run_to_example_map (dict): The dictionary mapping run IDs to example IDs.

    Returns:
    - List[dict]: The list of converted run dictionaries.
    >   Zparent_run_idsZchild_run_idsZ
session_id)excludeidtrace_idc                    s   g | ]}t | qS r   )r   .0rr   r   r   
<listcomp>@       z%_convert_root_run.<locals>.<listcomp>r   Zreference_example_id)
uuiduuid4r   popdictr   
child_runsextendappendr   )r   r   Zruns_r   resultssrcZsrc_dictresultr   r#   r   _convert_root_run)   s     

r0   F)test_project_nameclientload_child_runsinclude_outputs)runsdataset_namer1   r2   r3   r4   r   c                   sv  | st d|   pt   j|d}|r<dd | D nd} jdd | D |dd | D |jd sr| }n fd	d| D }|pd
t jdd  }t	 j
|d}	dd |	D |	d jr|	d jn|	d j}
fdd|D } j||jd|
 dd}|D ]T}|d |d  }tjjtjjd|d< |d | |d<  jf i |d|i q |j}|S )a  Convert the following runs to a dataset + test.

    This makes it easy to sample prod runs into a new regression testing
    workflow and compare against a candidate system.

    Internally, this function does the following:
        1. Create a dataset from the provided production run inputs.
        2. Create a new test project.
        3. Clone the production runs and re-upload against the dataset.

    Parameters:
    - runs (Sequence[ls_schemas.Run]): A sequence of runs to be executed as a test.
    - dataset_name (str): The name of the dataset to associate with the test runs.
    - client (Optional[Client]): An optional LangSmith client instance. If not provided,
        a new client will be created.
    - load_child_runs (bool): Whether to load child runs when copying runs.
        Defaults to False.

    Returns:
    - ls_schemas.TracerSession: The project containing the cloned runs.

    Examples:
    --------
    .. code-block:: python

        import langsmith
        import random

        client = langsmith.Client()

        # Randomly sample 100 runs from a prod project
        runs = list(client.list_runs(project_name="My Project", execution_order=1))
        sampled_runs = random.sample(runs, min(len(runs), 100))

        runs_as_test(runs, dataset_name="Random Runs")

        # Select runs named "extractor" whose root traces received good feedback
        runs = client.list_runs(
            project_name="<your_project>",
            filter='eq(name, "extractor")',
            trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
        )
        runs_as_test(runs, dataset_name="Extraction Good")
    z1Expected a non-empty sequence of runs. Received: )r6   c                 S   s   g | ]
}|j qS r   )outputsr    r   r   r   r$      r%   z(convert_runs_to_test.<locals>.<listcomp>Nc                 S   s   g | ]
}|j qS r   )inputsr    r   r   r   r$      r%   c                 S   s   g | ]
}|j qS r   )r   r    r   r   r   r$      r%   )r8   r7   Zsource_run_idsZ
dataset_idc                    s   g | ]} j |jd qS ))r3   )Zread_runr   r    )r2   r3   r   r   r$      s   zprod-baseline-   c                 S   s   i | ]}|j |jqS r   )Zsource_run_idr   )r!   er   r   r   
<dictcomp>   r%   z(convert_runs_to_test.<locals>.<dictcomp>r   c                    s    g | ]}t | D ]}|qqS r   )r0   )r!   Zroot_runr   )r   r   r   r$      s   zprod-baseline)whichdataset_version)project_nameZreference_dataset_idmetadataend_time
start_time)tzr>   )
ValueErrorrtget_cached_clientZcreate_datasetZcreate_examplesr   r&   r'   hexlistZlist_examplesZmodified_atZ
created_atZcreate_project	isoformatdatetimenowtimezoneutcZ
create_runZupdate_project)r5   r6   r1   r2   r3   r4   Zdsr7   Zruns_to_copyZexamplesr=   Z	to_createprojectZnew_runZlatency_r   )r2   r3   r   r   convert_runs_to_testE   sP    6
	rO   )r>   r2   r   c           	      C   s   |j | d}tt}g }i }|D ]4}|jd urB||j | n
|| |||j< q"| D ]\}}t|dd d|| _	q`|S )N)r>   c                 S   s   | j S N)r   )r"   r   r   r   <lambda>   r%   z%_load_nested_traces.<locals>.<lambda>)key)
Z	list_runscollectionsdefaultdictrG   r   r,   r   r   sortedr*   )	r>   r2   r5   Ztreemapr-   Zall_runsrunZrun_idr*   r   r   r   _load_nested_traces   s    

rW   TU)list1list2r   c                 C   s   t t| |S rP   )rG   	itertoolsproduct)rZ   r[   r   r   r   _outer_product   s    r^   
   )max_concurrencyr2   )r>   
evaluatorsr`   r2   r   c                C   s   ddl m} g }|D ]H}t|tjr0|| qt|rJ|t| qtdt	| q|pht
 }t| |}||d.}|j|jgtt|| R  }	W d   n1 s0    Y  |	D ]}
qdS )a  Compute test metrics for a given test name using a list of evaluators.

    Args:
        project_name (str): The name of the test project to evaluate.
        evaluators (list): A list of evaluators to compute metrics with.
        max_concurrency (Optional[int], optional): The maximum number of concurrent
            evaluations. Defaults to 10.
        client (Optional[Client], optional): The client to use for evaluations.
            Defaults to None.

    Returns:
        None: This function does not return any value.
    r   )ContextThreadPoolExecutorz5Evaluation not yet implemented for evaluator of type )max_workersN)	langsmithrb   
isinstancels_evalZRunEvaluatorr,   callableZrun_evaluatorNotImplementedErrortyperD   rE   rW   mapZevaluate_runzipr^   )r>   ra   r`   r2   rb   Zevaluators_funcZtracesexecutorr-   rN   r   r   r   compute_test_metrics   s(    
&rn   )(__doc__rS   rI   r\   r&   typingr   r   r   r   r   r   Zlangsmith.run_treesZ	run_treesrD   Zlangsmith.schemasZschemasZ
ls_schemasrd   r   rf   Z#langsmith._internal._beta_decoratorr	   Zlangsmith.clientr
   r)   r   ZRunr0   r   boolZTracerSessionrO   rW   rX   rY   r^   rG   intrn   r   r   r   r   <module>   sP    k&