a
    dg                     @   s   d dl Z d dlmZ d dlmZmZ d dlmZmZm	Z	 erTd dl
mZ d dlmZ eeeeef f Zeeeddd	ZeG d
d dZdee	d deedddZdS )    Ndefaultdict)	dataclassfield)TYPE_CHECKINGDictList)RecognizerResult)EngineResult)operator_namecountreturnc                 C   sT   t dd| }t dd|}| drB| drBd| d| dS | d| S dS )z(Format the operator name with the count.z[<>] z_\d+$<>_N)resub
startswithendswith)r   r   Zclean_operator_name r   /var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_experimental/data_anonymizer/deanonymizer_mapping.pyformat_duplicated_operator   s
    r   c                   @   sJ   e Zd ZU dZedd dZeed< eedddZ	ed	d
ddZ
d	S )DeanonymizerMappingzDeanonymizer mapping.c                   C   s   t dd S )Nc                   S   s   t tS N)r   strr   r   r   r   <lambda>       z.DeanonymizerMapping.<lambda>.<locals>.<lambda>r   r   r   r   r   r      r   zDeanonymizerMapping.<lambda>)default_factorymapping)r   c                 C   s   dd | j  D S )z Return the deanonymizer mapping.c                 S   s   i | ]\}}|t |qS r   )dict).0kvr   r   r   
<dictcomp>$   r   z,DeanonymizerMapping.data.<locals>.<dictcomp>)r   items)selfr   r   r   data!   s    zDeanonymizerMapping.dataN)new_mappingr   c           	      C   s   t  }| D ]\}}t| j| d }| D ]^\}}||vr0|| j|  vr0|| j| v rjt||n|}|| j| |< || |d7 }q0qdS )aJ  Update the deanonymizer mapping with new values.

        Duplicated values will not be added
        If there are multiple entities of the same type, the mapping will
        include a count to differentiate them. For example, if there are
        two names in the input text, the mapping will include NAME_1 and NAME_2.
           N)setr%   lenr   valuesr   add)	r&   r(   Zseen_valuesentity_typer,   r   keyvalueZnew_keyr   r   r   update&   s    
zDeanonymizerMapping.update)__name__
__module____qualname____doc__r   r   MappingDataType__annotations__propertyr'   r1   r   r   r   r   r      s   
r   Fr	   r
   )original_textanalyzer_resultsanonymizer_resultsis_reversedr   c                 C   s   |j dd d |jj dd d tt}tt}t||jD ]\}}| |j|j }|j}	|rr|||	 	 v }
n|||	 v }
|
rq>|j
||	 	 v s|j
||	 v rt|j
||	 d }||	  d7  < n|j
}|r||fn||f\}}|||	 |< q>|S )a]  Create or update the mapping used to anonymize and/or
     deanonymize a text.

    This method exploits the results returned by the
    analysis and anonymization processes.

    If is_reversed is True, it constructs a mapping from each original
    entity to its anonymized value.

    If is_reversed is False, it constructs a mapping from each
    anonymized entity back to its original text value.

    If there are multiple entities of the same type, the mapping will
    include a count to differentiate them. For example, if there are
    two names in the input text, the mapping will include NAME_1 and NAME_2.

    Example of mapping:
    {
        "PERSON": {
            "<original>": "<anonymized>",
            "John Doe": "Slim Shady"
        },
        "PHONE_NUMBER": {
            "111-111-1111": "555-555-5555"
        }
        ...
    }
    c                 S   s   | j S r   startdr   r   r   r   k   r   z+create_anonymizer_mapping.<locals>.<lambda>)r/   c                 S   s   | j S r   r=   r?   r   r   r   r   l   r      r)   )sortr%   r   r    intzipr>   endr.   r,   textr   )r9   r:   r;   r<   r   r   ZanalyzedZ
anonymizedoriginal_valuer.   ZcondZanonymized_valueZmapping_keyZmapping_valuer   r   r   create_anonymizer_mappingC   s6    (
rH   )F)r   collectionsr   dataclassesr   r   typingr   r   r   Zpresidio_analyzerr	   Zpresidio_anonymizer.entitiesr
   r   r6   rC   r   r   boolrH   r   r   r   r   <module>   s$   - 