a
    dg                     @   sR   d dl mZmZmZmZmZ d dlmZmZm	Z	 d dl
mZ dZG dd dZdS )    )AnyDictListSequenceUnion)GraphDocumentNodeRelationship)Documentr   c                   @   sp   e Zd ZdZdee eee eeef f eee	e	ee
dd	d	d
ZeedddZee ee dddZdS )GlinerGraphTransformerau  
    A transformer class for converting documents into graph structures
    using the GLiNER and GLiREL models.

    This class leverages GLiNER for named entity recognition and GLiREL for
    relationship extraction from text documents, converting them into a graph format.
    The extracted entities and relationships are filtered based on specified
    confidence thresholds and allowed types.

    For more details on GLiNER and GLiREL, visit their respective repositories:
      GLiNER: https://github.com/urchade/GLiNER
      GLiREL: https://github.com/jackboyla/GLiREL/tree/main

    Args:
        allowed_nodes (List[str]): A list of allowed node types for entity extraction.
        allowed_relationships (Union[List[str], Dict[str, Any]]): A list of allowed
          relationship types or a dictionary with additional configuration for
          relationship extraction.
        gliner_model (str): The name of the pretrained GLiNER model to use.
          Default is "urchade/gliner_mediumv2.1".
        glirel_model (str): The name of the pretrained GLiREL model to use.
          Default is "jackboyla/glirel_beta".
        entity_confidence_threshold (float): The confidence threshold for
          filtering extracted entities. Default is 0.1.
        relationship_confidence_threshold (float): The confidence threshold for
          filtering extracted relationships. Default is 0.1.
        device (str): The device to use for model inference ('cpu' or 'cuda').
          Default is "cpu".
        ignore_self_loops (bool): Whether to ignore relationships where the
          source and target nodes are the same. Default is True.
    urchade/gliner_mediumv2.1jackboyla/glirel_beta皙?cpuTN)	allowed_nodesallowed_relationshipsgliner_modelglirel_modelentity_confidence_threshold!relationship_confidence_thresholddeviceignore_self_loopsreturnc	                 C   s   zdd l }	W n ty&   tdY n0 zdd l}
W n tyN   tdY n0 zdd l}W n tyv   tdY n0 |d|d||d}||d}|
d	| _| jjd
|d | jjdd
|d t|trd|in|| _	|| _
|| _d S )Nr   z`Could not import gliner-spacy python package. Please install it with `pip install gliner-spacy`.zRCould not import spacy python package. Please install it with `pip install spacy`.zTCould not import gliner python package. Please install it with `pip install gliner`.   ent)r   
chunk_sizelabelsstyle	thresholdZmap_location)modelr   engliner_spacy)configglirel)afterr"   Zglirel_labels)r!   ImportErrorspacyr#   blanknlpZadd_pipe
isinstancelistr   r   r   )selfr   r   r   r   r   r   r   r   r!   r&   r#   Zgliner_configZglirel_config r,   ~/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_experimental/graph_transformers/gliner.py__init__*   sF    




zGlinerGraphTransformer.__init__)documentr   c              
      s  t | jj|j| jfgdd}dd |d d jD }dd |D }g }|d d jj}g }t }|D ]`}	t	|	d t	|	d	 |	d
 f  |vrf|
   fdd|D }
t|
dd d}|| qf|D ]}|d | jk rq|d d |d d |d d  j|d d |d d |d d  j| jr@kr@q̇fdd|D d }fdd|D d }|t|||d
 dd d qt|||dS )NT)Z	as_tuplesc                 S   s   h | ]}|j |jfqS r,   )textZlabel_).0noder,   r,   r-   	<setcomp>i       z:GlinerGraphTransformer.process_document.<locals>.<setcomp>r   c                 S   s   g | ]\}}t ||d qS ))idtype)r   )r1   Z	node_textZ
node_labelr,   r,   r-   
<listcomp>l   s
   z;GlinerGraphTransformer.process_document.<locals>.<listcomp>	head_text	tail_textlabelc                    s2   g | ]*}t |d  t |d |d f kr|qS )r8   r9   r:   )tuple)r1   relkeyr,   r-   r7      s
   c                 S   s   | d S )Nscorer,   )xr,   r,   r-   <lambda>   r4   z9GlinerGraphTransformer.process_document.<locals>.<lambda>r=   r?   Zhead_pos   Ztail_posc                    s   g | ]}|j  kr|qS r,   r5   r1   n)	source_idr,   r-   r7      r4   c                    s   g | ]}|j  kr|qS r,   rC   rD   )	target_idr,   r-   r7      r4    _)sourcetargetr6   )nodesrelationshipsrJ   )r*   r(   pipeZpage_contentr   ZentsrI   	relationssetr;   addmaxappendr   r0   r   r	   replaceupperr   )r+   r/   docsZdeduplicated_nodesrL   rM   rO   Zdeduplicated_relsseenitemZmatching_itemsZmax_itemr<   Zsource_nodeZtarget_noder,   )r>   rF   rG   r-   process_documenta   sN    

&&z'GlinerGraphTransformer.process_document)	documentsr   c                 C   s&   g }|D ]}|  |}|| q|S )a  Convert a sequence of documents into graph documents.

        Args:
            documents (Sequence[Document]): The original documents.
            kwargs: Additional keyword arguments.

        Returns:
            Sequence[GraphDocument]: The transformed documents as graphs.
        )rY   rS   )r+   rZ   resultsr/   Zgraph_documentr,   r,   r-   convert_to_graph_documents   s
    
z1GlinerGraphTransformer.convert_to_graph_documents)r   r   r   r   r   T)__name__
__module____qualname____doc__r   strr   r   r   floatboolr.   r
   r   rY   r   r\   r,   r,   r,   r-   r   	   s,   $      7Ar   N)typingr   r   r   r   r   Z)langchain_community.graphs.graph_documentr   r   r	   Zlangchain_core.documentsr
   ZDEFAULT_NODE_TYPEr   r,   r,   r,   r-   <module>   s   