a
    bgw#                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ ertd dlZd dlmZ dd	d
ddZG dd deZdddddZG dd deZeZdS )    )annotationsN)Path)TYPE_CHECKINGDictListOptionalUnionDocument)
BaseLoader)
EntityLikedictstr)rowreturnc                 C  s.   | d }| d }| d }| d| d| dS )zBCombine message information in a readable format ready to be used.datefromtextz on z: 

 )r   r   Zsenderr   r   r   {/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/telegram.pyconcatenate_rows   s    r   c                   @  s,   e Zd ZdZddddZdddd	Zd
S )TelegramChatFileLoaderzLoad from `Telegram chat` dump.zUnion[str, Path])pathc                 C  s
   || _ dS )zInitialize with a path.N)	file_path)selfr   r   r   r   __init__   s    zTelegramChatFileLoader.__init__List[Document]r   c                 C  sr   t | j}t|dd}t|}W d   n1 s60    Y  ddd |d D }dt|i}t||d	gS )
Load documents.utf8encodingN c                 s  s0   | ](}|d  dkrt |d trt|V  qdS )typemessager   N)
isinstancer   r   ).0r%   r   r   r   	<genexpr>'   s   z.TelegramChatFileLoader.load.<locals>.<genexpr>messagessourcepage_contentmetadata)r   r   openjsonloadjoinr   r
   )r   pfdr   r-   r   r   r   r0       s    
(
zTelegramChatFileLoader.loadN)__name__
__module____qualname____doc__r   r0   r   r   r   r   r      s   r   zUnion[str, List[str]]r   )r   r   c           	      C  s   ddl m} |dg ddd}t| tr.| g} dd | D }t|D ]\}}|d	 |jd
< qDg }|D ]d}||j}t|D ]J\}}t||jd
 |dd}|jd
  d|jd  |jd< |	| q|qd|S )zIConvert a string or list of strings to a list of Documents with metadata.r   )RecursiveCharacterTextSplitteri   )r   
.!?, r#      )
chunk_size
separatorsZchunk_overlapc                 S  s   g | ]}t |d qS ))r,   r	   )r'   pager   r   r   
<listcomp>>       z text_to_docs.<locals>.<listcomp>   rC   )rC   chunkr+   -rG   r*   )
Zlangchain_text_splittersr9   r&   r   	enumerater-   Z
split_textr,   r
   append)	r   r9   Ztext_splitterZ	page_docsidocZ
doc_chunkschunksrG   r   r   r   text_to_docs1   s*    
 rN   c                   @  sf   e Zd ZdZdddddddd	d
ZddddZdddddZddddddZddddZdS )TelegramChatApiLoaderz)Load `Telegram` chat json directory dump.Ntelegram_data.jsonzOptional[EntityLike]zOptional[int]zOptional[str]r   chat_entityapi_idapi_hashusernamer   c                 C  s"   || _ || _|| _|| _|| _dS )aI  Initialize with API parameters.

        Args:
            chat_entity: The chat entity to fetch data from.
            api_id: The API ID.
            api_hash: The API hash.
            username: The username.
            file_path: The file path to save the data to. Defaults to
                 "telegram_data.json".
        NrQ   )r   rR   rS   rT   rU   r   r   r   r   r   V   s
    zTelegramChatApiLoader.__init__Noner   c                   s   ddl m} g }|| j| j| j4 I dH p}|| j2 zJ3 dH W }|jdu}|rZ|jjnd}|	|j
|j|j |j||d q86 W d  I dH  q1 I dH s0    Y  t| jddd"}tj||dd	d
 W d   n1 s0    Y  dS )z8Fetch data from Telegram API and save it as a JSON file.r   )TelegramClientN)	sender_idr   r   
message.idis_replyreply_to_idwzutf-8r!   F   )ensure_asciiindent)Ztelethon.syncrW   rU   rS   rT   Ziter_messagesrR   Zreply_toZreply_to_msg_idrJ   rX   r   r   	isoformatidr.   r   r/   dump)r   rW   dataclientr%   rZ   r[   r3   r   r   r   fetch_data_from_telegramn   s"    
2z.TelegramChatApiLoader.fetch_data_from_telegrampd.DataFramer   )rc   r   c                   sh   dddd fdd ||d   }||d  j dgd	d td<  fd
d|d D }|S )a
  Create a dictionary of message threads from the given data.

        Args:
            data (pd.DataFrame): A DataFrame containing the conversation                 data with columns:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            dict: A dictionary where the key is the parent message ID and                 the value is a list of message IDs in ascending order.
        intrf   z	List[int])	parent_id
reply_datar   c                   s>   ||d | k d   }g }|D ]}||g || 7 }q |S )a^  
            Recursively find all replies to a given parent message ID.

            Args:
                parent_id (int): The parent message ID.
                reply_data (pd.DataFrame): A DataFrame containing reply messages.

            Returns:
                list: A list of message IDs that are replies to the parent message ID.
            r[   rY   )tolist)rh   ri   Zdirect_repliesZall_repliesZreply_id)find_repliesr   r   rk      s    z@TelegramChatApiLoader._get_message_threads.<locals>.find_repliesrZ   r[   )Zsubsetc                   s   i | ]}||g | qS r   r   )r'   rh   rk   Zreply_messagesr   r   
<dictcomp>   s   z>TelegramChatApiLoader._get_message_threads.<locals>.<dictcomp>rY   )ZdropnaZastyperg   )r   rc   Zparent_messagesmessage_threadsr   rl   r   _get_message_threads   s    z*TelegramChatApiLoader._get_message_threadszDict[int, List[int]])rn   rc   r   c                 C  s`   d}|  D ]J\}}||d | jddd  }dd |D }|d|d	 7 }q| S )
aw  
        Combine the message texts for each parent message ID based             on the list of message threads.

        Args:
            message_threads (dict): A dictionary where the key is the parent message                 ID and the value is a list of message IDs in ascending order.
            data (pd.DataFrame): A DataFrame containing the conversation data:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            str: A combined string of message texts sorted by date.
        r#   rY   r   )Zbyr   c                 S  s   g | ]}t |qS r   )r   )r'   elemr   r   r   rD      rE   z@TelegramChatApiLoader._combine_message_texts.<locals>.<listcomp>r?   z.
)itemsisinZsort_valuesrj   r1   strip)r   rn   rc   Zcombined_textrh   Zmessage_idsZmessage_textsr   r   r   _combine_message_texts   s    z,TelegramChatApiLoader._combine_message_textsr   c           
      C  s   | j durHz"ddl}|  t|   W n tyF   tdY n0 t| j}t	|dd}t
|}W d   n1 s~0    Y  zddl}W n ty   tdY n0 ||}||}| |}| ||}	t|	S )r   Nr   zy`nest_asyncio` package not found.
                    please install with `pip install nest_asyncio`
                    r    r!   zf`pandas` package not found. 
                please install with `pip install pandas`
                )rR   nest_asyncioapplyasynciorunre   ImportErrorr   r   r.   r/   r0   pandasZjson_normalizeZ	DataFramero   rt   rN   )
r   ru   r2   r3   r4   pdZnormalized_messagesZdfrn   Zcombined_textsr   r   r   r0      s.    


(



zTelegramChatApiLoader.load)NNNNrP   )	r5   r6   r7   r8   r   re   ro   rt   r0   r   r   r   r   rO   S   s        ;&rO   )
__future__r   rw   r/   pathlibr   typingr   r   r   r   r   Zlangchain_core.documentsr
   Z)langchain_community.document_loaders.baser   rz   r{   Ztelethon.hintsr   r   r   rN   rO   ZTelegramChatLoaderr   r   r   r   <module>   s   " :