a
    bg1                     @   sp   d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
 edZedZG dd de
ZdS )	    N)ListTuple)Document)
BaseLoaderzBV\w+zav[0-9]+c                   @   sT   e Zd ZdZdee eeedddZee dddZee	ee
f d	d
dZdS )BiliBiliLoaderz9
    Load fetching transcripts from BiliBili videos.
     )
video_urlssessdatabili_jctbuvid3c                 C   sZ   || _ d| _zddlm} W n ty6   tdY n0 |rV|rV|rV|j|||d| _dS )a  
        Initialize the loader with BiliBili video URLs and authentication cookies.
        if no authentication cookies are provided, the loader can't get transcripts
        and will only fetch videos info.

        Args:
            video_urls (List[str]): List of BiliBili video URLs.
            sessdata (str): SESSDATA cookie value for authentication.
            bili_jct (str): BILI_JCT cookie value for authentication.
            buvid3 (str): BUVI3 cookie value for authentication.
        Nr   )videoTrequests package not found, please install it with `pip install bilibili-api-python`)r	   r
   r   )r   
credentialbilibili_apir   ImportErrorZ
Credential)selfr   r	   r
   r   r    r   {/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/bilibili.py__init__   s    
zBiliBiliLoader.__init__)returnc                 C   s8   g }| j D ](}| |\}}t||d}|| q
|S )z
        Load and return a list of documents containing video transcripts.

        Returns:
            List[Document]: List of Document objects transcripts and metadata.
        )Zpage_contentmetadata)r   _get_bilibili_subs_and_infor   append)r   resultsurlZ
transcript
video_infodocr   r   r   load5   s    
zBiliBiliLoader.load)r   r   c                 C   s  t |}zddlm}m} W n ty8   tdY n0 |rT|j| | jd}n>t	|}|r|jt
| dd | jd}ntd| || }|d	|i | jsd
|fS |||d }|dg }	|	r|	d dd
}
|
dsd|
 }
t|
}|jdkrft|jdg }ddd |D }d|d  d|d  d| }||fS td| d|j  ntd| d d
|fS )zU
        Retrieve video information and transcript for a given BiliBili URL.
        r   )syncr   r   )bvidr      N)aidr   z(Unable to find a valid video ID in URL: r   r   ZcidZ	subtitlesZsubtitle_urlhttpzhttps:   body c                 S   s   g | ]}|d  qS )contentr   ).0cr   r   r   
<listcomp>k       z>BiliBiliLoader._get_bilibili_subs_and_info.<locals>.<listcomp>zVideo Title: titlez, description: descz

Transcript: zFailed to fetch subtitles for z. HTTP Status Code: zNo subtitles found for video: z. Returning empty transcript.)
BV_PATTERNsearchr   r   r   r   ZVideogroupr   
AV_PATTERNint
ValueErrorget_infoupdateZget_subtitleget
startswithrequestsstatus_codejsonloadsr&   joinwarningswarn)r   r   r   r   r   vr!   r   subZsub_listZsub_urlresponseZraw_sub_titlesZraw_transcriptZraw_transcript_with_meta_infor   r   r   r   D   sV    


"

z*BiliBiliLoader._get_bilibili_subs_and_infoN)r   r   r   )__name__
__module____qualname____doc__r   strr   r   r   r   dictr   r   r   r   r   r      s       r   )r9   rer<   typingr   r   r7   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   compiler-   r0   r   r   r   r   r   <module>   s   

