a
    bg                     @   sV   d dl Z d dlmZmZ d dlZd dlmZ d dlmZ e 	e
ZG dd deZdS )    N)AnyList)Document)
BaseLoaderc                   @   sZ   e Zd ZdZdeee edddZeedddZee	d	d
dZ
ee dddZdS )DiffbotLoaderzLoad `Diffbot` json file.T	api_tokenurlscontinue_on_failurec                 C   s   || _ || _|| _dS )a	  Initialize with API token, ids, and key.

        Args:
            api_token: Diffbot API token.
            urls: List of URLs to load.
            continue_on_failure: Whether to continue loading other URLs if one fails.
               Defaults to True.
        Nr   )selfr   r	   r
    r   z/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/diffbot.py__init__   s    zDiffbotLoader.__init__)diffbot_apireturnc                 C   s
   d| S )Nzhttps://api.diffbot.com/v3/r   )r   r   r   r   r   _diffbot_api_url   s    zDiffbotLoader._diffbot_api_url)urlr   c                 C   s8   |  d}| j|d}tj||dd}|jr4| S i S )z'Get Diffbot file from Diffbot REST API.Zarticle)tokenr   
   )paramstimeout)r   r   requestsgetokjson)r   r   Zdiffbot_urlr   responser   r   r   _get_diffbot_data!   s    
zDiffbotLoader._get_diffbot_data)r   c                 C   s   t  }| jD ]}zD| |}d|v r4|d d d nd}d|i}|t||d W q ty } z.| jrtd| d|  n|W Y d	}~qd	}~0 0 q|S )
z>Extract text from Diffbot on all the URLs and return Documentsobjectsr   text source)Zpage_contentmetadatazError fetching or processing z, exception: N)	listr	   r   appendr   	Exceptionr
   loggererror)r   docsr   datar   r!   er   r   r   load.   s    

zDiffbotLoader.loadN)T)__name__
__module____qualname____doc__strr   boolr   r   r   r   r   r*   r   r   r   r   r      s    
r   )loggingtypingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerr+   r%   r   r   r   r   r   <module>   s   
