a
    bg	                     @   sf   d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ eeZG dd deZdS )	zScrapfly Web Reader.    N)IteratorListLiteralOptional)
BaseLoader)Document)get_from_envc                   @   sV   e Zd ZdZdddddee ee ed ee e	dddd	Z
ee d
ddZdS )ScrapflyLoaderzTurn a url to llm accessible markdown with `Scrapfly.io`.

    For further details, visit: https://scrapfly.io/docs/sdk/python
    NmarkdownT)api_keyscrape_formatscrape_configcontinue_on_failure)r
   text)urlsr   r   r   r   returnc                C   sn   zddl m} W n ty*   tdY n0 |s8td|pDtdd}||d| _ || _|| _|| _|| _dS )	a  Initialize client.

        Args:
            urls: List of urls to scrape.
            api_key: The Scrapfly API key. If not specified must have env var
                SCRAPFLY_API_KEY set.
            scrape_format: Scrape result format, one or "markdown" or "text".
            scrape_config: Dictionary of ScrapFly scrape config object.
            continue_on_failure: Whether to continue if scraping a url fails.
        r   )ScrapflyClientzC`scrapfly` package not found, please run `pip install scrapfly-sdk`zURLs must be provided.r   ZSCRAPFLY_API_KEY)keyN)	scrapflyr   ImportError
ValueErrorr   r   r   r   r   )selfr   r   r   r   r   r    r   {/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/scrapfly.py__init__   s    
zScrapflyLoader.__init__)r   c                 c   s   ddl m} | jd ur| jni }| jD ]}z:| j ||fd| ji|}t|jd d|idV  W q& ty } z.| j	rt
d| d|  n|W Y d }~q&d }~0 0 q&d S )	Nr   )ScrapeConfigformatcontenturl)Zpage_contentmetadatazError fetching data from z, exception: )r   r   r   r   Zscraper   r   Zscrape_result	Exceptionr   loggererror)r   r   r   r   responseer   r   r   	lazy_load5   s    
zScrapflyLoader.lazy_load)__name__
__module____qualname____doc__r   strr   r   dictboolr   r   r   r%   r   r   r   r   r	      s   	"r	   )r)   loggingtypingr   r   r   r   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_core.utilsr   	getLogger__file__r!   r	   r   r   r   r   <module>   s   
