a
    bg                     @   sV   d Z ddlZddlmZmZmZ ddlmZ ddlm	Z	 e
eZG dd de	ZdS )z1Loader that uses unstructured to load HTML files.    N)AnyIteratorList)Document)
BaseLoaderc                	   @   sT   e Zd ZdZdee eeeeeddddZee	 dd	d
Z
ee	 dddZdS )NewsURLLoadera/  Load news articles from URLs using `Unstructured`.

    Args:
        urls: URLs to load. Each is loaded into its own document.
        text_mode: If True, extract text from URL and use that for page content.
            Otherwise, extract raw HTML.
        nlp: If True, perform NLP on the extracted contents, like providing a summary
            and extracting keywords.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newspaper_kwargs: Any additional named arguments to pass to
            newspaper.Article().

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import NewsURLLoader

            loader = NewsURLLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    Newspaper reference:
        https://newspaper.readthedocs.io/en/latest/
    TFN)urls	text_modenlpcontinue_on_failureshow_progress_barnewspaper_kwargsreturnc                 K   sX   zddl }|j| _W n ty.   tdY n0 || _|| _|| _|| _|| _|| _	dS )zInitialize with file path.r   NzMnewspaper package not found, please install it with `pip install newspaper3k`)
	newspaper__version__Z_NewsURLLoader__versionImportErrorr   r	   r
   r   r   r   )selfr   r	   r
   r   r   r   r    r   w/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/news.py__init__+   s    

zNewsURLLoader.__init__)r   c              
   C   s^   |   }| jrVzddlm} W n. tyL } ztd|W Y d }~n
d }~0 0 ||}t|S )Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   r   list)r   iterr   er   r   r   loadF   s    zNewsURLLoader.loadc                 c   sp  zddl m} W n. ty> } ztd|W Y d }~n
d }~0 0 | jD ]"}z4||fi | j}|  |  | jr~|  W nR ty } z:| j	rt
d| d|  W Y d }~qFn|W Y d }~n
d }~0 0 t|ddt|dt|d	dt|d
g t|ddt|ddt|ddd}| jr.|j}n|j}| jr\t|dg |d< t|dd|d< t||dV  qFd S )Nr   )ArticlezFCannot import newspaper, please install with `pip install newspaper3k`zError fetching or processing z, exception: title urlZcanonical_linkauthorsZ	meta_langZmeta_descriptionpublish_date)r   linkr    languagedescriptionr!   keywordssummary)Zpage_contentmetadata)r   r   r   r   r   downloadparser
   	Exceptionr   loggererrorgetattrr	   texthtmlr   )r   r   r   r   Zarticler'   contentr   r   r   r   T   sD    




	zNewsURLLoader.lazy_load)TFTF)__name__
__module____qualname____doc__r   strboolr   r   r   r   r   r   r   r   r   r   r      s         r   )r4   loggingtypingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerr1   r+   r   r   r   r   r   <module>   s   
