a
    bg                     @   sf   d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ e eZG dd de
ZdS )    N)AnyIteratorListOptionalSequence)Document)
BaseLoader)NewsURLLoaderc                   @   sp   e Zd ZdZdeee  ee eeeddddZ	e
e dd	d
Zeee dddZee dddZdS )RSSFeedLoaderaQ  Load news articles from `RSS` feeds using `Unstructured`.

    Args:
        urls: URLs for RSS feeds to load. Each articles in the feed is loaded into its own document.
        opml: OPML file to load feed urls from. Only one of urls or opml should be provided.  The value
        can be a URL string, or OPML markup contents as byte or string.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newsloader_kwargs: Any additional named arguments to pass to
            NewsURLLoader.

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import RSSFeedLoader

            loader = RSSFeedLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    The loader uses feedparser to parse RSS feeds.  The feedparser library is not installed by default so you should
    install it if using this loader:
    https://pythonhosted.org/feedparser/

    If you use OPML, you should also install listparser:
    https://pythonhosted.org/listparser/

    Finally, newspaper is used to process each article:
    https://newspaper.readthedocs.io/en/latest/
    NTF)urlsopmlcontinue_on_failureshow_progress_barnewsloader_kwargsreturnc                 K   s:   |du |du krt d|| _|| _|| _|| _|| _dS )zInitialize with urls or OPML.Nz;Provide either the urls or the opml argument, but not both.)
ValueErrorr   r   r   r   r   )selfr   r   r   r   r    r   v/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/rss.py__init__/   s    	zRSSFeedLoader.__init__)r   c              
   C   s^   |   }| jrVzddlm} W n. tyL } ztd|W Y d }~n
d }~0 0 ||}t|S )Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   ImportErrorlist)r   iterr   er   r   r   loadD   s    zRSSFeedLoader.loadc              
   C   sd   | j r| j S zdd l}W n. tyF } ztd|W Y d }~n
d }~0 0 || j}dd |jD S )Nr   zPackage listparser must be installed if the opml arg is used. Please install with 'pip install listparser' or use the urls arg instead.c                 S   s   g | ]
}|j qS r   )url).0feedr   r   r   
<listcomp>_       z+RSSFeedLoader._get_urls.<locals>.<listcomp>)r   
listparserr   parser   Zfeeds)r   r"   r   Zrssr   r   r   	_get_urlsR   s    zRSSFeedLoader._get_urlsc                 c   s\  zdd l }W n ty&   tdY n0 | jD ]&}z0||}t|ddrbtd| d|j W nR ty } z:| jrt	
d| d|  W Y d }~q.n|W Y d }~n
d }~0 0 zD|jD ]8}tf d|jgi| j}| d }||jd< |V  qW q. tyT } z>| jr<t	
d	|j d|  W Y d }~q.n|W Y d }~q.d }~0 0 q.d S )
Nr   zMfeedparser package not found, please install it with `pip install feedparser`ZbozoFzError fetching z, exception: r   r   zError processing entry )
feedparserr   r$   r#   getattrr   Zbozo_exception	Exceptionr   loggererrorentriesr	   linkr   r   metadata)r   r%   r   r   r   entryloaderZarticler   r   r   r   a   sB    



zRSSFeedLoader.lazy_load)NNTF)__name__
__module____qualname____doc__r   r   strboolr   r   r   r   r   propertyr$   r   r   r   r   r   r   r
      s"   $    
r
   )loggingtypingr   r   r   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   Z)langchain_community.document_loaders.newsr	   	getLoggerr/   r(   r
   r   r   r   r   <module>   s   
