a
    bg                     @   s<   d dl mZmZ d dlmZ d dlmZ G dd deZdS )    )AnyList)Document)WebBaseLoaderc                   @   sJ   e Zd ZdZee dddZeee dddZeee dd	d
Z	dS )HNLoaderz_Load `Hacker News` data.

    It loads data from either main page results or the comments page.)returnc                 C   s*   |   }d| jv r| |S | |S dS )a  Get important HN webpage information.

        HN webpage components are:
            - title
            - content
            - source url,
            - time of post
            - author of the post
            - number of comments
            - rank of the post
        itemN)Zscrapeweb_pathload_commentsload_results)self	soup_info r   u/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/langchain_community/document_loaders/hn.pyload   s    

zHNLoader.load)r   r   c                    s.   | d}|dd fdd|D S )zLoad comments from a HN post.ztr[class='athing comtr']ztr[id='pagespace']titlec                    s&   g | ]}t |j  jd dqS ))sourcer   )page_contentmetadata)r   textstripr	   ).0commentr   r   r   r   
<listcomp>#   s
   
z*HNLoader.load_comments.<locals>.<listcomp>)select
select_oneget)r   r   commentsr   r   r   r
      s
    
zHNLoader.load_comments)soupr   c           	   	   C   s   | d}g }|D ]h}|dj}|dddidd}|dddij }| j|||d}|t||||d	 q|S )
zLoad items from an HN page.ztr[class='athing']zspan[class='rank']spanclassZ	titlelineahref)r   r   linkranking)r   r$   r%   r   )	r   r   r   findr   r   r	   appendr   )	r   r   itemsZ	documentsZlineItemr%   r$   r   r   r   r   r   r   +   s"    
zHNLoader.load_resultsN)
__name__
__module____qualname____doc__r   r   r   r   r
   r   r   r   r   r   r      s   r   N)typingr   r   Zlangchain_core.documentsr   Z-langchain_community.document_loaders.web_baser   r   r   r   r   r   <module>   s   