a
    ^g/                     @   s  d dl Z d dlZd dlZd dlZddlmZmZmZm	Z	 ddl
mZ G dd dZG dd deZe Zd	d
de dfD ]ae jtr| qq|dadd ZG dd deZdd ZG dd deZG dd deZG dd deZG dd deZG dd deZd"d d!ZdS )#    N   )
fvecs_read
ivecs_read
bvecs_mmap
fvecs_mmap)knnc                   @   s`   e Zd ZdZdd Zdd ZdddZd	d
 ZdddZdddZ	dddZ
dd Zdd ZdS )Datasetz+ Generic abstract class for a test dataset c                 C   s"   d| _ d| _d| _d| _d| _dS )z2 the constructor should set the following fields: L2Ndmetricnqnbntself r   d/var/www/html/cobodadashboardai.evdpl.com/venv/lib/python3.9/site-packages/faiss/contrib/datasets.py__init__   s
    zDataset.__init__c                 C   s
   t  dS )z' return the queries as a (nq, d) array NNotImplementedErrorr   r   r   r   get_queries   s    zDataset.get_queriesNc                 C   s
   t  dS )z' return the queries as a (nt, d) array Nr   r   maxtrainr   r   r   	get_train   s    zDataset.get_trainc                 C   s
   t  dS )z' return the queries as a (nb, d) array Nr   r   r   r   r   get_database"   s    zDataset.get_database   r   r   c           	      c   s`   |   }|\}}| j| | | j|d  |  }}t|||D ]}||t|| | V  q>dS )a7  returns an iterator on database vectors.
        bs is the number of vectors per batch
        split = (nsplit, rank) means the dataset is split in nsplit
        shards and we want shard number rank
        The default implementation just iterates over the full matrix
        returned by get_dataset.
        r   N)r   r   rangemin	r   bssplitxbZnsplitZrankZi0i1Zj0r   r   r   database_iterator&   s
    "zDataset.database_iteratorc                 C   s
   t  dS )z7 return the ground truth for k-nearest neighbor search Nr   r   kr   r   r   get_groundtruth4   s    zDataset.get_groundtruthc                 C   s
   t  dS )z* return the ground truth for range search Nr   )r   Zthreshr   r   r   get_groundtruth_range8   s    zDataset.get_groundtruth_rangec              
   C   s,   d| j  d| j d| j d| j d| j 
S )Nzdataset in dimension z, with metric z
, size: Q z B z T r   r   r   r   r   __str__<   s    zDataset.__str__c                 C   s   |   j| j| jfksJ | jdkrP| jdd}|jd| jfksPJ d|jf |  j| j| jfksjJ | jddj| jdfksJ dS )z8 runs the previous and checks the sizes of the matrices r   {   )r   zshape=%s   )r(   N)	r   shaper   r   r   r   r   r   r)   )r   xtr   r   r   check_sizes@   s    
 zDataset.check_sizes)N)r   r   )N)N)__name__
__module____qualname____doc__r   r   r   r   r&   r)   r*   r+   r0   r   r   r   r   r      s   



r   c                   @   s>   e Zd ZdZdddZdd Zdd	d
Zdd ZdddZdS )SyntheticDatasetzOA dataset that is not completely random but still challenging to
    index
    r
   :  c                 C   s   t |  ||||f\| _| _| _| _d}|| | }tj|}	|	j	||fd}
t
|
|	||}
|
|	|d d  }
t|
}
|
d}
|| _|
d | | _|
|||  | _|
|| d  | _d S )N
   )size   g?float32)r   r   r   r   r   r   nprandomZRandomStatenormaldotZrandsinZastyper   r/   r$   xq)r   r   r   r   r   r   seedZd1nrsxr   r   r   r   O   s    


zSyntheticDataset.__init__c                 C   s   | j S N)r@   r   r   r   r   r   a   s    zSyntheticDataset.get_queriesNc                 C   s    |d ur|n| j }| jd | S rE   )r   r/   r   r   r   r   r   d   s    zSyntheticDataset.get_trainc                 C   s   | j S rE   )r$   r   r   r   r   r   h   s    zSyntheticDataset.get_databased   c                 C   s(   t | j| j|| jdkrtjntjd S )Nr
   r   )r   r@   r$   r   faissZ	METRIC_L2ZMETRIC_INNER_PRODUCTr'   r   r   r   r)   k   s    
z SyntheticDataset.get_groundtruth)r
   r6   )N)rF   	r1   r2   r3   r4   r   r   r   r   r)   r   r   r   r   r5   J   s   

r5   z/datasets01/simsearch/041218/z7/mnt/vol/gfsai-flash3-east/ai-group/datasets/simsearch/z/home/z/simsearch/data/zdata/c                 C   s   | a d S rE   )dataset_basedir)pathr   r   r   set_dataset_basedir   s    rK   c                   @   s<   e Zd ZdZdd Zdd ZdddZd	d
 ZdddZdS )DatasetSIFT1M_
    The original dataset is available at: http://corpus-texmex.irisa.fr/
    (ANN_SIFT1M)
    c                 C   s,   t |  d\| _| _| _| _td | _d S )N)r   順 @B '  zsift1M/r   r   r   r   r   r   rI   basedirr   r   r   r   r      s    
zDatasetSIFT1M.__init__c                 C   s   t | jd S )Nzsift_query.fvecsr   rR   r   r   r   r   r      s    zDatasetSIFT1M.get_queriesNc                 C   s(   |d ur|n| j }t| jd d | S )Nzsift_learn.fvecsr   r   rR   r   r   r   r   r      s    zDatasetSIFT1M.get_trainc                 C   s   t | jd S )Nzsift_base.fvecsrS   r   r   r   r   r      s    zDatasetSIFT1M.get_databasec                 C   s:   t | jd }|d ur6|dks"J |d d d |f }|S )Nzsift_groundtruth.ivecsrF   r   rR   r   r(   gtr   r   r   r)      s
    zDatasetSIFT1M.get_groundtruth)N)NrH   r   r   r   r   rL      s   
rL   c                 C   s   t j| ddS )Nr:   Zdtype)r;   Zascontiguousarray)rD   r   r   r   sanitize   s    rY   c                   @   sH   e Zd ZdZdddZdd Zddd	Zdd
dZdd ZdddZ	dS )DatasetBigANNz_
    The original dataset is available at: http://corpus-texmex.irisa.fr/
    (ANN_SIFT1B)
      c                 C   sN   t |  |dv sJ || _|d }dd|df\| _| _| _| _td | _d S )N)
r         r7      2   rF      i  r[   rO   r    rP   zbigann/)	r   r   nb_Mr   r   r   r   rI   rR   )r   rb   r   r   r   r   r      s    
zDatasetBigANN.__init__c                 C   s   t t| jd d d  S )Nzbigann_query.bvecs)rY   r   rR   r   r   r   r   r      s    zDatasetBigANN.get_queriesNc                 C   s,   |d ur|n| j }tt| jd d | S )Nzbigann_learn.bvecs)r   rY   r   rR   r   r   r   r   r      s    zDatasetBigANN.get_trainc                 C   s@   t | jd| j  }|d ur<|dks(J |d d d |f }|S )Nzgnd/idx_%dM.ivecsrF   )r   rR   rb   rV   r   r   r   r)      s
    zDatasetBigANN.get_groundtruthc                 C   s.   | j dk sJ dtt| jd d | j S )NrF   dataset too large, use iteratorbigann_base.bvecs)rb   rY   r   rR   r   r   r   r   r   r      s    zDatasetBigANN.get_databaser   r   c           	      c   sj   t | jd }|\}}| j| | | j|d  |  }}t|||D ] }t||t|| | V  qDd S )Nrd   r   )r   rR   r   r   rY   r    r!   r   r   r   r&      s
    "zDatasetBigANN.database_iterator)r[   )N)N)r   r   
r1   r2   r3   r4   r   r   r   r)   r   r&   r   r   r   r   rZ      s   


rZ   c                   @   sH   e Zd ZdZdddZdd Zddd	Zdd
dZdd ZdddZ	dS )DatasetDeep1Bzv
    See
    https://github.com/facebookresearch/faiss/tree/main/benchs#getting-deep1b
    on how to get the data
     ʚ;c                 C   sf   t |  dddddd}||v s&J dd|d	f\| _| _| _| _td
 | _d| j|| j f | _d S )NZ100kZ1MZ10MZ100M1B)rN   rO   i ra   rg   `   i]rP   zdeep1b/z%sdeep%s_groundtruth.ivecs)	r   r   r   r   r   r   rI   rR   gt_fname)r   r   Z
nb_to_namer   r   r   r      s    

zDatasetDeep1B.__init__c                 C   s   t t| jd S )Nzdeep1B_queries.fvecs)rY   r   rR   r   r   r   r   r      s    zDatasetDeep1B.get_queriesNc                 C   s,   |d ur|n| j }tt| jd d | S )Nzlearn.fvecs)r   rY   r   rR   r   r   r   r   r      s    zDatasetDeep1B.get_trainc                 C   s6   t | j}|d ur2|dksJ |d d d |f }|S )NrF   )r   rj   rV   r   r   r   r)      s
    
zDatasetDeep1B.get_groundtruthc                 C   s.   | j dksJ dtt| jd d | j  S )Nra   rc   
base.fvecs)r   rY   r   rR   r   r   r   r   r      s    zDatasetDeep1B.get_databaser   r   c           	      c   sj   t | jd }|\}}| j| | | j|d  |  }}t|||D ] }t||t|| | V  qDd S )Nrk   r   )r   rR   r   r   rY   r    r!   r   r   r   r&      s
    "zDatasetDeep1B.database_iterator)rg   )N)N)r   r   re   r   r   r   r   rf      s   


rf   c                   @   s4   e Zd ZdZdddZdd Zdd	 Zdd
dZdS )DatasetGlovezD
    Data from http://ann-benchmarks.com/glove-100-angular.hdf5
    NFc                 C   sh   dd l }|rJ d|s td }||d| _d| _d\| _| _| jd jd | _| jd jd | _	d S )	Nr   znot implementedzglove/glove-100-angular.hdf5rIP)rF   r   traintest)
h5pyrI   File
glove_h5pyr   r   r   r.   r   r   )r   locdownloadrq   r   r   r   r     s    zDatasetGlove.__init__c                 C   s   t | jd }t| |S )Nrp   r;   arrayrs   rG   Znormalize_L2r   r@   r   r   r   r     s    
zDatasetGlove.get_queriesc                 C   s   t | jd }t| |S )Nro   rv   r   r$   r   r   r   r     s    
zDatasetGlove.get_databasec                 C   s6   | j d }|d ur2|dksJ |d d d |f }|S )NZ	neighborsrF   )rs   rV   r   r   r   r)     s
    
zDatasetGlove.get_groundtruth)NF)Nr1   r2   r3   r4   r   r   r   r)   r   r   r   r   rl     s
   
rl   c                   @   s2   e Zd ZdZdd Zdd Zdd Zdd	d
ZdS )DatasetMusic100zO
    get dataset from
    https://github.com/stanis-morozov/ip-nsw#dataset
    c                 C   s2   t |  d\| _| _| _| _d| _td | _d S )N)rF   r   rO   rP   rn   z
music-100/)	r   r   r   r   r   r   r   rI   rR   r   r   r   r   r   ,  s    
zDatasetMusic100.__init__c                 C   s$   t j| jd dd}|dd}|S )Nzquery_music100.binr:   rX   r	   rF   r;   fromfilerR   Zreshaperx   r   r   r   r   2  s    zDatasetMusic100.get_queriesc                 C   s$   t j| jd dd}|dd}|S )Nzdatabase_music100.binr:   rX   r	   rF   r|   ry   r   r   r   r   7  s    zDatasetMusic100.get_databaseNc                 C   s<   t | jd }|d ur8|dks$J |d d d |f }|S )Nzgt.npyrF   )r;   loadrR   rV   r   r   r   r)   <  s
    zDatasetMusic100.get_groundtruth)Nrz   r   r   r   r   r{   &  s
   r{   c                   @   s<   e Zd ZdZdd Zdd ZdddZd	d
 ZdddZdS )DatasetGIST1MrM   c                 C   s,   t |  d\| _| _| _| _td | _d S )N)i  rN   rO   rP   zgist1M/rQ   r   r   r   r   r   I  s    
zDatasetGIST1M.__init__c                 C   s   t | jd S )Nzgist_query.fvecsrS   r   r   r   r   r   N  s    zDatasetGIST1M.get_queriesNc                 C   s(   |d ur|n| j }t| jd d | S )Nzgist_learn.fvecsrT   r   r   r   r   r   Q  s    zDatasetGIST1M.get_trainc                 C   s   t | jd S )Nzgist_base.fvecsrS   r   r   r   r   r   U  s    zDatasetGIST1M.get_databasec                 C   s:   t | jd }|d ur6|dks"J |d d d |f }|S )Nzgist_groundtruth.ivecsrF   rU   rV   r   r   r   r)   X  s
    zDatasetGIST1M.get_groundtruth)N)NrH   r   r   r   r   r   C  s   
r   deep1MFc                 C   s   | dkrt  S | dkrt S | drL| dkr2dnt| dd }t|dS | d	r| d
d }|d dkrdt|dd  }n@|dkrd}n2|d dkrdt|dd  }ndsJ d| t|dS | dkrt S | dkrt|dS td|  dS )z converts a string describing a dataset to a Dataset object
    Supports sift1M, bigann1M..bigann1B, deep1M..deep1B, music-100 and glove
    Zsift1MZgist1MZbigannZbigann1Br[      r	   )rb   deepr9   NMrO   rh   rg   r(   Fzdid not recognize suffix )r   z	music-100Zglove)ru   zunknown dataset )	rL   r   
startswithintrZ   rf   r{   rl   RuntimeError)Zdatasetru   ZdbsizeZszsufr   r   r   dataset_from_name`  s,    




r   )r   F)osnumpyr;   rG   getpassZvecs_ior   r   r   r   Zexhaustive_searchr   r   r5   getuserusernamerI   rJ   existsrK   rL   rY   rZ   rf   rl   r{   r   r   r   r   r   r   <module>   s0   ;.
(0#