o
    0j@*                     @   s  d Z ddlZddlmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZmZ dd
lmZ ddlm Z  e!eeZ"eeZ#ej$ddd e#D e%dZ&ee%dB ej'dd(e" dee"df Z)eddZ*e*j+dg dddddddeddddddfdee%dB ej,ddf dedededee&dB ej'd df d!ed"e)d#ee-ej'd$d%d&df d'ee-ej'd(d)df d*ee-ej'd+d,d-df d.ed/ed0dfd1d2Z.e*j+d3g d4d	5	dYd6ee%ej,d7df d!ed/ed0dfd8d9Z/e*j+d:d;d<gd			dZd6ee%ej,d=df d.ed"e)d/ed0df
d>d?Z0e*j+d@g dAd			dZd6ee%ej,d=df dBee%dB ej'dCdDdf dEee%dB ej'dFdf d/ed0df
dGdHZ1e*j+dIdJdKgd	d[dIee%ej,dLdf d/ed0dfdMdNZ2e*j+dOg dPd			d\d6ee%ej,d=df dQee-ej'dRdSdf dTee-ej'dUdVdf d/ed0df
dWdXZ3dS )]a  Contains commands to interact with datasets on the Hugging Face Hub.

Usage:
    # list datasets on the Hub
    hf datasets ls

    # list datasets with a search query
    hf datasets ls --search "code"

    # get info about a dataset
    hf datasets info HuggingFaceFW/fineweb
    N)	Annotatedget_args)execute_raw_sql_query)CLIErrorRepositoryNotFoundErrorRevisionNotFoundError)DatasetSort_TExpandDatasetProperty_T)DatasetCard   )REPO_LIST_DEFAULT_LIMIT	AuthorOpt	FilterOptLimitOptRevisionOpt	SearchOptTokenOptapi_object_to_dict
get_hf_apimake_expand_properties_parsertyper_factory)list_repo_files_cmd)outDatasetSortEnumc                 C   s   i | ]}||qS  r   ).0sr   r   ]/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/huggingface_hub/cli/datasets.py
<dictcomp>8   s    r   )typezComma-separated properties to return. When used, only the listed properties (and id) are returned. Example: '--expand=downloads,likes,tags'. Valid: z, .)helpcallbackz"Interact with datasets on the Hub.)r!   z	list | ls)zhf datasets lsz*hf datasets ls --sort downloads --limit 10zhf datasets ls --search "code"z*hf datasets ls --filter benchmark:officialz$hf datasets ls HuggingFaceFW/finewebz'hf datasets ls HuggingFaceFW/fineweb -Rz.hf datasets ls HuggingFaceFW/fineweb --tree -h)ZexamplesFrepo_idzVDataset ID (e.g. `username/repo-name`) to list files from. If omitted, lists datasets.searchauthorfiltersortzSort results.limitexpandhuman_readablez--human-readablez-hz=Show sizes in human readable format (only for listing files).as_treez--treez3List files in tree format (only for listing files).	recursivez--recursivez-Rz0List files recursively (only for listing files).revisiontokenreturnc              	   C   s  | durE|durt d|durt d|durt d|dur(t d|tkr1t d|dur:t dt| d|||	|
|d	S |rLt d
|	rSt d|rZt d|
durct dt|d}|rm|jnd}dd |j||||||dD }t| dS )zList datasets on the Hub, or files in a dataset repo.

    When called with no argument, lists datasets on the Hub.
    When called with a dataset ID, lists files in that dataset repo.
    Nz'Cannot use --search when listing files.z'Cannot use --author when listing files.z'Cannot use --filter when listing files.z%Cannot use --sort when listing files.z&Cannot use --limit when listing files.z'Cannot use --expand when listing files.Zdataset)r#   Z	repo_typer*   r+   r,   r-   r.   z(Cannot use --tree when listing datasets.z-Cannot use --recursive when listing datasets.z2Cannot use --human-readable when listing datasets.z,Cannot use --revision when listing datasets.r.   c                 S      g | ]}t |qS r   r   )r   dataset_infor   r   r   
<listcomp>   s    zdatasets_ls.<locals>.<listcomp>)r&   r%   r$   r'   r(   r)   )	typerZBadParameterr   r   r   valueZlist_datasetsr   table)r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   apisort_keyresultsr   r   r   datasets_lsG   sV   .











r;   leaderboard)z4hf datasets leaderboard SWE-bench/SWE-bench_VerifiedzLhf datasets leaderboard SWE-bench/SWE-bench_Verified --limit 5 --format jsonzIhf datasets ls --filter benchmark:official  # list available leaderboards   
dataset_idz?The benchmark dataset ID (e.g. `SWE-bench/SWE-bench_Verified`).c                 C   sv   t |d}|j| d}dd |d| D }tj|g ddddd	d
 td |r9td|d j d dS dS )zList model scores from a dataset leaderboard. This command helps find the best models for a task or compare models by benchmark scores. Use 'hf datasets ls --filter benchmark:official' to list available leaderboards.r0   )r#   c                 S   r1   r   r2   r   entryr   r   r   r4      s    z(datasets_leaderboard.<locals>.<listcomp>N)rankmodel_idr6   sourcerB   right)rA   r6   )headersid_keyZ
alignmentszPUse 'hf datasets ls --filter benchmark:official' to list available leaderboards.zUse 'hf models info r   z' to get details about a model.)r   Zget_dataset_leaderboardr   r7   hintrB   )r>   r(   r.   r8   r<   r:   r   r   r   datasets_leaderboard   s   

rH   infoz&hf datasets info HuggingFaceFW/finewebz9hf datasets info my-dataset --expand downloads,likes,tagsz+The dataset ID (e.g. `username/repo-name`).c              
   C   s   t |d}z
|j| ||d}W n, ty$ } z	td|  d|d}~w ty; } ztd| d|  d|d}~ww t| dS )	z$Get info about a dataset on the Hub.r0   )r#   r-   r)   z	Dataset 'z' not found.Nz
Revision 'z' not found on 'z'.)r   r3   r   r   r   r   dict)r>   r-   r)   r.   r8   rI   er   r   r   datasets_info   s   
rL   Zparquet)z(hf datasets parquet cfahlgren1/hub-statsz8hf datasets parquet cfahlgren1/hub-stats --subset modelsz6hf datasets parquet cfahlgren1/hub-stats --split trainz6hf datasets parquet cfahlgren1/hub-stats --format jsonsubsetz--subsetz(Filter parquet entries by subset/config.splitz Filter parquet entries by split.c                    sP   t |d}|j| |d} fdd|D }dd |D }tj|g ddd d	S )
z/List parquet file URLs available for a dataset.r0   )r#   configc                    s"   g | ]} d u s|j  kr|qS NrN   r?   rQ   r   r   r4      s   " z$datasets_parquet.<locals>.<listcomp>c                 S   s"   g | ]}|j |j|j|jd qS )rM   rN   urlsize)rO   rN   rS   rT   r?   r   r   r   r4      s    rR   rS   )rE   rF   N)r   Zlist_dataset_parquet_filesr   r7   )r>   rM   rN   r.   r8   entriesfilteredr:   r   rQ   r   datasets_parquet   s   
rW   sqlzhf datasets sql "SELECT COUNT(*) AS rows FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet')"zhf datasets sql "SELECT * FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet') LIMIT 5" --format jsonzRaw SQL query to execute.c              
   C   sF   zt | |d}W n ty } ztt||d}~ww t| dS )zAExecute a raw SQL query with DuckDB against dataset parquet URLs.)Z	sql_queryr.   N)r   ImportErrorr   strr   r7   )rX   r.   resultrK   r   r   r   datasets_sql   s   r\   card)z&hf datasets card HuggingFaceFW/finewebz1hf datasets card HuggingFaceFW/fineweb --metadataz?hf datasets card HuggingFaceFW/fineweb --metadata --format jsonz-hf datasets card HuggingFaceFW/fineweb --textmetadataz
--metadataz'Output only the metadata from the card.textz--textz(Output only the text body (no metadata).c                 C   sl   |r|rt dtj| |d}|rt|j  dS |r%t|j dS t|j t	d|  d dS )z7Get the dataset card (README) for a dataset on the Hub.z---metadata and --text are mutually exclusive.r0   zUse `hf datasets card z/ --metadata` to extract only the card metadata.N)
r   r
   loadr   rJ   datato_dictr_   contentrG   )r>   r^   r_   r.   r]   r   r   r   datasets_card  s   rd   )r=   N)NNNrP   )FFN)4__doc__enumtypingr   r   r5   Zhuggingface_hub._dataset_viewerr   Zhuggingface_hub.errorsr   r   r   Zhuggingface_hub.hf_apir   r	   Zhuggingface_hub.repocardr
   Z
_cli_utilsr   r   r   r   r   r   r   r   r   r   r   Z_file_listingr   _outputr   sortedZ_EXPAND_PROPERTIESZ_SORT_OPTIONSEnumrZ   r   OptionjoinZ	ExpandOptZdatasets_clicommandZArgumentboolr;   rH   rL   rW   r\   rd   r   r   r   r   <module>   sV  4
	

R
		