o
    *jd                     @   s  U d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZ ddlmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZ ddlm Z m!Z!m"Z" dd	l#m$Z$ dd
l%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4m5Z5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZBmCZC ddlDmEZE ddlFmGZG ddlHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZP ddlQmRZRmSZS ddlTmUZU ddlVmWZW ddlXmYZY zddl0mZZ[ W n e\y   ddl'm[Z[ Y nw eY Z]da^eeG e_d< deGfd d!Z`		dDd"d#Za		dDd$ebd%ebd&ee
eb  d'ee de
eb f
d(d)Zc	dEd%ebd'ee de	ebe
eb f fd*d+Zdd,ebd-ebd'ed.ebdebf
d/d0Ze	1dFd2ebd3ebd4ebd.ebd5eebebebebf d'ee d6eef de
eebebf  fd7d8Zg			dGd,ebd.ebd'ed9eeb d6eef de)fd:d;Zhde)fd<d=Zi					dHde)fd>d?Zj			dGde)fd@dAZkde)fdBdCZldS )Iac  Dataset module factory functions and data file resolution for ModelScope.

This module provides ModelScope-specific implementations of dataset module
loading (both script-based and script-free) and data file pattern resolution.
These functions are monkey-patched onto the ``datasets`` library internals
by :func:`~hf_datasets_util.load_dataset_with_ctx`.
    N)partial)Path)DictListOptionalSequenceTupleUnion)BuilderConfigDownloadConfigDownloadModeFeaturesVersionconfig
data_files)FILES_TO_IGNOREDataFilesDictEmptyDatasetError_get_data_files_patterns"_is_inside_unrequested_special_dir?_is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dirsanitize_patterns)!_prepare_path_and_storage_options	xbasenamexjoin)DataFilesNotFoundError)DatasetInfosDict)BuilderConfigsParametersDatasetModule,create_builder_configs_from_metadata_configsget_dataset_builder_classimport_main_classinfer_module_for_data_files)camelcase_to_snakecase)_MODULE_TO_EXTENSIONS_PACKAGED_DATASETS_MODULES)cached_pathis_local_pathrelative_to_absolute_path)MetadataConfigs)tracked_str)
filesystem)	_un_chain)stringify_path)DatasetCardDatasetCardData)version)HubApi)_HAS_SCRIPT_LOADING_create_importable_file_get_importable_file_path_load_importable_filefiles_to_hashget_importsinit_dynamic_modulesresolve_trust_remote_code)DEFAULT_DATASET_REVISIONREPO_TYPE_DATASET)is_relative_path)has_attr_in_class)
get_logger)_ALL_ALLOWED_EXTENSIONS)ALL_ALLOWED_EXTENSIONS_hub_apireturnc                   C   s   t d u r
tddda t S )N      )timeoutmax_retries)rA   r1    rG   rG   n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/msdatasets/utils/_module_factories.py_get_hub_apiE   s   rI   c                 C   s   t | tttfr| stdtt| d }nt| }|pi }|r%||d< t||p*i }i }tt|D ].\}}|\}}	}
|t	|d krOt
di |
|}q4t
di |
||d< |	|d< ||d< q4|d \}}}t|fi |}|S )	Nzempty urlpath sequencer   protocol   Ztarget_optionsZtarget_protocolforG   )
isinstancelisttupleset
ValueErrorr-   r,   	enumeratereversedlendictr+   )Zurlpathstorage_optionsrJ   Zurlpath0chainZinkwargsichurlsZnested_protocolkwpaths_fsrG   rG   rH   get_fs_token_pathsQ   s*   

r_   pattern	base_pathallowed_extensionsdownload_configc                    s  t | r
t|| } nt| rtj| d tj }nd}t| |d\} }t| |d}|	dd 	dd p8|j
| 	dd 	dd ttt| h t|jtrW|jn|jd }|dkrd|d ndi }|d	krxtjtd
krxd|d< z|j| fddi|}W n ty   td|  dw fdd| D }	 dur· fdd|	D }
t|
t|	k rtt|	t|
 }td|  d|  n|	}
|
sd|  d} dur|dt  7 }t||
S )zResolve data file paths/URLs from a user-supplied pattern.

    Supports ``*``, ``**``, and fsspec-based remote patterns (e.g. ``hf://``).
    Hidden files/directories and ``__pycache__`` are excluded by default.
    r    rc   )rV   z::z://fileZhfz0.20.0FZexpand_infodetailTzUnable to find ''c                    sz   g | ]9\}}|d  dkr;t | vrttj|tjsttj|tjs|r7|n| qS )typerg   )r   r   ospathrelpathr   
startswith).0filepathinfo)files_to_ignorefs_base_path
fs_patternprotocol_prefixrG   rH   
<listcomp>   s$    z$_resolve_pattern.<locals>.<listcomp>Nc                    s8   g | ]}t  fd dt|ddd D r|qS )c                 3   s    | ]	}d |  v V  qdS ).NrG   )ro   suffixrb   rG   rH   	<genexpr>   s    z._resolve_pattern.<locals>.<listcomp>.<genexpr>rw   rK   N)anyr   split)ro   rp   ry   rG   rH   rv      s    z Some files matched the pattern 'z-' but don't have valid data file extensions: z with any supported extension )r<   r   r'   rk   rl   
splitdrivesepr   r_   r|   Zroot_markerrP   r   r   rM   rJ   strr   ZHF_HUB_VERSIONr0   parseglobFileNotFoundErrorr   itemsrT   rN   loggerrq   )r`   ra   rb   rc   rV   r^   rJ   Zglob_kwargsZtmp_file_pathsZmatched_pathsoutZinvalid_matched_files	error_msgrG   )rb   rr   rs   rt   ru   rH   _resolve_patternn   sX   

r   c                 C   s:   t t| |d}zt|W S  ty   td|  ddw )zGet data file patterns for a dataset directory.

    Tries ``SPLIT_PATTERN_SHARDED`` first, then falls back to
    ``ALL_DEFAULT_PATTERNS``.
    )ra   rc   zThe directory at z doesn't contain any data filesN)r   r   r   r   r   )ra   rc   resolverrG   rG   rH   _get_data_patterns   s   	

r   repo_idpath_in_reporevisionc              
   C   s   t  }| d\}}|j| td}|r|jdu rd| d|_z|j||||d|d}t||d}	W |	S  tyM }
 zd	}	t	|
 W Y d}
~
|	S d}
~
ww )
z<Download a single file from a ModelScope dataset repository./r   Z	repo_typeNzDownloading []F)	file_namedataset_name	namespacer   Zextension_filterendpoint)url_or_filenamerc   rd   )
rI   r|   get_endpoint_for_readr;   download_descget_dataset_file_urlr&   r   r   error)r   r   rc   r   api
_namespace_dataset_namer   r   Zrepo_file_patherG   rG   rH   _download_repo_file   s4   r   Fnamer   r   importstrust_remote_codec                 C   s  g }g }t dd |D }	|	r|std|  dt }
| }|jdu r(d|_|D ]\\}}}}|dkr<|||f q*|| krQtd|  d	| d
| d| d	|dkrc|d }|
j||||d}n|dkrj|}ntdt||d}|durtj	
||}|||f q*i }|D ] \}}zt| W q ty   ||vs||kr|||< Y qw |rt|dkrdnd}t|dkrdnd}d|v rd|d< d|v rd|d< td|  d| dd
| d | d!d"
|  d#|S )$zDownload additional modules referenced by a dataset builder script.

    Parses the import list produced by ``get_imports`` and downloads any
    internal (relative) or external modules. Library imports are validated
    but not downloaded.
    c                 s   s     | ]\}}}}|d v V  qdS ))internalexternalNrG   )ro   import_typer]   rG   rG   rH   rz      s
    

z/_download_additional_modules.<locals>.<genexpr>Loading z requires executing code from the repository. This is disabled by default for security reasons. If you trust the authors of this dataset, you can enable it with `trust_remote_code=True`.NDownloading extra moduleslibraryzError in the z script, importing relative z module but z: is the name of the script. Please change relative import zl to another name and add a '# From: URL_OR_PATH' comment pointing to the original relative import file path.r   .py)r   r   r   r   r   zWrong import_typere   rK   dependencies
dependencyZthemitZsklearnzscikit-learnZBioZ	biopythonzTo be able to use z$, you need to install the following z: z, z.
Please install z using 'pip install  z' for instance.)r{   rQ   rI   copyr   appendr   r&   rk   rl   join	importlibimport_moduleImportErrorrT   values)r   r   r   r   r   rc   r   local_importsZlibrary_importsZhas_remote_coder   r   import_nameimport_pathsub_directoryr   r   local_import_pathZneeds_to_be_installedZlibrary_import_nameZlibrary_import_pathZ_dependencies_strZ	_them_strrG   rG   rH   _download_additional_modules   s   

r   dynamic_modules_pathc              
   C   sX  |  d\}}| d}t| |||d}	|	s$td| d|  d| dt| d||d}
t|	}t| ||||||d	}g }|
rG|tj|
f |pKt }t	|	gd
d |D  }t
|d|| d}tj|st|| d}|rtd|  d t|	|||d|| |d ntd|  dt|d|| d\}}t  t }|j| d| d}t|||S )zShared implementation for loading a dataset module from a Hub .py script.

    Used by both ``get_module_with_script`` (monkey-patch for datasets<4.0) and
    ``_compat_hub_script_module`` (compat shim for datasets>=4.0).
    r   r   r   r   rc   r   zCannot find z in z at revision rw   	README.md)r   r   r   r   r   rc   r   c                 S      g | ]}|d  qS rK   rG   ro   locrG   rG   rH   rv   {      z'_load_script_module.<locals>.<listcomp>datasetsr   module_namespacesubdirectory_namer   )r   r   z3Use trust_remote_code=True. Will invoke codes from z9. Please make sure that you can trust the external codes.
local_pathr   additional_filesr   r   r   r   download_moder   z requires executing the dataset script in that repo on your local machine. Make sure you have read the code there to avoid malicious use, then set the option `trust_remote_code=True` to remove this error.)r   )ra   r   )r|   r   r   r7   r   r   r   ZREPOCARD_FILENAMEr8   r6   r4   rk   rl   existsr9   r   warningr3   rQ   r5   r   invalidate_cachesrI   get_file_base_pathr   )r   r   rc   r   r   r   r   r   Zscript_file_nameZlocal_script_pathdataset_readme_pathr   r   r   hash_valimportable_file_pathtrustmodule_pathr   builder_kwargsrG   rG   rH   _load_script_moduleG  s   






r   c                 C   s@   | j }| jjddpt}t||| j| j| jr| jnd| jdS )zXMonkey-patch target for ``HubDatasetModuleFactoryWithScript.get_module`` (datasets<4.0).r   Nr   r   rc   r   r   r   )	r   rc   rV   getr:   r   r   r   r   )selfr   r   rG   rG   rH   get_module_with_script  s    r   c                 C   s   t | |pt|p	t |||dS )zLLoad a dataset module from a Hub repo .py script (compat for datasets>=4.0).r   )r   r:   r   )rl   r   rc   r   r   r   rG   rG   rH   _compat_hub_script_module  s   	r   c              
   C   s  | }t | j}g }t|}|D ]l\}}	}
}|dkrq|dkrYtjtj||
d }tj|r9||	|f qtj	tjtj||
rX||	tjtj||
f q|dkr{t
 }d|_t|
|d}|durttj||}||	|f q|pt }t|gdd	 |D  }t|d
||d}tj|st||}|rt||g |d
|||d ntd| dt|d
||d\}}t  dtt |  ji}t|||S )zILoad a dataset module from a local .py script (compat for datasets>=4.0).r   r   r   r   r   re   Nc                 S   r   r   rG   r   rG   rG   rH   rv     r   z/_compat_local_script_module.<locals>.<listcomp>r   r   r   r   zS requires executing the dataset script. Set `trust_remote_code=True` to allow this.ra   )r   stemr7   rk   rl   r   dirnameisfiler   isdirr   r   r&   r8   r6   r4   r   r9   r3   rQ   r5   r   r   r   resolveparentr   )rl   r   r   r   r   r   r   r   r   r   r   r   rel_pathZ	dl_configr   r   r   r   r   r   rG   rG   rH   _compat_local_script_module  s   




r   c              
   C   sf  | j jddp	t}d| j d| d| jpd d}| j}| j  }t|d||d}|r6t	
t|jnt }|jd	d}t|}t|}	| jdurUt| j}
n-|r{d
tt| v r{|durl|| d
 }n
tt| d
 }t|}
nt|| j d}
tj|
|t| j d}t|| j| j d\}}t|dr|jt| d}n|t| }t| \}}|r|dv }t !t"}||||| j d}d|j#v r||d< t"di |\}}nt$|j%dd
|i|g}d}t& }|j'|t(d}|j)||d| jt*t| jj|d}| j  }|j+du rd|_+|du r%t,|	dkr%tt|	}t-||||	t.|||ddS )zLMonkey-patch target for ``HubDatasetModuleFactoryWithoutScript.get_module``.r   Nzhf://datasets/@r   rd   r   r   r   r   re   )ra   rb   rc   )r   rl   rc   filter)
extensions>   ZaudiofolderZimagefolder)r   metadata_configsra   default_builder_kwargsrc   supports_metadatar   )r   r   )ra   r   r   r   zDownloading metadatarK   )r   builder_configsdefault_config_name)dataset_infosZbuilder_configs_parametersrG   )/rc   rV   r   r:   r   data_dirrstripr   r   r.   loadr   datar/   r)   Zfrom_dataset_card_datar   r   r   nextiterr   r   r   Zfrom_patternsr@   r"   hasattrr   r$   Zfilter_extensionsr%   inspect	signaturer   
parametersr!   ZBUILDER_CONFIG_CLASSrI   r   r;   r   r#   r   rT   r   r   )r   r   ra   r   rc   r   Zdataset_card_dataZsubset_namer   r   patternsZsubset_data_filesZdata_files_dictmodule_namer   r   r]   r   Zcreate_builder_signatureZin_argsr   r   r   r   r   rG   rG   rH   get_module_without_script  s   
	







r   )NN)N)F)NNN)NNNNN)m__doc__r   r   rk   	functoolsr   pathlibr   typingr   r   r   r   r   r	   r   r
   r   r   r   r   r   r   Zdatasets.data_filesr   r   r   r   r   r   r   Z,datasets.download.streaming_download_managerr   r   r   Zdatasets.exceptionsr   Zdatasets.infor   Zdatasets.loadr   r   r   r    r!   r"   Zdatasets.namingr#   Zdatasets.packaged_modulesr$   r%   Zdatasets.utils.file_utilsr&   r'   r(   Zdatasets.utils.metadatar)   Zdatasets.utils.trackr*   Zfsspecr+   Zfsspec.corer,   Zfsspec.utilsr-   Zhuggingface_hubr.   r/   	packagingr0   Z
modelscoper1   Z#modelscope.msdatasets.utils._compatr2   r3   r4   r5   r6   r7   r8   r9   Zmodelscope.utils.constantr:   r;   Zmodelscope.utils.file_utilsr<   Zmodelscope.utils.import_utilsr=   Zmodelscope.utils.loggerr>   r?   r@   r   r   rA   __annotations__rI   r_   r   r   r   r   boolr   r   r   r   r   r   rG   rG   rG   rH   <module>   s    $$ (
 

E

*
_
b

N