o
    )j6                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlZddlZddlmZ e ZdZdZG d	d
 d
ZG dd deeZdedefddZG dd dZG dd dZdS )zUnified file-level upload tracker.

Merges hash cache and upload progress into a single .ms_upload_cache file
with per-file status tracking, eliminating batch-granularity issues.
    N)Enum)Path)DictListOptionalTupleUnion)
get_loggerz.ms_upload_progress   c                   @   s   e Zd ZdZdZdZdZdS )
FileStatusz7Single-character status codes for compact JSON storage.ucfN)__name__
__module____qualname____doc__UPLOADED	COMMITTEDFAILED r   r   ^/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/hub/upload_tracker.pyr      s
    r   c                   @   s>   e Zd ZdZdZdZdZdZdZdZ	dZ
ed	efd
dZdS )ErrorCategoryz:Classification of upload/commit errors for retry strategy.Ztransient_networkZtransient_serverZ	throttledZauth_failed	not_foundZfile_invalidunknownreturnc                 C   s   | t jt jt jfvS N)r   AUTH_FAILED	NOT_FOUNDFILE_INVALIDselfr   r   r   is_retryable.   s
   zErrorCategory.is_retryableN)r   r   r   r   TRANSIENT_NETWORKTRANSIENT_SERVER	THROTTLEDr   r   r   UNKNOWNpropertyboolr"   r   r   r   r   r   $   s    r   errorr   c                 C   sd  t |  }t| trtjS t| trtjS t| ttfr tj	S t| t
jjrSt| dd}|durP|j}|dkr;tjS |dv rBtjS |dkrItjS |dkrPtjS tjS t| tr}d|v r_tjS d|v sgd	|v rjtjS d
|v rqtjS td|rztjS tjS t| ttfrd|v sd|v rtjS d|v sd|v rtjS tj	S d|v sd|v rtj	S d|v rtj	S tjS )zClassify an exception into a retry category.

    Returns an ErrorCategory that indicates whether the error is transient
    (retryable) or permanent, and what kind of failure occurred.
    responseNi  )i  i  i  i  Z429Z401Z403Z404z#(?:http[/\s]*)?5\d{2}|server.*errorzsize changedzno such fileZ
permissionzaccess deniedtimeoutz	timed out
connection)strlower
isinstanceFileNotFoundErrorr   r   PermissionErrorConnectionErrorTimeoutErrorr#   requests
exceptions	HTTPErrorgetattrstatus_coder%   r   r   r$   r&   
ValueErrorresearchIOErrorOSError)r)   Z	error_strrespstatusr   r   r   classify_error7   sR   


r@   c                	   @   s6  e Zd ZdZdeeef defddZedede	de
d	efd
dZdede	de
d	ee fddZdede	de
defddZdede	de
d	efddZdede	de
d	ee fddZdede	de
fddZdeeee	e
f  fddZ	d*dede	de
defddZdd Zd d! Zd"d# Zd$efd%d&Zd'd( Zd)S )+UploadTrackera  Unified file-level upload tracker.

    Replaces both UploadHashCache (.ms_upload_cache) and
    UploadProgress (.ms_upload_progress) with a single file that tracks
    per-file hash and upload status.

    File format (version 3):
        {
            "version": 3,
            "repo_id": "user/repo",
            "files": {
                "path|mtime|size": {"hash": "...", "size": 123, "status": "c"},
                ...
            }
        }

    Status values:
        "c" = committed (blob uploaded AND committed to repo)
        "u" = uploaded (blob uploaded, NOT yet committed)
        "f" = failed
        (no status field) = hash cached only, upload not attempted

    Thread safety: all mutations are protected by a lock.
    Persistence: atomic write via temp file + rename.
    
cache_pathrepo_idc                 C   s2   t || _|| _i | _t | _d| _|   d S NF)	r   _path_repo_id_files	threadingLock_lock_dirty_load)r!   rB   rC   r   r   r   __init__   s   

zUploadTracker.__init__rel_pathmtimesizer   c                 C   s   |  d| d| S )zKBuild cache key from file metadata (same format as legacy UploadHashCache).|r   )rN   rO   rP   r   r   r   	_make_key   s   zUploadTracker._make_keyc                 C   sh   |  |||}| j | j|}W d   n1 sw   Y  |du s(d|vr*dS ||d |d dS )zGet cached hash info for a file.

        Returns dict compatible with legacy UploadHashCache.get():
            {'file_path_or_obj': rel_path, 'file_hash': ..., 'file_size': ...}
        or None if not cached or file has changed.
        NhashrP   )Zfile_path_or_obj	file_hash	file_sizerR   rJ   rG   getr!   rN   rO   rP   keyentryr   r   r   get_hash   s   zUploadTracker.get_hash	hash_infoc                 C   sp   |  |||}| j$ | j|i }|d |d< |d |d< || j|< d| _W d   dS 1 s1w   Y  dS )zStore computed hash info for a file.

        Args:
            hash_info: dict with 'file_hash' and 'file_size' keys.
        rT   rS   rU   rP   TN)rR   rJ   rG   rW   rK   )r!   rN   rO   rP   r\   rY   rZ   r   r   r   put_hash   s   
"zUploadTracker.put_hashc                 C   sX   |  |||}| j | j|}W d   n1 sw   Y  |duo+|dtjkS )z<Check if a file is committed (with matching mtime and size).Nr?   )rR   rJ   rG   rW   r   r   rX   r   r   r   is_committed   s   zUploadTracker.is_committedc                 C   sR   |  |||}| j | j|}W d   n1 sw   Y  |r'|dS dS )z(Get file status, or None if not tracked.Nr?   rV   rX   r   r   r   
get_status   s
   zUploadTracker.get_statusc                 C   sj   |  |||}| j! || jv r#tj| j| d< d| _W d   dS W d   dS 1 s.w   Y  dS )z1Mark a file as blob-uploaded (not yet committed).r?   TN)rR   rJ   rG   r   r   rK   )r!   rN   rO   rP   rY   r   r   r   mark_uploaded   s   
"zUploadTracker.mark_uploaded	file_keysc                 C   sj   | j ( |D ]\}}}| |||}|| jv rtj| j| d< qd| _W d   dS 1 s.w   Y  dS )zMark multiple files as committed after a successful commit.

        Args:
            file_keys: list of (rel_path, mtime, size) tuples.
        r?   TN)rJ   rR   rG   r   r   rK   )r!   ra   rN   rO   rP   rY   r   r   r   mark_committed_batch   s   
"z"UploadTracker.mark_committed_batch 
error_typec                 C   s   |  |||}| j3 || jv r"tj| j| d< |r!|| j| d< ndtji}|r-||d< || j|< d| _W d   dS 1 s@w   Y  dS )z9Mark a file as failed with optional error classification.r?   rd   TN)rR   rJ   rG   r   r   rK   )r!   rN   rO   rP   rd   rY   rZ   r   r   r   mark_failed   s   


"zUploadTracker.mark_failedc              
   C   s<  | j & | js	 W d   dS t| jdd | j D d}d| _W d   n1 s,w   Y  zQ| jjjddd t	j
t| jjdd	\}}z-tj|d
dd}tj||dd W d   n1 sdw   Y  t|t| j W W dS  ty   t|  w  ty } ztd|  W Y d}~dS d}~ww )z&Atomically save tracker state to disk.Nc                 S   s   i | ]	\}}|t |qS r   )dict).0kvr   r   r   
<dictcomp>  s    z&UploadTracker.save.<locals>.<dictcomp>)versionrC   filesFT)parentsexist_okz.tmp)dirsuffixwzutf-8)encoding)ensure_asciizFailed to save upload tracker: )rJ   rK   _TRACKER_VERSIONrF   rG   itemsrE   parentmkdirtempfilemkstempr-   osfdopenjsondumpreplaceBaseExceptionunlink	Exceptionloggerwarning)r!   datafdZtmp_pathr   er   r   r   save   s<   


zUploadTracker.savec              
   C   s   z	| j jdd W n ty# } ztd|  W Y d}~nd}~ww | j | j  d| _W d   dS 1 s;w   Y  dS )zDelete the tracker file.T)
missing_okzFailed to delete tracker file: NF)	rE   r   r=   r   r   rJ   rG   clearrK   )r!   r   r   r   r   r     s   
"zUploadTracker.clearc              
   C   s^  | j  s|   dS zt| j d}t|}W d   n1 s"w   Y  W n tjtfyF } zt	d|  W Y d}~dS d}~ww |
d}|du rW| | dS |tk rgt	d| dt d |
dd	}|r|| jkrt	d
| d| j d dS |
di | _tdd | j D }|dkrtdt| j d| d |   dS )z8Load tracker state from disk, handling format migration.Nrz/Failed to load upload tracker, starting fresh: rk   zUpload tracker version z is older than current z%. Data will be migrated on next save.rC   rc   z"Tracker repo_id mismatch (cached: z, current: z), ignoring stale tracker.rl   c                 s   s$    | ]}| d tjkrdV  qdS )r?      N)rW   r   r   )rg   r   r   r   r   	<genexpr>D  s    z&UploadTracker._load.<locals>.<genexpr>r   zUpload tracker loaded: z
 entries, z committed.)rE   exists_check_legacy_progressopenr|   loadJSONDecodeErrorr=   r   r   rW   _migrate_v1rt   rF   rG   sumvaluesinfolen)r!   r   r   r   rk   Zstored_repoZcommitted_countr   r   r   rL   "  sP   



zUploadTracker._loadr   c                 C   sp   i }|  D ]\}}t|tr d|v r |d |ddd||< q|| _d| _|r6tdt| d dS dS )	ad  Migrate from legacy hash-only format (UploadHashCache v1).

        Old format: {"rel_path|mtime|size": {"file_hash": "...", "file_size": 123}}
        New format: {"rel_path|mtime|size": {"hash": "...", "size": 123}}

        Status is NOT set during migration -- cached hashes do not imply
        the file was committed (conservative approach).
        rT   rU   r   )rS   rP   Tz	Migrated z' entries from legacy hash cache format.N)	ru   r/   rf   rW   rG   rK   r   r   r   )r!   r   ZmigratedrY   valuer   r   r   r   L  s   	

zUploadTracker._migrate_v1c                 C   s.   | j jt }| rtd| d dS dS )z/Warn if legacy .ms_upload_progress file exists.z&Legacy upload progress file detected: z8. This file is no longer used. You may delete it safely.N)rE   rv   _LEGACY_PROGRESS_FILEr   r   r   )r!   Zlegacy_pathr   r   r   r   c  s   
z$UploadTracker._check_legacy_progressNrc   )r   r   r   r   r   r-   r   rM   staticmethodfloatintrR   r   rf   r[   r]   r(   r^   r_   r`   r   r   rb   re   r   r   rL   r   r   r   r   r   r   rA   u   sH    






*rA   c                	   @   s   e Zd ZdZdedededdfddZdededed	efd
dZ	dededede
fddZdededefddZdededefddZdd Z	ddedededefddZdd Zdd ZdS )NullTrackerzNo-op tracker for when caching is disabled.

    Implements the same interface as UploadTracker but does nothing,
    eliminating 'if tracker is not None' checks throughout api.py.
    rN   rO   rP   r   Nc                 C      d S r   r   r!   rN   rO   rP   r   r   r   r[   s     zNullTracker.get_hashr\   c                 C   r   r   r   )r!   rN   rO   rP   r\   r   r   r   r]   v  s   zNullTracker.put_hashc                 C   s   dS rD   r   r   r   r   r   r^   z  r   zNullTracker.is_committedc                 C   r   r   r   r   r   r   r   r_   }  r   zNullTracker.get_statusc                 C   r   r   r   r   r   r   r   r`     r   zNullTracker.mark_uploadedc                 C   r   r   r   )r!   ra   r   r   r   rb     r   z NullTracker.mark_committed_batchrc   rd   c                 C   r   r   r   )r!   rN   rO   rP   rd   r   r   r   re     s   zNullTracker.mark_failedc                 C   r   r   r   r    r   r   r   r     r   zNullTracker.savec                 C   r   r   r   r    r   r   r   r     r   zNullTracker.clearr   )r   r   r   r   r-   r   r   r[   rf   r]   r(   r^   r_   r`   rb   re   r   r   r   r   r   r   r   l  s,    

r   )r   rz   r:   rx   rH   enumr   pathlibr   typingr   r   r   r   r   r|   r4   Zmodelscope.utils.loggerr	   r   r   rt   r   r-   r   r   r@   rA   r   r   r   r   r   <module>   s(   > x