o
    "jW                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 ddlmZmZ ddlmZmZ ed	d
G dd dZdd Zdd Zdd ZdedefddZdedefddZdd Z		 d	dddZdS )     N)	dataclass)Tuple)is_initialized)logger   )LocalTensorIndexLocalTensorMetadata)%compute_local_shape_and_global_offsetflatten_state_dictT)frozenc                   @   sB   e Zd ZU eed< eed< ee ed< ee ed< ee ed< dS )ReadItemlocal_tensor_indexrank
cur_offsetstorage_offsetlengthsN)__name__
__module____qualname__r   __annotations__intr    r   r   n/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/checkpoint/load_state_dict.pyr      s   
 r   c                    s  t | }dd |D }t|dksJ d|  dg }g }|D ]5}tt j| |}	|	j D ]#\}
}|
|vsDJ d|
 d| d|	|
j
 |
j
|v rT|	| q1q t| t dkrmtd	|  d
|  i S dd |D }g }|rtj||| n|	| g }|D ]}||7 }qt|}td  d|  | @  ksJ d|  d| d  t| t| }t|dkrtd| d i }t|D ]\}}t|dkr fdd|D }|||< qtd|  |S )Nc                 S      g | ]	}| d r|qS z	.metadataendswith.0filer   r   r   
<listcomp>*   
    
z%get_rank_to_files.<locals>.<listcomp>r   z3No metadata file found in the checkpoint directory:.zDuplicate tensor_key:z( found. Check whether the metadata_file:z# contains the same tensor metadata.z:No necessary data files found in the checkpoint directory:z". Please check the metadata_files:c                 S   r   )z.distcpr   r   r   r   r   r    D   r!   znecessary_data_files_set:z, global_data_files_set:zMThe checkpoint files are not complete. Please check the checkpoint directory:z.global_data_files_set:z, necessary_data_files_set:zMissing keys:z+, check whether the checkpoint is complete.c                    s   g | ]}| v r|qS r   r   )r   fZnecessary_data_files_setr   r   r    c   s    zmapping rank_to_files:)oslistdirlenpaddleloadpathjoinstorage_metadataitemsappend
tensor_keysetr   warningdistributedall_gather_objectdebugkeys	enumerate)r*   
state_dictprocess_groupuse_distaccessible_filesmetadata_filesZtensor_key_listZnecessary_filesmetadata_filemetadatar   	file_nameZlocal_data_filesZglobal_data_filestmpfilesZglobal_data_files_setZmissing_keysrank_to_filesr   Zlocal_filesr   r$   r   get_rank_to_files(   s|   








rB   c              
   C   sl  i }|   D ]\}}|D ]}||vrg ||< || | qqt| }dd | D }|  D ])\}}t|dkrZ|d }|| | || | t|| dkrZ|| q1td| d|  dd }d	d
 }	dd }
t|dkr||}|	||}|
||| td| d| d| d|  t|dksxt	j
 }||v r|| S td| d g S )a\  
    Load files in a load-balanced manner.
    Example:
        Case1: all ranks access the same data files
            rank_to_files = {rank0:[0_0.distcp, 1_0.distcp, 2_0.distcp, 3_0.distcp], rank1:[0_0.distcp, 1_0.distcp, 2_0.distcp, 3_0.distcp]}
            rank0 return [0_0.distcp, 1_0.distcp], rank1 return [2_0.distcp, 3_0.distcp]
        Case2: all ranks access different data files but some overlapped
            rank_to_files = {rank0:[0_0.distcp, 1_0.distcp, 2_0.distcp], rank1:[2_0.distcp, 3_0.distcp]
            rank0 return [0_0.distcp, 1_0.distcp], rank1 return [2_0.distcp, 3_0.distcp]
        Case3: all ranks access different data files and no overlapped
            rank_to_files = {rank0:[0_0.distcp, 1_0.distcp], rank1:[2_0.distcp, 3_0.distcp]
            rank0 return [0_0.distcp, 1_0.distcp], rank1 return [2_0.distcp, 3_0.distcp]
    c                 S   s   i | ]}|g qS r   r   )r   r   r   r   r   
<dictcomp>   s    z(get_local_load_files.<locals>.<dictcomp>r   r   zrank_to_read_files:z, rank_to_not_read_files:c                    s8   dd |   D  t dd d  fdd D }|S )Nc                 S   s   g | ]
\}}|t |fqS r   r'   r   r   r@   r   r   r   r       s    zLget_local_load_files.<locals>.get_least_read_files_ranks.<locals>.<listcomp>c                 S      | d S Nr   r   xr   r   r   <lambda>       zJget_local_load_files.<locals>.get_least_read_files_ranks.<locals>.<lambda>keyc                    s$   g | ]\}}| d  d kr|qS )r   r   r   )r   r   numnumsr   r   r       s   $ )r-   sorted)rank_to_read_filesranksr   rO   r   get_least_read_files_ranks   s   z8get_local_load_files.<locals>.get_least_read_files_ranksc                    sR   t | dkrdS  fdd|  D }t|dd d}|d d }|| | d fS )Nr   )NNc                    s$   g | ]\}}| v r|t |fqS r   rD   rE   rS   r   r   r       s
    
zDget_local_load_files.<locals>.get_read_rank_file.<locals>.<listcomp>c                 S   rF   rG   r   rH   r   r   r   rJ      rK   zBget_local_load_files.<locals>.get_read_rank_file.<locals>.<lambda>rL   )r'   r-   rQ   )rank_to_not_read_filesrS   rP   r   r   rU   r   get_read_rank_file   s   
z0get_local_load_files.<locals>.get_read_rank_filec           	      S   s   |\}}|d u r|d u rd S || vrg | |< | |  | i }| D ]\}}|D ]}||vr3g ||< ||  | q)q#td|  ||v rc|| D ]}|| | t|| dkrb|| qLd S d S )Nzfile_to_ranks:r   )r.   r-   r   r4   remover'   pop)	rR   rV   	rank_filer   r   file_to_ranksrr@   r#   r   r   r   update   s,   
z$get_local_load_files.<locals>.updatezupdate rank_to_read_files:z, ranks:z, rank_file:zrank:z! does not need to load checkpoint)r-   r.   copyr5   r'   rX   rY   r   r4   r(   r2   get_rankr1   )rA   r[   r   r@   r   rV   rR   rS   rT   rW   r]   rZ   Zcur_rankr   r   r   get_local_load_filesk   sH   



r`   c                 C   s   i }t | }dd |D }t|dksJ d|D ]#}tt j| |}|j D ]\}	}
|
|v r<tj	
 |
f||	< q+qg }|rKtj	||| n|| i }|D ]}| D ]\}	\}}
|	|vsfJ ||
f||	< qZqT|S )Nc                 S   r   r   r   r   r   r   r   r       r!   z"get_load_infos.<locals>.<listcomp>r   :No metadata file found in the checkpoint directory:{path}.)r%   r&   r'   r(   r)   r*   r+   r,   r-   r2   r_   r3   r.   )r*   local_load_filesr8   r9   Z	load_infor:   r;   r<   r=   r   r>   Zload_info_list
load_infosr   r   r   r   get_load_infos   s<   

rd   cur_chunk_metadatastorage_local_tensor_metadatac                 C   s   g }g }g }t | j| j|j|jD ]_\}}}}t||}	t|| || }
|	|kr6|d ||	|  n|	|krG||	|  |d ntd|	 d| d| ||
|	  |d dkspJ d|d  d|
 d|	 q|||fS )	Nr   zInvalid begin_offset:z, cur_offset:z, storage_offset:zInvalid length:z, end_offset:z, begin_offset:)ziplocal_shapeglobal_offsetmaxminr.   
ValueError)re   rf   cur_offsetsstorage_offsetsr   cur_lenr   strorage_lenr   Zbegin_offset
end_offsetr   r   r   compute_overlap   s2   


rs   c                 C   sF   t | j| j|j|jD ]\}}}}||| ks|| |kr  dS qdS )NTF)rh   ri   rj   )re   rf   rp   r   rq   r   r   r   r   not_overlap  s   rt   c                 C   s  t | }dd |D }t|dksJ di }|D ]&}tt j| |}|j D ]\}	}
|	|vr7g ||	< ||	  |
7  < q+qg }t	
d|  | D ]\}	}t|tjr| r}t|jdkrot|j|jj|jjnd\}}|d u s{|d u r|qOnt|j}t|jdkrtdgt|j nd}t||}|	|v sJ d|	 d	| d
||	 D ],}t||rqt||\}}}t|	t|j}|t|tj t|t|t| qqOtdt| g }g }|rtj||| n|| |D ]}|D ]}|| qq|S )Nc                 S   r   r   r   r   r   r   r   r      r!   z"get_read_items.<locals>.<listcomp>r   ra   zstorage_state_dict_metadata:)r   r   r   ztensor_key:z* not found in storage_state_dict_metadata:r"   z&Only support paddle.Tensor., val type:) r%   r&   r'   r(   r)   r*   r+   Zstate_dict_metadatar-   r   r4   
isinstanceTensoris_distshaper	   Z	dist_attrZprocess_meshZdims_mappingtupler   rt   rs   r   rj   r.   r   r2   r_   rm   typer3   )r*   r7   r8   r9   r:   r;   Zstorage_state_dict_metadatar<   r=   r/   Zlocal_tensor_metadata
read_itemsvalri   rj   re   rf   rn   ro   r   Zstorage_local_tensor_indexZglobal_read_itemsr?   r-   itemr   r   r   get_read_items  s   


$



r~   returnc              	   C   s  t jj  t| tsJ dt| } t| dkr/|  D ]}t|t j	s.J d| qt j
 dkr8dnd}|rH|du rHt sHt j
  |rPt j
| t|| ||}t|dkrf	 W d   dS t|}t||||}t|| ||}	i }
td|  d	| d
|	  g }|  D ]\}}|j r|| | | |< q|	D ] }|j|v sJ d| d| ||j \}}d}d}|t j
 kr||
vrt tj|||
|< |
| }|jj |v sJ ||jj  }|j!}|j"}dd t#||D }t|dkrt $|t%t&t|||}n|}|j't j
 krq|jj | v s.J d| d|  |rB| |jj  ( rB| |jj  ) n| |jj  }|j*}|j"}dd t#||D }t|dkrnt $|t%t&t|||}n|}nt j+|j"| |jj  j,d}||j'krt -|| q|t j
 krt j
j.|||d qt j
j.|||d q|  D ]\}}||v r|/ | |< qW d   dS 1 sw   Y  dS )a|  
    Load the state_dict inplace from a checkpoint path.

    Args:
        state_dict(Dict[str, paddle.Tensor]): The state_dict to load. It will be modified inplace after loading.
        path(str): The directory to load checkpoint files.
        process_group(paddle.distributed.collective.Group): ProcessGroup to be used for cross-rank synchronization. Use the default process group which contains all cards.
        coordinator_rank(int): The rank used to coordinate the checkpoint. Rank0 is used by default.

    Example:
        .. code-block:: python

            >>> # doctest: +SKIP('run in distributed mode.')
            >>> import paddle
            >>> import paddle.distributed as dist
            >>> ckpt_path = "./checkpoint"
            >>> w1 = paddle.arange(32).reshape([4, 8])
            >>> mesh = dist.ProcessMesh([0, 1])
            >>> sharded_w1 = dist.shard_tensor(w1, mesh, [dist.Shard(0)])
            >>> state_dict = {"w1": sharded_w1}
            >>> dist.save_state_dict(state_dict, ckpt_path)
            >>> w1_to_load = paddle.zeros_like(w1)
            >>> sharded_w1_to_load = dist.shard_tensor(w1, mesh, [dist.Replicate()])
            >>> state_dict_to_load = {"w1": sharded_w1_to_load}
            >>> dist.load_state_dict(state_dict_to_load, ckpt_path)
            >>> print(f"state_dict_to_load:{state_dict_to_load}")
            state_dict_to_load:{'w1': Tensor(shape=[4, 8], dtype=int64, place=Place(gpu:0), stop_gradient=True, dist_attr={process_mesh: {shape: [2], process_ids: [0,1], dim_names: [d0]}, dims_mappings: [-1,-1], batch_dim: 0, dynamic_dims: [0,0], annotated: [dims_mapping: 1,process_mesh: 1], partial: [].}, GlobalDenseTensor=
            [[0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ],
             [8 , 9 , 10, 11, 12, 13, 14, 15],
             [16, 17, 18, 19, 20, 21, 22, 23],
             [24, 25, 26, 27, 28, 29, 30, 31]])}
            >>> # doctest: -SKIP
    z&The state_dict should be a dictionary.r   z(Only support dygraph Tensor now, but is r   TFNzbefore load, state_dict:z,
 load_infos:z,
 read_items:zitem:z, load_infos:c                 S      g | ]\}}|| qS r   r   )r   r   Zstorage_lengthr   r   r   r          z#load_state_dict.<locals>.<listcomp>z, state_dict:c                 S   r   r   r   )r   r   Z
cur_lengthr   r   r   r      r   )dtype)srcgroup)0r(   baseZdygraphguardru   dictr
   r'   valuesrv   r2   Zget_world_sizer   Zinit_parallel_envZbarrierrB   r`   rd   r~   r   r4   r-   ZplaceZis_cpu_placer.   cudar   r_   r)   r%   r*   r+   r/   r   r   rh   slicelistranger   rw   Z_local_valuer   Zzerosr   Zassign	broadcastcpu)r7   r*   r8   Zcoordinator_rankr|   r9   rA   rb   rc   r{   Zstorage_file_to_state_dictZstate_dict_in_cpukvr}   Zsrc_rankr>   Zstorage_chunk_tensorZcur_chunk_tensorZstorage_state_dictZstorage_local_tensorro   Zstorage_lengthsZstorage_endsZcur_local_tensorrn   Zcur_lengthsZcur_endsr   r   r   load_state_dictm  s   '





 $r   )Nr   )r   N)r^   r%   dataclassesr   typingr   r(   Z&paddle.distributed.communication.groupr   Z'paddle.distributed.fleet.utils.log_utilr   r=   r   r   utilsr	   r
   r   rB   r`   rd   rs   rt   r~   r   r   r   r   r   <module>   s<   CZ 
 
Y