o
    *jC                     @   sx   d dl Z d dlmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZ G dd deZG d	d
 d
eZdS )    N)mpu)_flatten_dense_tensors_unflatten_dense_tensors)Variable)Module)DistributedDataParallelc                   @   s4   e Zd ZddedefddZdd	d
ZdddZdS )PyTorchDistributedDataParallel Tprefixrecursec                 C      | j j||dS N)r
   r   modulenamed_parametersselfr
   r    r   m/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/mglm/model/distributed.pyr         z/PyTorchDistributedDataParallel.named_parametersNFc                 C      | j |||}|S Nr   
state_dictr   Zdestinationr
   Z	keep_varssdr   r   r   r         z)PyTorchDistributedDataParallel.state_dictc                 C   r   N)strictr   load_state_dictr   r   r   r   r   r   r    !   r   z.PyTorchDistributedDataParallel.load_state_dictr	   TNr	   FT)__name__
__module____qualname__strboolr   r   r    r   r   r   r   r      s    
r   c                       sL   e Zd Z fddZdd Zddd	ZdddZddedefddZ	  Z
S )r   c                    s   t t  tjtjjkrdnd_|_t	
 _t	 }j D ]}t|r4tj||jd q$			dfdd	 g _g _tj D ]} fdd}qL _d S )	NTFgroupc                    s*   j rd _ i } j D ] \}}|jr-|jd ur-|j }||vr&g ||< || | q jr>t	j
j|v r>td d _|D ]R}|| }dd |D }t|}	|rW|	 }	|sd| sd|	tj jd }	tj|	 jd t	j
  |s~| r~|	tj jd }	t|t|	|D ]	\}
}|
| qq@d S d S )NFz}WARNING: gloo dist backend for half parameters may be extremely slow. It is recommended to use the NCCL backend in this case.c                 S   s   g | ]}|j jqS r   )graddata).0paramr   r   r   
<listcomp>F   s    zNDistributedDataParallel.__init__.<locals>.allreduce_params.<locals>.<listcomp>r*   )needs_reductionr   r   Zrequires_gradr,   r-   typeappendwarn_on_halftorchcudaZ
HalfTensorprintr   floatdistZget_world_sizedata_parallel_groupZ
all_reduceZsynchronizezipr   Zcopy_)Zreduce_afterZno_scaleZfp32_allreduceZbucketsnamer/   tpZbucketZgradsZ	coalescedbufZsynced)r   r   r   allreduce_params2   sN   


z:DistributedDataParallel.__init__.<locals>.allreduce_paramsc                     s   t j  d S r   )r   Z_execution_engineZqueue_callback)Zunused)r?   r   r   allreduce_hookZ   r   z8DistributedDataParallel.__init__.<locals>.allreduce_hook)TFF)superr   __init__r9   Z_backendZdist_backendZGLOOr4   r   r   Zget_data_parallel_groupr:   Zget_model_parallel_rank
parametersr5   Z	is_tensor	broadcastZhook_handleshookslistr?   )r   r   Zsrc_rankpr/   r@   	__class__)r?   r   r   rB   '   s$   

$
z DistributedDataParallel.__init__c                 O   s   d| _ | j|i |S )NT)r1   r   )r   Zinputskwargsr   r   r   forward_   s   zDistributedDataParallel.forwardNr	   Fc                 C   r   r   r   r   r   r   r   r   c   r   z"DistributedDataParallel.state_dictTc                 C   r   r   r   r!   r   r   r   r    g   r   z'DistributedDataParallel.load_state_dictr
   r   c                 C   r   r   r   r   r   r   r   r   j   r   z(DistributedDataParallel.named_parametersr#   r$   r"   )r%   r&   r'   rB   rK   r   r    r(   r)   r   __classcell__r   r   rH   r   r   %   s    8

r   )r5   Ztorch.distributeddistributedr9   Zmegatron_utilr   Ztorch._utilsr   r   Ztorch.autogradr   Ztorch.nn.modulesr   Ztorch.nn.parallel.distributedr   ZDDPr   r   r   r   r   <module>   s   