o
    *jO:                     @   s>  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
mZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ defddZdedefd	d
Zdd ZdJdededdfddZdeddfddZdeddfddZdKdedee ddfddZ dKdeeef fddZ!dd Z"dd Z#dd Z$d d! Z%d"d# Z&dKd$d%Z'dKd&d'Z(d(d) Z)d*d+ Z*d,d- Z+e , d.d/ Z-d0d1 Z.d2d3 Z/dKd4d5Z0d6ej1j2defd7d8Z3d9e	d:ej4f d;ed<edej4fd=d>Z5d?e6e d@edAedBe7e de8e7e ej4f f
dCdDZ9	 dLdEej1j:dFej;dGedej1j:fdHdIZ<dS )M    N)CallableListOptionalTuple)version)distributedreturnc                  C   s2   t  t jt j} | d |  d }|   |S )N) r      )socketAF_INETSOCK_STREAMbindgetsocknameclose)sockport r   ]/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/utils/torch_utils.py_find_free_port   s
   
r   r   c                    sh   t t  d }|d t  t jt jt fdd|D W  d    S 1 s-w   Y  d S )N	localhostc                 3   s"    | ]} | fd kV  qdS )r   N)
connect_ex).0ipr   sr   r   	<genexpr>#   s     z _is_free_port.<locals>.<genexpr>)r   gethostbyname_exgethostnameappendr   r   all)r   ipsr   r   r   _is_free_port   s
   
$r#   c                 K   s`   t | dr| jdi |} | S ttjtdkr%tj| fi |} | S tdtj d | S )Ncompilez	2.0.0.devzDCompiling model needs torch version > 2.0.0, your torch version is: z , origin model will be returned.r   )hasattrr$   r   parsetorch__version__print)modelZcompile_optionsr   r   r   compile_model&   s   
r+   nccllauncherbackendc                 K   s|   t jddd u rt d | dkrt|fi | d S | dkr)t|fi | d S | dkr7t|fi | d S td|  )NT)
allow_noneZspawnZpytorchZmpislurmzInvalid launcher type: )mpZget_start_methodZset_start_method_init_dist_pytorch_init_dist_mpi_init_dist_slurm
ValueError)r-   r.   kwargsr   r   r   	init_dist4   s   
r7   c                 K   s2   t tjd }tj| tjdd| i| d S )N
LOCAL_RANKr.   r   )intosenvironr'   cuda
set_devicedistinit_process_groupr.   r6   Z
local_rankr   r   r   r2   A   s   r2   c                 K   sx   t tjd }tj| dtjvrdtjd< dtjvr tdtjd tjd< tjd tjd	< tjdd
| i| d S )NZOMPI_COMM_WORLD_LOCAL_RANKMASTER_PORT29500MASTER_ADDRz/The environment variable MASTER_ADDR is not setZOMPI_COMM_WORLD_SIZE
WORLD_SIZEZOMPI_COMM_WORLD_RANKRANKr.   r   )	r9   r:   r;   r'   r<   r=   KeyErrorr>   r?   r@   r   r   r   r3   H   s   


r3   c                 C   s   t tjd }t tjd }tjd }tj }tj||  td| d}|dur5t	|tjd< ndtjv r;nt
drEd	tjd< nt	t tjd< d
tjvrW|tjd
< t	|tjd< t	|| tjd< t	|tjd< tj| d dS )a  Initialize slurm distributed training environment.

    If argument ``port`` is not specified, then the master port will be system
    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
    environment variable, then a default port ``29500`` will be used.

    Args:
        backend (str): Backend of torch.distributed.
        port (int, optional): Master port. Defaults to None.
    ZSLURM_PROCIDZSLURM_NTASKSZSLURM_NODELISTzscontrol show hostname z | head -n1NrA   i<s  rB   rC   rD   r8   rE   r.   )r9   r:   r;   r'   r<   Zdevice_countr=   
subprocess	getoutputstrr#   r   r>   r?   )r.   r   Zproc_idZntasksZ	node_listZnum_gpusaddrr   r   r   r4   U   s*   





r4   c                 C   s`   t  r(ddlm} | du r| rddlm} | } t| }t| }||fS d}d}||fS )zGet dist info of a specified group

    Args:
        group: The parallel group, default None, for the global group

    Returns:
        A tuple of the current rank and world_size of the group
    r   )is_megatron_initializedN)mpur
   )	is_distZmodelscope.utils.megatron_utilsrL   Zmegatron_utilrM   Zget_data_parallel_groupr>   get_rankget_world_size)grouprL   rM   rank
world_sizer   r   r   get_dist_info|   s   	

rT   c                   C   s   t tjddS )Nr8   r   )r9   r:   r;   getr   r   r   r   get_local_rank   s   rV   c                   C       t  sdS t  sdS t  S )Nr   )r>   is_availableis_initializedrO   r   r   r   r   rO      
   rO   c                   C   rW   )Nr
   )r>   rX   rY   rP   r   r   r   r   rP      rZ   rP   c                  C   s8   t  sdS t  sdS t  } | dkrdS t   dS )zj
    Helper function to synchronize (barrier)
    among all processes when using distributed training
    Nr
   )r>   rX   rY   rP   barrier)rS   r   r   r   synchronize   s   r\   c                   C   s   t  ot  S N)r>   rX   rY   r   r   r   r   rN      s   rN   c                 C   s   t  r
t| dkS dS )Nr   T)rN   r>   rO   rQ   r   r   r   	is_master   s   r_   c                    s   dt dt f fdd}|S )Nfuncr   c                    s   t   fdd}|S )Nc                     s   t r | i |S d S r]   )r_   )argsr6   )r`   rQ   r   r   wrapper   s   z.master_only.<locals>.decorate.<locals>.wrapper)	functoolswraps)r`   rb   r^   )r`   r   decorate   s   zmaster_only.<locals>.decorate)r   )rQ   re   r   r^   r   master_only   s   	rf   c                  C   s6   t  st S d} t rt } t  t| d} | S )zRMake sure each rank has the same temporary directory on the distributed mode.
    Nr   )rN   tempfilemkdtempr_   r>   r[   	broadcast)Ztmpdirr   r   r   make_tmp_dir   s   
rj   c                 C   s   t  }tjdgdd}||kr&tjtt| tjdd}tj|jdd}t 	  t 
|| ||krAtj| fdtjdd}t 	  t 
|| t|   S )z
    Broadcasts the inputs to all ranks.

    Arguments:
        inputs : Any objects that can be serialized by pickle.
        src (int): Source rank.
    Returns:
        Each rank returns the same value as src.
    r   r<   deviceZdtyperl   )r>   rO   r'   tensor	bytearraypickledumpsuint8shaper[   ri   fullitemloadscpunumpytobytes)ZinputssrcrR   Zshape_tensorZinputs_tensorr   r   r   ri      s$   
ri   c                 C   sN   | d ur | dkr t |  tj |  t|  tj|  d S td|  )Nr   z0Random seed should be positive, current seed is )randomseednpr'   Zmanual_seedr<   Zmanual_seed_allr5   )r|   r   r   r   set_random_seed   s   

r~   c                   C   s    t  dkrt jddS t jjS )zj
    Return a process group based on gloo backend, containing all the ranks
    The result is cached.
    r,   gloorG   )r>   get_backendZ	new_grouprQ   ZWORLDr   r   r   r   _get_global_gloo_group  s   r   c                 C   s   t |}|dv sJ t|dkrdnd}t| }t|dkr0td	t
 t|d | tj|}t|j|d}|S )N)r   r,   r   rw   r<   i   @z;Rank {} trying to all-gather {:.2f} GB of data on device {}rk   )r>   r   r'   rl   rp   rq   lenloggerwarningformatrO   ZByteStoragefrom_bufferZ
ByteTensorto)datarQ   r.   rl   bufferZstoragern   r   r   r   _serialize_to_tensor  s   

r   c                    s   t j|d}|dksJ dtj  gtj jd} fddt|D }t j|||d dd |D }t	|}||krStj
|| ftj jd}tj |fdd	 | fS )
zz
    Returns:
        list[int]: size of the tensor, on each rank
        Tensor: padded tensor that has the max size
    r^   r
   zBcomm.gather/all_gather must be called from ranks within the group!rm   c                    s"   g | ]}t jd gt j jdqS )r
   rm   )r'   zerosint64rl   r   _rn   r   r   
<listcomp>0      z*_pad_to_largest_tensor.<locals>.<listcomp>c                 S   s   g | ]}t | qS r   )r9   ru   )r   sizer   r   r   r   5  s    r   dim)r>   rP   r'   rn   Znumelr   rl   range
all_gathermaxr   rr   cat)rn   rQ   rS   Z
local_size	size_listmax_sizepaddingr   r   r   _pad_to_largest_tensor#  s*   

r   c                    s   t  dkr| gS |du rt }t |dkr| gS t| |t|\}t|  fdd|D }tj||d g }t||D ]\} 	 
 d| }|t| qB|S )a;  
    Run all_gather on arbitrary picklable data (not necessarily tensors).
    Args:
        data: any picklable object
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.
    Returns:
        list[data]: list of data gathered from each rank
    r
   Nc                    s"   g | ]}t j ft jjd qS )rm   )r'   emptyrr   rl   r   r   rn   r   r   r   Z  r   zall_gather.<locals>.<listcomp>r^   )rP   r   r>   r   r   r   r   ziprw   rx   ry   r    rp   rv   )r   rQ   r   Ztensor_listZ	data_listr   r   r   r   r   r   C  s$   


r   r*   c                 C   s(   t dd |  D dh }t|dkS )Nc                 s   s    | ]}t |jV  qd S r]   )rJ   rl   )r   pr   r   r   r   i  s    z$is_on_same_device.<locals>.<genexpr>rw   r
   )set
parametersr   )r*   Z
device_setr   r   r   is_on_same_deviceh  s   r   
forward_fn.
chunk_size	chunk_dimc           	         s&  t |dksJ | dt tj}|t |kr(td| dt | d|dkr|d j  }|D ]}|j  |krKtd| d|j   q5|d j  | dkrftd|d j   d	| |d j  | t fd
d|D }tfddt| D }tj	| dS | S )Nr   z" has to be a tuple/list of tensorszforward_chunk_fn expects z arguments, but only z input tensors are givenz/All input tenors have to be of the same shape: z, found shape zThe dimension to be chunked z( has to be a multiple of the chunk size c                 3   s    | ]
}|j  d V  qdS )r   N)chunk)r   input_tensor)r   
num_chunksr   r   r     s
    
z,apply_chunking_to_forward.<locals>.<genexpr>c                 3   s    | ]} | V  qd S r]   r   )r   Zinput_tensors_chunk)r   r   r   r     s
    
r   )
r   inspect	signaturer   r5   rs   tupler   r'   r   )	r   r   r   Zinput_tensorsZnum_args_in_forward_chunk_fnZtensor_shaper   Zinput_tensors_chunksZoutput_chunksr   )r   r   r   r   apply_chunking_to_forwardm  sJ   
r   headsn_heads	head_sizealready_pruned_headsc                    sv   t ||}t| | } | D ]  t fdd|D   d| < q|d d}t t|| 	 }| |fS )Nc                 3   s     | ]}| k r
d ndV  qdS )r
   r   Nr   )r   hheadr   r   r     s    z3find_pruneable_heads_and_indices.<locals>.<genexpr>r   r   r
   )
r'   Zonesr   sumview
contiguouseqZaranger   long)r   r   r   r   maskindexr   r   r    find_pruneable_heads_and_indices  s   
r   layerr   r   c                 C   s   | | jj}| j||  }| jd ur,|dkr#| j  }n	| j|   }t| j }t	|||< t
jj|d |d | jd ud | jj}d|j_|j|  d|j_| jd ursd|j_|j|  d|j_|S )Nr
   r   )biasFT)r   weightrl   Zindex_selectdetachcloner   listr   r   r'   nnLinearZrequires_gradZcopy_r   )r   r   r   Wbnew_sizeZ	new_layerr   r   r   prune_linear_layer  s.   


r   )r,   r]   )r   )=rc   r   r:   rp   r{   r   rH   rg   typingr   r   r   r   rx   r}   r'   Ztorch.multiprocessingmultiprocessingr1   	packagingr   r   r>   rJ   r   r9   boolr#   r+   r7   r2   r3   r4   rT   rV   rO   rP   r\   rN   r_   rf   rj   ri   r~   	lru_cacher   r   r   r   r   Moduler   ZTensorr   r   r   r   r   r   Z
LongTensorr   r   r   r   r   <module>   s   '

!

 %
/
