o
    *j*                     @   s   d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZ e ZG dd deZdS )    )DictN)mpuprint_rank_0)FP16_Module)
functional)
TorchModel)Tensor)
get_logger)init_megatron_util)pre_load   )	PlugModel)PlugNLGConfigc                       sx   e Zd ZdZ fddZdddZedded	 fd
dZ									dddZ	dde
eef fddZ  ZS )DistributedPlugaG  
    The wrapper class of PLUG Model to initialize parallel environment, load model weights, generate sentences.
    Parameters:
        model_dir (`str`, *required*):
            Path to model damo/nlp_plug_text-generation_27B.
        The model structure in model_dir should be like this:
        model_dir
            |_ config.json
            |_ configuration.json
            |_ ds_zero-offload_10B_config.json
            |_ vocab.txt
            |_ model <-- an empty directory

        Model binaries shall be downloaded separately to populate the model directory, so that
        the model directory would contain the following binaries:
            |_ model
                |_ mp_rank_00_model_states.pt
                |_ mp_rank_01_model_states.pt
                |_ mp_rank_02_model_states.pt
                |_ mp_rank_03_model_states.pt
                |_ mp_rank_04_model_states.pt
                |_ mp_rank_05_model_states.pt
                |_ mp_rank_06_model_states.pt
                |_ mp_rank_07_model_states.pt
        rank (`int`, *required*):
            Used to identify different GPUs in a tensor parallel environment. eg. The rank of GPU #0 is 0, and the
            model file `mp_rank_00_model_states.pt` will be loaded on this GPU.
        world_size (`int`, *required*, defaults to 8):
            The parallel size in total.
        model_parallel_size (`int`, *required*, defaults to 8):
            The parallel size of model(tensor parallel).
        master_ip (`str`, *required*):
            The master IP, can usually be set to `"127.0.0.1"`, used as part of
            [`~torch.distributed.init_process_group`] method parameter `init_method`.
            `init_method` = `"tcp://{master_ip}:{master_port}"`
        master_port (`str`, *required*):
            The master port, can usually be set to `"29500"`, used as part of
            [`~torch.distributed.init_process_group`] method parameter `init_method`.
            `init_method` = `"tcp://{master_ip}:{master_port}"`
        seed (`int`, *optional*, defaults to 42):
            Random seed to control sampling.
    c                    sP   t  j|fi | || _|| _t|| _t||d d| _| j	dd| _
d S )N)	model_dirrankr   model)path_load_tag)super__init__r   	model_cfgr   Zfrom_pretrainedconfigr
   	iterationinitialize_modelr   )selfr   r   kwargs	__class__ l/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/plug/distributed_plug.pyr   @   s   zDistributedPlug.__init__r   c              	   C   sf  t d t| j}t dkr#tdt t	dd |
 D  | jjr/| jjr/|  |tj  | jjrt|}| jjr^|jjjjj  |jjjjj  |jjjjj  | jjrk|jjjjj  | jjr| D ]\}}d|v r|  qstt | j|d}|jj  }|D ]}||! vrt d|  qt d	|  q|jjj"|d
d |S )zBuild the model.z3Building Plug model. It will take a few minutes ...r   z5 > number of parameters on model parallel rank {}: {}c                 S   s   g | ]}|  qS r   )Znelement).0pr   r   r   
<listcomp>T   s    z4DistributedPlug.initialize_model.<locals>.<listcomp>Z	LayerNorm)tagz
Skip key: zLoading key: F)strict)#r   r   r   r   Zget_data_parallel_rankloggerinfoformatZget_tensor_model_parallel_ranksum
parametersZ	deepspeedZfp16Zhalfcudatorchcurrent_devicer   Zfp32_embeddingmoduler   ZbertZ
embeddingsZword_embeddingsfloatZposition_embeddingsZtoken_type_embeddingsZfp32_tokentypesZfp32_layernormZnamed_modulesr   r   Z
state_dictkeysZload_state_dict)r   r   r   name_moduleZ
load_modelZ
model_dictkeyr   r   r   r   K   sJ   
z DistributedPlug.initialize_modelr           ZInfc           	      C   s   |dkr| t | |d d k }|| |< |dkr`| |  d  } t j| dd\}}t jtj|dddd}||k}|d	d df 	 |d	dd f< d|d
< || }|| |< | dd } | S )Nr   ).Nr3   r   T)Z
descendingr4   dim.).r   )
r+   Ztopkviewsize
contiguoussortZcumsumFsoftmaxclone)	logitstop_ktop_pZfilter_valueZindices_to_removeZsorted_logitsZsorted_indicesZcumulative_probsZsorted_indices_to_remover   r   r   top_k_logitsy   s(   
zDistributedPlug.top_k_logitsNFTc                 C   s   | j |||||||||	|
d
S )N)checkpoint_activationsis_infersequence_outputparallel_outputr   )r   Zinput_tokensZtoken_type_idsattention_maskZtarget_tokensposition_idsZdecode_attention_maskrB   rC   rD   rE   r   r   r   forward   s   zDistributedPlug.forward   inputc                 G   s  t j }|d jd }|d dd |}|d |}|d |}| j  t 	 ( g }	g }
d}d }| j
j}d}||k r0|d dkr|dkr|
| ||kjd	d
d }|t|
 dkr{t |d | t j|
gddd  }nt j|
|d ||t|
 < |dk}|d |}g }
d }t j|dgt|
t j|d}| j|d ||||d	|dd	\}}}|d d dd d f }|| jd  }| j|| jd | jd d}tj|dd}t j|dd}|d  }||krd}d|d< |dkrt|	ttd|d krn%|dkr|d7 }q@t j||gdd}|
| |	| |d7 }||k sEg }|	D ]}|rG|d dkrG|dkrGq4|| q4d|iW  d    S 1 s]w   Y  d S )NZ	input_idsr   r   r4   dec_input_idsrG   f   rJ   T)as_tuplei   i )ZdtypedeviceF)rC   rD   rE   Ztemperaturer?   r@   )r?   r@   r5   )Znum_samplesd   g?generate_context)r+   r*   r,   shaper7   r9   tor   evalZno_gradr   Zoriginal_vocab_sizeappendZnonzerolencatZ
LongTensorfulllongr   rA   r;   r<   Zmultinomialitemintmax)r   rK   Z
out_lengthr   rO   Z
batch_sizetokensrL   rG   Zall_generate_tokensgenerate_tokensZcounterrD   Z
vocab_sizeZsep_token_idxstartrH   _r>   Z	log_probsprevZ
prev_tokenrQ   tokenr   r   r   generate   s   










:
&zDistributedPlug.generaterF   )	NNNNNFFNT)rJ   )__name__
__module____qualname____doc__r   r   staticmethodr.   rA   rI   r   strr   rc   __classcell__r   r   r   r   r      s"    +
. 
 r   )typingr   r+   Zmegatron_utilr   r   Zmegatron_util.fp16r   Ztorch.nnr   r;   Zmodelscope.modelsr   Zmodelscope.models.baser   Zmodelscope.utils.loggerr	   Zmodelscope.utils.megatron_utilsr
   Z$modelscope.utils.nlp.load_checkpointr    r   configurationr   r%   r   r   r   r   r   <module>   s   