o
    *j                      @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlmZ ddlmZ d	d
 Zdd Zdd Zdd Zdd Zdd ZdS )    N)get_argsget_tokenizer)initialize_distributed)GLM130B)get_model_parallel_groupget_model_parallel_rankget_model_parallel_world_size)load_checkpoint   )quantizec                 C   s0   |  d}|jdddd |jdtddd	 | S )
zArguments for BMInfZBMInfz--bminf
store_truez,Use BMInf to support low resource evaluationactionhelpz--bminf-memory-limit   z$Max memory for model per GPU (in GB)typedefaultr   )add_argument_groupadd_argumentintparsergroup r   j/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/glm_130b/initialize.pyadd_bminf_args   s   
r   c                 C   s0   |  d}|jdtdd |jdtddd d S )	NZQuantizationz--quantization-bit-width   )r   r   z--from-quantized-checkpointTz#Loading from a quantized checkpointr   )r   r   r   boolr   r   r   r   add_quantization_args"   s   

r   c                 C   s   |  d}|jdddd d S )NZInitializationz--sequential-initializationr   zTInitialize sequentially in tensor parallel group (reduce CPU RAM for initialization)r   )r   r   r   r   r   r   add_initialization_args-   s   

r    c                 C   sL   d| _ d| _d| _d| _d| _d| _d| _d| _d	| _d
| _	d
| _
d| _| S )Nr   F   i 0  i   i L `   i   zicetk-glm-130BpostT	inference)Zmodel_parallel_sizeZ
num_layersZhidden_sizeZinner_hidden_sizeZ
vocab_sizeZnum_attention_headsmax_sequence_lengthZtokenizer_typeZlayernorm_orderZ	skip_initZfp16mode)argsr   r   r   set_up_model_args8   s   r(   c                 C   s   t jdd}t| t| t| t| | | | \}}|g d7 }t|}t	|}t j
di t|t|}d|_t| |S )NF)add_help)z--model-parallel-size4z--moder$   r   )argparseArgumentParserr   r   r    r   Zadd_model_specific_argsparse_known_argsr   r(   	NamespacevarsZdo_trainr   )Zextra_args_providerr   ZknownZ	args_listr'   r   r   r   
initializeH   s   
r0   c                 C   s  t | }tj  t }tt D ]{}t |krt| 	 }| j
r/| jd us)J t|| j}t||  | jd urB| j
sBt|| j}| jr|dd l}tj dkrYtd| j d tj| j |j|d| jd> d}W d    n1 svw   Y  n|| j}| jrtjjt d qtj  tj dkrtdt | d	d
 tj  |  |jj}d|j_t : |tjd| jtj tj dtj!| jtj tj d"ddtj#dd| j| jtj ddk ^}}W d    n1 sw   Y  ||j_tj  ||fS )Nr   z!> BMInf activated, memory limit: z GBF   )quantizationZmemory_limit)r   z> Model initialized in z.1fsTr
   )deviceZdtype)r4   g      ?)$r   torchdistributedZbarriertimeranger   r   r   ZhalfZfrom_quantized_checkpointZquantization_bit_widthr   r	   bminfZget_rankprintZbminf_memory_limitcudar4   wrappertoZsequential_initializationr   Zempty_cacheevalZtransformerZparallel_outputZno_gradZonesr%   Zcurrent_deviceZint64ZarangeviewZrandn)r'   	tokenizerstartimodelr:   Zoriginal_parallel_output_r   r   r   initialize_model_and_tokenizerY   s   







rF   )r+   r8   r6   ZSwissArmyTransformerr   r   ZSwissArmyTransformer.argumentsr   ZSwissArmyTransformer.modelr   ZSwissArmyTransformer.mpur   r   r   ZSwissArmyTransformer.trainingr	   r2   r   r   r   r    r(   r0   rF   r   r   r   r   <module>   s   