o
    *jB                     @   s<   d dl Z d dlmZ d dlmZ e ZG dd deZdS )    N)PretrainedConfig)loggingc                $       sx   e Zd ZdZdddddddddd	d
ddddddddddddddddddddgdddddf$ fdd	Zedd Z  ZS )GPTMoEConfigzgpt-moei d  i   N   i   Zgelug?i      g-q=TFg{Gz?   d   r   g?standardc%           (         s  t  jdd|i|% || _|| _|d u rd| n|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _|rL|rLJ || _|| _|d u rc|| dks^J || | _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _ | | _!|!| _"|"| _#|#| _$|$| _%| j d t&j'( krt&j'( | _)n| j d | _)t*t&j+,dd }&t*t&j+,dd }'|&dk p|&dko|'dk | _-d S )NZlayer_norm_eps   r   .       ).super__init__
vocab_sizehidden_sizeffn_hidden_sizenum_hidden_layersnum_attention_heads
hidden_actintermediate_sizehidden_dropout_probattention_probs_dropout_probmax_position_embeddingstype_vocab_sizelayernorm_epsilonbias_gelu_fusionfp32_residual_connectionsequence_parallelfp16bf16apply_query_key_layer_scalingattention_softmax_in_fp32kv_channelsmasked_softmax_fusionattention_dropoutbias_dropout_fusion(apply_residual_connection_post_layernormhidden_dropoutinit_method_stdeod_idtokens_to_generatetop_ktop_pnum_experts	use_tuteltop_k_linear_strategyuse_expert_residual_networkload_ds_ckpts	model_dirtorchcudaZdevice_countZmoe_expert_parallel_sizeint__version__splitZno_persist_layer_norm)(selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   kwargsZTORCH_MAJORZTORCH_MINOR	__class__r   l/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/gpt_moe/configuration.pyr      sf   (
zGPTMoEConfig.__init__c                 C   s   | j rtjS | jrtjS tjS )N)r    r5   Zhalfr!   Zbfloat16float)r:   r   r   r>   params_dtypey   s
   zGPTMoEConfig.params_dtype)__name__
__module____qualname__Z
model_typer   propertyr@   __classcell__r   r   r<   r>   r      sR    ^r   )r5   Z transformers.configuration_utilsr   Ztransformers.utilsr   Z
get_loggerloggerr   r   r   r   r>   <module>   s
   