o
    *j5                     @   s   d dl Z d dlZd dlZd dlmZ d dlm  mZ ddl	m
Z
 G dd dejZG dd dejZG dd	 d	ejZG d
d dejZG dd dejZG dd dejZdS )    N   )Rotation2xyzc                       s   e Zd Z												
				d fdd	Zdd Zdd ZdddZdd ZdddZ fddZ	 fddZ
  ZS ) MDM            皙?NgeluFrot6damass   	trans_encc                    s|  t    || _|| _|| _|| _|| _|| _|| _|| _	|| _
|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|dd | _| j| j | _|dd| _|dd| _|dd| _|| _| jdkrl| jnd	| _t| j| j| j | j| _t| j| j| _ || _!| jd
krt"d t#j$| j| j| j| j| jd}t#j%|| jd| _&n<| jdkrt"d t#j'| j| j| j| j|d}t#j(|| jd| _)n| jdkrt"d t#j*| j| j| jdd| _+nt,dt-| j| j | _.| jdkr%d| jv rt#/| j| j| _0t"d t"d || _1| 2|| _3d| jv r%t4| j| j| _5t"d t6| j| j| j| j| j| _7t8d|| jd| _9d S )N
action_embZnormalize_encoder_outputF	cond_modeZno_condcond_mask_prob        grur   r   zTRANS_ENC init)d_modelZnheadZdim_feedforwarddropout
activation)
num_layers	trans_deczTRANS_DEC initzGRU initT)r   Zbatch_firstz>Please choose correct architecture [trans_enc, trans_dec, gru]textz
EMBED TEXTzLoading CLIP...actionzEMBED ACTIONcpu)devicesmpl_data_pathdataset):super__init__legacy	modeltypenjointsnfeatsnum_actionsdata_repr   pose_repglobglob_rottranslation
latent_dimff_sizer   	num_headsr   ablationr   clip_dimgetr   input_featsZnormalize_outputr   r   archZgru_emb_dimInputProcessinput_processPositionalEncodingsequence_pos_encoderemb_trans_decprintnnZTransformerEncoderLayerZTransformerEncoderseqTransEncoderZTransformerDecoderLayerZTransformerDecoderseqTransDecoderZGRUr   
ValueErrorTimestepEmbedderembed_timestepLinear
embed_textclip_versionload_and_freeze_clip
clip_modelEmbedActionembed_actionOutputProcessoutput_processr   rot2xyz)selfr"   r#   r$   r%   r*   r'   r(   r)   r+   r,   r   r-   r   r   r.   r   r!   r&   r   r/   r2   r7   rA   ZkargsZseqTransEncoderLayerZseqTransDecoderLayer	__class__ s/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/cv/motion_generation/modules/mdm.pyr       s   








zMDM.__init__c                 C   s   dd |   D S )Nc                 S   s   g | ]\}}| d s|qS )zclip_model.)
startswith).0nameprL   rL   rM   
<listcomp>   s    z*MDM.parameters_wo_clip.<locals>.<listcomp>)Znamed_parameters)rI   rL   rL   rM   parameters_wo_clip   s   zMDM.parameters_wo_clipc                 C   s4   t j|ddd\}}|  | D ]}d|_q|S )Nr   F)r   Zjit)cliploadeval
parametersZrequires_grad)rI   rA   rC   Zclip_preprocessrQ   rL   rL   rM   rB      s   
zMDM.load_and_freeze_clipc                 C   s\   |j \}}|rt|S | jr,| jdkr,ttj||jd| j |d}|d|  S |S )Nr   )r   r   g      ?)	shapetorchZ
zeros_likeZtrainingr   Z	bernoulliZonesr   view)rI   Zcond
force_maskbsdmaskrL   rL   rM   	mask_cond   s   

zMDM.mask_condc                 C   s   t |  j}| jdv rdnd }|d urFd}|d }||k s J tj||dd|}tj|j	d || g|j
|jd}tj||gd	d
}n
tj|dd|}| j| S )N)ZhumanmlZkit   M      T)context_lengthtruncater   )dtyper   r   )dim)rd   )nextrW   r   r   rT   tokenizetorY   zerosrX   re   catrC   encode_textfloat)rI   Zraw_textr   Zmax_text_lenZdefault_context_lengthrc   ZtextsZzero_padrL   rL   rM   rl      s,   zMDM.encode_textc                 C   s  |j \}}}}| |}|dd}	d| jv r*| |d }
|| | j|
|	d7 }d| jv r?| |d }|| j||	d7 }| jdkrn|	||| d|}|
|dd}|ddd	}|	|| jd|}tj||fdd
}| |}| jdkrtj||fd	d
}| |}| |dd }nC| jdkr| jrtj||fd	d
}n|}| |}| jr| j||ddd }n| j||d}n| jdkr|}| |}| |\}}| |}|S )z~
        x: [batch_size, njoints, nfeats, max_frames], denoted x_t in the paper
        timesteps: [batch_size] (int)
        ZuncondFr   )r[   r   r   r   rb   r   Zaxisr   Nr   )ZtgtZmemory)rX   r>   r0   r   rl   r@   r_   rE   r2   reshaperepeatpermuter+   rY   rk   r4   r6   r:   r7   r;   r   rG   )rI   x	timestepsyr\   r#   r$   nframesZembr[   Zenc_textr   Z
x_reshapedZemb_gruZxseqoutput_rL   rL   rM   forward   s\   












zMDM.forwardc                    s   t  | | jj| d S N)r   _applyrH   
smpl_model)rI   fnrJ   rL   rM   rz      s   z
MDM._applyc                    s*   t  j|i | | jjj|i | d S ry   )r   trainrH   r{   )rI   argskwargsrJ   rL   rM   r}      s   z	MDM.train)r   r   r   r   r	   NNr
   Fr   r   r   r   FN)Fry   )__name__
__module____qualname__r    rS   rB   r_   rl   rx   rz   r}   __classcell__rL   rL   rJ   rM   r      s0    {

7r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )r5   r	     c                    s   t t|   tj|d| _t||}tjd|tj	d
d}ttd|d	 td |  }t|| |d d dd df< t|| |d d dd df< |
ddd}| d| d S )N)rQ   r   )re   r   rb   g     @pe)r   r5   r    r9   ZDropoutr   rY   rj   Zarangerm   Z	unsqueezeexpnplogsincosZ	transposeZregister_buffer)rI   r   r   max_lenr   positionZdiv_termrJ   rL   rM   r      s      zPositionalEncoding.__init__c                 C   s*   || j d |jd d d f  }| |S Nr   )r   rX   r   )rI   rr   rL   rL   rM   rx     s    
zPositionalEncoding.forward)r	   r   r   r   r   r    rx   r   rL   rL   rJ   rM   r5      s    r5   c                       $   e Zd Z fddZdd Z  ZS )r=   c                    sF   t    || _|| _| j}tt| j|t t||| _d S ry   )	r   r    r+   r6   r9   Z
Sequentialr?   ZSiLU
time_embed)rI   r+   r6   Ztime_embed_dimrJ   rL   rM   r      s   


zTimestepEmbedder.__init__c                 C   s   |  | jj| dddS )Nr   r   rb   )r   r6   r   rq   )rI   rs   rL   rL   rM   rx   #  s
   

zTimestepEmbedder.forwardr   rL   rL   rJ   rM   r=     s    r=   c                       r   )r3   c                    sR   t    || _|| _|| _t| j| j| _| jdkr't| j| j| _d S d S Nrot_vel)	r   r    r&   r1   r+   r9   r?   poseEmbeddingvelEmbedding)rI   r&   r1   r+   rJ   rL   rM   r    *  s   

zInputProcess.__init__c                 C   s   |j \}}}}|d|||| }| jdv r| |}|S | jdkrB|dg }| |}|dd  }| |}tj||fddS t)N)   r   r   rb   r   ZxyzZhml_vecr   r   r   rn   )	rX   rq   ro   r&   r   r   rY   rk   r<   )rI   rr   r\   r#   r$   ru   
first_posevelrL   rL   rM   rx   3  s   





zInputProcess.forwardr   rL   rL   rJ   rM   r3   (  s    	r3   c                       r   )rF   c                    s^   t    || _|| _|| _|| _|| _t| j| j| _	| jdkr-t| j| j| _
d S d S r   )r   r    r&   r1   r+   r#   r$   r9   r?   	poseFinalvelFinal)rI   r&   r1   r+   r#   r$   rJ   rL   rM   r    F  s   

zOutputProcess.__init__c                 C   s   |j \}}}| jdv r| |}n&| jdkr5|dg }| |}|dd  }| |}tj||fdd}nt|||| j| j	}|
dddd}|S )Nr   r   r   r   rn   rb   r   )rX   r&   r   r   rY   rk   r<   ro   r#   r$   rq   )rI   rv   ru   r\   r]   r   r   rL   rL   rM   rx   Q  s   




zOutputProcess.forwardr   rL   rL   rJ   rM   rF   D  s    rF   c                       r   )rD   c                    s"   t    tt||| _d S ry   )r   r    r9   	ParameterrY   Zrandnaction_embedding)rI   r%   r+   rJ   rL   rM   r    d  s   


zEmbedAction.__init__c                 C   s&   |d d df  tj}| j| }|S r   )ri   rY   longr   )rI   inputidxrv   rL   rL   rM   rx   i  s   
zEmbedAction.forwardr   rL   rL   rJ   rM   rD   b  s    rD   )rT   numpyr   rY   Ztorch.nnr9   Ztorch.nn.functionalZ
functionalFZrotation2xyzr   Moduler   r5   r=   r3   rF   rD   rL   rL   rL   rM   <module>   s    s