o
    *j                     @   sP   d dl Z d dl mZ d dlmZ e ZdddZ		dddZ		dd	d
ZdS )    N)nn)
get_loggerc                 C   s   | j d }tjdd}tj|tjdd| tjdd|ddtj|tjdd|tjdd| dd }|durKt|| t| | }|S t|| }|S )a  
    Calculate two logits' the KL div value symmetrically.
    :param logits1: The first logit.
    :param logits2: The second logit.
    :param attention_mask: An optional attention_mask which is used to mask some element out.
    This is usually useful in token_classification tasks.
    If the shape of logits is [N1, N2, ... Nn, D], the shape of attention_mask should be [N1, N2, ... Nn]
    :return: The mean loss.
    none)Z	reduction)dimN)shaper   Z	KLDivLosstorchsumZ
LogSoftmaxZSoftmaxmean)Zlogits1Zlogits2attention_maskZ
labels_numZKLDivloss r   k/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/nlp/structbert/adv_utils.py_symmetric_kl_div   s4   

r   h㈵>c                 K   sj  |dur|nd| }| | j |  d| }|d d|v r&|d d|vr,dn|d }	|d }
|	s8d}
d|v rA|d |di |d|i}|j}t|||
}tj	||d j }|j
dd	td
djdd	dd }tt|}|rtd |S ||d  }|||  }t|| |}t|| |}|di |d|i}|j}t|||
}|| S )a4  
    Calculate the adv loss of the model.
    :param embedding: Original sentence embedding
    :param model: The model, or the forward function(including decoder/classifier),
            accept kwargs as input, output logits
    :param ori_logits: The original logits outputted from the model function
    :param ori_loss: The original loss
    :param adv_grad_factor: This factor will be multiplied by the KL loss grad and then the result will be added to
            the original embedding.
            More details please check:https://arxiv.org/abs/1908.04577
            The range of this value always be 1e-3~1e-7
    :param adv_bound: adv_bound is used to cut the top and the bottom bound of the produced embedding.
            If not proveded, 2 * sigma will be used as the adv_bound factor
    :param sigma: The std factor used to produce a 0 mean normal distribution.
            If adv_bound not proveded, 2 * sigma will be used as the adv_bound factor
    :param kwargs: the input param used in model function
    :return: The original loss adds the adv loss
    N   r   	input_idsinputs_embedswith_attention_maskFr   Tinfr   keepdimp   r   z'Nan occurred when calculating adv loss.gư>r   datanewsizeZnormal_popZlogitsr   r   ZautogradZgradZnormfloatmaxanyisnanloggerwarningmin)	embeddingmodelZ
ori_logitsori_lossadv_grad_factor	adv_boundsigmakwargsembedding_1r   r   outputsZ	v1_logitsr   emb_grademb_grad_normis_nanembedding_2Z
adv_logitsadv_lossr   r   r   compute_adv_loss3   sN   




r5   c                 K   sP  |dur|nd| }| | j |  d| }	|d d|v r&|d |di |d|	i}
|
j\}}t||t|| }|d }tj	||	d j }|j
ddtddjd	dd
d }tt|}|rptd |S || }|	||  }t|	| |}t|	| |}|di |d|i}
|
j\}}t||t|| }|| S )a  
    Calculate the adv loss of the model. This function is used in the pair logits scenario.
    :param embedding: Original sentence embedding
    :param model: The model, or the forward function(including decoder/classifier),
            accept kwargs as input, output logits
    :param start_logits: The original start logits outputted from the model function
    :param end_logits: The original end logits outputted from the model function
    :param ori_loss: The original loss
    :param adv_grad_factor: This factor will be multiplied by the KL loss grad and then the result will be added to
            the original embedding.
            More details please check:https://arxiv.org/abs/1908.04577
            The range of this value always be 1e-3~1e-7
    :param adv_bound: adv_bound is used to cut the top and the bottom bound of the produced embedding.
            If not proveded, 2 * sigma will be used as the adv_bound factor
    :param sigma: The std factor used to produce a 0 mean normal distribution.
            If adv_bound not proveded, 2 * sigma will be used as the adv_bound factor
    :param kwargs: the input param used in model function
    :return: The original loss adds the adv loss
    Nr   r   r   r   Tr   r   r   r   z,Nan occurred when calculating pair adv loss.r   r   )r'   r(   Zstart_logitsZ
end_logitsr)   r*   r+   r,   r-   r.   r/   Zv1_logits_startZv1_logits_endr   r0   r1   r2   r3   Zadv_logits_startZadv_logits_endr4   r   r   r   compute_adv_loss_pairo   sX   





r6   )N)Nr   )r   r   Zmodelscope.utils.loggerr   r$   r   r5   r6   r   r   r   r   <module>   s   
 
B