o
    )jf8                     @   st   d dl Z d dlm  mZ ddlmZmZmZ dZ	dddZ
				
dddZdd Z									dddZdS )    N   )GaborSTRFConvMelScaleModulationDomainLossModule:0yE>psmiamc                 C   s  t | r| j| j}}n	| d | d }}t |r#|j|j}}n	|d |d }}|dkrSt |d |d  }t |d |d  }	|	|t  }
t |
ddS |dkrr|d |d  }|| ||  |t  }t |ddS |dkr|d |d  }|| ||  |t  }t |d |d  }t |d |d  }	|	|t  }
||
 }t |ddS |d	kr|d |d  }|| ||  |t  }|| ||  |t  }t || |}t || |}||fS d
S )zL
        stft: (batch, ..., 2) or complex(batch, ...)
        y = x + n
    .r   .r   iam   r   r   psmr   crmN)torchZ
is_complexrealimagsqrtEPSclamp)
mixed_spec
clean_spec	mask_typeZclipyryixrxiymagxmagr
   Zypowr   r   mrmi r   i/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/models/audio/aec/network/loss.pycompute_mask   s>   

r!     nTc                 C   sJ   t j| d d | d d  dd}||k}t |dk||k}d||< |S )z
        energy based vad should be accurate enough
        spec: (batch, bins, frames, 2)
        returns (batch, frames)
    r   r   r	   r   dimr         ?)r   sumlogical_and)specZthdhighZthdlowZint16ZenergyZvadidxr   r   r    
energy_vad7   s
   "	r+   c                 C   sh   t dd }tdddd}|| t| }| D ]}d|_qtdd| d	 d
 d	 }||fS )Nz"./network/gabor_strf_parameters.ptZ
state_dict   <   )ZsupnZsupkZnkernFP   i>  r   r   )Zn_melsZsample_rateZn_stft)
r   loadr   Zload_state_dictr   eval
parametersZrequires_gradr   cuda)n_fftZgabor_strf_parametersZgabor_modulation_kernelsmodulation_loss_moduleparamstft2melr   r   r    modulation_loss_initG   s$   
r7   psm_lossmseF  @  c	                    sx  d urt d  tjddd+fdd	
fddfdd	d
d 	
fdd}	d,
fdd	d,
fdd	}
d,
fdd	}	 	d- 
fdd	}d, 
fdd	}d,
fdd	fdd}fdd}	
fdd }| d!krS | d"kr|S | d#kr|S | d$krS | d%kr|
S | d&kr|S | d'kr|S | d(kr|S | d)kr|	S t d* d S ).NzUse loss weight: F)Zperiodicc              	          t j|  | jd|dS )NF)windowcenterreturn_complex)r   stfttodevice)xr?   
hop_lengthr3   r=   winlenr   r    r@   g   s   
z mask_loss_function.<locals>.stftc              	      r<   )NF)r=   r>   length)r   istftrA   rB   )rC   slenrD   r   r    rH   r   s   
z!mask_loss_function.<locals>.istftc              	      s  t  ! t | }t|D ]\}}d|||dddf< qW d   n1 s(w   Y  || }| | } du r<d}n|  } dkrTdt |t | | d  }n+ dkret |t | |  }ndt |t | | d d| t | |    }|t | }|S )	z" [Batch, Time, Frequency]
        r   Nr   r9   r&   r   Zmaeg?)r   no_grad	ones_like	enumerater'   powabs)targetsmasksnframesmask_for_lossr*   numalphaloss)	loss_typeweightr   r    	mask_loss|   s(   

 z%mask_loss_function.<locals>.mask_lossc                 S   s$  t  # t | d }t|D ]\}}d|||dddf< qW d   n1 s*w   Y  |d | }|d | }| d | }| d | }	t |d d |d d  | }
t | d d | d d  | }t t || dt ||	 d }t t |
| d}|| t | }|S )z% [Batch, Time, Frequency, 2]
        r   r   Nr	   r   )r   rJ   rK   rL   r   r'   rM   )rO   r)   rQ   rR   r*   rS   r   r   r   r   r   r   loss1loss2rU   r   r   r    spectrum_loss   s(   
"
&z)mask_loss_function.<locals>.spectrum_lossc                    s    |  g dd } | g dd }t # t|d }t|D ]\}}d|||d d d f< q&W d    n1 s@w   Y  |d d |d d  d |d  }	|d d |d d  d }
|	| }	|
| }
tt|	|
 dt| }|S )	Nr   r   r         r   r   r   r	   g333333?g333333?)permuter   rJ   rK   rL   r'   rM   )mixedcleanrP   rQ   ZyspecZxspecrR   r*   rS   Zemagr   rU   )r@   r   r    sa_loss_dlen   s   
$ z(mask_loss_function.<locals>.sa_loss_dlenc                    s   | }|}t ||}|ddd} |||}|d urqt|}	t ' t|d d d d df }
t|D ]\}}d|
||d f< q:W d    n1 sQw   Y  |d d d d df |
 }|	|
 }	t||	}|| S |S )Nr   r   r   )	r!   r_   r+   r   rJ   rK   rL   FZbinary_cross_entropy)r`   ra   rP   rQ   subtaskr   r   rO   rU   Z
vadtargetsrR   r*   rS   Zloss_vad)rX   r   r@   r   r    psm_vad_loss_dlen   s$   
z-mask_loss_function.<locals>.psm_vad_loss_dlenc                    s&  | d}|d}t |}t |}t  ! t |}	t|D ]\}
}d|	|
d d |d f< q"W d    n1 s<w   Y  ||	 }||	 |g d }t t t|d ddd }t t t|d ddd }t	|| }|ddd}dt
||| }| ||||}|| }|S )NTr   r   r   r   r   r   r   g?)r   rN   rJ   rK   rL   r_   logZ	transposer6   r!   r4   )r`   ra   rP   rQ   rd   r   r   Zenhanced_magZ	clean_magrR   r*   rS   Zclean_log_melZenhanced_log_melrT   rU   rZ   )r   re   r@   r   r    modulation_loss   s4   





z+mask_loss_function.<locals>.modulation_lossc                    s   | d } |d }| }t  ! t |}t|D ]\}}d|||d d d f< qW d    n1 s4w   Y  || }	||	g dd }
 |
|jd }t||}|S )Nr^   r   rf   r]   r   )r   rJ   rK   rL   r_   	unsqueezeshapeZwav2vec_loss_module)r`   ra   rP   rQ   rd   r   rR   r*   rS   	masks_estestimate	est_cleanrU   )rH   r@   r   r    wav2vec_loss   s   


z(mask_loss_function.<locals>.wav2vec_lossTc                    s  | }t  ! t |}t|D ]\}}	d|||	d d d f< qW d    n1 s,w   Y  || }
||
g dd }||jd }t|jd |jd }|d d d |f }|d d d |f }|r~|t j|ddd }|t j|ddd }t j	|| ddd}t j	|d ddd  }|| | }|| }t j	|d ddt j	|d dd   }d	t 
|   }| }|S )
Nr   rf   r]   r   T)r%   Zkeepdimr   r$   )r   rJ   rK   rL   r_   ri   rj   minmeanr'   log10)r`   ra   rP   rQ   rd   Z	zero_meanr   rR   r*   rS   rk   rl   rm   Zflendots_clean_energyscaled_cleane_noisesisdrrU   )r   rH   r@   r   r    sisdr_loss_dlen   s8   

z+mask_loss_function.<locals>.sisdr_loss_dlenc                    s  | }|}t  ! t |}t|D ]\}}	d|||	d d d f< qW d    n1 s0w   Y  || }
||
g dd }|d |d  |d |d   }|d |d  |d |d   }t j|d|dgdd}|d d |d d    }|| |d }|| }t j|d d |d d  d	d}t j|d d |d d  d	d}t j|d	dt j|d	d   }d
t |   }|	 }|S )Nr   rf   r]   r   r	   r$   r   r   ro   )
r   rJ   rK   rL   r_   ri   catr'   rr   rq   )r`   ra   rP   rQ   rd   r   r   rR   r*   rS   rk   rl   Zdot_realZdot_imagrs   rt   ru   rv   Zscaled_clean_energyZe_noise_energyrw   rU   )r   r@   r   r    sisdr_freq_loss_dlen  sL   



z0mask_loss_function.<locals>.sisdr_freq_loss_dlenc              	      s   |  g d} | g d}|d }|d }t||dd\}}t|jd d }	t # t|d }
t|D ]\}}d|
||d d d f< q<W d    n1 sVw   Y  |dd |	f |
 }|d|	d f |
 }||
 }||
 }d u r~d	}n| }|d }|d
 }t|t	|| ||  |d  d t|t	|| ||  |d
  d  }t|t	|| d t|t	|| d  }d||  t| }|S )Nr\   r^   r   )r   r   r   r   .r   r	   r&   )
r_   r!   intrj   r   rJ   rK   rL   r'   rM   )r`   ra   rP   rQ   rd   r   r   Ztgt_mrZtgt_miDrR   r*   rS   r   r   rT   r   r   rY   rZ   rU   )r@   rW   r   r    crm_loss_dlen@  s:   
((z)mask_loss_function.<locals>.crm_loss_dlenc                    s    | d |d ||S )Nr   r   )r`   ra   rP   rQ   )r~   r   r    crm_miso_loss_dlen`  s   z.mask_loss_function.<locals>.crm_miso_loss_dlenc           	   	      s   | j d }|j d | } | d |d |dd |f |}td|D ]!} | d|f |d|f |d|| || | f |}|| }q"|| S )Nry   r   r   .r   )rj   range)	r`   ra   rP   rQ   Zchsr}   rU   chrY   )re   r   r    mimo_loss_dlenc  s   

z*mask_loss_function.<locals>.mimo_loss_dlenc                    sf   | g d}|d }|jd d }tj|dd |d f |d|d d f gdd} |||}|S )Nr\   r^   r   .ry   r$   )r_   rj   r   rz   )r`   ra   r)   rQ   r   r}   Zspec_estrU   )r[   r@   r   r    spec_loss_dlenn  s   &z*mask_loss_function.<locals>.spec_loss_dlenre   rx   r{   r~   rh   rn   r   r   rb   zerror loss func)F)N)NT)printr   Zhamming_window)Z	loss_funcrV   r   Zuse_mod_lossZuse_wav2vec_lossr3   rE   r   rW   rb   rh   rn   rx   r{   r   r   r   r   )r   r~   rE   rH   rV   rX   r   r3   re   r[   r@   rW   r=   rF   r    mask_loss_functionX   sP   

%! 
r   )r   r   )r"   r#   T)	r8   r9   r   FFr:   r;   r   N)r   Ztorch.nn.functionalnnZ
functionalrc   rh   r   r   r   r   r!   r+   r7   r   r   r   r   r    <module>   s(   
,
