o
    *j                     @   s   d dl Z d dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ e ZddgZ	ddejjfd	dZG d
d deZdS )    N)CallableIterableTuple)	Bernoulli)	Optimizer)
get_loggercalculate_fisherChildTuningAdamWmodelc                 C   sD  t  }|   |  D ]\}}d|v r|| ||< qt|}|D ]8}	|| |	}
|
  |  D ]$\}}d|v rV|d urItjj	j
|fi | ||  |jd | 7  < q2|   q#td d }| D ]\}}|d  }|d u r{|}qgt||}qgt|d| d }|D ]
}|| |k||< qtd| |S )Nlayer   zCalculate Fisher Information...   d   zPolar => {})dicttrainZnamed_parametersZ	new_zerossizelenZbackwardtorchnnutilsZclip_grad_norm_gradZ	zero_gradloggerinfoitemsviewcpunumpynpappendZ
percentileprintformat)r
   Zdata_loaderZforward_step	reserve_pZ	grad_clipgradient_masknameparamsZitersZinputslossrkvZpolar r*   {/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/modelscope/trainers/optimizer/child_tuning_adamw_optimizer.pyr      s:   


c                       sp   e Zd Z							ddeejjj d	ed
e	eef dedede
f fddZdd ZddefddZ  ZS )r	   MbP?g?g+?ư>        T      ?Nr%   lrbetasepsweight_decaycorrect_biasc	           
         s   |dk rt d|d|d   krdk s!n t d|d d|d   kr-dk s7n t d|d d|ksBt d|t|||||d}	t ||	 d | _|| _|| _d S )	Nr/   z,Invalid learning rate: {} - should be >= 0.0r   r0   z4Invalid beta parameter: {} - should be in [0.0, 1.0[r   z,Invalid epsilon value: {} - should be >= 0.0)r1   r2   r3   r4   r5   )
ValueErrorr!   r   super__init__r#   r"   mode)
selfr%   r1   r2   r3   r4   r5   r"   r9   defaults	__class__r*   r+   r8   K   s>   	
zChildTuningAdamW.__init__c                 C   s
   || _ d S N)r#   )r:   r#   r*   r*   r+   set_gradient_maskn   s   
z"ChildTuningAdamW.set_gradient_maskclosurec                 C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}|jr%td| jdurQ| jdkr<|| jv r;|| j| 9 }nt|j|	 | j
d}|| | j
 9 }| j| }t|dkrpd|d< t|j|d< t|j|d	< |d |d	 }}	|d
 \}
}|d  d7  < ||
j|d|
 d |	|j||d| d |	 |d }|d }|d rd|
|d   }d||d   }|t| | }|jj||| d |jj|j|d  |d  d qq|S )z
        Performs a single optimization step.
        Arguments:
            closure (:obj:`Callable`, `optional`): A closure that reevaluates the model and returns the loss.
        Nr%   zJAdam does not support sparse gradients, please consider SparseAdam insteadzChildTuning-D)r   Z
fill_valuer   stepexp_avg
exp_avg_sqr2   r   r0   )alpha)valuer3   r1   r5   r4   )Zparam_groupsr   dataZ	is_sparseRuntimeErrorr9   r#   r   Znew_fullr   r"   samplestater   r   Z
zeros_likeZmul_Zadd_Zaddcmul_sqrtmathZaddcdiv_)r:   r@   r&   grouppr   Z	grad_maskrI   rB   rC   Zbeta1Zbeta2denomZ	step_sizeZbias_correction1Zbias_correction2r*   r*   r+   rA   q   s^   






"
=zChildTuningAdamW.step)r,   r-   r.   r/   Tr0   Nr>   )__name__
__module____qualname__r   r   r   Z	parameter	Parameterfloatr   boolr8   r?   r   rA   __classcell__r*   r*   r<   r+   r	   I   s.    
#r>   )rK   typingr   r   r   r   r   r   Ztorch.distributions.bernoullir   Ztorch.optimr   Zmodelscope.utils.loggerr   r   __all__r   Moduler   r	   r*   r*   r*   r+   <module>   s   +