o
    #j8                     @   s\   d dl Z d dlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 g ZG d	d
 d
e
ZdS )    N)_C_ops   )	framework)no_grad)in_dynamic_or_pir_mode   )	Optimizerc                       sJ   e Zd ZdZ						d fdd	Zdd Zed	d
 Zdd Z  Z	S )SGDaM	  
    Optimizer of the stochastic gradient descent algorithm.

    .. math::

        param\_out = param - learning\_rate * grad

    Parameters:
        learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
            It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It can be a float value as coeff of L2 regularization or \
            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
            If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
            the regularization setting here in optimizer will be ignored for this parameter. \
            Otherwise, the regularization setting here in optimizer will take effect. \
            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three clipping strategies
            ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
            :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
        name (str, optional): The default value is None. Normally there is no need for user
                to set this property. For more information, please refer to
                :ref:`api_guide_Name` .

    Examples:
        .. code-block:: python

            >>> import paddle

            >>> inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32')
            >>> linear = paddle.nn.Linear(10, 10)
            >>> inp = paddle.to_tensor(inp)
            >>> out = linear(inp)
            >>> loss = paddle.mean(out)
            >>> sgd = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
            >>> out.backward()
            >>> sgd.step()
            >>> sgd.clear_grad()

    MbP?NFc                    s<   |d u rt dt j|||||d d| _|| _i | _d S )Nzlearning_rate is not set)learning_rate
parametersweight_decay	grad_clipnameZsgd)
ValueErrorsuper__init__type_multi_precision_master_weights)selfr   r   r   r   multi_precisionr   	__class__ U/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/optimizer/sgd.pyr   I   s   	
zSGD.__init__c                 C   s   t |tjsJ t |tr| |}|D ]-}|j| jv rq| jr3| |j	r3| 
|}| j|j q| |j	rA| jsAtd qd S )NzAccumulating with FP16/BF16 in optimizer can lead to poor accuracy or slow convergence.Consider using multi_precision=True option of the Adam optimizer.)
isinstancer   Blockdict_update_param_groupr   Z_already_create_accumulaterr   _is_dtype_fp16_or_bf16dtypeZ_create_master_weightaddwarningswarn)r   blockr   pZmaster_pr   r   r   _create_accumulators_   s&   



zSGD._create_accumulatorsc           
      C   s   t |tr
| |}| jo| |d j}|r| j|d j nd }| |}t	 r8t
|d ||d || d S t |tjs@J |d |d |d}d|d i}d|i}|r^||d< ||d< |j| j|||dd	}	|	S )
Nr   r   )ParamZGradZLearningRateZParamOutr   ZMasterParamZMasterParamOutT)r   inputsoutputsattrsZstop_gradient)r   r   r   r   r    r!   r   r   Z_create_param_lrr   r   Zsgd_r   r   Z	append_opr   )
r   r%   Zparam_and_gradZfind_masterZmaster_weightlrr)   r*   r+   Zsgd_opr   r   r   _append_optimize_opu   sJ   



zSGD._append_optimize_opc                 C   s   | d}|S )Nparams)get)r   r   r   r   r   r      s   
zSGD._update_param_group)r
   NNNFN)
__name__
__module____qualname____doc__r   r'   r   r-   r   __classcell__r   r   r   r   r	      s    /
2r	   )r#   Zpaddler   baser   Zbase.dygraphr   Zbase.frameworkr   Z	optimizerr   __all__r	   r   r   r   r   <module>   s   