o
    "j-                     @   s`   d dl Z d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ g ZG dd deZdS )	    N)	frameworkunique_name)base)Variable)LayerHelper)	Optimizerc                       sp   e Zd ZdZdZd fdd	Z fdd	Zeje	j
d
d Zdd Zdd Zdd Ze	j
	dddZ  ZS )	LookAheada  
    This implements the Lookahead optimizer of the
    paper : https://arxiv.org/abs/1907.08610.

    Lookahead keeps two sets of params: the fast_params and
    the slow_params. inner_optimizer update fast_params every
    training step. Lookahead updates the slow_params and fast_params
    every k training steps as follows:

    .. math::

        slow\_param_t &= slow\_param_{t-1} + \\alpha * (fast\_param_{t-1} - slow\_param_{t-1})

        fast\_param_t &=  slow\_param_t

    Args:
        inner_optimizer (Optimizer): The optimizer that update fast params step by step.
        alpha (float, optinal): The learning rate of Lookahead. The default value is 0.5.
        k (int, optinal): The slow params is updated every k steps. The default value is 5.
        name (str, optional): Normally there is no need for user to set this property.
            For more information, please refer to :ref:`api_guide_Name`.
            The default value is None.

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> import paddle
            >>> import paddle.nn as nn

            >>> BATCH_SIZE = 16
            >>> BATCH_NUM = 4
            >>> EPOCH_NUM = 4

            >>> IMAGE_SIZE = 784
            >>> CLASS_NUM = 10
            >>> # define a random dataset
            >>> class RandomDataset(paddle.io.Dataset):
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
            ...         label = np.random.randint(0, CLASS_NUM - 1,
            ...                                 (1, )).astype('int64')
            ...         return image, label
            ...     def __len__(self):
            ...         return self.num_samples

            >>> class LinearNet(nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
            ...         self.bias = self._linear.bias
            ...     @paddle.jit.to_static
            ...     def forward(self, x):
            ...         return self._linear(x)

            >>> def train(layer, loader, loss_fn, opt):
            ...     for epoch_id in range(EPOCH_NUM):
            ...         for batch_id, (image, label) in enumerate(loader()):
            ...             out = layer(image)
            ...             loss = loss_fn(out, label)
            ...             loss.backward()
            ...             opt.step()
            ...             opt.clear_grad()
            ...             print("Train Epoch {} batch {}: loss = {}".format(
            ...                 epoch_id, batch_id, np.mean(loss.numpy())))
            >>> layer = LinearNet()
            >>> loss_fn = nn.CrossEntropyLoss()
            >>> optimizer = paddle.optimizer.SGD(learning_rate=0.1, parameters=layer.parameters())
            >>> lookahead = paddle.incubate.LookAhead(optimizer, alpha=0.2, k=5)

            >>> # create data loader
            >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
            >>> loader = paddle.io.DataLoader(
            ...     dataset,
            ...     batch_size=BATCH_SIZE,
            ...     shuffle=True,
            ...     drop_last=True,
            ...     num_workers=2)

            >>> # doctest: +SKIP('The run time is too long to pass the CI check.')
            >>> train(layer, loader, loss_fn, lookahead)

    Zslow      ?   Nc                    s   |d usJ dd|  krdksJ d J dt |tr#|dks'J d|| _| jjd u r9t   }n| jj}t j	||d d |d || _
|| _d| _t| jj| _d | _d | _d S )	Nzinner optimizer can not be Noneg              ?zBalpha should be larger or equal to 0.0, and less or equal than 1.0r   zk should be a positive integer)Zlearning_rate
parametersZweight_decayZ	grad_clipnameZ	lookahead)
isinstanceintinner_optimizer_parameter_listr   Zdefault_main_programZglobal_blockZall_parameterssuper__init__alphaktyper   	__class____name__helper_global_step_varZ_k_var)selfr   r   r   r   r   r    d/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/incubate/optimizer/lookahead.pyr   r   s2   
zLookAhead.__init__c                    s    t  || | j|| d S N)r   _set_auxiliary_varr   )r   keyvalr   r   r   r       s   zLookAhead._set_auxiliary_varc                 C   s`   | j   |   g }| jD ]}|jsq| dur%| }|||f q| jdd|d dS )a  
        Execute the optimizer and update parameters once.

        Returns:
            None

        Examples:

            .. code-block:: python

                >>> import paddle
                >>> inp = paddle.rand([1,10], dtype="float32")
                >>> linear = paddle.nn.Linear(10, 1)
                >>> out = linear(inp)
                >>> loss = paddle.mean(out)
                >>> sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters())
                >>> lookahead = paddle.incubate.LookAhead(sgd, alpha=0.2, k=5)
                >>> loss.backward()
                >>> lookahead.step()
                >>> lookahead.clear_grad()

        N)lossstartup_programparams_grads)r   step_increment_global_varr   Z	trainableZ
_grad_ivarappend_apply_optimize)r   r%   paramZgrad_varr   r   r   r&      s   


zLookAhead.stepc                 C   s,   t |tjsJ |D ]	}| | j| q
d S r   )r   r   ZBlockZ_add_accumulator	_slow_str)r   blockr   pr   r   r   _create_accumulators   s   zLookAhead._create_accumulatorsc                 C   sV   | j d u rtjjtddgdddd| _ | jjdd| j gid	| j gid
did d S )NZlookahead_step   r   int32Tr   shapevaluedtypeZpersistable	incrementXZOutr&   r   )r   ZinputsZoutputsattrs)r   paddlestaticcreate_global_varr   generater   Z	append_op)r   r   r   r   r'      s   



zLookAhead._increment_global_varc                 C   s&  t jdgddd}t jdgddd}t jjtddg| jddd}t | j	|}t 
| j	|}t j|d	d
}t 
||}t j|d	d
}| | j|d }	||d  d| |	  }
t |
|	 | j|d  d| j |	  }
||
 d| |d   }t ||d  ||
 d| |	  }t ||	 d S )Nr/   r0   Zlookahead_ones)r2   r4   r   Zlookahead_zerosZlookahead_kTr1   Zfloat32)r4   r   r   )r8   ZonesZzerosr9   r:   r   r;   r   	remainderr   equalcastZ_get_accumulatorr+   Zassignr   )r   r,   Zparam_and_gradZone_varZzero_varZk_varmodZcond_1Zcond_2Zslow_varZtmp_varZ	tmp_var_1r   r   r   _append_optimize_op   s0   zLookAhead._append_optimize_opc                 C   sJ   t |ts	J d| jj||||d\}}|   | j|||d}||fS )a  
        Add operations to minimize ``loss`` by updating ``parameters``.

        Args:
            loss (Tensor): A ``Tensor`` containing the value to minimize.
            startup_program (Program, optional): :ref:`api_paddle_static_Program` for
                initializing parameters in ``parameters``. The default value
                is None, at this time :ref:`api_paddle_static_default_startup_program` will be used.
            parameters (list, optional): List of ``Tensor`` or ``Tensor.name`` to update
                to minimize ``loss``. The default value is None, at this time all parameters
                will be updated.
            no_grad_set (set, optional): Set of ``Tensor``  or ``Tensor.name`` that don't need
                to be updated. The default value is None.

        Returns:
            tuple: tuple (optimize_ops, params_grads), A list of operators appended
            by minimize and a list of (param, grad) tensor pairs, param is
            ``Parameter``, grad is the gradient value corresponding to the parameter.
            In static graph mode, the returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to
            indicate program pruning. If so, the program will be pruned by ``feed`` and
            ``fetch_list`` before run, see details in ``Executor``.

        Examples:

            .. code-block:: python

                >>> import paddle

                >>> inp = paddle.rand([1, 10], dtype="float32")
                >>> linear = paddle.nn.Linear(10, 1)
                >>> out = linear(inp)
                >>> loss = paddle.mean(out)
                >>> sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters())
                >>> lookahead = paddle.incubate.LookAhead(sgd, alpha=0.2, k=5)
                >>> loss.backward()
                >>> lookahead.minimize(loss)
                >>> lookahead.clear_grad()

        zThe loss should be an Tensor.)r$   r   no_grad_set)r$   r%   )r   r   r   minimizer'   r)   )r   r#   r$   r   rA   Zoptimize_opsr%   _r   r   r   rB      s   +
zLookAhead.minimize)r	   r
   N)NNN)r   
__module____qualname____doc__r+   r   r    r   Zdygraph_onlyimperative_baseZno_gradr&   r.   r'   r@   rB   __classcell__r   r   r   r   r      s    V&!r   )r8   Zpaddle.baser   r   Zpaddle.base.dygraphr   rG   Zpaddle.base.frameworkr   Zpaddle.base.layer_helperr   Zpaddle.optimizerr   __all__r   r   r   r   r   <module>   s   