o
    "j2@                     @   sf   d dl mZ d dlmZ d dlZd dlmZ d dlmZ ddl	m
Z
 edd	dd
G dd deZdS )    )defaultdict)reduceN)	Optimizer)
deprecated   )_strong_wolfez2.5.0zpaddle.optimizer.LBFGS)ZsinceZ	update_tolevelc                       sx   e Zd ZdZ											d fdd		Zd
d Zdd Zdd Zdd Zdd Z	dd Z
dd Zdd Z  ZS )LBFGSa  
    The L-BFGS is a quasi-Newton method for solving an unconstrained optimization problem over a differentiable function.
    Closely related is the Newton method for minimization. Consider the iterate update formula:

    .. math::
        x_{k+1} = x_{k} + H_k \nabla{f_k}

    If :math:`H_k` is the inverse Hessian of :math:`f` at :math:`x_k`, then it's the Newton method.
    If :math:`H_k` is symmetric and positive definite, used as an approximation of the inverse Hessian, then
    it's a quasi-Newton. In practice, the approximated Hessians are obtained
    by only using the gradients, over either whole or part of the search
    history, the former is BFGS, the latter is L-BFGS.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006. pp179: Algorithm 7.5 (L-BFGS).

    Args:
        learning_rate (float, optional): learning rate .The default value is 1.
        max_iter (int, optional): maximal number of iterations per optimization step.
            The default value is 20.
        max_eval (int, optional): maximal number of function evaluations per optimization
            step. The default value is max_iter * 1.25.
        tolerance_grad (float, optional): termination tolerance on first order optimality
            The default value is 1e-5.
        tolerance_change (float, optional): termination tolerance on function
            value/parameter changes. The default value is 1e-9.
        history_size (int, optional): update history size. The default value is 100.
        line_search_fn (string, optional): either 'strong_wolfe' or None. The default value is strong_wolfe.
        parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. The default value is None.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
            If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
            the regularization setting here in optimizer will be ignored for this parameter. \
            Otherwise, the regularization setting here in optimizer will take effect. \
            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of \
            some derived class of ``GradientClipBase`` . There are three cliping strategies \
            ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` , \
            :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
        name (str, optional): Normally there is no need for user to set this property.
            For more information, please refer to :ref:`api_guide_Name`.
            The default value is None.

    Return:
        loss (Tensor): the final loss of closure.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import numpy as np
            >>> from paddle.incubate.optimizer import LBFGS

            >>> paddle.disable_static()
            >>> np.random.seed(0)
            >>> np_w = np.random.rand(1).astype(np.float32)
            >>> np_x = np.random.rand(1).astype(np.float32)

            >>> inputs = [np.random.rand(1).astype(np.float32) for i in range(10)]
            >>> # y = 2x
            >>> targets = [2 * x for x in inputs]

            >>> class Net(paddle.nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         w = paddle.to_tensor(np_w)
            ...         self.w = paddle.create_parameter(shape=w.shape, dtype=w.dtype, default_initializer=paddle.nn.initializer.Assign(w))
            ...     def forward(self, x):
            ...         return self.w * x

            >>> net = Net()
            >>> opt = LBFGS(learning_rate=1, max_iter=1, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn='strong_wolfe', parameters=net.parameters())
            >>> def train_step(inputs, targets):
            ...     def closure():
            ...         outputs = net(inputs)
            ...         loss = paddle.nn.functional.mse_loss(outputs, targets)
            ...         print('loss: ', loss.item())
            ...         opt.clear_grad()
            ...         loss.backward()
            ...         return loss
            ...     opt.step(closure)

            >>> for input, target in zip(inputs, targets):
            ...     input = paddle.to_tensor(input)
            ...     target = paddle.to_tensor(target)
            ...     train_step(input, target)

          ?   NHz>&.>d   c                    s   |d u r
|d d }|| _ || _|| _|| _|| _|| _|| _t|tj	r-t
dt| tt| _t jd||	|
|d t| jd tsJ| j| _nt| jD ]	\}}|d | _qOd | _d S )N      z^parameters argument given to the optimizer should be an iterable of Tensors or dicts, but got r
   )learning_rate
parametersweight_decay	grad_clipnamer   params)r   max_itermax_evaltolerance_gradtolerance_changehistory_sizeline_search_fn
isinstancepaddleZTensor	TypeErrortyper   dictstatesuper__init__Z_parameter_list_params	enumerateZ_param_groups_numel_cache)selfr   r   r   r   r   r   r   r   r   r   r   idxZparam_group	__class__ `/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/incubate/optimizer/lbfgs.pyr$   w   s8   


zLBFGS.__init__c                 C   s.   i }| j  D ]\}}|||i qd|iS )zReturns the state of the optimizer as a :class:`dict`.

        Return:
            state, a dict holding current optimization state. Its content
                differs between optimizer classes.
        r"   )r"   itemsupdate)r(   Zpacked_statekvr,   r,   r-   
state_dict   s   zLBFGS.state_dictc                 C   s$   | j d u rtdd | jd| _ | j S )Nc                 S   s   | |   S N)numel)totalpr,   r,   r-   <lambda>   s    zLBFGS._numel.<locals>.<lambda>r   )r'   r   r%   r(   r,   r,   r-   _numel   s
   
zLBFGS._numelc                 C   sT   g }| j D ]}|jd u rt|dg}n|jdg}|| qtj|ddS )Nr   )Zaxis)r%   Zgradr   Z
zeros_likereshapeappendconcat)r(   Zviewsr6   viewr,   r,   r-   _gather_flat_grad   s   

zLBFGS._gather_flat_gradc              	   C   sf   d}| j D ]#}tdd |j}t|||||  |j| |}||7 }q||  ks1J d S )Nr   c                 S   s   | | S r3   r,   )xyr,   r,   r-   r7      s    z!LBFGS._add_grad.<locals>.<lambda>)r%   r   shaper   assignaddr;   r9   )r(   alpha	directionoffsetr6   r4   r,   r,   r-   	_add_grad   s   

zLBFGS._add_gradc                 C   s   dd | j D S )Nc                 S   s   g | ]}|  qS r,   )clone).0r6   r,   r,   r-   
<listcomp>   s    z&LBFGS._clone_param.<locals>.<listcomp>)r%   r8   r,   r,   r-   _clone_param   s   zLBFGS._clone_paramc                 C   s&   t | j|D ]
\}}t|| qd S r3   )zipr%   r   rC   )r(   Zparams_datar6   Zpdatar,   r,   r-   
_set_param   s   zLBFGS._set_paramc                 C   s0   |  || t| }|  }| | ||fS r3   )rH   floatr?   rN   )r(   closurer@   rE   dloss	flat_gradr,   r,   r-   _directional_evaluate   s
   

zLBFGS._directional_evaluatec           %   	      s  t  X t    j}j}j}j}j}j}j	}j
}	|	dd |	dd   }
t|
}d}|	d  d7  <  }|  |k}|rX|
W  d   S |	d}|	d}|	d}|	d	}|	d
}|	d}|	d}|	d}d}||k r5|d7 }|	d  d7  < |	d dkr| }g }g }g }t jd|
jd}n||}|t j||jd}||}|dkrt||kr|d |d |d || || |d|  ||| }t|}d|	vrdg| |	d< |	d }| }t|d ddD ] }|| |||  ||< t ||| ||   | qt || }}t|D ]}|| |||  } t ||| || |   | q?|du ri| }nt || |}|	d dkrtdd|   | }n|}||}!|!| krnd}"|dur|dkrt d! }# fdd}$t"|$|#|||||!\}}}}"#|| |  |k}n3#|| ||krt   t  }W d   n	1 sw   Y   }|  |k}d}"||"7 }|	d  |"7  < |rn&||   |krnt|| |k r%n||kr+n
||kr1n||k s||	d< ||	d< ||	d< ||	d	< ||	d
< ||	d< ||	d< ||	d< W d   |
S 1 saw   Y  |
S )z
        Performs a single optimization step.

        Args:
            closure (callable): A closure that reevaluates the model
                and returns the loss.

        Z
func_evalsr   n_iterr   NrQ   rE   old_ykold_skroH_diagprev_flat_grad	prev_lossr
   )dtypeg|=alr:   Zstrong_wolfez only 'strong_wolfe' is supportedc                    s     | ||S r3   )rT   )r@   rE   rQ   rP   r(   r,   r-   obj_funcq  s   zLBFGS.step.<locals>.obj_func)$r   Zno_gradZenable_gradr   r   r   r   r   r   r   r"   
setdefaultrO   r?   absmaxgetnegZ	to_tensorr\   subtractmultiplydotlenpopr<   rangerC   rD   rI   minsumRuntimeErrorrL   r   rH   )%r(   rP   r   r   r   r   r   r   r   r"   Z	orig_lossrR   Zcurrent_evalsrS   Zopt_condrQ   rE   rV   rW   rX   rY   rZ   r[   rU   rA   sZysZnum_oldr]   qirZbe_iZgtdZls_func_evalsZx_initr_   r,   r^   r-   step   s   

















$&








  
 
 6  6z
LBFGS.step)r
   r   Nr   r   r   NNNNN)__name__
__module____qualname____doc__r$   r2   r9   r?   rH   rL   rN   rT   rr   __classcell__r,   r,   r*   r-   r	      s,    ]1	r	   )collectionsr   	functoolsr   r   Zpaddle.optimizerr   Zpaddle.utilsr   Zline_search_dygraphr   r	   r,   r,   r,   r-   <module>   s   