o
    "jV4                     @   sR   d dl Zd dlZddlmZ ddlmZmZmZ 										
	dddZ	dS )    N   )strong_wolfe)_value_and_gradient&check_initial_inverse_hessian_estimatecheck_input_typed   2   :0yE>r         ?float32c                    s  dvrt d dd}t|d| |du r#tj|jd d nt|d	| t| | t| }t|\}}tj	d
gddd}tj	d
gddd}tj	d
gddd}tj	d
gd
dd}tj	g ddtj	d
gd
dd}tj	d
gddd}|jd }tj
d
 |fd}tj
d
 |fd}tj
d
 d
fd}tj
d
 d
fdfdd} 	
f
dd}tjjj||||||||||||||gd |||||fS )a  
    Minimizes a differentiable function `func` using the L-BFGS method.
    The L-BFGS is a quasi-Newton method for solving an unconstrained optimization problem over a differentiable function.
    Closely related is the Newton method for minimization. Consider the iterate update formula:

    .. math::
        x_{k+1} = x_{k} + H_k \nabla{f_k}

    If :math:`H_k` is the inverse Hessian of :math:`f` at :math:`x_k`, then it's the Newton method.
    If :math:`H_k` is symmetric and positive definite, used as an approximation of the inverse Hessian, then
    it's a quasi-Newton. In practice, the approximated Hessians are obtained
    by only using the gradients, over either whole or part of the search
    history, the former is BFGS, the latter is L-BFGS.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006. pp179: Algorithm 7.5 (L-BFGS).

    Args:
        objective_func: the objective function to minimize. ``objective_func`` accepts a 1D Tensor and returns a scalar.
        initial_position (Tensor): the starting point of the iterates, has the same shape with the input of ``objective_func`` .
        history_size (Scalar): the number of stored vector pairs {si,yi}. Default value: 100.
        max_iters (int, optional): the maximum number of minimization iterations. Default value: 50.
        tolerance_grad (float, optional): terminates if the gradient norm is smaller than this. Currently gradient norm uses inf norm. Default value: 1e-7.
        tolerance_change (float, optional): terminates if the change of function value/position/parameter between two iterations is smaller than this value. Default value: 1e-9.
        initial_inverse_hessian_estimate (Tensor, optional): the initial inverse hessian approximation at initial_position. It must be symmetric and positive definite. If not given, will use an identity matrix of order N, which is size of ``initial_position`` . Default value: None.
        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the futrue. Default value: 'strong wolfe'.
        max_line_search_iters (int, optional): the maximum number of line search iterations. Default value: 50.
        initial_step_length (float, optional): step length used in first iteration of line search. different initial_step_length may cause different optimal result. For methods like Newton and quasi-Newton the initial trial step length should always be 1.0. Default value: 1.0.
        dtype ('float32' | 'float64', optional): data type used in the algorithm, the data type of the input parameter must be consistent with the dtype. Default value: 'float32'.
        name (str, optional): Name for the operation. For more information, please refer to :ref:`api_guide_Name`. Default value: None.

    Returns:
        output(tuple):

            - is_converge (bool): Indicates whether found the minimum within tolerance.
            - num_func_calls (int): number of objective function called.
            - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regrading to the initial position.
            - objective_value (Tensor): objective function value at the `position`.
            - objective_gradient (Tensor): objective function gradient at the `position`.

    Examples:
        .. code-block:: python
            :name: code-example1

            >>> # Example1: 1D Grid Parameters
            >>> import paddle
            >>> # Randomly simulate a batch of input data
            >>> inputs = paddle. normal(shape=(100, 1))
            >>> labels = inputs * 2.0
            >>> # define the loss function
            >>> def loss(w):
            ...     y = w * inputs
            ...     return paddle.nn.functional.square_error_cost(y, labels).mean()
            >>> # Initialize weight parameters
            >>> w = paddle.normal(shape=(1,))
            >>> # Call the bfgs method to solve the weight that makes the loss the smallest, and update the parameters
            >>> for epoch in range(0, 10):
            ...     # Call the bfgs method to optimize the loss, note that the third parameter returned represents the weight
            ...     w_update = paddle.incubate.optimizer.functional.minimize_bfgs(loss, w)[2]
            ...     # Use paddle.assign to update parameters in place
            ...     paddle.assign(w_update, w)

        .. code-block:: python
            :name: code-example2

            >>> # Example2: Multidimensional Grid Parameters
            >>> import paddle
            >>> def flatten(x):
            ...     return x. flatten()
            >>> def unflatten(x):
            ...     return x.reshape((2,2))
            >>> # Assume the network parameters are more than one dimension
            >>> def net(x):
            ...     assert len(x.shape) > 1
            ...     return x.square().mean()
            >>> # function to be optimized
            >>> def bfgs_f(flatten_x):
            ...     return net(unflatten(flatten_x))
            >>> x = paddle.rand([2,2])
            >>> for i in range(0, 10):
            ...     # Flatten x before using minimize_bfgs
            ...     x_update = paddle.incubate.optimizer.functional.minimize_bfgs(bfgs_f, flatten(x))[2]
            ...     # unflatten x_update, then update parameters
            ...     paddle.assign(unflatten(x_update), x)

    )r   Zfloat64z?The dtype must be 'float32' or 'float64', but the specified is .minimize_lbfgsinitial_positionNr   dtype initial_inverse_hessian_estimater   int64shapeZ
fill_valuer   Fboolc                    s   |  k | @ S N )kdoneis_convergenum_func_callsvaluexkg1sk_vecyk_vecrhok_vecheadtail)	max_itersr   k/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/incubate/optimizer/functional/lbfgs.pycond   s   zminimize_lbfgs.<locals>.condc                    sV  t |}t jg  d 
dd}fdd}
fdd}t jjj||||gd t |}t jg d dd} fd	d}
fd
d}t jjj||||gd | }dkrpt||	d\}}}}nt	d
t || | || }|| }t ||t jjdk	fddfdd}t  r| < | < | < nt j |t j |t j | d 
  dd t jj kfddd  || }|}| d7 } t jj|tjd}t jj|tjd}t ||k B |k B | t || t ||dkB | | |||||| gS )Nr   r   r   c                       |  kS r   r   iqai_vecr#   r   r%   r&         z*minimize_lbfgs.<locals>.body.<locals>.condc              	      sv   t  r|  t |  | || < nt j|| |  t |  | }|||  |    }| d  } | ||fS Nr   )paddlein_dynamic_modedotstaticsetitemmodr(   )history_sizer!   r   r    r   r%   body   s   
z*minimize_lbfgs.<locals>.body.<locals>.bodyr&   r6   Z	loop_varsc                    r'   r   r   )r)   r)r"   r   r%   r&      r-   c                    sF   |  t |  | }||   |  |   }| d } | |fS r.   )r/   r1   r4   )r)   r8   beta)r+   r5   r!   r   r    r   r%   r6      s   r   )fr   pkr$   initial_step_lengthr   zQCurrently only support line_search_fn = 'strong_wolfe', but the specified is '{}'g        c                      s   t jdgd dS )Nr   g     @@r   )r/   fullr   r   r   r%   <lambda>  s    z.minimize_lbfgs.<locals>.body.<locals>.<lambda>c                      s   d  S )Nr
   r   r   )rhok_invr   r%   r>         c                 S   s   t | d |  d S r.   )r/   assignr,   r   r   r%   true_fn  s   z-minimize_lbfgs.<locals>.body.<locals>.true_fnc                      s    S r   r   r   )r#   rB   r   r%   r>     r@   )p)r/   rA   r=   r4   r2   nn
while_loopmatmulr   NotImplementedErrorformatr1   r&   r0   r3   ZlinalgZnormnpinf)r   r   r   r   r   r   r   r   r    r!   r"   r#   r*   r)   r&   r6   r8   r;   alphag2Zls_func_callsskZykZrhokZgnormZpk_norm)
H0r+   r   r5   r<   line_search_fnmax_line_search_itersobjective_functolerance_changetolerance_grad)r"   r?   r!   r   r#   rB   r    r%   r6      s   
	


 zminimize_lbfgs.<locals>.bodyr7   )
ValueErrorr   r/   eyer   r   rA   detachr   r=   Zzerosr2   rD   rE   )rQ   r   r5   r$   rS   rR   r   rO   rP   r<   r   nameZop_namer   r   r   r   r   r   r   r"   r#   r   r   r    r!   r&   r6   r   )rN   r+   r   r5   r<   rO   r$   rP   rQ   rR   rS   r%   r      sd   d

 r   )
r   r   r	   r	   Nr   r   r
   r   N)
numpyrI   r/   Zline_searchr   utilsr   r   r   r   r   r   r   r%   <module>   s   
