o
    "Õjm0  ã                   @   s8   d dl Z ddlmZ dd„ Z						
		ddd„ZdS )é    Né   )Ú_value_and_gradientc           	         sœ   t jj ˆˆk‡‡fdd„‡‡fdd„¡\‰‰ˆˆ d||  ˆˆ   ‰ ˆ d ˆˆ  ‰‡ ‡‡‡‡‡‡‡fdd„}‡‡fdd	„}t jj ˆd
k||¡}|S )a/  Cubic interpolation between (x1, f1, g1) and (x2, f2, g2).
        Use two points and their gradient to determine a cubic function and get the minimun point
        between them in the cubic curve.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
        pp59: formula 3.59

    Args:
        x1, f1, g1: point1's position, value and gradient.
        x2, f2, g2: point2's position, value and gradient.
    Returns:
        min_pos: the minimun point between the specified points in the cubic curve.
    c                      s   ˆ ˆfS ©N© r   ©Úx1Úx2r   úq/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/incubate/optimizer/functional/line_search.pyÚ<lambda>$   ó    z&cubic_interpolation_.<locals>.<lambda>c                      s   ˆˆ fS r   r   r   r   r   r	   r
   $   r   é   é   c                     sh   ˆ  ¡ ‰ ‡‡ ‡‡‡‡fdd„} ‡‡ ‡‡‡‡fdd„}tjˆˆd}tjj || |¡}t t |ˆ¡ˆ¡S )Nc                      s(   ˆˆˆ ˆˆ ˆ  ˆˆ dˆ     S ©Nr   r   r   ©Úd1Úd2Úg1Úg2r   r   r   r	   Útrue_fn2,   ó   (z:cubic_interpolation_.<locals>.true_func1.<locals>.true_fn2c                      s(   ˆˆˆ ˆˆ ˆ  ˆˆ dˆ     S r   r   r   r   r   r	   Ú	false_fn2/   r   z;cubic_interpolation_.<locals>.true_func1.<locals>.false_fn2)ÚxÚy)ÚsqrtÚpaddleZ
less_equalÚstaticÚnnÚcondÚminimumÚmaximum)r   r   ÚpredÚmin_pos©r   Z	d2_squarer   r   r   r   ÚxmaxÚxmin)r   r	   Ú
true_func1)   s   z(cubic_interpolation_.<locals>.true_func1c                      s   ˆˆ  d S )Ng       @r   r   )r#   r$   r   r	   Úfalse_func17   s   z)cubic_interpolation_.<locals>.false_func1ç        )r   r   r   r   )	r   Úf1r   r   Úf2r   r%   r&   r!   r   r"   r	   Úcubic_interpolation_   s   ÿr*   é   ç:Œ0âŽyE>ç      ð?ç-Cëâ6?çÍÌÌÌÌÌì?é
   Úfloat32c
                    s2  ‡‡‡fdd„‰
‡‡‡‡
‡fdd„‰t jdgˆ|	d‰t jdgd|	d}
t jdg||	d}ˆ
|
ƒ\}}‰t  |¡‰	t  ˆ¡‰t jdgddd}t jdgd	|	d‰ t  |¡‰t  |¡‰t jdgd	dd}t jdgd
dd}‡fdd„}‡ ‡‡‡‡‡‡‡	‡
‡‡fdd„}t jjj|||||
||||gd ˆ ˆˆ|fS )a4  Implements of line search algorithm that satisfies the strong Wolfe conditions using double zoom.

    Reference:
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006.
        pp60: Algorithm 3.5 (Line Search Algorithm).

    Args:
        f: the objective function to minimize. ``f`` accepts a multivariate input and returns a scalar.
        xk (Tensor): the starting point of the iterates.
        pk (Tensor): search direction.
        max_iters (Scalar): the maximum number of iterations.
        tolerance_grad (Scalar): terminates if the gradient norm is smaller than
            this. Currently gradient norm uses inf norm.
        tolerance_change (Scalar): terminates if the change of function value/position/parameter between
            two iterations is smaller than this value.
        initial_step_length (Scalar): step length used in first iteration.
        c1 (Scalar): parameter for sufficient decrease condition.
        c2 (Scalar): parameter for curvature condition.
        alpha_max (float): max step length.
        dtype ('float32' | 'float64'): the datatype to be used.

    Returns:
        num_func_calls (float): number of objective function called in line search process.
        a_star(Tensor): optimal step length, or 0. if the line search algorithm did not converge.
        phi_star (Tensor): phi at a_star.
        derphi_star (Tensor): derivative of phi at a_star.

    Following summarizes the essentials of the strong Wolfe line search algorithm.
    Some notations used in the description:

        - `f` denotes the objective function.
        - `phi` is a function of step size alpha, restricting `f` on a line.

            phi = f(xk + a * pk),
            where xk is the position of k'th iterate, pk is the line search direction(decent direction),
            and a is the step size.
        - a : substitute of alpha
        - a1 is a of last iteration, which is alpha_(i-1).
        - a2 is a of current iteration, which is alpha_i.
        - a_lo is a in left position when calls zoom, which is alpha_low.
        - a_hi is a in right position when calls zoom, which is alpha_high.

    Line Search Algorithm:
        repeat
            Compute phi(a2) and derphi(a2).
            1. If phi(a2) > phi(0) + c_1 * a2 * phi'(0) or [phi(a2) >= phi(a1) and i > 1],
                a_star= zoom(a1, a2) and stop;

            2. If |phi'(a2)| <= -c_2 * phi'(0),
                a_star= a2 and stop;

            3. If phi'(a2) >= 0,
                a_star= zoom(a2, a1) and stop;

            a1 = a2
            a2 = min(2 * a2, a2)
            i = i + 1
        end(repeat)

    zoom(a_lo, a_hi) Algorithm:
        repeat
            aj = cubic_interpolation(a_lo, a_hi)
            Compute phi(aj) and derphi(aj).
            1. If phi(aj) > phi(0) + c_1 * aj * phi'(0) or phi(aj) >= phi(a_lo),
                then a_hi <- aj;
            2.
                2.1. If |phi'(aj)| <= -c_2 * phi'(0), then a_star= a2 and stop;

                2.2. If phi'(aj) * (a2 - a1) >= 0, then a_hi = a_lo

                a_lo = aj;
        end(repeat)
    c                    s,   t ˆ ˆ| ˆ  ƒ\}}t |ˆ¡}|||fS )zCompute function value and derivative of phi at a.
        phi = f(xk + a * pk)
        phi'(a) = f'(xk + a * pk) * pk
        )r   r   Údot)ÚaZ	phi_valueZf_gradZphi_grad)ÚfÚpkÚxkr   r	   Úphi_and_derphi”   s   
z$strong_wolfe.<locals>.phi_and_derphic	                    st   ˆ‰t jdgddd}	t jdgddd}
‡‡fdd„}‡‡‡ ‡‡fd	d
„}t jjj|||	|
| ||||||g	d |	S )Nr   r   Úint64©ÚshapeZ
fill_valueÚdtypeFÚboolc	           
         s0   t  || ¡ˆk }	t  ||	B |¡ | ˆ k | @ S r   )r   ÚabsÚassign)
ÚjÚ	done_zoomÚa_loÚphi_loÚ	derphi_loÚderf_loÚa_hiÚphi_hiÚ	derphi_hir    )Úmax_zoom_itersÚtolerance_changer   r	   Ú	cond_zoom®   s   z-strong_wolfe.<locals>.zoom.<locals>.cond_zoomc	                    s$  t ˆˆˆˆ ˆˆƒ‰dt ˆ ˆ ¡ }	t t ˆˆ ¡t ˆˆ  ¡¡|	k }
tjj |
‡ ‡fdd„‡fdd„¡‰ˆˆƒ\‰‰‰‡ ‡‡‡‡‡fdd„}‡ ‡‡‡‡‡‡‡‡‡‡‡fdd„‰	ˆˆˆˆ ˆ  kˆˆkB }tjj ||‡‡‡	fd	d„¡ tjj ˆ‡
fd
d„‡
fdd„¡‰
ˆ
ˆˆˆˆˆˆ ˆˆg	S )Ngš™™™™™¹?c                      s   dˆˆ   S )Ng      à?r   r   )rE   rA   r   r	   r
   Ñ   s    z?strong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.<lambda>c                      ó   ˆ S r   r   r   )Úajr   r	   r
   Ñ   ó    c                      ó(   t  ˆˆ ¡ t  ˆˆ¡ t  ˆˆ¡ d S r   ©r   r>   r   )rE   rL   rG   Úderphi_jrF   Úphi_jr   r	   Útrue_fnÖ   s   z>strong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.true_fnc                    s’   t  ˆ¡ˆ ˆ k}t  ||¡ ‡‡ ‡‡	‡
‡fdd„}| ˆˆˆ   dk@ }t jj ||d ¡ t  ˆˆ ¡ t  ˆˆ¡ t  ˆˆ	¡ t  ˆˆ¡ d S )Nc                      rN   r   rO   r   )rE   rA   rG   rC   rF   rB   r   r	   rR   à   ó   zPstrong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.false_fn.<locals>.true_fnr   )r   r=   r>   r   r   r   )rA   r@   Úpred3rR   Zpred4)rE   rL   Úc2Úderf_jrD   Úderphi_0rG   rP   rC   rF   rQ   rB   )rA   r	   Úfalse_fnÜ   s   z?strong_wolfe.<locals>.zoom.<locals>.body_zoom.<locals>.false_fnc                      s
   ˆˆ ˆƒS r   r   r   )rA   r@   rX   r   r	   r
   ï   s   
 c                      rK   r   r   r   ©r?   r   r	   r
   ñ   rM   c                      s   ˆ d S )Nr   r   r   rY   r   r	   r
   ñ   r   )r*   r   r=   r   r   r   r   )r?   r@   rA   rB   rC   rD   rE   rF   rG   Z
min_changer    rR   Úpred2)Úc1rU   rW   Úphi_0r7   )rE   rA   rL   rV   rD   rG   rP   rC   r@   rX   r?   rF   rQ   rB   r	   Ú	body_zoom½   s:   ÿÿÿÿ"ÿ"÷z-strong_wolfe.<locals>.zoom.<locals>.body_zoom©r   ÚbodyZ	loop_vars)r   Úfullr   r   Ú
while_loop)rA   rB   rC   rD   rE   rF   rG   r\   rW   r?   r@   rJ   r]   )r[   rU   Ú	max_itersr7   rI   )rW   rH   r\   r	   Úzoomž   s(   A÷ýzstrong_wolfe.<locals>.zoomr   r9   r'   r8   r   Fr<   c                    s   | ˆ k | @ S r   r   )ÚiÚls_func_callsÚa1Úa2Úphi_1Úderf_1Údone)rb   r   r	   r   #  s   zstrong_wolfe.<locals>.condc                    s’  ˆˆƒ\‰‰‰t  ˆd ˆ¡ t  |t  t  ˆ¡¡B |¡ ‡ ‡‡	‡‡‡‡‡‡‡‡‡‡‡fdd„}| ˆˆˆˆ ˆ  kˆˆkˆdk@ B @ }t  ||B |¡ t jj ||d ¡ ‡‡	‡‡‡‡fdd„}	| t  ˆ¡ˆ ˆ k@ }
t  ||
B |¡ t jj |
|	d ¡ ‡ ‡‡	‡‡‡‡‡‡‡‡‡‡‡fdd„}| ˆdk@ }t  ||B |¡ t jj ||d ¡ ‡ ‡‡
‡‡‡‡‡fd	d
„}t jj |d |¡ ˆˆˆ ˆˆˆ|gS )Nr   c               
      sP   ˆˆ ˆ
ˆˆˆˆˆˆ	ˆƒ	} t  ˆ ˆ¡ t  ˆ
ˆ¡ t  ˆˆ¡ t  ˆ|  ˆ¡ d S r   rO   rY   )rf   rg   Úa_starri   Ú	derf_starrW   Úderphi_1Úderphi_2re   r\   rh   Úphi_2Úphi_starrc   r   r	   Útrue_fn1+  ó   ÷z,strong_wolfe.<locals>.body.<locals>.true_fn1c                      s(   t  ˆ ˆ¡ t  ˆˆ¡ t  ˆˆ¡ d S r   rO   r   )rg   rk   Úderf_2rl   ro   rp   r   r	   r   B  rS   z,strong_wolfe.<locals>.body.<locals>.true_fn2c               
      sP   ˆˆˆˆˆˆ ˆ
ˆˆ	ˆƒ	} t  ˆˆ¡ t  ˆˆ¡ t  ˆˆ¡ t  ˆ|  ˆ¡ d S r   rO   rY   )rf   rg   rk   rs   rl   rW   rm   rn   re   r\   rh   ro   rp   rc   r   r	   Útrue_fn3K  rr   z,strong_wolfe.<locals>.body.<locals>.true_fn3r   c                      sP   t  ˆˆ ¡ t  ˆˆ¡ t  ˆˆ¡ t  t  dˆ ˆ¡ˆ¡ t  ˆd ˆ¡ d S )Nr   r   )r   r>   r   r   )rf   rg   Ú	alpha_maxri   rs   rd   rh   ro   r   r	   rX   `  s
   z,strong_wolfe.<locals>.body.<locals>.false_fn)r   r>   ÚanyÚisinfr   r   r   r=   )rd   re   rf   rg   rh   ri   rj   rq   Zpred1r   rZ   rt   rT   rX   )rk   ru   r[   rU   rl   rW   rm   r\   r7   rp   rc   )	rf   rg   ri   rs   rn   rd   re   rh   ro   r	   r_   &  s(   &"ÿ&zstrong_wolfe.<locals>.bodyr^   )r   r`   r>   r   r   ra   )r4   r6   r5   rb   rI   Zinitial_step_lengthr[   rU   ru   r;   rf   rg   rh   ri   re   rd   rj   r   r_   r   )rk   ru   r[   rU   rl   rW   rm   r4   rb   r\   r7   rp   r5   rI   r6   rc   r	   Ústrong_wolfe>   s,   V
r



 Dýrx   )r+   r,   r-   r.   r/   r0   r1   )r   Úutilsr   r*   rx   r   r   r   r	   Ú<module>   s   .ö