o
    "j*+                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 g ZG dd dZG d	d
 d
e jdZG dd de jdZdS )    N)base)ExecutorRoleMakerBase)SGD)OptimizerWithMixedPrecisionc                   @   s   e Zd ZdZdZdZdZdS )ModezY
    There are various mode for fleet, each of them is designed for different model.
             N)__name__
__module____qualname____doc__Z
TRANSPILERZPSLIBZ
COLLECTIVE r   r   g/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/incubate/distributed/fleet/base.pyr      s
    r   c                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zd1ddZ	dd Z
dd Zd1ddZdd Zdd Zdd Zd2ddZdd Zd d! Zejd"d# Zejd2d$d%Zejd&d' Zejd(d) Zejd2d*d+Zej		,	d3d-d.Zejd2d/d0ZdS )4Fleetz
    Fleet is the base class, transpiler and pslib are implementation of Fleet.

    Args:
        mode(Mode): the implementation of Fleet's mode.

    Returns:
        None
    c                 C   s"   d| _ || _d | _d | _d | _d S )NF)_is_initialized_mode
_optimizer_role_maker	_executor)selfmoder   r   r   __init__/   s
   
zFleet.__init__c                 C   
   | j  S )z
        Check whether the node is the first instance of worker.

        Returns:
            bool: True if this is the first node of worker,
                  False if not.
        )r   is_first_workerr   r   r   r   r   6      
zFleet.is_first_workerc                 C   r   )zV
        Get current worker index.

        Returns:
            int: node id
        )r   worker_indexr   r   r   r   r   @      
zFleet.worker_indexc                 C   r   )zd
        Get current total worker number.

        Returns:
            int: worker numbers
        )r   
worker_numr   r   r   r   r!   I   r    zFleet.worker_numc                 C   r   )z
        Check whether the node is an instance of worker.

        Returns:
            bool: True if this is a node of worker,
                  False if not.
        )r   	is_workerr   r   r   r   r"   R   r   zFleet.is_workerFc                 C      |r
d | j S | j S z
        Get current server endpoints, such as ["127.0.0.1:1001", "127.0.0.1:1002"].

        Returns:
            list/string: server endpoints
        ,)joinr   Zget_trainer_endpointsr   Z	to_stringr   r   r   worker_endpoints\      
zFleet.worker_endpointsc                 C   s   t | j S )zc
        Get current total worker number.

        Returns:
            int: server number
        )lenr   get_pserver_endpointsr   r   r   r   
server_numi   s   zFleet.server_numc                 C   r   )zV
        Get current server index.

        Returns:
            int: node id
        )r   server_indexr   r   r   r   r-   r   r    zFleet.server_indexc                 C   r#   r$   )r&   r   r+   r'   r   r   r   server_endpoints{   r)   zFleet.server_endpointsc                 C   r   )z
        Check whether the node is an instance of server.

        Returns:
            bool: True if this is a node of server,
                  False if not
        )r   	is_serverr   r   r   r   r/      r   zFleet.is_serverc                 C   r   )z
        Check whether the node is an instance of server.

        Returns:
            bool: True if this is a node of server,
                  False if not.
        )r   is_xpur   r   r   r   r0      r   zFleet.is_xpuc           
      C   s   t |ts	td|  }|  }t|| }t|| }|g| }t|D ]
}||  d7  < q&g g| }d}	t|D ]}||	|	||   ||< |	|| 7 }	q<|| S )a  
        split files before distributed training,
        example 1: files is [a, b, c ,d, e]  and trainer_num = 2, then trainer
                   0 gets [a, b, c] and trainer 1 gets [d, e].
        example 2: files is [a, b], and trainer_num = 3, then trainer 0 gets
                   [a], trainer 1 gets [b],  trainer 2 gets []

        Args:
            files(list): file list need to be read.

        Returns:
            list: files belongs to this worker.
        z/files should be a list of file need to be read.r	   r   )
isinstancelist	TypeErrorr   r!   r*   range)
r   filesZ
trainer_idZtrainers	remainder	blocksizeblocksiZtrainer_filesbeginr   r   r   split_files   s   


zFleet.split_filesNc                 C   sX   t t | _|rt|tsddlm} |rt||std|| _| j	  d| _
dS )a<  
        should be called only once in user's python scripts,
        init() will initialize RoleMaker which is used for identifying
            current node's role, e.g. worker, server, etc.

        Args:
            role_maker(RoleMakerBase): subclass of RoleMakerBase.

        Returns:
            None
        r   r   z/role_maker must be an instance of RoleMakerBaseTN)r   r   ZCPUPlacer   r1   r   Z,paddle.incubate.distributed.fleet.role_makerr3   r   Zgenerate_roler   )r   Z
role_makerZRoleMakerBaseIncubater   r   r   init   s   

z
Fleet.initc                 C   s   | j || dS )z
        all reduce between workers, only support array of one dim.

        Args:
            input(list|numpy.array): array of one dim
            output(list|numpy.array): array of one dim
        N)r   all_reduce_worker)r   inputoutputr   r   r   r=      s   zFleet.all_reduce_workerc                 C   s   | j   dS )z)
        barrier between workers
        N)r   barrier_workerr   r   r   r   r@      s   zFleet.barrier_workerc                 C      d S Nr   r   r   r   r   init_worker      zFleet.init_workerc                 K   rA   rB   r   )r   Z	model_dirkwargsr   r   r   init_server   rD   zFleet.init_serverc                 C   rA   rB   r   r   r   r   r   
run_server   rD   zFleet.run_serverc                 C   rA   rB   r   r   r   r   r   stop_worker   rD   zFleet.stop_workerc                 C   rA   rB   r   r   Z	optimizerZstrategyr   r   r   distributed_optimizer   rD   zFleet.distributed_optimizerTc                 C   rA   rB   r   )r   executordirnameZfeeded_var_namesZtarget_varsmain_programZexport_for_deploymentZlegacy_formatr   r   r   save_inference_model   s   zFleet.save_inference_modelc                 C   rA   rB   r   )r   rK   rL   rM   r   r   r   save_persistables  rD   zFleet.save_persistables)FrB   )NTF)r   r   r   r   r   r   r   r!   r"   r(   r,   r-   r.   r/   r0   r;   r<   r=   r@   abcabstractmethodrC   rF   rG   rH   rJ   rN   rO   r   r   r   r   r   $   sD    

		

	
	


#



r   )	metaclassc                   @   sX   e Zd ZdZdddZej				dddZejdd Zej				dd	d
Z	dS )DistributedOptimizera<  
    DistributedOptimizer is a wrapper for paddle.base.optimizer
    A user should pass a paddle.base.optimizer to DistributedOptimizer
    minimize() function is implemented.
    DistributedOptimizer is the starting point for a user who wants to
    run distributed training. The optimized information will be stored in
    Fleet() instance who holds the global information about current distributed
    training.

    Args:
        optimizer(Optimizer): subclass of Optimizer.
        strategy(any): the user define config for Optimizer.

    Returns:
        None

    Nc                 C   s.   t |tjst |tstd|| _|| _d S )Nz*optimizer must be an instance of Optimizer)r1   r   	__bases__r   r3   r   Z	_strategyrI   r   r   r   r   $  s   
zDistributedOptimizer.__init__c                 C      dS )a  
        First part of `minimize`, do auto-diff to append backward ops for
        the current program.

        Args:
            loss (Variable): loss variable to run optimizations.
            startup_program (Program): startup_program for initializing parameters
                in `parameter_list`.
            parameter_list (list): list of Variables to update.
            no_grad_set (set|None): set of Variables should be ignored.
            callbacks (list|None): list of callables to run when appending backward
                operator for one parameter.

        Return:
            list: list of (param, grad) pair, grad is the output of backward.

        Examples:
            See examples in `apply_gradients`.
        Nr   )r   ZlossZstartup_programparameter_listno_grad_set	callbacksr   r   r   backward-  s   zDistributedOptimizer.backwardc                 C   rU   )a  
        Second part of `minimize`, appending optimization operators for
        given `params_grads` pairs.

        Args:
            params_grads (list): list of (param, grad) pair to do optimization.

        Returns:
            list: A list of operators appended to the current program.

        Examples:
            .. code-block:: python

                >>> # doctest: +SKIP('The network is not defined.')
                >>> loss = network()
                >>> optimizer = base.optimizer.SGD(learning_rate=0.1)
                >>> params_grads = optimizer.backward(loss)
                >>> # you may append operations for params_grads here
                >>> # ...
                >>> optimizer.apply_gradients(params_grads)
        Nr   )r   Zparams_gradsr   r   r   apply_gradientsK  s   z$DistributedOptimizer.apply_gradientsc                 C   rU   )a  
        Add operations to minimize `loss` by updating `parameter_list`.

        This method combines interface `backward()` and
        `apply_gradients()` into one.

        Args:
            losses (Variable|Variable List): loss variable to run optimizations.
            scopes (Scope| Scope List): scope instance.
            startup_programs (Program|Program List): startup_program for initializing parameters
                in `parameter_list`.
            parameter_list (list): list of Variables to update.
            no_grad_set (set|None): set of Variables should be ignored.

        Returns:
            tuple: (optimize_ops, params_grads) which are, list of operators appended;
            and list of (param, grad) Variables pair for optimization.
        Nr   )r   ZlossesZscopesZstartup_programsrV   rW   r   r   r   minimized  s   zDistributedOptimizer.minimizerB   )NNNN)
r   r   r   r   r   rP   rQ   rY   rZ   r[   r   r   r   r   rS     s"    
	
rS   )rP   Zpaddler   Zpaddle.base.executorr   Z(paddle.distributed.fleet.base.role_makerr   Zpaddle.optimizerr   Zpaddle.static.amp.decoratorr   __all__r   ABCMetar   rS   r   r   r   r   <module>   s   
 n