o
    "j5A                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
  m  m  mZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZ d dl m!Z! d	d
l"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZDmEZE G dd dZFdS )    N)staticutils)_to_name_str)fleet)IrGraph_current_expected_placecorein_dynamic_mode)Metric)	InputSpecOperatorVariableglobal_scope)_convert_float_to_bfloat16   )
get_logger   )CollectionNamesfetchget_collection)Strategy   )config_callbacks)Clusterget_default_cluster)	Converter)get_cost_from_engine)DistributedContextget_default_distributed_context)DistributedDataLoader"DistributedDataLoaderFromGenerator)DistributedOperator)DistributedSaver)ProgramHelper)Parallelizer)Planner)get_all_process_groupsnew_process_groupc                   @   s  e Zd ZdZ						duddZdd Zdd Zg fd	d
Zdd Zdd Z							duddZ
dvddZdd Zdd Zdd ZdwddZdd Zdd  Zdvd!d"Zdd#d#dd$dd#ddd#dddd%d&d&gfd'd(Z		#		$			%dxd)d*Z		#				%dyd+d,Z	#				-			-		#		#		dzd.d/Z	0					#	#			#	d{d1d2Z								d|d3d4Zd}d5d6Zd7d8 Z		#				-			-		#		d~d9d:Z							#	#		dd;d<Zdd=d>Zd?d@ ZdAdB ZdCdD Z dEdF Z!dGdH Z"dIdJ Z#dKdL Z$dMdN Z%dOdP Z&dQdR Z'dvdSdTZ(ddUdVZ)ddWdXZ*dYdZ Z+d[d\ Z,d]d^ Z-d_d` Z.e/dadb Z0e/dcdd Z1e/dedf Z2e/dgdh Z3e/didj Z4e/dkdl Z5e/dmdn Z6e/dodp Z7e/dqdr Z8e/dsdt Z9dS )Enginea
  
    An High-Level API for auto parallel, which could be used for distributed Training (engine.fit) and Inferenced (engine.predict).
    Static graph mode is supported natively, Dynamic graph mode is also supported under `@to_static <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/jit/to_static_cn.html#to-static>`_ .

    Args:
        model (paddle.nn.Layer, optional): The model is an instance of
            paddle.nn.Layer.
        loss (Loss|Callable|None, optional): The loss can be a `paddle.nn.Layer`
            instance or any callable function taken the predicted values and
            ground truth values as input. It can be None when there is no loss.
            Default: None.
        optimizer (Optimizer|None, optional): The optimizer need to be set in training
            and should be None in eval and predict mode. Default: None.
        metrics (Metric|list[Metric]|None, optional): If metrics is set, all
            metrics will be calculated and output in train/eval mode. Default: None.
        cluster (Cluster|None, optional): The cluster represents the topology information
            about the used physical devices. Default: None. (Unused for now)
        strategy (Strategy|None, optional): The strategy is used to configure the
        parallelization and optimization behaviors. Default: None.

    Examples:

        .. code-block:: python

            >>> import paddle
            >>> import paddle.vision.transforms as T
            >>> from paddle.distributed.fleet import auto
            >>> from paddle.vision.datasets import MNIST

            >>> transform = T.Compose([
            ...     T.Transpose(),
            ...     T.Normalize([127.5], [127.5])
            >>> ])
            >>> train_dataset = MNIST(mode='train', transform=transform)
            >>> valid_dataset = MNIST(mode='test', transform=transform)

            >>> model = paddle.vision.models.LeNet()
            >>> loss = paddle.nn.CrossEntropyLoss()
            >>> optimizer = paddle.optimizer.Adam(
            ...     learning_rate=0.001, parameters=model.parameters())
            >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

            >>> engine = auto.Engine(model, loss, optimizer, metrics)
            >>> # fit
            >>> engine.fit(train_dataset,
            ...            epochs=2,
            ...            batch_size=64)
            >>> # evaluate
            >>> engine.evaluate(valid_dataset,
            ...                 batch_size=64)
            >>> # predict
            >>> engine.predict(valid_dataset,
            ...                batch_size=64)
            >>> # save
            >>> engine.save("./my_model")
            >>> # load
            >>> engine.load("./my_model")

    Nc              
   C   sV  |rt |tjjst|std|| _|sd ndd | D | _|r5t |tjjt	fs5t|s5td|| _
|rEt |tjjsEtdt|| _|pNg }t|D ]}|rft |tsft|jj dqTt|| _|rxt |tsxtd|p|t | _|rt |tstd|pt | _ttj| _d | _|r|| _nIt d	rz"t d	}t!|d
}	t"#|	| _W d    n1 sw   Y  W n t$y }
 z| j%d d | _W Y d }
~
nd }
~
ww t| j| _t dr| j%d t&j'dd i | _(i | _)t*+| j| _,d | _-tj./ | _0tj.1 | _2t3 | _4t56 | _7t58 | _9t: | _;i | _<i | _=dddd| _>dddd| _?g | _@g | _Ag | _Bg | _Cg | _Dd | _Ed| _Fd| _Gd | _Hd| _I| jjJ| _Kd| _L| jjMjNrm| jjMjO| _Ln| jjPjNry| jjPjQ| _L| jjPjNr| jjPjRdkrt ddksJ dd | _StjTUddi tjTUddi d| _Vd S )NzI'model must be sub classes of `paddle.nn.Layer` or any callable function.c                 S   s   g | ]}|j qS  )name).0pr)   r)   o/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/distributed/auto_parallel/static/engine.py
<listcomp>   s    z#Engine.__init__.<locals>.<listcomp>zW'loss' must be sub classes of `paddle.nn.Layer` or any callable function or a Variable.z@'optimizer' must be object of class `paddle.optimizer.Optimizer`z is not sub class of MetriczP'cluster' must be the object or class `paddle.distributed.auto_parallel.Cluster`zN'strategy' must be object of class `paddle.distributed.auto_parallel.Strategy`ZPADDLE_AUTO_PARALLEL_CONFIGrzILoad json failed, please check json file, engine will run default config.ZPOD_NAMEz0Distribute training by paddle.distributed.launchT)Zis_collectiveFtrainevalpredictr   1F1BZCUDA_MODULE_LOADINGZLAZYz<EXP_CUDA_MODULE_LOADING_LAZY not supported in 1F1B pipeline.Z!FLAGS_new_executor_sequential_runZFLAGS_new_executor_static_build)W
isinstancepaddlennLayercallable	TypeError_model
parameters_parameter_listr   _loss	optimizerZ	Optimizer
auto_utilsZvalidate_opt
_optimizerto_listr
   	__class____name___metricsr   r   _clusterr   	_strategyr   loggingINFO_logger_json_configosgetenvopenjsonload	Exceptioninfor   init_fwd_dist_contexts_fwd_main_progscopydeepcopyZ_orig_optimizer	_executordistributedZget_rank	_cur_rankZget_world_size_nranksr"   _saverr   default_main_program_orig_main_progdefault_startup_program_orig_startup_progr   Z_orig_dist_context_dist_contexts	_planners_has_prepared_has_prepared_reader_inputs_spec_labels_spec_inputs_labels_losses_mode_skip_build_outside_dataloader_planned_mode_dygraph_modeZtuning_tuning
_acc_stepsZgradient_mergeenableZk_stepspipelineZaccumulate_stepsschedule_modehistory	frameworkZ	set_flagsenable_job_schedule_profiler)selfmodellossr?   metricsclusterstrategymetricpathfer)   r)   r-   __init__x   s   







zEngine.__init__c                    s  g }g }t |tjjr+|d u rtt|\}}n@tt|}|d | }||d  }n-t |tjjrN|d u r=|d \}}n|d }|d | }||d  }n
tdt	|j
t|}t|}| jjjdd   fdd}	|d urt|D ]\}
}|d usJ ddt|
 }|	|||| qz|d urt|D ]\}
}|d usJ dd	t|
 }|	|||| q| |}| |}||fS )
Nr   z=Data should be a Dataset or IterableDataset, but received {}.c                 S   s6   | dkrt |jdkr|jd |  |jd< d S d S d S )Nr   r   )lenshape)
num_shardsspecr)   r)   r-   _adjust_item_spec   s   z4Engine._prepare_data_spec.<locals>._adjust_item_specc                    s   t | tjr&t| |}|d u r | || d S ||| d S t | ttj	j
frOt| |} | |d u rE|| d S ||| d S t | tjrc|t|gt| | d S tdt| j)NzYThe sample's dtype returned of dataset should be number, np.ndarray or Tensor, but got {})r5   npZndarrayr   Z
from_numpyappendbatchr   r   eagerZTensorZfrom_tensornumbersNumbertyper:   formatrD   )itemr*   
batch_sizespecsr   r   r   r)   r-   _infer_item_spec$  s&   

z3Engine._prepare_data_spec.<locals>._infer_item_speczReceive None input.inputlabel)r5   r6   ioZIterableDatasetnextiterZDatasetr:   r   r   rD   r@   rB   rG   datasetr   	enumeratestr_validate_spec)rw   datasplitr   inputs_speclabels_specinputslabelssampler   ir   r*   r)   r   r-   _prepare_data_spec  sJ   




zEngine._prepare_data_specc           	      C   s   t  s| jr
td|rKt|tsJ dt| t|ts(J dt| t|t|ks4J dt||D ]\}}|j|jkrJ|j	
|j q9|rt|ts[J dt| t|tsiJ dt| t|t|ksuJ dt||D ]\}}|j|jkr|j	
|j qz||fS )NzOnly support static graph mode.z$inputs should be list, but received z:the number of `inputs_spec` should be equal to `inputs`'s.z$labels should be list, but received z:the number of `labels_spec` should be equal to `labels`'s.)r	   rn   
ValueErrorr5   listr   r   zipr   desc	set_shape)	rw   r   r   r   r   Z
input_specr   Z
label_specr   r)   r)   r-   _prepare_data_tensorK  sV   zEngine._prepare_data_tensorc                 C   s  | j | j }|j| j }| }g d}|jd jdkr4tt|D ]}|jd j|v r3|j	ddd q"|
  g }t|jD ]\}}	|	j|v rM|| q?g }
g }t|D ],}|j }||j| j |
| t||| d}|| t|}|| qV|D ]	}|jd| qtt|D ]}||  t|7  < qt|D ]}|j|}	|j	||d  q|
  d| j| j< | jjrDt sFd	| jjd
 v sJ | jjd
 }d }| jjjdkr|d	 d }n| jjjdkr|d	 d }|d usJ | }| }|D ]}|j|j vr|!| q	|
D ]}|j }|| t||| d}|jd| q|
  |"| d S d S d S )N)create_py_readerZcreate_double_buffer_readerreadr   r   F)sync)r   r   Ttasks	fleet_optr4   stream)#ra   rj   dist_main_programsrZ   global_blockopsr   ranger   Z
_remove_opZ_sync_with_cppr   r   reversedr   Z_prepend_opZ	copy_fromr   r!   Zadd_dist_op_for_programinsertpoprd   main_programZ_pipeline_optr@   use_new_executorrG   rr   rs   Zget_programr*   vars_clone_variableZset_program)rw   	feed_listdist_contextdist_main_progdist_main_blockZrelated_reader_opsr   Zreader_op_indicesidxopZread_ops_descZnew_reader_opsZnew_op_descZnew_opdist_opr   Zfwd_taskZfwd_progZ	fwd_blockvarZop_descr)   r)   r-   _prepare_readerl  s   









zEngine._prepare_readerc                 C   s   i }|d urKt |ttfr1t|dkr*t |d tr*|d  D ]\}}|||< q n!td| t |trD| D ]\}}|||< q:ntd| |d urkt |ts^J dt|j | D ]\}}|||< qb|S )Nr   r   zUnsupported data z'user_feeds must be a dict, but receive )	r5   r   tupler   dictitemsr   r   rD   )rw   r   Z
user_feedsmodeZfeedsr*   valuer)   r)   r-   _prepare_feed  s,   



zEngine._prepare_feedc                    s   |d urt |tsJ dt|j g g   fdd}j| }|j}|dkr2|d|d  |dkrL|d }t|D ]\}}|dt| | q>|dkrW|d|d  |pZg D ]
}	t|	}
t	|
 q[d	d
 t
tjD }|psg }|d|  fS )Nz)user_fetches must be a list, but receive c                    sP   g }|D ]} |r t|}|vr| || q | d S N)_is_local_varr   r   index)
group_namevar_listZgroup_indicesr   var_namefetch_indicesfetch_namesrw   r)   r-   _process_fetch_group  s   

z3Engine._prepare_fetch.<locals>._process_fetch_groupr3   ry   rz   Zmetrics_outputsc                 S   s   g | ]}|d  qS )r   r)   )r+   r   r)   r)   r-   r.         z)Engine._prepare_fetch.<locals>.<listcomp>fetches)r5   r   r   rD   ra   serial_fetch_varsr   r   r   r   r   r   FETCHES)rw   Zuser_fetchesr   r   r   
fetch_varsrz   r   r   Z	usr_fetchr   Zuser_fetches_collectionr)   r   r-   _prepare_fetch  s8   


zEngine._prepare_fetchc                 C   s  i }|d ur
||d< |d ur|d |d< |d ur||d< d}	|dkr||	 }
t |
dks.J |
D ]}|| |d< q0|	d7 }	| j| }|jd }|r| jD ]7}||	 }g }|D ]	}|||  qV|r|j|  | }tt	|D ]\}}|||
 | < qr|	d7 }	qLn|dkr||	 }i }|D ]
}|| |d	| < q||d
< |	d7 }	ttj}i }|D ]\}}||v r||}|| ||p|< q||d< |S )Nepochr   steplrr   r3   ry   rz   zout%dr   r   )r   ra   r   rE   r   update
accumulater   r@   rB   r*   r   r   r   r   )rw   outsr   r   r   r   r   r   logsZ	group_idxZloss_indicesr   r   metric_varsr}   Zmetrics_indicesZ
metric_outresultsr   resZoutputs_indicesZlogs_outZcollect_fetchesZ
logs_fetchr*   r   r)   r)   r-   _prepare_logger  sZ   







zEngine._prepare_loggerTc                 C   s@   |  | | | | | |   | || d| j|< d S )NT)_build_plan	_parallel
_init_comm_initializerc   )rw   r   init_parametersr)   r)   r-   _prepare_program2  s   


zEngine._prepare_programc              
   C   sZ  t  s| jret  d| _| jd t| j| j| j	| j
| j| _tj  | j| W d    n1 s7w   Y  | jj| _| jj}| jj}| jj| _| jj| _| jj}| jj| _| jj}t  n| j|d }|d urrd S g }g }g | _| j  }| j!  }| j"st#$||y tj d dd | j
D | _dd | jD | _t%&| j| j }|dkr| jrt'| jtj(j)st*| jsJ dt%&| j|| j  | _|dkr|s| jr| j	D ]}|+t%&|j,|| j   qW d    n1 sw   Y  W d    n	1 s
w   Y  n|dkr't'| jt-s J d	t%&| j| _t. }|j/sLt0t1t2| j3 d|_4d
d | jD | _dd | jD | _| j| jd}	tj5|| j|d}
|dkri|j dd}t%6| j| j| j7| t8||| j9| j|	|
| j:| j7| j;	| j|< t8||| j9| j|	|
| j:| j7| j;	| j<|< | j7j=| j| _=|  | j>|< d S )NTz'Building model with 'to_static' method.c                 S      g | ]}|  qS r)   Z_create_feed_layerr+   sr)   r)   r-   r.   j  r   z!Engine._build.<locals>.<listcomp>c                 S   r   r)   r   r   r)   r)   r-   r.   m  r   r3   zothe type of `loss` of the Engine arguments should be sub classes of `paddle.nn.Layer` or any callable function.r1   z>the type of `loss` of the Engine arguments should be Variable.c                 S      g | ]}t |qS r)   r@   Zset_data_parallelr+   r   r)   r)   r-   r.         
c                 S   r   r)   r   r   r)   r)   r-   r.     r   )r   r   )r   ry   rz   Zfor_test)?r	   rn   r6   Zdisable_staticrJ   rR   r#   r;   r>   rE   re   rf   program_helperr   Zunique_nameguardZbuild_programZconcrete_programr   startup_programZ
input_varsrg   Z
label_varsrh   Zoutput_varsZ	loss_varsri   r   Zenable_staticra   getr^   cloner`   rk   r   program_guardr@   rB   r5   r7   r8   r9   r   Zcomputer   r   Zhas_annotationr'   r   r   r[   Zdata_parallelflattenZset_recompute_segmentsrG   r   rA   rF   rK   rT   Zgradient_scalerU   )rw   r   Zserial_main_progZserial_startup_progr   rz   r   r}   Zdefault_ctx	feed_varsr   r)   r)   r-   r   ?  s   








 




zEngine._buildc                 C   s   | j jstd|dksJ | | | | | j|_| j|_ddl	m
} || j| || j| j|| jd| _| j  | j jrK| j | j| _d S d S )Nz Please set `tuning.enable=True`.r1   r   )OptimizationTuner)r   rank)ro   rq   r   r   r   _dp_world_sizesdp_world_size	_dp_ranksdp_rankZtuner.optimization_tunerr   ra   re   rf   rZ   Z_optimization_tunerZtuneZrun_after_tuningZget_best_configrG   )rw   r   r   r   r   r)   r)   r-   _optimization_tuning  s0   


	zEngine._optimization_tuningc           
      C   s   | j d u r	|| _ n| jjdkr| | t|| j| | j|< | j|   | j| jd }| j| jd }| j| j	
 }g }|| D ]}|j|jv rU||j|j  qDg | _g | _|D ]}t| j|| j| \}}	| j| | j|	 q^d S )Nsemir   r   )rm   rG   	auto_mode_init_dist_contextr%   ra   rb   Zplanserial_feed_varsserial_main_programr   r*   r   r   r  r  r@   Zget_input_split_inforZ   )
rw   r   
inputs_var
labels_varblockr   r   Zfeed_varr  r  r)   r)   r-   r     s.   

zEngine._planFc                 C   sB   t || j| j| j| }|s|| j| j d S || j d S r   )r$   rb   Z	completerra   ZparallelrZ   r=   Zparallel_all)rw   r   Z	all_ranksZparallelizerr)   r)   r-   r     s   
zEngine._parallelc              	   C   s   | j | }|j}| j}| j | }|j}|j}t|jD ]2\}}	t|	jD ](\}
}|| j|
 }|j|jksAJ d||j||j||}|	|| q$qd S )Nz2'{}' mode op '{}' is different with '{}' op '{}'. )
ra   Z_original_serial_main_programrm   blocksr   r   r   r   Zget_op_dist_attr_for_programZset_op_dist_attr_for_program)rw   r   r   Zorigin_main_progZref_modeZref_dist_contextZref_origin_main_progZ
ref_blocksZibr  Ziopr   Zref_opZref_op_dist_attrr)   r)   r-   r	    s&   

zEngine._init_dist_contextc                 C   sH   | j dkr t }| jjdkrt|| j d S |D ]}|  qd S d S )Nr   Zfull_random)r[   r&   rG   r  r@   Zinitialize_pg_in_full_moderZ   Zinstantiate)rw   Zall_process_groupsprocess_groupr)   r)   r-   r     s   


zEngine._init_commc                 C   sj  t  | _t| jtjjrtjtj j| _| j	j
r@t
| j	j
| jd   tj
| j	j
| jd   t
| j	j
| jd   | j| }|j| j }| jr| j|| j| | jrt| j dkr| j D ]Z}| |jr| |jj}t |j}t |j }|r| rqh|t j!j"j#kr|$t%|& | j qh|t j!j"j'kr|$t(|& | j qh|$|& | j qh| j)d u rtj*+| j| _)g }	|j,| j }
|
- D ]}t |j}|r|  rq|	.| q|	r|
/|	}| j)0| t1| drt1| dr| 2|| j3| j4| j5 | j	j6r3| j78d |j,| j }
| j)0|
 d S d S )Nr   _state_dict
_dist_attrz(NOTE: parameters will be re-initialized.)9_get_device_placer5   r6   ru   Z	CUDAPlacerY   ZParallelEnvZdev_idrG   seedr  r   randomra   r   rZ   rn   r   rS   r;   r   buffersr   Zhas_varr*   r   dtyper   Zfind_varZ
get_tensorZ_is_initializedr   ZVarDescZVarTypeZBF16setr   numpyZFP16Zfloat16rX   r   ZExecutordist_startup_programsZ	list_varsr   Z_prunerunhasattr_set_state_dict_strictr  r  ZreinitrJ   rR   )rw   r   r   r   dist_main_programbufferZ	dest_typeZ	scope_varZbuffer_tensorZuninitializeddist_startup_progr   Zprune_startup_progr)   r)   r-   r   ,  s~   




zEngine._initializer   
   r   c                  C   s  d| _ | j| j  s| |||\| _| _| | j  n| | j  t r?| 	|}| j
|d|||d}|du r<t|n|}n | 	|}| j|dd||||d}|j}|}| jjjr_|| j }| jd| j d\}}t|| ||||||||  | jjjr}dn| jd	}|d t|D ]}i }|| t|D ]\}}t r| |}ni g}zU|D ]P}tjjj||d
 |d d8 |d|| | jj | j!||| jj"| jj#d}t$| j%}| &||||||| j }|'d|| W d   n1 sw   Y  qW n t(j)y
   Y  nw |r||krt s|*   nq|	rK|d | d
krK| +|	|
||||||}dd |, D }|-| | d n| .  |/|| q|0d| | j1S )a  
        Trains the model for a fixed number of epochs. If `valid_data` is set,
        evaluation will be done at the end of each epoch.

        Args:
            train_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
            train_sample_split (int, optional): Each sample of the train dataset is assumed
                to be a (input, label) pair by default and has two items. If each sample has
                more than two items, train_sample_split specifies how to split these items into
                input and label. The items before it are input and the left are label. Default: None.
            batch_size (int, optional): The batch size of train_data and valid_data if provided.
                The user's data will be used directly without batching if set to None. Default: 1.
            epochs (int, optional): The number of epochs to train the model. Default: 1.
            steps_per_epoch (int, optional): The total number of steps (batches of samples)
                is executed in one epoch before stating the next one. If None, it is equal to
                the number samples in your dataset divided by the batch size. Default: None.
            valid_data (Dataset, optional): An instance of paddle paddle.io.Dataset used for
                evaluation at the end of epoch. No evaluation will be done if set to None.
                Default: None. (Unsupported for now)
            valid_freq (int, optional): Only relevant if valid_data is provided. This specifies
                how many training epochs before a new evaluation is performed. Default: 1.
            valid_sample_split (int, optional): Only relevant if valid_data is provided.
                Each sample of the valid dataset is assumed to be a (input, label) pair
                by default and has two items. If each sample has more than two items,
                valid_sample_split specifies how to split these items into input and label.
                The items before it are input and the left are label. Default: None.
            valid_steps (int, optional): Only relevant if valid_data is provided.
                It is the total number of steps (batches of samples) to draw before
                stopping validation at the end of every epoch. If None, validation will run until the
                `valid_data` dataset is exhausted. The validation will start from the
                beginning of the dataset at each epoch. Default: None.
            collate_fn(callable, optional): function to generate mini-batch data by merging
                the sample list, None for only stack each fields of sample in axis
                0. Default None.
            callbacks (Callback|None, optional): A list of `Callback` instances to apply
                during training. Default: None. (Unused for now)
            nvprof_range(list, optional): A list of integers indicating nvprof ranges in form of [start_step, end_step]. Note that if start_step >= end_step, the nvprof will not apply.

        Returns:
            None

        Examples:

            .. code-block:: python

                >>> import paddle
                >>> import paddle.vision.transforms as T
                >>> from paddle.distributed.fleet import auto
                >>> from paddle.vision.datasets import MNIST

                >>> transform = T.Compose([
                ...     T.Transpose(),
                ...     T.Normalize([127.5], [127.5])
                >>> ])
                >>> train_dataset = MNIST(mode='train', transform=transform)

                >>> model = paddle.vision.models.LeNet()
                >>> loss = paddle.nn.CrossEntropyLoss()
                >>> optimizer = paddle.optimizer.Adam(
                ...     learning_rate=0.001, parameters=model.parameters())
                >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

                >>> engine = auto.Engine(model, loss, optimizer, metrics)
                >>> engine.fit(train_dataset,
                ...             epochs=2,
                ...             batch_size=64)
        r1   F)return_listr   epochs
collate_fnNF   )r   capacityiterabler   r&  steps_per_epochr'  r   r   )
enginer   r&  stepslog_freq	save_freqsave_dirverboserz   Zacc_stepr   )Ziter_idstartendfeed
fetch_listZuse_program_cachereturn_numpyc                 S   s   i | ]	\}}d | |qS )Zval_r)   )r+   r*   valr)   r)   r-   
<dictcomp>?  s    zEngine.fit.<locals>.<dictcomp>)2rj   rc   r   re   rf   r   _switch_moder@   r   _validate_batch_size_prepare_dataloaderr   "_prepare_dataloader_from_generator_stepsrG   rr   rq   rp   r   r   _metrics_nameon_beginr   Zon_epoch_beginr   _validate_batchr6   Zprofilerr   Z_nvprof_rangeon_batch_beginrX   r  r   	use_cacher8  Zget_lrr?   r   on_batch_endr   EOFException_resetevaluater   r   _reset_metricsZon_epoch_endon_endrt   ) rw   Z
train_dataZtrain_sample_splitr   r&  r+  r/  r1  r0  
valid_datavalid_sample_splitZ
valid_freqZvalid_stepsr'  	callbacksr2  Znvprof_rangelocal_batch_sizeZtrain_dataloadermicro_batch_sizer   r   cbksr   r   r   r   batchesmicro_batchr   r   Zval_logsr)   r)   r-   fitv  s   V
	
	



	


z
Engine.fitc	              
   C   s  d| _ | j| j  s| |||\| _| _| | j  n| | j  t r>| 	|}	| j
|d|	|d}
|du r;t|
n|}n| 	|}| j|dd|||d}
|
j}|}	| jjjr]|| j }	| jd| j d\}}t|| |	|||  d}|}|d||  d	 i }t|
D ]^\}}t r| |}ni g}z|D ]}|d|| | jj| j||| jj| jjd
}qW n tjy   Y  n%w |r||krt s|
    n| !|d|d||| j }|"d|| q|#d| | $  |S )a  
        Evaluate the loss and metrics of the model on evaluation data.

        Args:
            valid_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
            valid_sample_split (int, optional): Each sample of the eval dataset is assumed
                to be a (input, label) pair by default and has two items. If each sample has
                more than two items, valid_sample_split specifies how to split these items into
                input and label. The items before it are input and the left are label. Default: None.
            batch_size (int, optional): The batch size of valid_data. The user's data will
                be used directly without batching if set to None. Default: 1.
            steps (int, optional): It is the total number of steps (batches of samples) to draw before
                stopping evaluation. If None, evaluation will run until the `valid_data` dataset is exhausted.
                The evaluation will start from the beginning of the dataset in each run. Default: None.
            collate_fn(callable, optional): function to generate mini-batch data by merging
                the sample list, None for only stack each fields of sample in axis
                0. Default None.
            callbacks (Callback|None, optional): A list of `Callback` instances to apply
                during evaluating. Default: None. (Unused for now)

        Returns:
            None

        Examples:

            .. code-block:: python

                >>> import paddle
                >>> import paddle.vision.transforms as T
                >>> from paddle.distributed.fleet import auto
                >>> from paddle.vision.datasets import MNIST

                >>> transform = T.Compose([
                ...     T.Transpose(),
                ...     T.Normalize([127.5], [127.5])
                >>> ])
                >>> valid_dataset = MNIST(mode='test', transform=transform)

                >>> model = paddle.vision.models.LeNet()
                >>> loss = paddle.nn.CrossEntropyLoss()
                >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

                >>> engine = auto.Engine(model, loss, metrics=metrics)
                >>> engine.evaluate(valid_dataset, batch_size=64)

        r2   Fr%  r   r'  Nr(  r   r)  r*  r   r+  r'  r,  )r-  r   r/  r2  rz   )r.  rz   r5  )%rj   rc   r   re   rf   r   r;  r@   r   r<  r=  r   r>  r?  rG   rr   rq   rp   r   r   r@  rA  r   rB  rC  rX   r  r   rD  r8  r   rF  rG  r   rE  rJ  rI  )rw   rK  rL  r   r.  r/  r'  rM  r2  rN  Zvalid_dataloaderr+  rO  r   r   rP  Z
eval_stepsr   r   r   rQ  rR  r   r)   r)   r-   rH  L  s   9



		zEngine.evaluatec              
   C   s  d| _ | j| j  s| |||\| _| _| | j  n| | j  t r>| 	|}| j
|d||d}	|du r;t|	n|}
n| 	|}| j|dd|||d}	|	j}
| jd| j d\}}g }t|| |d}|
}|dd	|i i }t|	D ]i\}}t r| |}ni g}z|D ]}|d|| | jj| j||| jj| jjd
}qW n tjy   Y  n0w |
r||
krt s|	   n| |d|d||| j }|d|| |t |d !  qt|"d| |S )a4  
        Compute the output predictions on testing data.

        Args:
            test_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
            test_sample_split (int, optional): Each sample of the test dataset is assumed
                to be a (input, label) pair by default and has two items. If each sample has
                more than two items, test_sample_split specifies how to split these items into
                input and label. The items before it are input and the left are label. Default: None.
            batch_size (int, optional): The batch size of test_data. The user's data will
                be used directly without batching if set to None. Default: 1.
            steps (int, optional): It is the total number of steps (batches of samples) to draw before
                stopping predict. If None, predict will run until the `test_data` dataset is exhausted.
                The predict will start from the beginning of the dataset in each run. Default: None.
            collate_fn(callable, optional): function to generate mini-batch data by merging
                the sample list, None for only stack each fields of sample in axis
                0. Default None.
            callbacks (Callback|None, optional): A list of `Callback` instances to apply
                during testing. Default: None. (Unused for now)

        Returns:
            None

        Examples:

            .. code-block:: python

                >>> import paddle
                >>> import paddle.vision.transforms as T
                >>> from paddle.distributed.fleet import auto
                >>> from paddle.vision.datasets import MNIST

                >>> transform = T.Compose([
                ...     T.Transpose(),
                ...     T.Normalize([127.5], [127.5])
                >>> ])
                >>> valid_dataset = MNIST(mode='test', transform=transform)

                >>> model = paddle.vision.models.LeNet()

                >>> engine = auto.Engine(model)
                >>> engine.predict(valid_dataset, batch_size=64)
        r3   FrT  Nr(  rU  r,  )r-  r2  r.  r5  r   )#rj   rc   r   re   rf   r   r;  r@   r   r<  r=  r   r>  r?  r   r   rA  r   rB  rC  rX   r  r   rG   rD  r8  r   rF  rG  r   rE  r   r   valuesrJ  )rw   Z	test_dataZtest_sample_splitr   r.  r'  rM  r2  rN  Ztest_dataloaderr+  rO  r   r   r   rP  Z
test_stepsr   r   r   rQ  rR  r   r)   r)   r-   r3     sz   5

	zEngine.predictr   c                 C   s   |d ur	|  | | j| j s!| |||\| _| _| | j n| | j | |}| j	|d||||||||	|
|||d}|S )NF)r%  r   shuffle	drop_lastr'  num_workersuse_buffer_readeruse_shared_memorytimeoutworker_init_fnr&  r+  places)
to_moderc   rj   r   re   rf   r   r;  r<  r=  )rw   r   r   rW  rX  r'  rY  rZ  r[  r\  r]  r&  r+  sample_splitr   r^  
dataloaderr)   r)   r-   ra  P  s4   

zEngine.dataloaderr(  c                 C   s|   |d ur	|  | | j| j s!| |||\| _| _| | j n| | j | |}| j	||||d|||||	|
d}|S )NF)r   r)  use_double_bufferr*  r%  use_multiprocessrX  r   r&  r+  r'  )
r_  rc   rj   r   re   rf   r   r;  r<  r>  )rw   r   r)  rb  r*  rc  rX  r   r&  r+  r'  r`  r   rO  ra  r)   r)   r-   dataloader_from_generator  s.   

z Engine.dataloader_from_generatorc	           	      C   sF  |d ur	|  | | jstd| j| j rd S | |}| |}| |}| |}|| _|| _|s6|rXd| _| 	||||\}}| jd u rMt
 | _| jd u rWt
 | _n&|s\|rtd| _| jd u rit
 | _| jd u rst
 | _n
| jrz| js~J d||| _| _||| _| _| j| j s| | j| d S | | j d S )Nz7Please set mode to be prepared with `prepare(mode=...)`Tz;Please call the dataloader(...) before calling prepare(...))r_  rj   r   rc   r   _validate_varsr^   r`   rk   r   r   r]   r_   rl   re   rf   rg   rh   r   r;  )	rw   r   r   r   r   r   r   r   r   r)   r)   r-   prepare  sT   












zEngine.preparec           
   	   C   s   |d ur	|  | | ||| j}| || j\}}| jr'| j| j s'|   | j| j_| jj	| j
||| jj| jjd}| |d d d ||| j}	|	S )Nr5  )r_  r   rj   r   rl   rd   r   rv   rX   r  r   rG   rD  r8  r   )
rw   r   r6  r7  r   Z	feed_dictr   r   r   r   r)   r)   r-   r    s,   

z
Engine.runc           
      C   s   | j | j }|j| j }|j| j }| }|jd }|jd }g }|| D ]'}|j|jv r8|	|j|j  q&|
||j}	|	j|j  |	|	 q&|S )Nr   r   )ra   rj   r   rZ   r  r   r
  r*   r   r   r   persistabler   set_original_idoriginal_id)
rw   r   r   r"  r   r  r  r   r   copy_varr)   r)   r-   get_feed_list  s   

zEngine.get_feed_listc                 C   sH  | j | j }|j| j }|j| j }| }|jd }|jd }g }|| D ]'}|j|jv r8|	|j|j  q&|
||j}|j|j  |	| q&t||F t|fi d|d|d|d|d|d|d	|d
|d|d|	d|
d|d|d|d| jjd| jd| j}W d    |S 1 sw   Y  |S )Nr   r   r   r^  r%  r   rW  rX  r'  rY  rZ  r[  r\  r]  r&  r+  
split_datadata_parallel_world_sizedata_parallel_rank)ra   rj   r   rZ   r  r   r
  r*   r   r   r   rg  r   rh  ri  r   r   r   rG   rl  r  r  )rw   r   r%  r   rW  rX  r'  rY  rZ  r[  r\  r]  r&  r+  r^  r   r   r"  r   r  r  r   r   rj  ra  r)   r)   r-   r=    sn   

	

zEngine._prepare_dataloaderc                 C   sh  | j | j }|j| j }|j| j }| }|jd }|jd }g }|| D ]'}|j|jv r8|	|j|j  q&|
||j}|j|j  |	| q&tj }t||L tdi d|d|d|d|d|d|d	|d
|d|d|d|	d|
d|d| jjd| jd| jd| jjjsdn| j}W d    n1 sw   Y  | | |S )Nr   r   r   r   r)  rb  r*  r%  rc  rX  r^  r   r&  r+  r'  rl  rm  rn  Z	acc_stepsr   r)   )ra   rj   r   rZ   r  r   r
  r*   r   r   r   rg  r   rh  ri  r6   r   Zcuda_placesr   r    rG   rl  r  r  rr   rq   rp   r   )rw   r   r)  rb  r*  r%  rc  rX  r   r&  r+  r'  r   r   r"  r   r  r  r   r   rj  r^  ra  r)   r)   r-   r>  R  sp   


	

z)Engine._prepare_dataloader_from_generatorc                 C   s0   d| _ | |||\| _| _| | j || d S )Nr1   )rj   r   re   rf   r  )rw   Z	tune_dataZtune_sample_splitr   r)   r)   r-   _tune  s
   zEngine._tunec                 C   s   |d u rd S t  r>tt| jdksJ dtt| j|| jd  dks7J dt|t| jd || jd  S || j dksNJ d|| j|| j S )Nr   ziDistributedBatchSampler only support one data parallel group, but got [{}] different data parallel groupsr   z6batch_size [{}] is not divisible by dp_world_size [{}]z;Requires batch_size:[{}] to be divisible by acc_steps:[{}].)r@   r   r   r  r  r   r   rp   )rw   r   r)   r)   r-   r<    s(   
zEngine._validate_batch_sizec                    s   |d u rd gS | j jjs| jdkr|S g }g }|d  D ]\}}|| |tt|| jd qg }t	| jD ]  fdd|D }|t
t|| q<|S )Nr   r   c                    s   g | ]}|  qS r)   r)   )r+   Zsplit_batchr   r)   r-   r.     s    z*Engine._validate_batch.<locals>.<listcomp>)rG   rr   rq   rp   r   r   r   r   arrayr   r   r   )rw   r   Z
feed_namesZsplit_batchesZ	feed_nameZcur_feedZbachesrR  r)   rp  r-   rB    s    
zEngine._validate_batchc                 C   s   t |}|d urVt|D ]H\}}t|tstd|jd u r*td| d| d| jdkrUt	|j
}|d | j dksIJ d|j
d | j|d  | j  < ||_
q|pYg S )Nz9'spec' must be object of class `paddle.static.InputSpec`.zRequires Input[z(].name != None, but receive `None` with .r   r   z7Requires batch_size[{}] to be divisible by k_steps[{}].)r@   rB   r   r5   r   r:   r*   r   rp   r   r   r   )rw   r   r   r   r   r)   r)   r-   r     s,   




zEngine._validate_specc                 C   s>   t |}|d urt|D ]\}}t|tstdq|pg S )Nz'var' must be a `Variable`.)r@   rB   r   r5   r   r:   )rw   r   r   r   r)   r)   r-   re    s   

zEngine._validate_varsc                 C   s   t |}|| j jv S r   )r   r   r   r   )rw   r   r   r)   r)   r-   r     s   zEngine._is_local_varc                 C   s   | j D ]}|  qd S r   )rE   reset)rw   r}   r)   r)   r-   rI    s   

zEngine._reset_metricsc                 C   s4   | j rdgng }| jD ]}|t|  q|S )Nry   )r>   rE   extendr@   rB   r*   )rw   Zmetrics_namemr)   r)   r-   r@    s   
zEngine._metrics_namec                 C   s&   || j v sJ | d| | d S )Nz3 model is not ready, please call `prepare()` first.)ra   r_  rw   r   r)   r)   r-   r;    s   zEngine._switch_modec                 C   s"   |dv sJ d| d|| _ d S )Nr0   zmode z. should be one of ['train', 'eval', 'predict'])rj   rv  r)   r)   r-   r_    s   


zEngine.to_modec              	   C   s   | j | }|j| j }t||}t|||}|j|d}|  D ]3\}	}
t	
|
}|	|vr1q#|j||	 jkrV| jd|	t||	 jt|j ||	 |j||	< q#|| d S )N)strictz!cast {}'s dtype from '{}' to '{}')ra   r   rZ   r@   Zget_dist_attrr   convert
state_dictr   r   rq  r  rJ   rR   r   r   ZastypeZset_state_dict)rw   r   rw  ry  Z	dist_attrr   programZcur_dist_attr	converterr*   paramZparam_arrayr)   r)   r-   r    s(   

zEngine._set_state_dictc                 C   s&  |r%| j | jv s
J | j| j  }|j}|j| j }| jj||||d dS d| jv s,J | jd }|jd }|jd }|j| j }| j	j
jr| j	j
jrddlm} | jd | jd| j	j
   tt|jd	d
}	|t | j}
|	 D ]}|
| qy|	 }| jj|||| j|d dS )a%  
        Saves the model, parameters, optimizer state to path.
        If `training` is set to False, only inference model will be saved.

        Args:
            path (str): The file prefix to save model. The format
                is 'dirname/file_prefix' or 'file_prefix'. if empty str.
                A exception will be raised.
            training (bool, optional): Whether to save for training. If not, save
                for inference only. If `training` is set to True, the optimizer state
                will be saved. Otherwise, only the model and parameters are saved.
                This function will silently overwrite existing file at the target
                location. Default: True.

        Returns:
            None

        Examples:

            .. code-block:: python

                >>> import paddle
                >>> import paddle.vision.transforms as T
                >>> from paddle.distributed.fleet import auto
                >>> from paddle.vision.datasets import MNIST

                >>> transform = T.Compose([
                ...     T.Transpose(),
                ...     T.Normalize([127.5], [127.5])
                >>> ])
                >>> train_dataset = MNIST(mode='train', transform=transform)

                >>> model = paddle.vision.models.LeNet()
                >>> loss = paddle.nn.CrossEntropyLoss()
                >>> optimizer = paddle.optimizer.Adam(
                ...     learning_rate=0.001, parameters=model.parameters())
                >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

                >>> engine = auto.Engine(model, loss, optimizer, metrics)
                >>> engine.fit(train_dataset,
                ...             epochs=1,
                ...             batch_size=64)
                >>> engine.save("./my_model")

        )serial_programr   r   r3   r   r   r   )QuantWeightPasszexport quantized model.zconvert config Tr   )rz  N)rj   ra   r  r   rZ   r\   saver
  r   rG   Zqatrq   Zonnx_formatZpaddle.static.quantizationr~  rJ   rR   to_dictr   r   ZGraphr   r   r  Zall_sub_graphsapplyZ
to_programZsave_inference_modelrX   )rw   r~   Ztrainingr   r}  r   r   r   r~  Z
test_graphZquant_weight_passZ	sub_graphr)   r)   r-   r    sH   .




zEngine.savec                 C   s(   || _ | j||\| _| _| j| jfS )a)  
        Load the stored model, parameters and optimizer states.

        Args:
            path (str): The prefix of files storing the model states and
                optimizer states.
            strict (bool, optional): Whether to skip the loading of mismatch
                parameter or raise an error when mismatch happens (not found
                the parameter in file storing model states of or receives a
                mismatch shape). Default: True.
            load_optimizer (bool, optional): If True, the stored optimizer
                states is restored. Otherwise, the optimizer states is initialized
                from scratch. Default: True.

        Returns:
            None

        Examples:

            .. code-block:: python

                >>> import paddle
                >>> import paddle.vision.transforms as T
                >>> from paddle.distributed.fleet import auto
                >>> from paddle.vision.datasets import MNIST

                >>> transform = T.Compose([
                ...     T.Transpose(),
                ...     T.Normalize([127.5], [127.5])
                >>> ])
                >>> train_dataset = MNIST(mode='train', transform=transform)

                >>> model = paddle.vision.models.LeNet()
                >>> loss = paddle.nn.CrossEntropyLoss()
                >>> optimizer = paddle.optimizer.Adam(
                ...     learning_rate=0.001, parameters=model.parameters())
                >>> metrics = paddle.metric.Accuracy(topk=(1, 2))

                >>> engine = auto.Engine(model, loss, optimizer, metrics)
                >>> engine.fit(train_dataset,
                ...             epochs=1,
                ...             batch_size=64)
                >>> engine.save("./my_model")
                >>> engine.load("./my_model")

        )r  r\   rP   r  r  )rw   r~   rw  Zload_optimizerr)   r)   r-   rP   h  s
   /zEngine.loadc                 C   s  | j jdkr| jd dS |dur|n| j}|dusJ d|| jvr1td|t| j	 | 
| |durV| j| sV| || _| || _| | | | n(t s\| jr`td| jd tj }| jru| js~| j| s~tdt| |\}}|j|fS )a  
        Get and Print cost, including memory of every rank,
        max memory among all ranks, and the global cost of one step based on
        communication cost(computation cost is 0 by default).
        In the future, the flops information of every rank and global cost including
        computation cost will be added.

        Args:
            inputs_spec(InputSpec): The specification of inputs. Default: None.
            labels_spec(InputSpec): The specification of labels. Default: None.
            mode (str): The engine mode must be in ["train", "predict", "eval"]. Default: None.

        Returns:
            Return the global execution time (ms) and max memory (B).

        fullzMThe cost will be calcudated in the search process when the auto mode is full.NzPlease set mode.z'The mode {} is not in accepted modes {}z\Please call `prepare()` or `fit()` or  `evaluate()` or  `predict()` before calling `cost()`.zThe program whose cost to be estimated must be static default program. Otherwise, please call `prepare()`before calling `cost()`.)rG   r  rJ   rR   rj   rc   r   r   r   keysr_  r   re   rf   r   r   r	   rn   r6   r   r]   r   r   r   time)rw   r   r   r   rz  Zglobal_costZ
max_memoryr)   r)   r-   cost  sL   




zEngine.costc                 C      | j | j| j S r   )ra   r   rZ   rv  r)   r)   r-   get_dist_main_program     zEngine.get_dist_main_programc                 C   r  r   )ra   r  rZ   rv  r)   r)   r-   get_dist_startup_program  r  zEngine.get_dist_startup_programc                 C      | j | jS r   )ra   r  rv  r)   r)   r-   get_serial_main_program     zEngine.get_serial_main_programc                 C   r  r   )ra   serial_startup_programrv  r)   r)   r-   get_serial_startup_program  r  z!Engine.get_serial_startup_programc                 C      | j | j }|j| j S r   )ra   rj   r   rZ   rw   r   r)   r)   r-   r        zEngine.main_programc                 C   r  r   )ra   rj   r  rZ   r  r)   r)   r-   r     r  zEngine.startup_programc                 C   s   | j | j S r   )ra   rj   rw   r)   r)   r-   r     s   zEngine.dist_contextc                 C      | j | j }|jS r   )ra   rj   r  r  r)   r)   r-   r       zEngine.serial_main_programc                 C   r  r   )ra   rj   r  r  r)   r)   r-   r    r  zEngine.serial_startup_programc                 C   r  r   )ra   rj   r
  r  r)   r)   r-   r      r  zEngine.feed_varsc                 C   r  r   )ra   rj   r   r  r)   r)   r-   r     r  zEngine.fetch_varsc                 C   s   | j | j }|jr|jS | jS r   )ra   rj   Z_serial_optimizerrA   r  r)   r)   r-   r?   
  s   zEngine.optimizerc                 C      | j S r   )rg   r  r)   r)   r-   r        zEngine.inputsc                 C   r  r   )rh   r  r)   r)   r-   r     r  zEngine.labels)NNNNNN)T)F)Nr   Nr#  NNr   )Nr   NNNr   )r   FTNr   TTr   Nr   Nr   NN)r(  TTFTr   r   NNr   N)NNNNNNNT)NNNN)Tr   FTNr   TTr   Nr   NN)
NTTFFTr   r   NN)Nr   )TT)NNN):rD   
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r  r   r   r	  r   r   rS  rH  r3   ra  rd  rf  r  rk  r=  r>  ro  r<  rB  r   re  r   rI  r@  r;  r_  r  r  rP   r  r  r  r  r  propertyr   r   r   r  r  r   r   r?   r   r   r)   r)   r)   r-   r(   ;   sH   >
 G!O*

: 

M
 Z
 
}
3
,

8
A

<

U
5?








r(   )GrV   rO   rH   r   rL   r  r  r   r6   Z-paddle.distributed.auto_parallel.static.utilsrY   Zauto_parallelr   r   r@   Zpaddle.base.executorr   Zpaddle.distributedr   Zpaddle.frameworkr   r   r  r   r	   Zpaddle.metricr
   Zpaddle.staticr   r   r   r   Zpaddle.static.amp.fp16_utilsr   Zutils.log_utilsr   Z	interfacer   r   r   r|   r   rM  r   r{   r   r   r{  r   Zcost.estimate_costr   r   r   r   Zdist_loaderr   r    r   r!   Z
dist_saverr"   helperr#   Zparallelizer_v2r$   Z
planner_v2r%   r  r&   r'   r(   r)   r)   r)   r-   <module>   s@   