o
    "jM                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZddlmZm	Z	m
Z
 ddlmZmZ ddlmZmZmZ ddlmZ ddlmZmZmZ d	Zdad
adadddZdd Zdd ZG dd dZG dd dZ dS )    N   )_current_expected_place_get_paddle_place_get_paddle_place_list)corein_dynamic_mode   )BatchSamplerIterableDatasetSubset)_InfiniteIterableSampler)_DataLoaderIterMultiProcess_DataLoaderIterSingleProcess_DatasetKind<   F  c                 C   s   | a |ad S N)USE_AUTOTUNETUNING_STEPS)Zuse_autotuneZtuning_steps r   Q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/io/reader.pyset_autotune_config0   s   r   c                  G   s:   t | dkrtS t | dkrt| d tsJ | d ad S )Nr   r   )lenUSE_PINNED_MEMORY
isinstancebool)argsr   r   r   use_pinned_memory7   s   r   c                 C   sR   t | ttfs
| g} g }| D ]}t |tjs!t }|| |}|| q|S r   )r   listtupler   ZPlaceZ	set_placeappend)placesretptmpr   r   r   _convert_places@   s   
r%   c                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )AuToTunec                 C   s   || _ t d | _d S )Nr   )loadermultiprocessing	cpu_countmax_num_worker)selfr'   r   r   r   __init__P   s   zAuToTune.__init__c                 C   s,  t r|  s
| jjS |  }|d u r| jjS t }td tdt| jj  d}t	d}tdt| j
  d}|| j
k r|||_| |}|d |krT|}|}n| |||| j
}||krbn|}tdt| d t|  |d	7 }|| j
k sAtd
t|  tdtt |  d  |S )Nz(========= DataLoader Auto Tune =========zUser config for DataLoader: r   infz"Tuning Range for num_workers: 0 ~ g      ?znum_workers:  avg_cost: r   z'auto_tune dataLoader best_num_workers: z AutoTuning Cost for DataLoader: z seconds)r   need_autotuner'   num_workersget_autotune_loadertimeloggingdebugstrfloatr*   evaluate_reader_costis_bestinfo)r+   Zauto_tune_loaderZauto_tune_startZbest_num_workersZmin_costr0   avg_costZ
update_numr   r   r   __call__T   sj   




zAuToTune.__call__c                 C   s   t jdks
t jdkrdS dS )Ndarwinwin32FT)sysplatformr+   r   r   r   r/      s   zAuToTune.need_autotunec                 C   s*   t |t t|}t|tt|d}|S )N)indices)minr   r   r   r   range)r+   dataset
batch_sizeZnum_samplessub_datasetr   r   r   get_sub_dataset   s   zAuToTune.get_sub_datasetc                 C   s   t  | j}| jjj}t| jjtjjr:| jjj}| 	||}tjj||| jjj
| jjj| jjj| jjjd|_|S t| jjtjjr^| jjjj}| 	||}tjj||| jjjd|_|S d }|S )N)rD   rE   Znum_replicasrankshuffle	drop_last)rD   rE   rJ   )copyr'   batch_samplerrE   r   paddleioZDistributedBatchSamplerrD   rG   ZnranksZ
local_rankrI   rJ   r	   ZsamplerZdata_source)r+   r'   rE   rD   rF   r   r   r   r1      s6   

zAuToTune.get_autotune_loaderc                 C   s   g }d}t   }t|D ]\}}|t   |  t   }qt|dkr6t|dd  t|dd   }|S t|dd  t|dd   }|S )Nr   r   )r2   	enumerater    r   sum)r+   readerZcostsr:   startidatar   r   r   r7      s   
  zAuToTune.evaluate_reader_costc           	      C   s   d}|d }d}||k rF|dk rF|| j _| |}tdt| d t|  |d7 }||d | k r6|S |d7 }|d9 }||k rF|dk s|S )Nr   r      zfor back num_workers: r.   gffffff?g?)r'   r0   r7   r3   r4   r5   )	r+   rQ   Zbest_workersZ	best_timeZnum_work_boundarystepr0   boundaryr2   r   r   r   r8      s,   
zAuToTune.is_bestN)
__name__
__module____qualname__r,   r;   r/   rG   r1   r7   r8   r   r   r   r   r&   O   s    :r&   c                   @   sP   e Zd ZdZ															ddd	Zd
d Zdd Zdd ZdS )
DataLoadera   
    DataLoader prodives an iterator which iterates given dataset
    once by the batch_sampler.

    DataLoader supports single-process and multi-prcess data loading,
    multi-process workers will be used to load data asynchronously if
    :attr:`num_workers` is set as a positive number.

    DataLoader supports map-style dataset and iterable-style dataset.

    For map-style datast(can get a sample from dataset with a given
    index), please see :code:`paddle.io.Dataset`.

    For iterable-style datast(get samples from dataset iteratively,
    like a Python iterator), please see :code:`paddle.io.IterableDataset`.

    For :code:`batch_sampler` please see :code:`paddle.io.BatchSampler`

    Notes:
        GPU tensor operation is not supported in subprocess currently,
        please don't use GPU tensor operations in pipeline which will
        be performed in subprocess, such as dataset transforms, collte_fn,
        etc. Numpy array and CPU tensor operation is supported.

    **Disable automatic batching**

    In certain cases such as some NLP tasks, instead of automatic batching,
    handling batching manually in dataset is needed by users. For these
    cases, automatic batching is disabled if both :attr:`batch_size` and
    :attr:`batch_sampler` is set as None, each data got from :attr:`dataset`
    should be batched data and will be processed with function define by
    :attr:`collate_fn` or :attr:`default_collate_fn`.


    Notes:
        When automatic batching is disabled, :attr:`default_collate_fn` will
        do nothing to data from dataset.


    Args:
        dataset(Dataset): the dataset to load data from, should be an
            instance of subclass of :code:`paddle.io.Dataset` or
            :code:`paddle.io.IterableDataset`.
        feed_list (list(Tensor)|tuple(Tensor), optional): feed Tensor list.
            The Tensors should be created by :code:`paddle.static.data()`.
            :attr:`feed_list` must be set if :attr:`return_list` is
            False. Default None.
        places(list(Place)|tuple(Place)|list(str), optional): a list of Place,
            to put data onto, :attr:`places` can be None, if
            :attr:`places` is None, default place(CPUPlace or CUDAPlace(0))
            will be used. Default None. If ``places`` is list of string,
            the string in the list can be ``cpu``, ``gpu:x`` and ``gpu_pinned``,
            where ``x`` is the index of the GPUs.
        return_list (bool, optional): whether the return value on each device is
            presented as a list. If :attr:`return_list=False`, the return
            value on each device would be a dict of str -> Tensor, where
            the key of the dict is the name of each fed Tensors. If
            :attr:`return_list=True`, the return value on each device would
            be a list(Tensor). :attr:`return_list` can only be True
            in dynamic graph mode. Default True.
        batch_sampler(BatchSampler, optional): an instance of `paddle.io.BatchSampler`
            to generate batch indices to draw samples from :attr:`dataset`
            and combine a batch. Default None.
        batch_size(int|None, optional): sample number in a mini-batch, a substitution
            parameter for :attr:`batch_sampler`, if :attr:`batch_sampler`
            is not set, a default `paddle.io.BatchSampler` will be used
            and initialize by :attr:`batch_size`, :attr:`shuffle` and
            :attr:`drop_last`. Default 1.
        shuffle(bool, optional): whther to shuffle indices order before genrate
            batch indices, a substitution parameter for :attr:`batch_sampler`
            see :attr:`batch_size`. Default False.
        drop_last(bool, optional): whether drop the last incomplete batch dataset size
            is not divisible by the batch size, a substitution parameter
            for :attr:`batch_sampler`, see :attr:`batch_size`. Default False
        collate_fn(callable, optional): function to generate mini-batch data by merging
            the sample list, None for only stack each fields of sample in axis
            0(same as :attr::`np.stack(..., axis=0)`). Default None
        num_workers(int, optional): the number of subprocess to load data, 0 for no
            subprocess used and loading data in main process. Default 0
        use_buffer_reader (bool, optional): whether to use bufferred reader.
            If use_buffer_reader=True, the DataLoader would prefetch
            batch data asynchronously, so it would speed up data feeding
            and occupies a little more CPU or GPU memory, i.e., the memory
            of one batch input data. Default True.
        prefetch_factor (int, optional): Number of batch data the DataLoader would prefetch
            if use_buffer_reader=True. Default 2.
        use_shared_memory (bool, optional): whether to use shared memory to speed up
            putting data into inter-process queue, set :attr:`use_shared_memory`
            as True only when the shared memory space on your machine(e.g.
            space of '/dev/shm' on Linux operating sysytem) is large enough.
            Shared memory will only be enabled in multi-process mode(num_workers
            > 0). Default True.
        timeout(int, optional): the timeout value for getting data form output queue
            of subprocesses. Default 0.
        worker_init_fn(callable, optional): init function which will be called with
            worker id on each subproces starting if not set as None. Default
            None.

    Returns:
        DataLoader: an iterable object for data iterating, each elemnet of the generated data is a Tensor.

    Examples:

        .. code-block:: python

            >>> import numpy as np

            >>> import paddle
            >>> import paddle.nn as nn
            >>> import paddle.nn.functional as F
            >>> from paddle.io import Dataset, BatchSampler, DataLoader

            >>> BATCH_NUM = 20
            >>> BATCH_SIZE = 16
            >>> EPOCH_NUM = 4

            >>> IMAGE_SIZE = 784
            >>> CLASS_NUM = 10

            >>> # define a random dataset
            >>> class RandomDataset(Dataset):
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
            ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)

            >>> class SimpleNet(nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         self.fc = nn.Linear(IMAGE_SIZE, CLASS_NUM)
            ...
            ...     def forward(self, image, label=None):
            ...         return self.fc(image)
            ...
            >>> simple_net = SimpleNet()
            >>> opt = paddle.optimizer.SGD(learning_rate=1e-3,
            ...                             parameters=simple_net.parameters())
            ...
            >>> loader = DataLoader(dataset,
            ...                     batch_size=BATCH_SIZE,
            ...                     shuffle=True,
            ...                     drop_last=True,
            ...                     num_workers=2)
            ...
            >>> for e in range(EPOCH_NUM):
            ...     for i, (image, label) in enumerate(loader()):
            ...         out = simple_net(image)
            ...         loss = F.cross_entropy(out, label)
            ...         avg_loss = paddle.mean(loss)
            ...         avg_loss.backward()
            ...         opt.minimize(avg_loss)
            ...         simple_net.clear_gradients()
            ...         print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))

    Notes:
        For reading iterable dataset with multiprocess Dataloader,
        please see :code:`paddle.io.IterableDataset`
    NTr   Fr   r   c                 C   s  || _ |	| _|| _|| _|| _|| _|st s|d usJ d|| _|d u r)t }t	|t
tfr5t|}nt|}t|| _|
dksFJ d|
dkr[tjdksTtjdkr[td d}
|
| _|dksfJ d|| _|rr|
dkrrd| _|dkszJ d	|| _t	|trtj| _|rtd
| |d urtdntj| _|d ur|dkr|s|rJ d|| _d | _n+|d u rd | _d | _n |dksJ d|| _t	|trt||| _n	t ||||d| _|| _!| jd u| _"d| _#t rt$ d u rdnt$ | _#|| _%d | _&t'| ( | _d S )Nz.feed_list should be set when return_list=Falser   z*num_workers should be a non-negative valuer<   r=   zDataLoader with multi-process mode is not supported on MacOs and Windows currently. Please use signle-process mode with num_workers = 0 insteadz*prefetch_factor should be a positive valueFz&timeout should be a non-negative valuez5IterableDataset not support shuffle, but got shuffle=z0IterableDataset expect unspecified batch_samplerr   zJbatch_size/shuffle/drop_last should not be set when batch_sampler is givenzMbatch_size should be None or a positive value when batch_sampler is not given)rD   rE   rI   rJ   T))return_list
collate_fnuse_buffer_readerprefetch_factorworker_init_fnrD   r   	feed_listr   r   r   r   r   r   r%   r!   r>   r?   warningswarnr0   use_shared_memorytimeoutr
   r   ITERdataset_kind
ValueErrorZMAPrL   rE   r   r	   rJ   auto_collate_batchZ
pin_memoryr   _persistent_workers	_iteratorr&   r;   )r+   rD   ra   r!   r\   rL   rE   rI   rJ   r]   r0   r^   r_   rd   re   r`   Zpersistent_workersr   r   r   r,     s   






zDataLoader.__init__c                 C   s.   | j tjkr
td| jrt| jS t| jS )Nz'length of IterableDataset not supported)rg   r   rf   rh   ri   r   rL   rD   r@   r   r   r   __len__  s
   

zDataLoader.__len__c                 C   sJ   | j dkr	t| S | jr!| jd u rt| | _| jS | j  | jS t| S )Nr   )r0   r   rj   rk   r   _resetr@   r   r   r   __iter__  s   



zDataLoader.__iter__c                 C   s   |   S r   )rn   r@   r   r   r   r;     s   zDataLoader.__call__)NNTNr   FFNr   Tr   Tr   NF)rX   rY   rZ   __doc__r,   rl   rn   r;   r   r   r   r   r[      s,     +
s	r[   )r   )!rK   r3   r(   r>   r2   rb   rM   Zbase.frameworkr   r   r   Z	frameworkr   r   Z
dataloaderr	   r
   r   Zdataloader.batch_samplerr   Zdataloader.dataloader_iterr   r   r   ZQUEUE_GET_TIMEOUTr   r   r   r   r   r%   r&   r[   r   r   r   r   <module>   s,   
	 
