o
    "j6                     @   sb   d dl Z d dlZddlmZ ddlmZmZmZ G dd deZ	G dd dZ
G d	d
 d
e	ZdS )    N   )IterableDataset)RandomSamplerSamplerSequenceSamplerc                   @   s4   e Zd ZdZ					dddZdd Zd	d
 ZdS )BatchSampleraf  
    A base implement of batch sampler used by `paddle.io.DataLoader`
    which yield mini-batch indices(a list/tuple with length as
    mini-batch size and holds sample indices) iterably.

    Batch sampler used by :code:`paddle.io.DataLoader` should be a subclass
    of :code:`paddle.io.BatchSampler`, BatchSampler subclasses should
    implement following methods:

    :code:`__iter__`: return mini-batch indices iterably.

    :code:`__len__`: get mini-batch number in an epoch.


    Args:
        dataset(Dataset, optional): this should be an instance of a subclass of :ref:`api_paddle_io_Dataset` or
                :ref:`api_paddle_io_IterableDataset` or other python object which implemented
                :code:`__len__` for BatchSampler to get indices as the
                range of :attr:`dataset` length. Default None, disabled.
        sampler (Sampler, optional): this should be a :ref:`api_paddle_io_Sample`
                instance which implemented :code:`__iter__` to generate
                sample indices. :attr:`sampler` and :attr:`dataset`
                can not be set in the same time.  If :attr:`sampler`
                is set, :attr:`dataset` should not be set. Default None, disabled.
        shuffle(bool, optional): whether to shuffle indices order before generating
                batch indices. Default False, don't shuffle indices before generating batch indices.
        batch_size(int, optional): sample indice number in a mini-batch indices. default 1, each mini-batch includes 1 sample.
        drop_last(bool, optional): whether drop the last incomplete (less than 1 mini-batch) batch dataset. Default False, keep it.
    see :ref:`api_paddle_io_DataLoader`

    Returns:
        BatchSampler: an iterable object for indices iterating

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import RandomSampler, BatchSampler, Dataset

            >>> np.random.seed(2023)
            >>> # init with dataset
            >>> class RandomDataset(Dataset):
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> bs = BatchSampler(dataset=RandomDataset(100),
            ...                     shuffle=False,
            ...                     batch_size=16,
            ...                     drop_last=False)
            ...
            >>> for batch_indices in bs:
            ...     print(batch_indices)
            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
            ...
            [96, 97, 98, 99]
            >>> # init with sampler
            >>> sampler = RandomSampler(RandomDataset(100))
            >>> bs = BatchSampler(sampler=sampler,
            ...                     batch_size=8,
            ...                     drop_last=True)
            ...
            >>> for batch_indices in bs:
            ...     print(batch_indices)
            [56, 12, 68, 0, 82, 66, 91, 44]
            ...
            [53, 17, 22, 86, 52, 3, 92, 33]
    NFr   c                 C   s   |d u r$|d usJ dt |tsJ dt| |r J d|| _n,t |tr-J d|d u s5J dt |tsCJ dt| |rKt|| _nt|| _t |trY|dks`J d| || _	t |tsqJ d	t| || _
d S )
Nz'either dataset or sampler should be setz/sampler should be a paddle.io.Sampler, but got z+shuffle should be False when sampler is setz1dataset should not be a paddle.io.IterableDatasetz'should not set both dataset and samplerz+shuffle should be a boolean value, but got r   z1batch_size should be a positive integer, but got z-drop_last should be a boolean value, but got )
isinstancer   typesamplerr   boolr   r   int
batch_size	drop_last)selfdatasetr
   shuffler   r    r   c/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddle/io/dataloader/batch_sampler.py__init__e   sJ   



zBatchSampler.__init__c                 c   sX    g }| j D ]}|| t|| jkr|V  g }q| js(t|dkr*|V  d S d S d S )Nr   )r
   appendlenr   r   )r   batch_indicesidxr   r   r   __iter__   s   


zBatchSampler.__iter__c                 C   s.   t | j}|t| j | jd  7 }|| j S Nr   )r   r
   r   r   r   r   num_samplesr   r   r   __len__   s   

zBatchSampler.__len__)NNFr   F)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r      s    O
'
r   c                   @   s   e Zd ZdddZdd ZdS )_InfiniteIterableSamplerr   c                 C   s"   t |ts	J d|| _|| _d S )Nz:dataset should be an instance of paddle.io.IterableDataset)r   r   r   r   )r   r   r   r   r   r   r      s   
z!_InfiniteIterableSampler.__init__c                 c   s    	 d g| j  V  q)N)r   r   r   r   r   r      s   z!_InfiniteIterableSampler.__iter__N)r   )r   r   r    r   r   r   r   r   r   r"      s    
r"   c                   @   s:   e Zd ZdZ				dddZdd Zdd	 Zd
d ZdS )DistributedBatchSamplera]	  Sampler that restricts data loading to a subset of the dataset.

    In such case, each process can pass a DistributedBatchSampler instance
    as a DataLoader sampler, and load a subset of the original dataset that
    is exclusive to it.

    .. note::
        Dataset is assumed to be of constant size.

    Args:
        dataset(Dataset): this could be an instance of subclass of :ref:`api_paddle_io_Dataset`
                     or other python object which implemented
                     `__len__` for BatchSampler to get indices of samples.
        batch_size(int): sample size of each mini-batch.
        num_replicas(int, optional): porcess number in distributed training.
            If :attr:`num_replicas` is None, :attr:`num_replicas` will be
            retrieved from :ref:`api_paddle_distributed_ParallelEnv` .
            Default None.
        rank(int, optional): the rank of the current process among :attr:`num_replicas`
            processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
            :ref:`api_paddle_distributed_ParallelEnv`. Default None.
        shuffle(bool, optional): whther to shuffle indices order before genrating
            batch indices. Default False.
        drop_last(bool, optional): whether drop the last incomplete(less than a mini-batch) batch dataset size.
            Default False.

    Returns:
        DistributedBatchSampler, return an iterable object for indices iterating.

    Examples:
        .. code-block:: python

            >>> import numpy as np

            >>> from paddle.io import Dataset, DistributedBatchSampler

            >>> # init with dataset
            >>> class RandomDataset(Dataset):
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> dataset = RandomDataset(100)
            >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

            >>> for data in sampler:
            ...     # do something
            ...     break
    NFc                 C   s  || _ t|tr|dksJ d|| _t|tsJ d|| _t|ts(J dddlm} |d urCt|tr;|dks?J d|| _n| j| _|d ur]t|trU|dksYJ d|| _	n| j	| _	|| _
d| _ttt| j d | j | _| j| j | _d S )	Nr   z'batch_size should be a positive integerz!shuffle should be a boolean valuez$drop_last should be a boolean number)ParallelEnvz)num_replicas should be a positive integerz%rank should be a non-negative integerg      ?)r   r   r   r   r   r   Zpaddle.distributedr%   nranks
local_rankr   epochmathceilr   r   
total_size)r   r   r   Znum_replicasrankr   r   r%   r   r   r   r      sF   	




 z DistributedBatchSampler.__init__c                 #   s.   t  j}t| } jt | }|t |kr#||d | 7 }n||t|t |  d | 7 }t | jks=J  jrQtj	
 j|   jd7  _ fdd} jdkr`||}t | jksiJ t|}g }|D ]}|| t | jkr|V  g }qq jst |dkr|V  d S d S d S )Nr   c                    s   g } j  j j  }| j dksJ | j }t j j t| |  j j D ]}|| || j   q+| t| | d  } ||  j|  jd |   |S )Nr   r   )r+   r   r&   ranger'   r   extend)indicesZsubsampled_indicesZlast_batch_sizeZlast_local_batch_sizeir#   r   r   _get_indices_by_batch_size!  s.   



zDDistributedBatchSampler.__iter__.<locals>._get_indices_by_batch_sizer   )r   r   npZarangetolistr+   r)   r*   r   randomZRandomStater(   r&   r   iterr   r   r   )r   r   r/   Zpadding_sizer1   Z_sample_iterr   r   r   r#   r   r     s8   



z DistributedBatchSampler.__iter__c                 C   s*   | j }|t| j | jd  7 }|| j S r   )r   r   r   r   r   r   r   r   r   G  s   
zDistributedBatchSampler.__len__c                 C   s
   || _ dS )a  
        Sets the epoch number. When :attr:`shuffle=True`, this number is used
        as seeds of random numbers. By default, users may not set this, all
        replicas (workers) use a different random ordering for each epoch.
        If set same number at each epoch, this sampler will yield the same
        ordering at all epoches.

        Arguments:
            epoch (int): Epoch number.

        Examples:
            .. code-block:: python

                >>> import numpy as np

                >>> from paddle.io import Dataset, DistributedBatchSampler

                >>> # init with dataset
                >>> class RandomDataset(Dataset):
                ...     def __init__(self, num_samples):
                ...         self.num_samples = num_samples
                ...
                ...     def __getitem__(self, idx):
                ...         image = np.random.random([784]).astype('float32')
                ...         label = np.random.randint(0, 9, (1, )).astype('int64')
                ...         return image, label
                ...
                ...     def __len__(self):
                ...         return self.num_samples
                ...
                >>> dataset = RandomDataset(100)
                >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

                >>> for epoch in range(10):
                ...     sampler.set_epoch(epoch)
        N)r(   )r   r(   r   r   r   	set_epochL  s   
%z!DistributedBatchSampler.set_epoch)NNFF)r   r   r    r!   r   r   r   r6   r   r   r   r   r$      s    =
,8r$   )r)   numpyr2   r   r   r
   r   r   r   r   r"   r$   r   r   r   r   <module>   s    