o
    j0                     @   sl   d Z ddlZddlmZ ddlmZmZ ddlmZmZ G dd dZ	G d	d
 d
e	eZ
G dd de
ZdS )z9A group of instances, e.g. Blocks, Lines, Spans, Shapes.
    N   )Element)ITextTextDirection)solve_rects_intersection	graph_bfsc                   @   s   e Zd ZdZddefddZdd Zdd	 Zd
d Ze	dd Z
e	dd Zdd ZdefddZddefddZdd Zdd ZdS )BaseCollectionz1Base collection representing a list of instances.N	instancesc                 C   s   || _ g | _| |pg  dS )z)Init collection from a list of instances.N)_parent
_instancesextend)selfr	   parent r   [/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/pdf2docx/common/Collection.py__init__   s   zBaseCollection.__init__c                 C   s4   z| j | }W |S  ty   d| d}t|w )NzCollection index z out of range.)r   
IndexError)r   idxr	   msgr   r   r   __getitem__   s   zBaseCollection.__getitem__c                 C      dd | j D S )Nc                 s   s    | ]}|V  qd S Nr   .0instancer   r   r   	<genexpr>   s    z*BaseCollection.__iter__.<locals>.<genexpr>r   r   r   r   r   __iter__       zBaseCollection.__iter__c                 C   s
   t | jS r   )lenr   r   r   r   r   __len__      
 zBaseCollection.__len__c                 C   s   | j S r   )r
   r   r   r   r   r   !   s   zBaseCollection.parentc                 C   s2   t  }| jD ]}||jO }qt dd |D S )zbbox of combined collection.c                 S   s   g | ]}t |d qS )r   )round)r   xr   r   r   
<listcomp>+       z'BaseCollection.bbox.<locals>.<listcomp>)fitzRectr   bbox)r   rectr   r   r   r   r)   %   s   
zBaseCollection.bboxc                 C   s   |sd S | j | d S r   )r   append)r   r   r   r   r   r+   .   s   zBaseCollection.appendc                 C   s    |sd S |D ]}|  | qd S r   )r+   )r   r	   r   r   r   r   r   3   s   zBaseCollection.extendc                 C   s   g | _ | |pg  | S )zReset instances list.

        Args:
            instances (list, optional): reset to target instances. Defaults to None.

        Returns:
            BaseCollection: self
        )r   r   )r   r	   r   r   r   reset8   s   	zBaseCollection.resetc                 C   r   )z Store attributes in json format.c                 S   s   g | ]}|  qS r   )storer   r   r   r   r%   H   s    z(BaseCollection.store.<locals>.<listcomp>r   r   r   r   r   r-   F   s   zBaseCollection.storec                 O   s   t )z)Construct Collection from a list of dict.)NotImplementedError)r   argskwargsr   r   r   restoreK   s   zBaseCollection.restore)NNr   )__name__
__module____qualname____doc__listr   r   r   r!   propertyr   r)   r+   r   r,   r-   r1   r   r   r   r   r      s    	

r   c                   @   s   e Zd ZdZedd Zdd Zdedefdd	Zddede	de	fddZ
ddede	de	fddZdde	de	fddZdd Zdd Zdd ZdS )
CollectionzACollection of instance focusing on grouping and sorting elements.c                 C   s2   t dd | jD }t|dkrt|d S tjS )z@Get text direction. All instances must have same text direction.c                 s   s    | ]}|j V  qd S r   text_directionr   r   r   r   r   V   s    z,Collection.text_direction.<locals>.<genexpr>r   r   )setr   r    r6   r   ZMIX)r   resr   r   r   r:   S   s   zCollection.text_directionc                    s   t  j}dd t|D }t jD ]$\}}t|d |D ]}|| j| r6|| | || | qqt|} fdd|D }|S )a  Group instances according to user defined criterion.

        Args:
            fun (function): with 2 arguments representing 2 instances (Element) and return bool.

        Returns:
            list: a list of grouped ``Collection`` instances.
        
        Examples 1::

            # group instances intersected with each other
            fun = lambda a,b: a.bbox & b.bbox
        
        Examples 2::

            # group instances aligned horizontally
            fun = lambda a,b: a.horizontally_aligned_with(b)
        
        .. note::
            It's equal to a GRAPH searching problem, build adjacent list, and then search graph
            to find all connected components.
        c                 S      g | ]}t  qS r   r;   r   ir   r   r   r%   u       z$Collection.group.<locals>.<listcomp>r   c                    $   g | ]}   fd d|D qS )c                       g | ]} j | qS r   r   r?   r   r   r   r%      r&   z/Collection.group.<locals>.<listcomp>.<listcomp>	__class__r   groupr   r   r   r%         $ )r    r   range	enumerateaddr   )r   funnumindex_groupsr@   r   jgroupsr   r   r   rG   Z   s   
zCollection.groupdxdyc                    s   t  j}dd t|D }g d}}| | ||f} jD ]'}dd t|j|D }	|||	|	d f ||d |	|	d f |d7 }q|jdd d	 t|d| | t|}
 fd
d|
D }
|
S )ai  Collect connected instances into same group.

        Args:
            dx (float): x-tolerances to define connectivity
            dy (float): y-tolerances to define connectivity

        Returns:
            list: a list of grouped ``Collection`` instances.
        
        .. note::
            * It's equal to a GRAPH traversing problem, which the critical point in 
              building the adjacent list, especially a large number of vertex (paths).

            * Checking intersections between paths is actually a Rectangle-Intersection 
              problem, studied already in many literatures.
        c                 S   r=   r   r>   )r   _r   r   r   r%      rA   z4Collection.group_by_connectivity.<locals>.<listcomp>r   c                 S   s   g | ]\}}|| qS r   r   )r   abr   r   r   r%      s    r      c                 S   s   | d S )Nr   )itemr   r   r   <lambda>   s    z2Collection.group_by_connectivity.<locals>.<lambda>keyc                    rB   )c                    rC   r   r   r?   r   r   r   r%      r&   z?Collection.group_by_connectivity.<locals>.<listcomp>.<listcomp>rD   rF   r   r   r   r%      rH   )	r    r   rI   zipr)   r+   sortr   r   )r   rQ   rR   rM   rN   Zi_rect_xr@   Zd_rectr*   ZpointsrP   r   r   r   group_by_connectivity   s   



z Collection.group_by_connectivity        TFfactorsortedr:   c                    F    fdd}|  |}|r!r| jrdnd|jfddd |S )z.Group elements into columns based on the bbox.c                       | j | dS Nr`   r:   )Zvertically_align_withrT   rU   re   r   r   rY      r   z-Collection.group_by_columns.<locals>.<lambda>   r   c                    
   | j   S r   r)   rG   r   r   r   rY      r"   rZ   rG   is_vertical_textr]   r   r`   ra   r:   rL   rP   r   r`   r   r:   r   group_by_columns      
zCollection.group_by_columnsc                    rb   )z+Group elements into rows based on the bbox.c                    rc   rd   )Zhorizontally_align_withrf   re   r   r   rY      r   z*Collection.group_by_rows.<locals>.<lambda>r   r   c                    rh   r   ri   rj   rk   r   r   rY      r"   rZ   rl   rn   r   ro   r   group_by_rows   rq   zCollection.group_by_rowsc                    s@   dd }|  |}|r|r| jrdnd |j fddd |S )zGroup lines into physical rows.c                 S   s
   |  |S r   )Zin_same_rowrf   r   r   r   rY      r"   z3Collection.group_by_physical_rows.<locals>.<lambda>r   r   c                    rh   r   ri   rj   rk   r   r   rY      r"   rZ   rl   )r   ra   r:   rL   rP   r   rk   r   group_by_physical_rows   s   
z!Collection.group_by_physical_rowsc                 C   s2   | j r| jjdd d | S | jjdd d | S )zSort collection instances in reading order (considering text direction), e.g.
            for normal reading direction: from top to bottom, from left to right.
        c                 S      | j j| j j| j jfS r   )r)   y0x0x1er   r   r   rY      r&   z2Collection.sort_in_reading_order.<locals>.<lambda>rZ   c                 S   rt   r   )r)   rv   y1ru   rx   r   r   r   rY      r&   )is_horizontal_textr   r]   r   r   r   r   sort_in_reading_order   
   z Collection.sort_in_reading_orderc                 C   s2   | j s| jjdd d | S | jjdd d | S )zSort collection instances in a physical with text direction considered, e.g.
            for normal reading direction: from left to right.
        c                 S   rt   r   )r)   rv   ru   rw   rx   r   r   r   rY      r&   z/Collection.sort_in_line_order.<locals>.<lambda>rZ   c                 S   rt   r   )r)   rz   rv   ru   rx   r   r   r   rY      r&   )rm   r   r]   r   r   r   r   sort_in_line_order   r}   zCollection.sort_in_line_orderc                 C   s8   g }| j dddD ]}|  || q	| | dS )ak  Sort instances in reading order, especially for instances in same row. Taking 
        natural reading direction for example: reading order for rows, from left to right 
        for instances in row. In the following example, A comes before B::

                         +-----------+
            +---------+  |           |
            |   A     |  |     B     |
            +---------+  +-----------+
        
        Steps:

            * Sort elements in reading order, i.e. from top to bottom, from left to right.
            * Group elements in row.
            * Sort elements in row: from left to right.
        T)ra   r:   N)rs   r~   r   r,   )r   r	   rowr   r   r   sort_in_reading_order_plus   s
   z%Collection.sort_in_reading_order_plusN)r_   TF)FF)r2   r3   r4   r5   r7   r:   rG   floatr^   boolrp   rr   rs   r|   r~   r   r   r   r   r   r8   P   s    
)'r8   c                   @   sz   e Zd ZdZdefddZdefddZdedefdd	Zdefd
dZ	dde
fddZdd Zddejde
fddZdS )ElementCollectionz$Collection of ``Element`` instances.ry   c                 C   s   | j dur| j | dS dS )zUpdate parent bbox.N)r
   Z
union_bboxr   ry   r   r   r   _update_bbox  s   
zElementCollection._update_bboxc                 C   s8   |sdS | j | | | | jdur| j|_dS dS )zAppend an instance, update parent's bbox accordingly and set the parent of the added instance.

        Args:
            e (Element): instance to append.
        N)r   r+   r   r
   r   r   r   r   r   r+   
  s   
zElementCollection.appendnthc                 C   s,   |sdS | j || | | | j|_dS )zInsert a Element and update parent's bbox accordingly.

        Args:
            nth (int): the position to insert.
            e (Element): the instance to insert.
        N)r   insertr   r
   r   )r   r   ry   r   r   r   r     s   
zElementCollection.insertc                 C   s   | j |S )zDelete the ``nth`` instance.

        Args:
            nth (int): the position to remove.

        Returns:
            Collection: the removed instance.
        )r   pop)r   r   r   r   r   r   %  s   	zElementCollection.popFline_separate_thresholdc                 C   s   |s| j rdS t| dkrdS t|  dkrdS | jrdnd\}}| jddD ]&}tdt|D ]}t|| j| ||d  j|  }||krM  dS q1q(dS )z5Whether contained elements are in flow layout or not.Fr   T)r   rV   )rg   r   r9   )rm   r    rp   r{   rs   rI   absr)   )r   r   Zcell_layoutZidx0Zidx1r   r@   disr   r   r   is_flow_layout1  s   
$z ElementCollection.is_flow_layoutc                    s"   t t fdd| j}| |S )zvFilter instances contained in target bbox.

        Args:
            bbox  (fitz.Rect): target boundary box.
        c                    s     | jS r   )containsr)   rx   ri   r   r   rY   M  s    z5ElementCollection.contained_in_bbox.<locals>.<lambda>)r6   filterr   rE   )r   r)   r	   r   ri   r   contained_in_bboxF  s   
z#ElementCollection.contained_in_bboxMbP?r)   	thresholdc                 C   s|   g g }}| j D ]+}|j|@ }|jr|| qt| |j  d}||kr.|| q|| q| || |fS )aH  Split instances into two groups: one intersects with ``bbox``, the other not.

        Args:
            bbox (fitz.Rect): target rect box.
            threshold (float): It's intersected when the overlap rate exceeds this threshold. Defaults to 0.

        Returns:
            tuple: two group in original class type.
        rV   )r   r)   Zis_emptyr+   r#   Zget_arearE   )r   r)   r   ZintersectionsZno_intersectionsr   intersectionr`   r   r   r   split_with_intersectionQ  s   



z)ElementCollection.split_with_intersectionN)F)r   )r2   r3   r4   r5   r   r   r+   intr   r   r   r   r   r'   r(   r   r   r   r   r   r     s    r   )r5   r'   r   Zsharer   r   	algorithmr   r   r   r8   r   r   r   r   r   <module>   s   D 2