o
    j                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZ ddl	m
Z
 dd Zdd Zd5ddZdd Zdd Zd6ddZdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Z e!d4kre   dS dS )7    N)DictListSet   )pymupdfc                 C   s   d|  d ddS )N K   -)center)x r   Q/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/pymupdf/__main__.pymycenter   s   r   c                 C   s   |d }|d }|dkr|  |S dd }t| |}t| |}	 |j|jkr;|j|j  kr4dkr;n n|jdksRtd| d| d t| d}||S t|}||j d }}||S )	zReturn image for a given XREF.r   r   c                 S   s"   | j jdkr| S ttj| }|S )N   )
colorspacenr   PixmapcsRGB)pixZtpixr   r   r   getimage   s   zrecoverpix.<locals>.getimagezWarning: unsupported /SMask z for :N)	Zextract_imager   r   Zirectalphar   messageZ	set_alphaZsamples)docitemr   sr   Zpix1pix2r   r   r   r   
recoverpix   s"   
2

r   FTc                 C   s   t | }|js|du rtd d}|js|S |r<||}|s&td |du r:d}|dkr2d}t d|  |S td	|j d
 |S )z!Open and authenticate a document.Tz$this command supports PDF files onlyzauthentication unsuccessfuluser   ownerzauthenticated as 'z' requires a password)	r   openZis_pdfsysexitZ
needs_passZauthenticater   name)filenamepasswordshowpdfr   rcZ
auth_levelr   r   r   	open_file9   s$   



r,   c                 C   sN   t dd |  D d }|  D ]\}}|| d| }t| qdS )zPrint a Python dictionary.c                 S   s   g | ]}t |qS r   )len).0kr   r   r   
<listcomp>Q       zprint_dict.<locals>.<listcomp>r   : N)maxkeysitemsrjustr   r   )r   lr/   vmsgr   r   r   
print_dictO   s
   r:   c                 C   s   t |dd | |}t | | |rJ| }z|dd }|| }|dr/d}W n ty;   d}Y nw t d| d t d	 t d
 dS )zPrint an object given by XREF number.

    Simulate the PDF source in "pretty" format.
    For a stream also print its size.
    dz 0 objz/Lengthr   z0 Runknownz
stream
...z bytesZ	endstreamZendobjN)r   r   Zxref_objectZxref_is_streamsplitindexendswith	Exception)r   xrefZxref_strtempidxsizer   r   r   
print_xrefW   s"   




rE   pagec                 C   s~  t |d }| d|dd} | d}g }t|D ]\}}|d }| rLt|}	d|	  kr5|k r?n n|t| ntd| d|d qz|d	\}
}t|
}
t|}W n t	yr   td| d
|d Y nw d|
  kr}|k rn nd|  kr|k sn td| d
|d |
|kr||
 q|
|k r|t
t|
|d 7 }q|t
t|
|d d7 }q|S )aK  Transform a page / xref specification into a list of integers.

    Args
    ----
        rlist: (str) the specification
        limit: maximum number, i.e. number of pages, number of objects
        what: a string to be used in error messages
    Returns
    -------
        A list of integers representing the specification.
    r   Nr    ,zbad z specification at item r;   r	   z range specification at item r   )strreplacer=   	enumerate	isdecimalintappendr$   r%   r@   listrange)ZrlistlimitwhatrG   Z	rlist_arrZout_listseqr   r   ii1i2r   r   r   get_listn   s8   
.
rX   c                 C   s  t | j| jd}tj| jd }d}|dkr|d }d}t|d}|j}t	d| j|j
| d |||d |d	 f  |j}|d
kr\| }d}|dkrPd}t	d| d| d | }|d
krnt	d|dd t	  | jrt	td | }t|| t	  | jrt	td t|j t	  | jrt	td t| j| dd}	|	D ]}t|| t	  q| jrt	td t| j|j
d }
|
D ]}|d }||}t	d|dd t|| t	  q| jrt	td t	|  t	  |  d S )NTi   ZKBi  MBr   z7'%s', pages: %i, objects: %i, %g %s, %s, encryption: %sformat
encryptionr   rH      znot zdocument contains z root form fields and is signedr;    embedded fileszPDF catalogzPDF metadatazobject informationrA   )rS   zpage informationzPage r   zPDF trailer)r,   inputr(   ospathgetsizeroundmetadatar   r   
page_countZxref_lengthZis_form_pdfZget_sigflagsZembfile_countcatalogr   Zpdf_catalogrE   r:   ZxrefsrX   pagesZ	page_xreftrailerZpdf_trailerclose)argsr   rD   flagmetar   r   sign_strrA   Zxreflpagelpnor   r   r   r)      sx   








r)   c                 C   s   t | j| jdd}| j}d|}| js0|j| j| j| j	| j
| j| j| j|| j| j| jd d S t| j|jd }t }|D ]}|d }|j|||d q?|j| j| j| j	| j
| j| j| j|| j| j| jd |  |  d S )NTr*   Zkeepnonezrc4-40zrc4-128zaes-128zaes-256)
garbagedeflateprettycleanasciilinearr[   Zowner_pwZuser_pwZpermissionsr   Z	from_pageZto_page)r,   r_   r(   r[   r>   rg   saveoutputrs   compressru   sanitizerw   rx   r!   r   Z
permissionrX   re   r   r#   
insert_pdfri   )rj   r   r[   Zencryptrg   Zoutdocro   r   r   r   r   rv      sR   rv   c           
      C   s   | j }t }|D ]T}|d}t|dkr|d nd}t|d |dd}d|dd }|r@td|dd |jd }nt	d|jd }|D ]}	|j
||	d |	d d qJ|  q	|j| jd	dd
 |  dS )z&Join pages from several PDF documents.rI   r   Nr   Trp   r    ry   r   )rs   rt   )r_   r   r#   r=   r-   r,   joinrX   re   rQ   r~   ri   rz   r{   )
rj   Zdoc_listr   Zsrc_itemZsrc_listr(   srcrg   Z	page_listrU   r   r   r   doc_join  s   
 
r   c           	      C   sP  t | j| jdd}| s| jr| j| jkrtd t | j| j}| j	r*t
| j	nt
 }t
| }|r?||ks>td n|}|sHtd |t
| @ }|r\tdt|  |D ])}||}||}|j|||d |d |d	 d
 td| d|j	 d q^|  | jr| j| jkr|j| jdd n|  |  dS )z!Copy embedded files between PDFs.Trp   cannot save PDF incrementallyz%not all names are contained in sourceznothing to copyz0following names already exist in receiving PDF: r'   	ufilenamedescr'   r   r   zcopied entry 'z' from 'r"   r\   rs   N)r,   r_   r(   can_save_incrementallyr{   r$   r%   sourceZ	pwdsourcer&   setembfile_namesrJ   embfile_infoembfile_getembfile_addr   r   ri   rz   saveIncr)	rj   r   r   namesZ	src_namesZ	intersectr   infoZbuffr   r   r   embedded_copy   sF   




r   c              
   C   s   t | j| jdd}| s| jr| j| jkrtd z|| j W n" t	t
jjfyE } ztd| jd|  W Y d}~nd}~ww | jrO| j| jkrT|  n|j| jdd |  dS )	zDelete an embedded file entry.Trp   r   no such embedded file r2   Nr   r   )r,   r_   r(   r   r{   r$   r%   embfile_delr&   
ValueErrorr   mupdfFzErrorBaser   rz   ri   )rj   r   er   r   r   embedded_delH  s    
$
r   c              
   C   s:  t | j| jdd}z|| j}|| j}W n" ttjj	fy9 } zt
d| jd|  W Y d}~nd}~ww | jr@| jn|d }| jsp| jsptj|rXt
d|  tj|}|t tj spt
d|  t|d	}|| W d   n1 sw   Y  td
| j d| d |  dS )z&Retrieve contents of an embedded file.Trp   r   r2   Nr'   z6refusing to overwrite existing file with stored name: z9refusing to write stored name outside current directory: wbzsaved entry 'z' as 'r"   )r,   r_   r(   r   r&   r   r   r   r   r   r$   r%   r{   unsafer`   ra   existsabspath
startswithgetcwdsepr#   writer   ri   )rj   r   streamr;   r   r'   Zfilename_absr{   r   r   r   embedded_get[  s(   $r   c                 C   s@  t | j| jdd}| s| jdu s| j| jkrtd z|| j td| j d W n	 t	y8   Y nw t
j| jrGt
j| jsQtd| j d t| jd	}| }W d   n1 sfw   Y  | j}|}| jsv|}n| j}|j| j||||d
 | jr| j| jkr|  n|j| jdd |  dS )zInsert a new embedded file.Trp   Nr   zentry 'z' already existszno such file 'r"   rbr   r\   r   )r,   r_   r(   r   r{   r$   r%   r   r&   r@   r`   ra   r   isfiler#   readr   r   r   rz   ri   rj   r   fr   r'   r   r   r   r   r   embedded_addp  s6   


r   c                 C   sh  t | j| jdd}| s| jdu s| j| jkrtd z|| j W n t	y8   td| j d Y nw | j
durgtj
| j
rgtj
| j
rgt| j
d}| }W d   n1 saw   Y  nd}| jrp| j}nd}| jry| j}n	| jr| j}nd}| jr| j}nd}|j| j||||d | jdu s| j| jkr|  n|j| jd	d
 |  dS )z0Update contents or metadata of an embedded file.Trp   Nr   no such embedded file 'r"   r   r   r\   r   )r,   r_   r(   r   r{   r$   r%   r   r&   r@   ra   r`   r   r   r#   r   r'   r   r   Zembfile_updr   rz   ri   r   r   r   r   embedded_upd  sJ   



r   c                 C   sN  t | j| jdd}| }| jdurQ| j|vr"td| j d n/t  d}t	|dkr0d}td	t	|d
d| d t  t
|| j t  dS |s_td|j d dS t	|dkrsd|j dt	|d
d}nd|j d}t| t  |D ]}| jst| q||}t
|| t  q|  dS )zList embedded files.Trp   Nr   r"   rH   r   r   zprinting 1 of r;   z embedded filer   z' contains no embedded filesz' contains the following r^   z&' contains the following embedded file)r,   r_   r(   r   r&   r$   r%   r   r   r-   r:   r   detailri   )rj   r   r   pluralr9   r&   _r   r   r   embedded_list  s<   





r   c              
   C   s  | j s| jstd t| j| jdd}| jr!t| j|j	d }nt
d|j	d }| js4tjtj}n| j}tj|rCtj|sLtd| d t }t }|D ]}| j r||d }|D ]N}|d }	|	|vr||	 ||	\}
}}}|dks|sqbtj||
d	d
 d
|	 d| }t|d}|| W d   n1 sw   Y  d}qb| jr.||d }|D ]o}|d }	|	|vr-||	 t||}t|tu r|d }|d }tj|d|	dd| }t|d}|| W d   n	1 sw   Y  qtj|d|	dd}|jjdk r!|nt !t j"|}|#| qqT| j rBt $dt%|dd| d | jrUt $dt%|dd| d |&  dS )z)Extract images and / or fonts from a PDF.z"neither fonts nor images requestedTrp   r   zoutput directory z does not existr   zn/ar   r	   .r   Nextimagezimg-r;   z.pngr   zsaved z fonts to 'r"   z images to ')'ZfontsZimagesr$   r%   r,   r_   r(   rg   rX   re   rQ   r{   r`   ra   r   curdirr   isdirr   Zget_page_fontsaddZextract_fontr   rK   r#   r   Zget_page_imagesr   typedictr   r   r   r   r   rz   r   r-   ri   )rj   r   rg   Zout_dirZ
font_xrefsZimage_xrefsro   Zitemlistr   rA   Zfontnamer   r   bufferoutnameoutfiler   Zimgdatar   r   r   r   extract_objects  st   





r   c           	      C   sX   |rdnt dg}| jd|d}|s|s|| d S ||jddd || d S )N   
   textflagsutf8surrogatepasserrors)bytesget_textr   encode)	rF   textoutGRIDfontsize
noformfeed
skip_emptyr   eopr   r   r   r   page_simple+  s   

r   c           
      C   sz   |rdnt dg}| jd|d}|g kr|s|| d S |jdd d |D ]}	||	d jd	d
d q'|| d S )Nr   r   blocksr   c                 S   s   | d | d fS )Nr\   r   r   )br   r   r   <lambda>>  s    z page_blocksort.<locals>.<lambda>keyr   r   r   r   )r   r   r   sortr   )
rF   r   r   r   r   r   r   r   r   r   r   r   r   page_blocksort7  s   

r   c           !         s@  |rdnt dg}dtt dtdtfdd}dtt dtfd	d
}	dtt dtjf fdd}
dtdtfdddd }| jd|dd }|
|| \}}}}}|g kr]|s[|	| d S |	||}|j
dd d i }|D ]}|\}}}}|||}||g }|| |||< qnt| }|
  || }i }|D ]/}|| }t|}|dk rd||< qdd |D }|
  t|}||k r|}|d ||< q||d |d   |t|  d }|d }|	d |D ].}||k r|	d ||7 }||k s||||| || } |	| d  jd!d"d# || }q|	| d S )$Nr   r   valuesvaluereturnc                 S   s2   t | |}|r| |d  S td|dd|  )zFind the right row coordinate.

        Args:
            values: (list) y-coordinates of rows.
            value: (int) lookup for this value (y-origin of char).
        Returns:
            y-ccordinate of appropriate line for value.
        r   z	Line for gz not found in )bisectbisect_rightRuntimeError)r   r   rU   r   r   r   find_line_indexI  s   	z$page_layout.<locals>.find_line_indexrowsc                 S   sJ   t | } |   | d g}| dd  D ]}||d | kr"|| q|S )Nr   r   r   )rP   r   rO   )r   r   Znrowshr   r   r   curate_rowsX  s   

z page_layout.<locals>.curate_rowsr   rF   c              
      s  t  }|jj}|jj}|}|}d}g }| D ]}	|	d D ]}
|
d dkr$q|
d \}}}}|dk s6||jjkr7q|| }||krA|}|
d D ]}|d  krNqE|d D ]z}|d \}}}}|| }|d	 \}}tt|}|| |d
 }||kr|dkr|}||k r|}|dkr|g kr|d \}}}}||kr|tdkr|| }n|dkrtd}n|dkrtd}n|}||||f|d< qR|||||f qRqEqq|||||fS )Nr   linesdir)r   r   ZbboxspansrD   charsorigincr   r      rU     r7     )	r   rectwidthheightrN   rc   r   chrrO   )r   rF   r   Z
page_widthZpage_height	rowheightleftrightr   blocklineZx0Zy0x1y1r   spanr   r   cwidthoxoychZold_chold_oxZold_oyZ
old_cwidthligr   joinligaturer   r   process_blocksa  s`   


)z#page_layout.<locals>.process_blocksr   c                 S   st   | dkrt dS | dkrt dS | dkrt dS | dkr t dS | d	kr(t d
S | dkr0t dS | dkr8t dS | S )zReturn ligature character for a given pair / triple of characters.

        Args:
            lig: (str) 2/3 characters, e.g. "ff"
        Returns:
            Ligature, e.g. "ff" -> chr(0xFB00)
        ffr   fii  fli  ffir   fflr   fti  sti  )r   )r   r   r   r   r     s   	z!page_layout.<locals>.joinligaturec                 S   s   d}d}d}d}|t jkrtd|d|D ]`}|\}	}
}}|
|  }
|
| }||	kr4|
| |d kr4q|	dkrA||
 | dkrAq|	}|
|| k rR||	7 }|}|
}q|	dkrWqt|
| t| }|
|kro|dkro|d| 7 }||	7 }|}|
}q| S )	a  Produce the text of one output line.

        Args:
            left: (float) left most coordinate used on page
            slot: (float) avg width of one character in any font in use.
            minslot: (float) min width for the characters in this line.
            chars: (list[tuple]) characters of this line.
        Returns:
            text: (str) text string for this line
        rH   r   z#program error: minslot too small = r   g?r   g?r   )r   ZEPSILONr   rN   r-   rstrip)r   slotZminslotlcharsr   Zold_charZold_x1r   r   charr   r   r   r   deltar   r   r   make_textline  s:   
z"page_layout.<locals>.make_textlineZrawdictr   c                 S   s   | d S )Nr   r   )r   r   r   r   r     s    zpage_layout.<locals>.<lambda>r   r    r   c                 S   s   g | ]}|d  qS )r\   r   )r.   r   r   r   r   r0     r1   zpage_layout.<locals>.<listcomp>r   r   g333333?
r   r   r   )r   r   rN   r   r   r   ZPagerJ   r   r   r   getrO   rP   r4   r-   
statisticsZmedianr   )!rF   r   r   r   r   r   r   r   r   r   r   r
  r   r   r   r   r   r   r   r   r   r   yr  r4   r  Zminslotsr/   ZccountwidthsZ	this_slotZrowposr   r   r   r   page_layoutE  sb   	46






$


r  c                 C   s   t | j| jdd}t| j|jd }| j}|d u r&tj	|j
\}}|d }t|dJ}tjtjB }| jr:|tjN }| jrB|tjN }| jrJ|tjN }tttd}|D ]}	||	d  }
|| j |
|| j| j| j| j|d qRW d    d S 1 sxw   Y  d S )NFrp   r   z.txtr   simpler   layoutr   )r,   r_   r(   rX   rg   re   r{   r`   ra   splitextr&   r#   r   ZTEXT_PRESERVE_LIGATURESZTEXT_PRESERVE_WHITESPACEZconvert_whiteZnoligaturesZextra_spacesZTEXT_INHIBIT_SPACESr   r   r  modegridr   r   r   )rj   r   rn   r{   r'   r   r   r   funcro   rF   r   r   r   gettext&  s>   


"r  c                 C   s   t d t d d S )NzThis is from PyMuPDF message().zThis is from PyMuPDF log().)r   r   log)rj   r   r   r   	_internalG  s   
r  c                  C   sL  t jdtdd} | jddd}|jdtdd	}|jd
tdd |jddd |jdddd |jdddd |jdddd |jdtdd |jdtdd |jtd |jdtdd	}|jd
tdd |jdtd d |jddd |jd!d"d#d$d% |jd&td'd |jd(td)d |jd*t	d+t
d,d-d. |jd/dd0d1d2 |jd3dd0d4d2 |jd5dd0d6d2 |jd7t	d8d9d: |jd;dd0d<d2 |jd=dd0d>d2 |jdd?d |jtd |jd@tdAdBdC}|jd
dDdEdF |jdGdHdIdJ |jtd |jdKtdLd	}|jd
tdd |jdMddNd |jdOddPd |jdGdQd |jddd |jdtdRd |jtd |jdStdTd	}|jd
dd |jdUdVd |jdWddXd |jddd |jtd |jdYtdZd	}|jd
dd |jddd |jdGd[d |jdUdHd\dJ |jd]dHd^dJ |jd_d`d |jtd |jdatdbd	}|jd
dd |jddd |jdGd[d |jdUdHdcdJ |jtd |jddtdedfdC}|jd
dd |jdUdHdgdJ |jddd |jdGdhd |jd]did |jdjdkd |jdldmd |jd_dnd |jtd |jdotdpd	}	|	jd
tdd |	jdUdHdgdJ |	jddd |	jdqd0ddrds |	jdGdtd |	jtd |jdutdvd	}
|
jd
tdwd |
jddxd |
jdGdyd |
jdzdHd{dJ |
jd|d}d |
jdUdDd~dF |
jtd |jdtdd	}|jd
tdd |jddd |jdtdddd. |jdtddd |jdddd0d |jdddd0d |jdddd0d |jdddd0d |jdddd0d |jdGdd |jdtddd |jdtddd |jtd |jdtdd	}|jtd |  }t|ds|   dS || dS )zDefine command configurations.r   zBasic PyMuPDF Functions)progdescriptionZSubcommandsz/Enter 'command -h' for subcommand specific help)titlehelpr)   zdisplay PDF information)r  r_   zPDF filename)r   r  z	-passwordr(   )r  z-catalog
store_truezshow PDF catalog)actionr  z-trailerzshow PDF trailerz	-metadatazshow PDF metadataz-xrefsz&show selected objects, format: 1,5-7,Nz-pagesz'show selected pages, format: 1,5-7,50-N)r  rv   z.optimize PDF, or create sub-PDF if pages givenr{   zoutput PDF filenamez-encryptionzencryption methodrq   rr   )r  choicesdefaultz-ownerzowner passwordz-userzuser passwordz-garbagezgarbage collection level   r   )r   r  r!  r"  z	-compressFzcompress (deflate) output)r   r"  r  z-asciizASCII encode binary dataz-linearzformat for fast web displayz-permissionr   zinteger with permission levels)r   r"  r  z	-sanitizezsanitize / clean contentsz-prettyzprettify PDF structurez/output selected pages pages, format: 1,5-7,50-Nr   zjoin PDF documentsz3specify each input as 'filename[,password[,pages]]')r  epilog*zinput filenames)nargsr  z-outputTzoutput filename)requiredr  extractz extract images and fonts to diskz-imageszextract imagesz-fontszextract fontsz-folder to receive output, defaults to currentz-consider these pages only, format: 1,5-7,50-Nz
embed-infozlist embedded filesz-namezif given, report only this onez-detailzdetail informationz	embed-addzadd embedded filez-output PDF filename, incremental save if nonezname of new entryz-pathzpath to data for new entryz-desczdescription of new entryz	embed-delzdelete embedded filezname of entry to deletez	embed-updzupdate embedded filez*except '-name' all parameters are optionalzname of entryz-Output PDF filename, incremental save if nonezpath to new data for entryz	-filenameznew filename to store in entryz
-ufilenamez&new unicode filename to store in entryz!new description to store in entryzembed-extractzextract embedded file to diskz-unsafezPallow write to stored name even if an existing file or outside current directory)r"  r   r  z'output filename, default is stored namez
embed-copyz copy embedded files between PDFszPDF to receive embedded fileszpassword of inputz2output PDF, incremental save to 'input' if omittedz-sourcezcopy embedded files from herez
-pwdsourcezpassword of 'source' PDFzrestrict copy to these entriesr  z(extract text in various formatting modeszinput document filenamezpassword for input documentz-modez-mode: simple, block sort, or layout (default)r  r  z select pages, format: 1,5-7,50-Nz1-N)r   r  r"  z-noligaturesz*expand ligature characters (default False))r   r  r"  z-convert-whitez6convert whitespace characters to white (default False)z-extra-spacesz%fill gaps with spaces (default False)z-noformfeedz-write linefeeds, no formfeeds (default False)z-skip-emptyz+suppress pages with no text (default False)z3store text in this file (default inputfilename.txt)z-gridz+merge lines if closer than this (default 2)r    z	-fontsizez4only include text with a larger fontsize (default 3)r\   Zinternalzinternal testingr  N)argparseArgumentParserr   add_subparsers
add_parseradd_argumentrJ   set_defaultsr)   rN   rQ   rv   r   r   r   r   r   r   r   r   floatr  r  
parse_argshasattr
print_helpr  )parserZsubpsZps_showZps_cleanZps_joinZ
ps_extractZps_embed_addZps_embed_delZps_embed_updZps_embed_extractZps_embed_copyZ
ps_gettextZps_internalrj   r   r   r   mainK  s  
r4  __main__)FT)rF   )"r)  r   r`   r$   r  typingr   r   r   rH   r   r   r   r,   r:   rE   rX   r)   rv   r   r   r   r   r   r   r   r   r   r   r  r  r  r4  __name__r   r   r   r   <module>   sF   
$
/?/("2%B b!  7
