o
    'j                     @   s   d dl Z d dlZd dlZe jeZd dlZeje j	ed d dl
Z
d dlZd dlZd dlmZ d dlZd dlmZ d dlmZ dddZed	e j	ed
ddZeddZeddZd dlmZ d dlmZ d dlmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0m1Z1m2Z2 e Z3g dZ4dgZ5dZ6ddgZ7e j8dZ9dZ:g dZ;dZ<d dgZ=d!d"id!d#id!d$id%d&d'd(d)d*d(d+d,d(d-d.d(d/d0d(d1d2d(d3d4d(d5d6d(d7d8d(d9d:d(d;d<d(d=d>d(d?d@d!dAiidBd!dCid!d#id!d$id%dDd'd(dEd*d(dFd,d(dGd.d(d/d0d(dHd2d(dId4d(dJd6d(d7d8d(dKd:d(d;d<d(dLd>d(d?d@d!dAiidBd@d!dMiid@dNd'd(id@d!dAiidBd!dOid!dPid!dQidRdSd'd(dTd*d(dUdVd(dWdXd(dYd,d(dZd.d(d[d0d(d\d2d(d]d4d(d^d6d(d_d8d(d`d:d(dad<d(dbd>d(dcddd(ded@d!dAiidBdfdgdhdidjd(iidkdjd(dldmd(dndodpd(dqdrd(dndsdtduZ>ddvdwZ?dxdy Z@dzd{ ZAd|eBfd}d~ZCdd ZDG dd dejEZFG dd de0ZGdd ZHdS )    N )Path)BytesIO)ImageFc                 C   s8   t j| |}t j|}|j| |r|tj| < |S N)	importlibutilspec_from_file_locationmodule_from_specloaderexec_modulesysmodules)module_name	file_pathmake_importablespecmodule r   T/var/www/html/Deteccion_Ine/venv/lib/python3.10/site-packages/paddleocr/paddleocr.py_import_file"   s   
r   toolsztools/__init__.pyT)r   ppocrZ	paddleocrppstructure)
get_logger)predict_system)check_and_readget_image_file_listalpha_to_colorbinarize_img)maybe_downloaddownload_with_progressbaris_linkconfirm_model_dir_url)draw_ocrstr2bool	check_gpu)	init_argsdraw_structure_result)StructureSystemsave_structure_resto_excel)	PaddleOCRPPStructurer$   r(   r*   r!   r+   ZDBz2.7.3ZCRNNZ
SVTR_LCNetz~/.paddleocr/PP-OCRv4)PP-OCRPP-OCRv2PP-OCRv3r.   PP-StructureV2PP-StructureurlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tarzYhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar)chenmlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tarz./ppocr/utils/ppocr_keys_v1.txt)r4   	dict_pathzJhttps://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tarz./ppocr/utils/en_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/korean_PP-OCRv4_rec_infer.tarz"./ppocr/utils/dict/korean_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/japan_PP-OCRv4_rec_infer.tarz!./ppocr/utils/dict/japan_dict.txtzXhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tarz'./ppocr/utils/dict/chinese_cht_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ta_PP-OCRv4_rec_infer.tarz./ppocr/utils/dict/ta_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/te_PP-OCRv4_rec_infer.tarz./ppocr/utils/dict/te_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ka_PP-OCRv4_rec_infer.tarz./ppocr/utils/dict/ka_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tarz!./ppocr/utils/dict/latin_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/arabic_PP-OCRv4_rec_infer.tarz"./ppocr/utils/dict/arabic_dict.txtzUhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tarz$./ppocr/utils/dict/cyrillic_dict.txtzWhttps://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/devanagari_PP-OCRv4_rec_infer.tarz&./ppocr/utils/dict/devanagari_dict.txt)r5   r6   koreanjapanchinese_chttatekalatinarabiccyrillic
devanagarir5   zRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar)detrecclszJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tarzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tarzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tarzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tarzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tarzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tarzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tarzWhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tarzRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tarz\https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar)r5   r6   	structurezRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tarz]https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tarzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/french_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/german_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tarzYhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tarz`https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tarzbhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tarzdhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tarzppocr/utils/dict/table_dict.txt)r5   r6   frenchgermanr9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rF   )r.   r1   r0   r/   tabler6   zahttps://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tarz)ppocr/utils/dict/table_structure_dict.txtzehttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tarzehttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tarz,ppocr/utils/dict/table_structure_dict_ch.txt)r6   r5   zahttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tarz6ppocr/utils/dict/layout_dict/layout_publaynet_dict.txtzfhttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tarz1ppocr/utils/dict/layout_dict/layout_cdla_dict.txt)rI   layout)r3   r2   )OCR	STRUCTUREc                 C   s   dd l }t }| |_|jdtdd |jdtdd |jdtdd |jdtd	d |jd
ttddd |jdttddd |jD ]
}|j	dv rKd |_
qA| rR| S i }|jD ]}|j
||j	< qW|jdi |S )Nr   z--langr5   )typedefaultz--detTz--recz--typeocrz--ocr_versionr.   aU  OCR Model version, the current model support list is as follows: 1. PP-OCRv4/v3 Support Chinese and English detection and recognition model, and direction classifier model2. PP-OCRv2 Support Chinese detection and recognition model. 3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.)rM   choicesrN   helpz--structure_versionr2   zModel version, the current model support list is as follows: 1. PP-Structure Support en table structure model. 2. PP-StructureV2 Support ch and en table structure model.)rec_char_dict_pathtable_char_dict_pathlayout_dict_pathr   )argparser'   add_helpadd_argumentstrr%   SUPPORT_OCR_MODEL_VERSIONSUPPORT_STRUCTURE_MODEL_VERSION_actionsdestrN   
parse_args	Namespace)mMainrU   parseractionZinference_args_dictr   r   r   r]     s>   

	

r]   c                 C   s   g d}g d}g d}g d}| |v rd} n| |v rd} n| |v r%d} n| |v r+d} | t d	 t d
 v sEJ dt d	 t d
  | | dkrOd}| |fS | dkrYd}| |fS | dv rcd}| |fS d}| |fS )N)*afazbscscydadeesetfrgahrhuidisitZkuZlaltlvmimsmtnlnoocpiplptroZrs_latinskslsqsvswtltruzvirG   rH   )arfaZugur)ruZrs_cyrillicbebgukZmnZabqZadyZkbdZavaZdarZinhZcheZlbeZleztab)himrneZbhmaiangZbhoZmahZscknewZgomsaZbgcr?   r@   rA   rB   rK   rD   z!param lang must in {}, but got {}r5   rF   )r6   r?   r6   r7   )
MODEL_URLSDEFAULT_OCR_MODEL_VERSIONformatkeys)langZ
latin_langZarabic_langZcyrillic_langZdevanagari_langdet_langr   r   r   
parse_lang  s<   r   c                 C   s   | dkrt }n	| dkrt}ntt|  }||vr|}||| vr;||| v r)|}ntd|||   t	d ||| | vrc||| | v rN|}ntd||| |  | t	d || | | S )NrK   rL   z,{} models is not support, we only support {}z8lang {} is not support, we only support {} for {} models)
r   DEFAULT_STRUCTURE_MODEL_VERSIONNotImplementedErrorr   loggererrorr   r   r   exit)rM   versionZ
model_typer   ZDEFAULT_MODEL_VERSIONZ
model_urlsr   r   r   get_model_config  s4   

r   contentc                 C   s   t j| t jd}t|tjS )N)Zdtype)np
frombufferuint8cv2imdecodeZIMREAD_UNCHANGED)r   Znp_arrr   r   r   
img_decode  s   r   c                 C   sp  t | tr	t| } t | trt| rt| d d} | }t|\} }}|s|st|d}| }t|} W d    n1 s>w   Y  | d u rz>t	 }t	|}t
|}|d}	|	|d |d | }
tt|
dd}t|}t|tj}t|tj} W n   td| Y d S | d u rtd| d S t | tjrt| jd	krt| tj} | S )
Ntmp.jpgrbRGBZjpegr   zutf-8)encodingerror in loading image:{}   )
isinstancebytesr   rX   r"   r!   r   openreadr   r   convertsaveseekbase64	b64encode	b64decoder   r   r   r   r   ZIMREAD_COLORr   r   r   ndarraylenshapeZcvtColorZCOLOR_GRAY2BGR)imgZ
image_fileflag_gifflag_pdffZimg_strbufimageZimrgbZimage_bytesZdata_base64Zimage_decodeZ	img_arrayr   r   r   	check_img  sJ   








r   c                       s2   e Zd Z fddZ						dddZ  ZS )	r,   c                    s  t dd}|jjdi | |jtv sJ dt|jt|j|_|js+t	
tj |j| _t|j\}}td|jd|}t|jtjtdd||d \|_}td|jd|}t|jtjtdd||d \|_}td|jd	d
}	t|jtjtdd	|	d \|_}
|jdv rd|_nd|_|jst|j| t|j| t|j|
 |jtvrt	dt t d |j!t"vrt	dt" t d |j#du rt$t%t&j'|d  |_#t	(| t) *| |j+| _+dS )zm
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        Fr_   z"ocr_version must in {}, but get {}rK   rC   whlr4   rD   rE   r5   )r1   r.   z
3, 48, 320z
3, 32, 320zdet_algorithm must in {}r   zrec_algorithm must in {}Nr8   r   ),r]   __dict__updateocr_versionrY   r   r&   use_gpushow_logr   setLevelloggingINFOuse_angle_clsr   r   r   r#   det_model_dirospathjoinBASE_DIRrec_model_dirZcls_model_dirZrec_image_shapeZuse_onnxr    Zdet_algorithmSUPPORT_DET_MODELr   r   r   Zrec_algorithmSUPPORT_REC_MODELrR   rX   r   __file__parentdebugsuper__init__page_num)selfkwargsparamsr   r   det_model_configdet_urlrec_model_configrec_urlZcls_model_configZcls_url	__class__r   r   r   /  sh   













zPaddleOCR.__init__TF   r   r   c                    s  t |tjtttfsJ t |tr|dkrtd td |dkr,| j	dkr,t
d t|}t |trN| jt|ksA| jdkrFt|| _|d| j }n|g} fdd}	|r|rg }
t|D ]*\}}|	|}| ||\}}}|s~|s~|
d qcd	d
 t||D }|
| qc|
S |r|sg }
t|D ]#\}}|	|}| |\}}|s|
d qdd
 |D }|
| q|
S g }
g }t|D ]0\}}t |ts|	|}|g}| j	r|r| |\}}}|s|| | |\}}|
| q|s|S |
S )u>  
        OCR with PaddleOCR
        args：
            img: img for OCR, support ndarray, img_path and list or ndarray
            det: use text detection or not. If False, only rec will be exec. Default is True
            rec: use text recognition or not. If False, only det will be exec. Default is True
            cls: use angle classifier or not. Default is True. If True, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
            bin: binarize image to black and white. Default is False.
            inv: invert image colors. Default is False.
            alpha_color: set RGB color Tuple for transparent parts replacement. Default is pure white.
        Tz.When input a list of images, det must be falser   Fz]Since the angle classifier is not initialized, it will not be used during the forward processNc                    s(   t |  } rt| } rt| } | S r   )r   r   Zbitwise_notr   )Z_imagealpha_colorbininvr   r   preprocess_image  s   

z'PaddleOCR.ocr.<locals>.preprocess_imagec                 S   s   g | ]
\}}|  |gqS r   tolist).0boxresr   r   r   
<listcomp>  s    z!PaddleOCR.ocr.<locals>.<listcomp>c                 S   s   g | ]}|  qS r   r   )r   r   r   r   r   r     s    )r   r   r   listrX   r   r   r   r   r   warningr   r   r   	enumerate__call__appendzipZtext_detectorZtext_classifierZtext_recognizer)r   r   rC   rD   rE   r   r   r   Zimgsr   Zocr_residxZdt_boxesZrec_res_Ztmp_resZelapseZcls_resZcls_res_tmpr   r   r   rO   k  sl   







zPaddleOCR.ocr)TTTFFr   )__name__
__module____qualname__r   rO   __classcell__r   r   r   r   r,   .  s    >r,   c                       s*   e Zd Z fddZd fdd	Z  ZS )r-   c                    s  t dd}|jjdi | |jtv sJ dt|jt|j|_d|_|j	s.t
tj t|j\}}|dkr<d}nd}|jdkrFd|_td|jd	|}t|jtjtd
d	||d \|_}td|jd|}t|jtjtd
d||d \|_}	td|jd|}
t|jtjtd
d|
d \|_}td|jd|}t|jtjtd
d|d \|_}t|j| t|j|	 t|j| t|j| |jd u rttt j!|d  |_|j"d u rttt j!|
d  |_"|j#d u rttt j!|d  |_#t
$| t% &| d S )NFr   z(structure_version must in {}, but get {}rF   r5   r6   r3   rK   rC   r   r4   rD   rL   rI   rJ   r8   r   )'r]   r   r   Zstructure_versionrZ   r   r&   r   moder   r   r   r   r   r   r   Zmerge_no_span_structurer   r   r#   r   r   r   r   r   r   Ztable_model_dirZlayout_model_dirr    rR   rX   r   r   r   rS   rT   r   r   r   )r   r   r   r   r   Z
table_langr   r   r   r   Ztable_model_configZ	table_urlZlayout_model_configZ
layout_urlr   r   r   r     s|   













zPPStructure.__init__Fr   c                    s"   t |}t j|||d\}}|S )Nimg_idx)r   r   r   )r   r   Zreturn_ocr_result_in_tabler  r   r   r   r   r   r     s
   
zPPStructure.__call__)Fr   )r   r   r   r   r   r  r   r   r   r   r-     s    :r-   c            !      C   s  t dd} | j}t|rt|d dg}nt| j}t|dkr+td| j d S | j	dkr9t
di | j}n| j	dkrGtdi | j}nt|D ]}tj|dd }td	d
|d
 | j	dkr|j|| j| j| j| j| j| jd}|d urtt|D ]}|| }|D ]}	t|	 qqqK| j	dkrt|\}
}}|s|st|}
| jr| jr|rddl m!} tj"| j#d|}||}|$| |%  td| qK|s|
d u rtd| qK||
gg}n6g }t&|
D ]/\}}tj'tj"| j#|dd tj"| j#||d t(| d }t)|| |*||g qg }t&|D ]U\}\}}
td|d t| tj|dd }||
|d}t+|| j#|| | jr|g krddl,m-} ddl.m/} |
j0\}}}||}|||}||7 }q,| jr|g krzddl.m1} ||
|| j#| W n t2y } ztd|| W Y d }~qKd }~ww |D ]} | 3d | 3d t|  qtd| j# qKd S )NTr   r   r   zno images find in {}rO   rF   .z{}{}{}z
**********)rC   rD   rE   r   r   r   )	Converterz{}.docxzdocx save to {}r   )exist_okr   z.jpgzprocessing {}/{} page:   r  )deepcopy)sorted_layout_boxes)convert_info_docxz.error in layout recovery image:{}, err msg: {}r   r   zresult save to {}r   )4r]   	image_dirr"   r!   r   r   r   r   r   rM   r,   r   r-   r   r   r   basenamesplitinforO   rC   rD   r   ZbinarizeinvertZ
alphacolorranger   r   ZimreadZrecoveryZuse_pdf2docx_apiZpdf2docx.converterr  r   outputr   closer   makedirsrX   Zimwriter   r*   copyr	  Z$ppstructure.recovery.recovery_to_docr
  r   r  	Exceptionpop)!argsr  Zimage_file_listZengineZimg_pathZimg_nameresultr   r   liner   r   r   r  Z	docx_fileZcvZ	img_pathsindexZpdf_imgZpdf_img_pathZall_resZnew_img_pathZnew_img_namer	  r
  hwr   Z	result_cpZresult_sortedr  exitemr   r   r   main  s   












r   )F)T)Ir   r   r   r   dirnamer   __dir__Zpaddler   r   r   r   numpyr   pathlibr   r   ior   ZPILr   r   r   import_moduler   r   Zppocr.utils.loggingr   Ztools.inferr   Zppocr.utils.utilityr   r   r   r   Zppocr.utils.networkr    r!   r"   r#   Ztools.infer.utilityr$   r%   r&   Zppstructure.utilityr'   r(   Zppstructure.predict_systemr)   r*   r+   r   __all__r   VERSIONr   
expanduserr   r   rY   r   rZ   r   r]   r   r   r   r   r   Z
TextSystemr,   r-   r   r   r   r   r   <module>   s
  
	?W?VN   &    
O)&( B