package com.inet.pdfc.parser;

import com.inet.annotations.InternalApi;
import com.inet.classloader.LoaderUtils;
import com.inet.logging.LogManager;
import com.inet.logging.Logger;
import java.awt.Font;
import java.awt.Rectangle;
import java.awt.geom.Point2D;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import javax.annotation.SuppressFBWarnings;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

@InternalApi
/* loaded from: input_file:com/inet/pdfc/parser/HocrReader.class */
public class HocrReader {
    private SAXParserFactory my = LoaderUtils.newSaxParserFactory();
    private b mz = null;
    private Logger mA = LogManager.getLogger("hOCR Parser");
    private File mB;

    /* loaded from: input_file:com/inet/pdfc/parser/HocrReader$a.class */
    private class a extends DefaultHandler {
        private f mD = null;
        private com.inet.pdfc.parser.a mE = null;
        private g mF = null;
        private c mG = null;
        private h mH = null;

        private a() {
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.EntityResolver
        public InputSource resolveEntity(String str, String str2) {
            HocrReader.this.mA.debug("resolve: " + str2);
            return new InputSource(getClass().getResourceAsStream("ressource/xhtml1-transitional.dtd"));
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startDocument() {
            HocrReader.this.mz = new b();
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) {
            HashMap hashMap = new HashMap();
            for (int i = 0; i < attributes.getLength(); i++) {
                hashMap.put(attributes.getQName(i), attributes.getValue(i).toLowerCase());
            }
            if ("meta".equalsIgnoreCase(str3) && attributes.getValue("name") != null && attributes.getValue("content") != null) {
                ((d) HocrReader.this.mz.getMeta()).e(attributes.getValue("name"), attributes.getValue("content"));
            }
            String str4 = (String) hashMap.get("class");
            if (str4 != null) {
                e eVar = null;
                if (str4.equals("ocr_page")) {
                    this.mD = new f();
                    eVar = this.mD;
                    HocrReader.this.mz.a(this.mD);
                    for (String str5 : ((String) hashMap.get("title")).split(";")) {
                        if (str5.contains("ppageno")) {
                            String trim = str5.replace("ppageno", "").trim();
                            if (trim.matches("\\d+")) {
                                this.mD.i(Integer.parseInt(trim));
                            } else {
                                HocrReader.this.mA.error("hOCR Parser Error, konnte ppageno nicht auswerten!\nValue = " + str5);
                            }
                        }
                    }
                } else if (str4.equals("ocr_carea")) {
                    this.mE = new com.inet.pdfc.parser.a();
                    eVar = this.mE;
                    this.mD.a(this.mE);
                } else if (str4.equals("ocr_par")) {
                    this.mF = new g();
                    eVar = this.mF;
                    this.mE.a(this.mF);
                } else if (str4.equals("ocr_line") || str4.equals("ocr_caption")) {
                    this.mG = new c();
                    eVar = this.mG;
                    this.mF.a(this.mG);
                    for (String str6 : ((String) hashMap.get("title")).split(";")) {
                        if (str6.contains("baseline")) {
                            String trim2 = str6.replace("baseline", "").trim();
                            if (trim2.contains(" ")) {
                                String[] split = trim2.split(" ");
                                if (split.length == 2) {
                                    this.mG.b(new Point2D.Double(Double.parseDouble(split[0]), Double.parseDouble(split[1])));
                                } else {
                                    HocrReader.this.mA.error("hOCR Parser Error, konnte positionen der baseline nicht auswerten!\nValue = " + str6);
                                }
                            } else {
                                HocrReader.this.mA.error("hOCR Parser Error, konnte baseline nicht auswerten!\nValue = " + str6);
                            }
                        }
                    }
                } else if (str4.equals("ocrx_word")) {
                    this.mH = new h();
                    eVar = this.mH;
                    if (this.mG == null) {
                        HocrReader.this.mA.warn("Keine Line Definiert " + ((String) hashMap.get("id")));
                    } else {
                        this.mG.a(this.mH);
                        String str7 = (String) hashMap.get("title");
                        if (str7 != null) {
                            this.mH.setFont(A(str7));
                        }
                    }
                }
                if (eVar != null) {
                    eVar.y((String) hashMap.get("id"));
                }
                String str8 = (String) hashMap.get("title");
                if (str8 == null || eVar == null) {
                    return;
                }
                eVar.a(z(str8));
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            String trim = new String(cArr, i, i2).trim();
            if (trim.isEmpty()) {
                return;
            }
            this.mH.setText(trim);
        }

        private Rectangle z(String str) {
            for (String str2 : str.split(";")) {
                if (str2.contains("bbox")) {
                    String[] split = str2.trim().split(" ");
                    int parseInt = Integer.parseInt(split[1]);
                    int parseInt2 = Integer.parseInt(split[2]);
                    return new Rectangle(parseInt, parseInt2, Integer.parseInt(split[3]) - parseInt, Integer.parseInt(split[4]) - parseInt2);
                }
            }
            return null;
        }

        private Font A(String str) {
            String str2 = "";
            int i = 0;
            int i2 = 0;
            for (String str3 : str.split(";")) {
                if (str3.contains("x_font")) {
                    str2 = str3.trim().split(" ")[1];
                } else if (str3.contains("x_fsize")) {
                    i = Integer.parseInt(str3.trim().split(" ")[1]);
                }
            }
            if (str2.toLowerCase().contains("bold") && str2.toLowerCase().contains("italic")) {
                i2 = 3;
            } else if (str2.toLowerCase().contains("bold")) {
                i2 = 1;
            } else if (str2.toLowerCase().contains("italic")) {
                i2 = 2;
            }
            if (str2.isEmpty()) {
                str2 = "Default";
            }
            return new Font(str2, i2, i);
        }
    }

    public HocrReader(File file) throws IllegalArgumentException {
        this.mB = file;
        if (file == null || !file.exists() || file.length() == 0) {
            throw new IllegalArgumentException("File can not be empty! " + file.getAbsolutePath());
        }
    }

    public void isInnternalUsage() {
        try {
            this.my.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
        } catch (Throwable th) {
            LogManager.getConfigLogger().error(th);
        }
    }

    @SuppressFBWarnings(value = {"XXE_SAXPARSER", "URLCONNECTION_SSRF_FD"}, justification = "The option for prevent XXE Injection is deactivate, see few line above. This function is only for the inernal use with tesseract,")
    public HocrDocument parse() throws ParserConfigurationException, SAXException, IOException {
        SAXParser newSAXParser = this.my.newSAXParser();
        a aVar = new a();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(this.mB.toURI().toURL().openStream());
        newSAXParser.getXMLReader().setErrorHandler(new ErrorHandler() { // from class: com.inet.pdfc.parser.HocrReader.1
            @Override // org.xml.sax.ErrorHandler
            public void warning(SAXParseException sAXParseException) throws SAXException {
                HocrReader.this.mA.warn(sAXParseException);
            }

            @Override // org.xml.sax.ErrorHandler
            public void error(SAXParseException sAXParseException) throws SAXException {
                HocrReader.this.mA.error(sAXParseException);
            }

            @Override // org.xml.sax.ErrorHandler
            public void fatalError(SAXParseException sAXParseException) throws SAXException {
                HocrReader.this.mA.fatal(sAXParseException);
            }
        });
        newSAXParser.parse(bufferedInputStream, aVar);
        return this.mz;
    }
}
