package org.apache.tika.parser.ner.corenlp;

import com.digifly.hifiman.util.HanziToPinyin;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.tika.io.IOUtils;
import org.apache.tika.parser.ner.NERecogniser;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: classes.dex */
public class CoreNLPNERecogniser implements NERecogniser {
    private static final String CLASSIFIER_CLASS_NAME = "edu.stanford.nlp.ie.crf.CRFClassifier";
    public static final String DEFAULT_MODEL_PATH = "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz";
    public static final String MODEL_PROP_NAME = "ner.corenlp.model";
    public static final String NER_3CLASS_MODEL = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
    public static final String NER_4CLASS_MODEL = "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz";
    public static final String NER_7CLASS_MODEL = "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz";
    private boolean available;
    private Object classifierInstance;
    private Method classifyMethod;
    private Field firstField;
    private Field secondField;
    private Field thirdField;
    private static final Logger LOG = LoggerFactory.getLogger(CoreNLPNERecogniser.class);
    public static final Set<String> ENTITY_TYPES = new HashSet<String>() { // from class: org.apache.tika.parser.ner.corenlp.CoreNLPNERecogniser.1
        {
            add(NERecogniser.PERSON);
            add(NERecogniser.TIME);
            add(NERecogniser.LOCATION);
            add(NERecogniser.ORGANIZATION);
            add(NERecogniser.MONEY);
            add(NERecogniser.PERCENT);
            add(NERecogniser.DATE);
        }
    };

    public CoreNLPNERecogniser() {
        this(System.getProperty(MODEL_PROP_NAME, "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz"));
    }

    public CoreNLPNERecogniser(String str) {
        this.available = false;
        try {
            Properties properties = new Properties();
            Class<?> cls = Class.forName(CLASSIFIER_CLASS_NAME);
            this.classifierInstance = cls.getMethod("getClassifier", String.class, Properties.class).invoke(cls, str, properties);
            this.classifyMethod = cls.getMethod("classifyToCharacterOffsets", String.class);
            Class<?> cls2 = Class.forName("edu.stanford.nlp.util.Triple");
            this.firstField = cls2.getField("first");
            this.secondField = cls2.getField("second");
            this.thirdField = cls2.getField("third");
            this.available = true;
        } catch (Exception e) {
            LOG.warn("{} while trying to load the model from {}", e.getMessage(), str);
        }
        LOG.info("Available for service ? {}", Boolean.valueOf(this.available));
    }

    public static void main(String[] strArr) throws IOException, JSONException {
        if (strArr.length != 1) {
            System.err.println("Error: Invalid Args");
            System.err.println("This tool finds names inside text");
            System.err.println("Usage: <path/to/text/file>");
            return;
        }
        FileInputStream fileInputStream = new FileInputStream(strArr[0]);
        Throwable th = null;
        try {
            System.out.println(new JSONObject(new CoreNLPNERecogniser().recognise(IOUtils.toString(fileInputStream))).toString(2));
            fileInputStream.close();
        } catch (Throwable th2) {
            if (0 != 0) {
                try {
                    fileInputStream.close();
                } catch (Throwable th3) {
                    th.addSuppressed(th3);
                }
            } else {
                fileInputStream.close();
            }
            throw th2;
        }
    }

    @Override // org.apache.tika.parser.ner.NERecogniser
    public Set<String> getEntityTypes() {
        return ENTITY_TYPES;
    }

    @Override // org.apache.tika.parser.ner.NERecogniser
    public boolean isAvailable() {
        return this.available;
    }

    @Override // org.apache.tika.parser.ner.NERecogniser
    public Map<String, Set<String>> recognise(String str) {
        HashMap hashMap = new HashMap();
        try {
            for (Object obj : (List) this.classifyMethod.invoke(this.classifierInstance, str)) {
                String str2 = (String) this.firstField.get(obj);
                if (!hashMap.containsKey(str2)) {
                    hashMap.put(str2, new HashSet());
                }
                String replaceAll = str.substring(((Integer) this.secondField.get(obj)).intValue(), ((Integer) this.thirdField.get(obj)).intValue()).trim().replaceAll("(\\s\\s+)|\n|\t", HanziToPinyin.Token.SEPARATOR);
                if (!replaceAll.isEmpty()) {
                    ((Set) hashMap.get(str2)).add(replaceAll);
                }
            }
        } catch (Exception e) {
            LOG.debug(e.getMessage(), e);
        }
        return hashMap;
    }
}
