package edu.stanford.nlp.sequences;

import edu.stanford.nlp.ling.FeatureLabel;
import edu.stanford.nlp.objectbank.XMLBeginEndIterator;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.StringUtils;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/sequences/TrueCasingDocumentReaderAndWriter.class */
public class TrueCasingDocumentReaderAndWriter implements DocumentReaderAndWriter {
    private static Pattern sgml = Pattern.compile("<[^>]*>");
    private static WordToSentenceProcessor wts = new WordToSentenceProcessor();
    private static Pattern allLower = Pattern.compile("[^A-Z]*?[a-z]+[^A-Z]*?");
    private static Pattern allUpper = Pattern.compile("[^a-z]*?[A-Z]+[^a-z]*?");
    private static Pattern startUpper = Pattern.compile("[A-Z].*");
    public static Set knownWords = null;

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void init(SeqClassifierFlags seqClassifierFlags) {
    }

    public static boolean known(String str) {
        return knownWords.contains(str.toLowerCase());
    }

    @Override // edu.stanford.nlp.objectbank.IteratorFromReaderFactory
    public Iterator<List<FeatureLabel>> getIterator(Reader reader) {
        ArrayList<List> arrayList = new ArrayList();
        String slurpReader = StringUtils.slurpReader(reader);
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        XMLBeginEndIterator xMLBeginEndIterator = new XMLBeginEndIterator(new StringReader(slurpReader), "TEXT");
        while (xMLBeginEndIterator.hasNext()) {
            PTBTokenizer<FeatureLabel> newPTBTokenizer = PTBTokenizer.newPTBTokenizer(new StringReader((String) xMLBeginEndIterator.next()), false, true);
            ArrayList arrayList2 = new ArrayList();
            HashSet<String> hashSet3 = new HashSet();
            while (newPTBTokenizer.hasNext()) {
                FeatureLabel next = newPTBTokenizer.next();
                hashSet3.add(next.word().toLowerCase());
                if (!sgml.matcher(next.word()).matches()) {
                    arrayList2.add(next);
                } else if (arrayList2.size() > 0) {
                    arrayList.addAll(wts.process(arrayList2));
                    arrayList2 = new ArrayList();
                }
            }
            if (arrayList2.size() > 0) {
                arrayList.addAll(wts.process(arrayList2));
            }
            for (String str : hashSet3) {
                if (!hashSet2.contains(str)) {
                    if (hashSet.contains(str)) {
                        hashSet.remove(str);
                        hashSet2.add(str);
                    } else {
                        hashSet.add(str);
                    }
                }
            }
        }
        XMLBeginEndIterator xMLBeginEndIterator2 = new XMLBeginEndIterator(new StringReader(slurpReader), "TXT");
        while (xMLBeginEndIterator2.hasNext()) {
            PTBTokenizer<FeatureLabel> newPTBTokenizer2 = PTBTokenizer.newPTBTokenizer(new StringReader((String) xMLBeginEndIterator2.next()), false, true);
            ArrayList arrayList3 = new ArrayList();
            HashSet<String> hashSet4 = new HashSet();
            while (newPTBTokenizer2.hasNext()) {
                FeatureLabel next2 = newPTBTokenizer2.next();
                hashSet4.add(next2.word().toLowerCase());
                if (!sgml.matcher(next2.word()).matches()) {
                    arrayList3.add(next2);
                } else if (arrayList3.size() > 0) {
                    arrayList.addAll(wts.process(arrayList3));
                    arrayList3 = new ArrayList();
                }
            }
            if (arrayList3.size() > 0) {
                arrayList.addAll(wts.process(arrayList3));
            }
            for (String str2 : hashSet4) {
                if (!hashSet2.contains(str2)) {
                    if (hashSet.contains(str2)) {
                        hashSet.remove(str2);
                        hashSet2.add(str2);
                    } else {
                        hashSet.add(str2);
                    }
                }
            }
        }
        knownWords = hashSet2;
        knownWords.addAll(hashSet);
        ArrayList arrayList4 = new ArrayList();
        for (List<FeatureLabel> list : arrayList) {
            System.err.println(list);
            ArrayList arrayList5 = new ArrayList();
            int i = 0;
            for (FeatureLabel featureLabel : list) {
                FeatureLabel featureLabel2 = new FeatureLabel();
                if (allLower.matcher(featureLabel.word()).matches()) {
                    featureLabel2.setAnswer("LOWER");
                } else if (allUpper.matcher(featureLabel.word()).matches()) {
                    featureLabel2.setAnswer("UPPER");
                } else if (startUpper.matcher(featureLabel.word()).matches()) {
                    featureLabel2.setAnswer("INIT_UPPER");
                } else {
                    featureLabel2.setAnswer("O");
                }
                featureLabel2.setWord(featureLabel.word().toLowerCase());
                featureLabel2.put("unknown", hashSet.contains(featureLabel.word().toLowerCase()) ? "true" : "false");
                featureLabel2.put("position", i + "");
                if (featureLabel2.get("unknown").equals("true")) {
                    System.err.println(featureLabel2.word() + " :: " + featureLabel2.get("unknown") + " :: " + featureLabel2.get("position"));
                }
                arrayList5.add(featureLabel2);
                i++;
            }
            System.err.println();
            arrayList4.add(arrayList5);
        }
        return arrayList4.iterator();
    }

    @Override // edu.stanford.nlp.sequences.DocumentReaderAndWriter
    public void printAnswers(List<FeatureLabel> list, PrintWriter printWriter) {
        for (FeatureLabel featureLabel : list) {
            String word = featureLabel.word();
            if (featureLabel.answer().equals("UPPER")) {
                word = word.toUpperCase();
            } else if (featureLabel.answer().equals("LOWER")) {
                word = word.toLowerCase();
            } else if (featureLabel.answer().equals("INIT_UPPER")) {
                word = word.substring(0, 1).toUpperCase() + word.substring(1);
            }
            printWriter.print(((String) featureLabel.get("_prevSGML")) + word + ((String) featureLabel.get("_afterSGML")));
        }
    }
}
