package edu.stanford.nlp.ie;

import edu.stanford.nlp.ie.pascal.PascalTemplate;
import edu.stanford.nlp.ling.AbstractMapLabel;
import edu.stanford.nlp.ling.FeatureLabel;
import edu.stanford.nlp.process.PTBLexer;
import edu.stanford.nlp.sequences.Clique;
import edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter;
import edu.stanford.nlp.sequences.FeatureFactory;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.PaddedList;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/ie/NERFeatureFactory.class */
public class NERFeatureFactory extends FeatureFactory {
    private static final long serialVersionUID = -2329726064739185544L;
    private Map<String, String> lexicon;
    private static final Pattern ordinalPattern = Pattern.compile("(?:(?:first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twenty|twentieth|thirty|thirtieth|fourty|fourtieth|fifty|fiftieth|sixty|sixtieth|seventy|seventieth|eighty|eightieth|ninety|ninetieth|one|two|three|four|five|six|seven|eight|nine|hundred|hundredth)-?)+|[0-9]+(?:st|nd|rd|th)", 2);
    private static final Pattern numberPattern = Pattern.compile("[0-9]+");
    private static final Pattern ordinalEndPattern = Pattern.compile("(?:st|nd|rd|th)", 2);
    private HashSet<String> lastNames;
    private HashSet<String> maleNames;
    private HashSet<String> femaleNames;
    private PaddedList<FeatureLabel> cache = null;
    private Map<String, Collection<String>> wordToSubstrings = new HashMap();
    private Map<String, Collection<String>> wordToGazetteEntries = new HashMap();
    private Map<String, Collection<GazetteInfo>> wordToGazetteInfos = new HashMap();
    private Pattern titlePattern = Pattern.compile("(Mr|Ms|Mrs|Dr|Miss|Sen|Judge|Sir)\\.?");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/stanford/nlp/ie/NERFeatureFactory$GazetteInfo.class */
    public static class GazetteInfo {
        String feature = "";
        int loc = 0;
        String[] words = StringUtils.EMPTY_STRING_ARRAY;

        GazetteInfo() {
        }
    }

    @Override // edu.stanford.nlp.sequences.FeatureFactory
    public void init(SeqClassifierFlags seqClassifierFlags) {
        super.init(seqClassifierFlags);
        initGazette();
        if (seqClassifierFlags.useDistSim) {
            initLexicon();
        }
    }

    @Override // edu.stanford.nlp.sequences.FeatureFactory
    public Collection getCliqueFeatures(PaddedList<FeatureLabel> paddedList, int i, Clique clique) {
        HashSet hashSet = new HashSet();
        if (clique == cliqueC) {
            addAllInterningAndSuffixing(hashSet, featuresC(paddedList, i), "C");
        } else if (clique == cliqueCpC) {
            addAllInterningAndSuffixing(hashSet, featuresCpC(paddedList, i), "CpC");
            addAllInterningAndSuffixing(hashSet, featuresCnC(paddedList, i - 1), "CnC");
        } else if (clique == cliqueCp2C) {
            addAllInterningAndSuffixing(hashSet, featuresCp2C(paddedList, i), "Cp2C");
        } else if (clique == cliqueCp3C) {
            addAllInterningAndSuffixing(hashSet, featuresCp3C(paddedList, i), "Cp3C");
        } else if (clique == cliqueCp4C) {
            addAllInterningAndSuffixing(hashSet, featuresCp4C(paddedList, i), "Cp4C");
        } else if (clique == cliqueCp5C) {
            addAllInterningAndSuffixing(hashSet, featuresCp5C(paddedList, i), "Cp5C");
        } else if (clique == cliqueCpCp2C) {
            addAllInterningAndSuffixing(hashSet, featuresCpCp2C(paddedList, i), "CpCp2C");
            addAllInterningAndSuffixing(hashSet, featuresCpCnC(paddedList, i - 1), "CpCnC");
        } else if (clique == cliqueCpCp2Cp3C) {
            addAllInterningAndSuffixing(hashSet, featuresCpCp2Cp3C(paddedList, i), "CpCp2Cp3C");
        } else if (clique == cliqueCpCp2Cp3Cp4C) {
            addAllInterningAndSuffixing(hashSet, featuresCpCp2Cp3Cp4C(paddedList, i), "CpCp2Cp3Cp4C");
        }
        return hashSet;
    }

    private void initLexicon() {
        if (this.flags.distSimLexicon != null && this.lexicon == null) {
            Timing.startDoing("Loading distsim lexicon from " + this.flags.distSimLexicon);
            this.lexicon = new HashMap();
            for (String str : StringUtils.slurpFileNoExceptions(this.flags.distSimLexicon).split("\n")) {
                String[] split = str.split("\\s+");
                this.lexicon.put(split[0].toLowerCase(), split[1]);
            }
            Timing.endDoing();
        }
    }

    private void distSimAnnotate(PaddedList<FeatureLabel> paddedList) {
        if (paddedList.sameInnerList(this.cache)) {
            return;
        }
        Iterator<FeatureLabel> it = paddedList.iterator();
        while (it.hasNext()) {
            FeatureLabel next = it.next();
            String str = this.lexicon.get(next.word().toLowerCase());
            if (str == null) {
                str = "null";
            }
            next.set("distSim", str);
        }
        this.cache = paddedList;
    }

    public void clearSubstringList() {
        this.wordToSubstrings = new HashMap();
    }

    private static String dehyphenate(String str) {
        String str2 = str;
        int length = str.length();
        int i = 2;
        do {
            i = str2.indexOf(45, i);
            if (i < 0 || i >= length - 2) {
                i = -1;
            } else {
                str2 = str2.substring(0, i) + str2.substring(i + 1);
            }
        } while (i >= 0);
        return str2;
    }

    private static String greekify(String str) {
        return Pattern.compile("(alpha)|(beta)|(gamma)|(delta)|(epsilon)|(zeta)|(kappa)|(lambda)|(rho)|(sigma)|(tau)|(upsilon)|(omega)").matcher(str).replaceAll("~");
    }

    private static boolean isNameCase(String str) {
        if (str.length() < 2) {
            return false;
        }
        if (!Character.isUpperCase(str.charAt(0)) && !Character.isTitleCase(str.charAt(0))) {
            return false;
        }
        for (int i = 1; i < str.length(); i++) {
            if (Character.isUpperCase(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    private static boolean noUpperCase(String str) {
        if (str.length() < 1) {
            return false;
        }
        for (int i = 0; i < str.length(); i++) {
            if (Character.isUpperCase(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    private static boolean hasLetter(String str) {
        if (str.length() < 1) {
            return false;
        }
        for (int i = 0; i < str.length(); i++) {
            if (Character.isLetter(str.charAt(i))) {
                return true;
            }
        }
        return false;
    }

    private static boolean isOrdinal(List<FeatureLabel> list, int i) {
        FeatureLabel featureLabel = list.get(i);
        if (ordinalPattern.matcher(featureLabel.word()).matches()) {
            return true;
        }
        if (numberPattern.matcher(featureLabel.word()).matches()) {
            if (i + 1 < list.size()) {
                return ordinalEndPattern.matcher(list.get(i + 1).word()).matches();
            }
            return false;
        }
        if (ordinalEndPattern.matcher(featureLabel.word()).matches() && i > 0) {
            if (numberPattern.matcher(list.get(i - 1).word()).matches()) {
                return true;
            }
        }
        if (!featureLabel.word().equals("-") || i + 1 >= list.size() || i <= 0) {
            return false;
        }
        return ordinalPattern.matcher(list.get(i - 1).word()).matches() && ordinalPattern.matcher(list.get(i + 1).word()).matches();
    }

    private void readGazette(BufferedReader bufferedReader) throws IOException {
        Pattern compile = Pattern.compile("^(\\S+)\\s+(.+)$");
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            Matcher matcher = compile.matcher(readLine);
            if (matcher.matches()) {
                String intern = intern(matcher.group(1));
                String[] split = matcher.group(2).split(" ");
                for (int i = 0; i < split.length; i++) {
                    String intern2 = intern(split[i]);
                    if (this.flags.sloppyGazette) {
                        Collection<String> collection = this.wordToGazetteEntries.get(intern2);
                        if (collection == null) {
                            collection = new HashSet();
                            this.wordToGazetteEntries.put(intern2, collection);
                        }
                        collection.add(intern(intern + "-GAZ" + split.length));
                    }
                    if (this.flags.cleanGazette) {
                        Collection<GazetteInfo> collection2 = this.wordToGazetteInfos.get(intern2);
                        if (collection2 == null) {
                            collection2 = new HashSet();
                            this.wordToGazetteInfos.put(intern2, collection2);
                        }
                        GazetteInfo gazetteInfo = new GazetteInfo();
                        gazetteInfo.loc = i;
                        gazetteInfo.words = split;
                        gazetteInfo.feature = intern(intern + "-GAZ" + split.length);
                        collection2.add(gazetteInfo);
                    }
                }
            }
        }
    }

    protected Collection<String> featuresC(PaddedList<FeatureLabel> paddedList, int i) {
        Collection<GazetteInfo> collection;
        Collection<String> collection2;
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i + 1);
        FeatureLabel featureLabel3 = paddedList.get(i + 2);
        FeatureLabel featureLabel4 = paddedList.get(i - 1);
        FeatureLabel featureLabel5 = paddedList.get(i - 2);
        FeatureLabel featureLabel6 = paddedList.get(i - 3);
        String word = featureLabel.word();
        ArrayList arrayList = new ArrayList();
        if (this.flags.useDistSim) {
            distSimAnnotate(paddedList);
        }
        if (this.flags.useDistSim && this.flags.useMoreTags) {
            arrayList.add(featureLabel4.get("distSim") + "-" + word + "-PDISTSIM-CWORD");
        }
        if (this.flags.useDistSim) {
            arrayList.add(featureLabel.get("distSim") + "-DISTSIM");
        }
        if (this.flags.useTitle && this.titlePattern.matcher(word).matches()) {
            arrayList.add("IS_TITLE");
        }
        if (this.flags.useInternal && this.flags.useExternal) {
            if (this.flags.useWord) {
                arrayList.add(word + "-WORD");
            }
            if (this.flags.useUnknown) {
                arrayList.add(featureLabel.get("unknown") + "-UNKNOWN");
                arrayList.add(featureLabel4.get("unknown") + "-PUNKNOWN");
                arrayList.add(featureLabel2.get("unknown") + "-NUNKNOWN");
            }
            if (this.flags.useLemmas) {
                String string = featureLabel.getString(AbstractMapLabel.LEMMA_KEY);
                if (!"".equals(string)) {
                    arrayList.add(string + "-LEM");
                }
            }
            if (this.flags.usePrevNextLemmas) {
                String string2 = featureLabel4.getString(AbstractMapLabel.LEMMA_KEY);
                String string3 = featureLabel2.getString(AbstractMapLabel.LEMMA_KEY);
                if (!"".equals(string2)) {
                    arrayList.add(string2 + "-PLEM");
                }
                if (!"".equals(string3)) {
                    arrayList.add(string3 + "-NLEM");
                }
            }
            if (this.flags.checkNameList) {
                try {
                    if (this.lastNames == null) {
                        this.lastNames = new HashSet<>();
                        for (String str : StringUtils.slurpFile(this.flags.lastNameList).split("\n")) {
                            this.lastNames.add(str.split("\\s+")[0]);
                        }
                    }
                    if (this.maleNames == null) {
                        this.maleNames = new HashSet<>();
                        for (String str2 : StringUtils.slurpFile(this.flags.maleNameList).split("\n")) {
                            this.maleNames.add(str2.split("\\s+")[0]);
                        }
                    }
                    if (this.femaleNames == null) {
                        this.femaleNames = new HashSet<>();
                        for (String str3 : StringUtils.slurpFile(this.flags.femaleNameList).split("\n")) {
                            this.femaleNames.add(str3.split("\\s+")[0]);
                        }
                    }
                    String upperCase = word.toUpperCase();
                    if (this.lastNames.contains(upperCase)) {
                        arrayList.add("LAST_NAME");
                    }
                    if (this.maleNames.contains(upperCase)) {
                        arrayList.add("MALE_NAME");
                    }
                    if (this.femaleNames.contains(upperCase)) {
                        arrayList.add("FEMALE_NAME");
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    throw new RuntimeException();
                }
            }
            if (this.flags.binnedLengths != null) {
                int length = word.length();
                String str4 = null;
                int i2 = 0;
                while (true) {
                    if (i2 > this.flags.binnedLengths.length) {
                        break;
                    }
                    if (i2 == this.flags.binnedLengths.length) {
                        str4 = "Len-" + this.flags.binnedLengths[this.flags.binnedLengths.length - 1] + "-Inf";
                    } else if (length <= this.flags.binnedLengths[i2]) {
                        str4 = "Len-" + (i2 == 0 ? 1 : this.flags.binnedLengths[i2 - 1]) + "-" + this.flags.binnedLengths[i2];
                    }
                    i2++;
                }
                arrayList.add(str4);
            }
            if (this.flags.useABGENE) {
                arrayList.add(featureLabel.get("abgene") + "-ABGENE");
                arrayList.add(featureLabel4.get("abgene") + "-PABGENE");
                arrayList.add(featureLabel2.get("abgene") + "-NABGENE");
            }
            if (this.flags.useABSTRFreqDict) {
                arrayList.add(featureLabel.get("abstr") + "-ABSTRACT" + featureLabel.get("freq") + "-FREQ" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
                arrayList.add(featureLabel.get("abstr") + "-ABSTRACT" + featureLabel.get("dict") + "-DICT" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
                arrayList.add(featureLabel.get("abstr") + "-ABSTRACT" + featureLabel.get("dict") + "-DICT" + featureLabel.get("freq") + "-FREQ" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
            }
            if (this.flags.useABSTR) {
                arrayList.add(featureLabel.get("abstr") + "-ABSTRACT");
                arrayList.add(featureLabel4.get("abstr") + "-PABSTRACT");
                arrayList.add(featureLabel2.get("abstr") + "-NABSTRACT");
            }
            if (this.flags.useGENIA) {
                arrayList.add(featureLabel.get("genia") + "-GENIA");
                arrayList.add(featureLabel4.get("genia") + "-PGENIA");
                arrayList.add(featureLabel2.get("genia") + "-NGENIA");
            }
            if (this.flags.useWEBFreqDict) {
                arrayList.add(featureLabel.get("web") + "-WEB" + featureLabel.get("freq") + "-FREQ" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
                arrayList.add(featureLabel.get("web") + "-WEB" + featureLabel.get("dict") + "-DICT" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
                arrayList.add(featureLabel.get("web") + "-WEB" + featureLabel.get("dict") + "-DICT" + featureLabel.get("freq") + "-FREQ" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
            }
            if (this.flags.useWEB) {
                arrayList.add(featureLabel.get("web") + "-WEB");
                arrayList.add(featureLabel4.get("web") + "-PWEB");
                arrayList.add(featureLabel2.get("web") + "-NWEB");
            }
            if (this.flags.useIsURL) {
                arrayList.add(featureLabel.get("isURL") + "-ISURL");
            }
            if (this.flags.useEntityRule) {
                arrayList.add(featureLabel.get("entityRule") + "-ENTITYRULE");
            }
            if (this.flags.useEntityTypes) {
                arrayList.add(featureLabel.get("entityType") + "-ENTITYTYPE");
            }
            if (this.flags.useIsDateRange) {
                arrayList.add(featureLabel.get("isDateRange") + "-ISDATERANGE");
            }
            if (this.flags.useABSTRFreq) {
                arrayList.add(featureLabel.get("abstr") + "-ABSTRACT" + featureLabel.get("freq") + "-FREQ");
            }
            if (this.flags.useFREQ) {
                arrayList.add(featureLabel.get("freq") + "-FREQ");
            }
            if (this.flags.useMoreTags) {
                arrayList.add(featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + word + "-PTAG-CWORD");
            }
            if (this.flags.usePosition) {
                arrayList.add(featureLabel.get("position") + "-POSITION");
            }
            if (this.flags.useBeginSent) {
                if (PascalTemplate.BACKGROUND_SYMBOL.equals(featureLabel.get("position"))) {
                    arrayList.add("BEGIN-SENT");
                    arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-BEGIN-SENT");
                } else {
                    arrayList.add("IN-SENT");
                    arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-IN-SENT");
                }
            }
            if (this.flags.useTags) {
                arrayList.add(featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TAG");
            }
            if (this.flags.useOrdinal) {
                if (isOrdinal(paddedList, i)) {
                    arrayList.add("C_ORDINAL");
                    if (isOrdinal(paddedList, i - 1)) {
                        arrayList.add("PC_ORDINAL");
                    }
                }
                if (isOrdinal(paddedList, i - 1)) {
                    arrayList.add("P_ORDINAL");
                }
            }
            if (this.flags.usePrev) {
                arrayList.add(featureLabel4.word() + "-PW");
                if (this.flags.useTags) {
                    arrayList.add(featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-PTAG");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel4.get("distSim") + "-PDISTSIM");
                }
                if (this.flags.useIsURL) {
                    arrayList.add(featureLabel4.get("isURL") + "-PISURL");
                }
                if (this.flags.useEntityTypes) {
                    arrayList.add(featureLabel4.get("entityType") + "-PENTITYTYPE");
                }
            }
            if (this.flags.useNext) {
                arrayList.add(featureLabel2.word() + "-NW");
                if (this.flags.useTags) {
                    arrayList.add(featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-NTAG");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel4.get("distSim") + "-NDISTSIM");
                }
                if (this.flags.useIsURL) {
                    arrayList.add(featureLabel2.get("isURL") + "-NISURL");
                }
                if (this.flags.useEntityTypes) {
                    arrayList.add(featureLabel2.get("entityType") + "-NENTITYTYPE");
                }
            }
            if (this.flags.useEitherSideWord) {
                arrayList.add(featureLabel4.word() + "-EW");
                arrayList.add(featureLabel2.word() + "-EW");
            }
            if (this.flags.useWordPairs) {
                arrayList.add(word + "-" + featureLabel4.word() + "-W-PW");
                arrayList.add(word + "-" + featureLabel2.word() + "-W-NW");
            }
            if (this.flags.useSymTags) {
                if (this.flags.useTags) {
                    arrayList.add(featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-PCNTAGS");
                    arrayList.add(featureLabel.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-CNTAGS");
                    arrayList.add(featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-PCTAGS");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel4.get("distSim") + "-" + featureLabel.get("distSim") + "-" + featureLabel2.get("distSim") + "-PCNDISTSIM");
                    arrayList.add(featureLabel.get("distSim") + "-" + featureLabel2.get("distSim") + "-CNDISTSIM");
                    arrayList.add(featureLabel4.get("distSim") + "-" + featureLabel.get("distSim") + "-PCDISTSIM");
                }
            }
            if (this.flags.useSymWordPairs) {
                arrayList.add(featureLabel4.word() + "-" + featureLabel2.word() + "-SWORDS");
            }
            if (this.flags.useGazFeatures) {
                if (!featureLabel.get("gaz").equals(this.flags.dropGaz)) {
                    arrayList.add(featureLabel.get("gaz") + "-GAZ");
                }
                if (!featureLabel2.get("gaz").equals(this.flags.dropGaz)) {
                    arrayList.add(featureLabel2.get("gaz") + "-NGAZ");
                }
                if (!featureLabel4.get("gaz").equals(this.flags.dropGaz)) {
                    arrayList.add(featureLabel4.get("gaz") + "-PGAZ");
                }
            }
            if (this.flags.useMoreGazFeatures && !featureLabel.get("gaz").equals(this.flags.dropGaz)) {
                arrayList.add(featureLabel.get("gaz") + "-" + word + "-CG-CW-GAZ");
                if (!featureLabel2.get("gaz").equals(this.flags.dropGaz)) {
                    arrayList.add(featureLabel.get("gaz") + "-" + featureLabel2.get("gaz") + "-CNGAZ");
                }
                if (!featureLabel4.get("gaz").equals(this.flags.dropGaz)) {
                    arrayList.add(featureLabel4.get("gaz") + "-" + featureLabel.get("gaz") + "-PCGAZ");
                }
            }
            if (this.flags.useAbbr || this.flags.useMinimalAbbr) {
                arrayList.add(featureLabel.get("abbr") + "-ABBR");
            }
            if ((this.flags.useAbbr1 || this.flags.useMinimalAbbr1) && !featureLabel.get("abbr").equals("XX")) {
                arrayList.add(featureLabel.get("abbr") + "-ABBR");
            }
            if (this.flags.useAbbr) {
                arrayList.add(featureLabel4.get("abbr") + "-" + featureLabel.get("abbr") + "-PCABBR");
                arrayList.add(featureLabel.get("abbr") + "-" + featureLabel2.get("abbr") + "-CNABBR");
                arrayList.add(featureLabel4.get("abbr") + "-" + featureLabel.get("abbr") + "-" + featureLabel2.get("abbr") + "-PCNABBR");
            }
            if (this.flags.useAbbr1 && !featureLabel.get("abbr").equals("XX")) {
                arrayList.add(featureLabel4.get("abbr") + "-" + featureLabel.get("abbr") + "-PCABBR");
                arrayList.add(featureLabel.get("abbr") + "-" + featureLabel2.get("abbr") + "-CNABBR");
                arrayList.add(featureLabel4.get("abbr") + "-" + featureLabel.get("abbr") + "-" + featureLabel2.get("abbr") + "-PCNABBR");
            }
            if (this.flags.useChunks) {
                arrayList.add(featureLabel4.get("chunk") + "-" + featureLabel.get("chunk") + "-PCCHUNK");
                arrayList.add(featureLabel.get("chunk") + "-" + featureLabel2.get("chunk") + "-CNCHUNK");
                arrayList.add(featureLabel4.get("chunk") + "-" + featureLabel.get("chunk") + "-" + featureLabel2.get("chunk") + "-PCNCHUNK");
            }
            if (this.flags.useMinimalAbbr) {
                arrayList.add(word + "-" + featureLabel.get("abbr") + "-CWABB");
            }
            if (this.flags.useMinimalAbbr1 && !featureLabel.get("abbr").equals("XX")) {
                arrayList.add(word + "-" + featureLabel.get("abbr") + "-CWABB");
            }
            String str5 = "";
            String str6 = "";
            if (this.flags.usePrevVB) {
                int i3 = i - 1;
                while (true) {
                    FeatureLabel featureLabel7 = paddedList.get(i3);
                    if (featureLabel7 == paddedList.getPad()) {
                        str5 = "X";
                        arrayList.add("X-PVB");
                        break;
                    }
                    if (((String) featureLabel7.get(AbstractMapLabel.TAG_KEY)).startsWith("VB")) {
                        arrayList.add(featureLabel7.word() + "-PVB");
                        str5 = featureLabel7.word();
                        break;
                    }
                    i3--;
                }
            }
            if (this.flags.useNextVB) {
                int i4 = i + 1;
                while (true) {
                    FeatureLabel featureLabel8 = paddedList.get(i4);
                    if (featureLabel8 == paddedList.getPad()) {
                        arrayList.add("X-NVB");
                        str6 = "X";
                        break;
                    }
                    if (((String) featureLabel8.get(AbstractMapLabel.TAG_KEY)).startsWith("VB")) {
                        arrayList.add(featureLabel8.word() + "-NVB");
                        str6 = featureLabel8.word();
                        break;
                    }
                    i4++;
                }
            }
            if (this.flags.useVB) {
                arrayList.add(str5 + "-" + str6 + "-PNVB");
            }
            if (this.flags.useShapeConjunctions) {
                arrayList.add(featureLabel.get("position") + featureLabel.shape() + "-POS-SH");
                if (this.flags.useTags) {
                    arrayList.add(featureLabel.tag() + featureLabel.shape() + "-TAG-SH");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel.get("distSim") + featureLabel.shape() + "-DISTSIM-SH");
                }
            }
            if (this.flags.useWordTag) {
                arrayList.add(featureLabel.word() + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-W-T");
                arrayList.add(featureLabel.word() + "-" + featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-W-PT");
                arrayList.add(featureLabel.word() + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-W-NT");
            }
            if (this.flags.useNPHead) {
                arrayList.add(featureLabel.get("head") + "-HW");
                if (this.flags.useTags) {
                    arrayList.add(featureLabel.get("head") + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-HW-T");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel.get("head") + "-" + featureLabel.get("distSim") + "-HW-DISTSIM");
                }
            }
            if (this.flags.useNPGovernor) {
                arrayList.add(featureLabel.get("governor") + "-GW");
                if (this.flags.useTags) {
                    arrayList.add(featureLabel.get("governor") + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-GW-T");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel.get("governor") + "-" + featureLabel.get("distSim") + "-DISTSIM-T1");
                }
            }
            if (this.flags.useHeadGov) {
                arrayList.add(featureLabel.get("head") + "-" + featureLabel.get("governor") + "-HW_GW");
            }
            if (this.flags.useClassFeature) {
                arrayList.add("###");
            }
            if (this.flags.useFirstWord) {
                arrayList.add(paddedList.get(0).word());
            }
            if (this.flags.useNGrams) {
                Collection<String> collection3 = this.wordToSubstrings.get(word);
                if (collection3 == null) {
                    collection3 = new ArrayList();
                    String str7 = "<" + word + ">";
                    if (this.flags.lowercaseNGrams) {
                        str7 = str7.toLowerCase();
                    }
                    if (this.flags.dehyphenateNGrams) {
                        str7 = dehyphenate(str7);
                    }
                    if (this.flags.greekifyNGrams) {
                        str7 = greekify(str7);
                    }
                    for (int i5 = 0; i5 < str7.length(); i5++) {
                        for (int i6 = i5 + 2; i6 <= str7.length(); i6++) {
                            if ((!this.flags.noMidNGrams || i5 == 0 || i6 == str7.length()) && (this.flags.maxNGramLeng < 0 || i6 - i5 <= this.flags.maxNGramLeng)) {
                                collection3.add(intern("#" + str7.substring(i5, i6) + "#"));
                            }
                        }
                    }
                    if (this.flags.cacheNGrams) {
                        this.wordToSubstrings.put(word, collection3);
                    }
                }
                arrayList.addAll(collection3);
                if (this.flags.conjoinShapeNGrams) {
                    String str8 = (String) featureLabel.get(AbstractMapLabel.SHAPE_KEY);
                    Iterator<String> it = collection3.iterator();
                    while (it.hasNext()) {
                        arrayList.add(it.next() + "-" + str8 + "-CNGram-CS");
                    }
                }
            }
            if (this.flags.useGazettes) {
                if (this.flags.sloppyGazette && (collection2 = this.wordToGazetteEntries.get(word)) != null) {
                    arrayList.addAll(collection2);
                }
                if (this.flags.cleanGazette && (collection = this.wordToGazetteInfos.get(word)) != null) {
                    for (GazetteInfo gazetteInfo : collection) {
                        boolean z = true;
                        for (int i7 = 0; i7 < gazetteInfo.words.length; i7++) {
                            z &= gazetteInfo.words[i7].equals(paddedList.get((i + i7) - gazetteInfo.loc).word());
                        }
                        if (z) {
                            arrayList.add(gazetteInfo.feature);
                        }
                    }
                }
            }
            if (this.flags.wordShape > -1 || this.flags.useShapeStrings) {
                arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TYPE");
                if (this.flags.useTypeSeqs) {
                    String str9 = (String) featureLabel.get(AbstractMapLabel.SHAPE_KEY);
                    String str10 = (String) featureLabel4.get(AbstractMapLabel.SHAPE_KEY);
                    String str11 = (String) featureLabel2.get(AbstractMapLabel.SHAPE_KEY);
                    arrayList.add(str10 + "-PTYPE");
                    arrayList.add(str11 + "-NTYPE");
                    arrayList.add(featureLabel4.word() + PTBLexer.ptbellipsis + str9 + "-PW_CTYPE");
                    arrayList.add(str9 + PTBLexer.ptbellipsis + featureLabel2.word() + "-NW_CTYPE");
                    arrayList.add(str10 + PTBLexer.ptbellipsis + str9 + "-PCTYPE");
                    arrayList.add(str9 + PTBLexer.ptbellipsis + str11 + "-CNTYPE");
                    arrayList.add(str10 + PTBLexer.ptbellipsis + str9 + PTBLexer.ptbellipsis + str11 + "-PCNTYPE");
                }
            }
            if (this.flags.useLastRealWord && featureLabel4.word().length() <= 3) {
                arrayList.add(featureLabel5.word() + PTBLexer.ptbellipsis + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-PPW_CTYPE");
            }
            if (this.flags.useNextRealWord && featureLabel2.word().length() <= 3) {
                arrayList.add(featureLabel3.word() + PTBLexer.ptbellipsis + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-NNW_CTYPE");
            }
            if (this.flags.useOccurrencePatterns) {
                arrayList.addAll(occurrencePatterns(paddedList, i));
            }
            if (this.flags.useDisjunctive) {
                for (int i8 = 1; i8 <= this.flags.disjunctionWidth; i8++) {
                    FeatureLabel featureLabel9 = paddedList.get(i + i8);
                    FeatureLabel featureLabel10 = paddedList.get(i - i8);
                    arrayList.add(featureLabel9.word() + "-DISJN");
                    if (this.flags.useDisjunctiveShapeInteraction) {
                        arrayList.add(featureLabel9.word() + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-DISJN-CS");
                    }
                    arrayList.add(featureLabel10.word() + "-DISJP");
                    if (this.flags.useDisjunctiveShapeInteraction) {
                        arrayList.add(featureLabel10.word() + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-DISJP-CS");
                    }
                }
            }
            if (this.flags.useWideDisjunctive) {
                for (int i9 = 1; i9 <= this.flags.wideDisjunctionWidth; i9++) {
                    arrayList.add(paddedList.get(i + i9).word() + "-DISJWN");
                    arrayList.add(paddedList.get(i - i9).word() + "-DISJWP");
                }
            }
            if (this.flags.useEitherSideDisjunctive) {
                for (int i10 = 1; i10 <= this.flags.disjunctionWidth; i10++) {
                    arrayList.add(paddedList.get(i + i10).word() + "-DISJWE");
                    arrayList.add(paddedList.get(i - i10).word() + "-DISJWE");
                }
            }
            if (this.flags.useDisjShape) {
                for (int i11 = 1; i11 <= this.flags.disjunctionWidth; i11++) {
                    arrayList.add(paddedList.get(i + i11).get(AbstractMapLabel.SHAPE_KEY) + "-NDISJSHAPE");
                    arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-" + paddedList.get(i + i11).get(AbstractMapLabel.SHAPE_KEY) + "-CNDISJSHAPE");
                }
            }
            if (this.flags.useExtraTaggySequences) {
                if (this.flags.useTags) {
                    arrayList.add(featureLabel5.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TTS");
                    arrayList.add(featureLabel6.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel5.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TTTS");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel5.get("distSim") + "-" + featureLabel4.get("distSim") + "-" + featureLabel.get("distSim") + "-DISTSIM_TTS1");
                    arrayList.add(featureLabel6.get("distSim") + "-" + featureLabel5.get("distSim") + "-" + featureLabel4.get("distSim") + "-" + featureLabel.get("distSim") + "-DISTSIM_TTTS1");
                }
            }
            if (this.flags.useMUCFeatures) {
                arrayList.add(featureLabel.get("section") + "-SECTION");
                arrayList.add(featureLabel.get("wordPos") + "-WORD_POSITION");
                arrayList.add(featureLabel.get("sentPos") + "-SENT_POSITION");
                arrayList.add(featureLabel.get("paraPos") + "-PARA_POSITION");
                arrayList.add(featureLabel.get("wordPos") + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-WORD_POSITION_SHAPE");
            }
        } else if (this.flags.useInternal) {
            if (this.flags.useWord) {
                arrayList.add(word + "-WORD");
            }
            if (this.flags.useNGrams) {
                Collection<String> collection4 = this.wordToSubstrings.get(word);
                if (collection4 == null) {
                    collection4 = new ArrayList();
                    String str12 = "<" + word + ">";
                    if (this.flags.lowercaseNGrams) {
                        str12 = str12.toLowerCase();
                    }
                    if (this.flags.dehyphenateNGrams) {
                        str12 = dehyphenate(str12);
                    }
                    if (this.flags.greekifyNGrams) {
                        str12 = greekify(str12);
                    }
                    for (int i12 = 0; i12 < str12.length(); i12++) {
                        for (int i13 = i12 + 2; i13 <= str12.length(); i13++) {
                            if ((!this.flags.noMidNGrams || i12 == 0 || i13 == str12.length()) && (this.flags.maxNGramLeng < 0 || i13 - i12 <= this.flags.maxNGramLeng)) {
                                collection4.add(intern("#" + str12.substring(i12, i13) + "#"));
                            }
                        }
                    }
                    if (this.flags.cacheNGrams) {
                        this.wordToSubstrings.put(word, collection4);
                    }
                }
                arrayList.addAll(collection4);
                if (this.flags.conjoinShapeNGrams) {
                    String str13 = (String) featureLabel.get(AbstractMapLabel.SHAPE_KEY);
                    Iterator<String> it2 = collection4.iterator();
                    while (it2.hasNext()) {
                        arrayList.add(it2.next() + "-" + str13 + "-CNGram-CS");
                    }
                }
            }
            if (this.flags.wordShape > -1 || this.flags.useShapeStrings) {
                arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TYPE");
            }
            if (this.flags.useOccurrencePatterns) {
                arrayList.addAll(occurrencePatterns(paddedList, i));
            }
        } else if (this.flags.useExternal) {
            if (this.flags.usePrev) {
                arrayList.add(featureLabel4.word() + "-PW");
            }
            if (this.flags.useNext) {
                arrayList.add(featureLabel2.word() + "-NW");
            }
            if (this.flags.useWordPairs) {
                arrayList.add(word + "-" + featureLabel4.word() + "-W-PW");
                arrayList.add(word + "-" + featureLabel2.word() + "-W-NW");
            }
            if (this.flags.useSymWordPairs) {
                arrayList.add(featureLabel4.word() + "-" + featureLabel2.word() + "-SWORDS");
            }
            if ((this.flags.wordShape > -1 || this.flags.useShapeStrings) && this.flags.useTypeSeqs) {
                String str14 = (String) featureLabel.get(AbstractMapLabel.SHAPE_KEY);
                String str15 = (String) featureLabel4.get(AbstractMapLabel.SHAPE_KEY);
                String str16 = (String) featureLabel2.get(AbstractMapLabel.SHAPE_KEY);
                arrayList.add(str15 + "-PTYPE");
                arrayList.add(str16 + "-NTYPE");
                arrayList.add(featureLabel4.word() + PTBLexer.ptbellipsis + str14 + "-PW_CTYPE");
                arrayList.add(str14 + PTBLexer.ptbellipsis + featureLabel2.word() + "-NW_CTYPE");
                if (this.flags.maxLeft > 0) {
                    arrayList.add(str15 + PTBLexer.ptbellipsis + str14 + "-PCTYPE");
                }
                arrayList.add(str14 + PTBLexer.ptbellipsis + str16 + "-CNTYPE");
                arrayList.add(str15 + PTBLexer.ptbellipsis + str14 + PTBLexer.ptbellipsis + str16 + "-PCNTYPE");
            }
            if (this.flags.useLastRealWord && featureLabel4.word().length() <= 3) {
                arrayList.add(featureLabel5.word() + PTBLexer.ptbellipsis + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-PPW_CTYPE");
            }
            if (this.flags.useNextRealWord && featureLabel2.word().length() <= 3) {
                arrayList.add(featureLabel3.word() + PTBLexer.ptbellipsis + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-NNW_CTYPE");
            }
            if (this.flags.useDisjunctive) {
                for (int i14 = 1; i14 <= this.flags.disjunctionWidth; i14++) {
                    FeatureLabel featureLabel11 = paddedList.get(i + i14);
                    FeatureLabel featureLabel12 = paddedList.get(i - i14);
                    arrayList.add(featureLabel11.word() + "-DISJN");
                    if (this.flags.useDisjunctiveShapeInteraction) {
                        arrayList.add(featureLabel11.word() + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-DISJN-CS");
                    }
                    arrayList.add(featureLabel12.word() + "-DISJP");
                    if (this.flags.useDisjunctiveShapeInteraction) {
                        arrayList.add(featureLabel12.word() + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-DISJP-CS");
                    }
                }
            }
            if (this.flags.useWideDisjunctive) {
                for (int i15 = 1; i15 <= this.flags.wideDisjunctionWidth; i15++) {
                    arrayList.add(paddedList.get(i + i15).word() + "-DISJWN");
                    arrayList.add(paddedList.get(i - i15).word() + "-DISJWP");
                }
            }
            if (this.flags.useDisjShape) {
                for (int i16 = 1; i16 <= this.flags.disjunctionWidth; i16++) {
                    arrayList.add(paddedList.get(i + i16).get(AbstractMapLabel.SHAPE_KEY) + "-NDISJSHAPE");
                    arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-" + paddedList.get(i + i16).get(AbstractMapLabel.SHAPE_KEY) + "-CNDISJSHAPE");
                }
            }
        }
        if (this.flags.twoStage) {
            arrayList.add(featureLabel.get("bin1") + "-BIN1");
            arrayList.add(featureLabel.get("bin2") + "-BIN2");
            arrayList.add(featureLabel.get("bin3") + "-BIN3");
            arrayList.add(featureLabel.get("bin4") + "-BIN4");
            arrayList.add(featureLabel.get("bin5") + "-BIN5");
            arrayList.add(featureLabel.get("bin6") + "-BIN6");
        }
        return arrayList;
    }

    protected Collection<String> featuresCpC(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i + 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 1);
        String word = featureLabel.word();
        ArrayList arrayList = new ArrayList();
        if (this.flags.useInternal && this.flags.useExternal) {
            if (this.flags.useOrdinal) {
                if (isOrdinal(paddedList, i)) {
                    arrayList.add("C_ORDINAL");
                    if (isOrdinal(paddedList, i - 1)) {
                        arrayList.add("PC_ORDINAL");
                    }
                }
                if (isOrdinal(paddedList, i - 1)) {
                    arrayList.add("P_ORDINAL");
                }
            }
            if (this.flags.useAbbr || this.flags.useMinimalAbbr) {
                arrayList.add(featureLabel3.get("abbr") + "-" + featureLabel.get("abbr") + "-PABBRANS");
            }
            if ((this.flags.useAbbr1 || this.flags.useMinimalAbbr1) && !featureLabel.get("abbr").equals("XX")) {
                arrayList.add(featureLabel3.get("abbr") + "-" + featureLabel.get("abbr") + "-PABBRANS");
            }
            if (this.flags.useChunkySequences) {
                arrayList.add(featureLabel3.get("chunk") + "-" + featureLabel.get("chunk") + "-" + featureLabel2.get("chunk") + "-PCNCHUNK");
            }
            if (this.flags.usePrev && this.flags.useSequences && this.flags.usePrevSequences) {
                arrayList.add("PSEQ");
                arrayList.add(word + "-PSEQW");
            }
            if ((this.flags.wordShape > -1 || this.flags.useShapeStrings) && this.flags.useTypeSeqs && (this.flags.useTypeSeqs2 || this.flags.useTypeSeqs3)) {
                String str = (String) featureLabel3.get(AbstractMapLabel.SHAPE_KEY);
                String str2 = (String) featureLabel.get(AbstractMapLabel.SHAPE_KEY);
                if (this.flags.useTypeSeqs3) {
                    arrayList.add(str + "-" + str2 + "-" + featureLabel2.get(AbstractMapLabel.SHAPE_KEY) + "-PCNSHAPES");
                }
                if (this.flags.useTypeSeqs2) {
                    arrayList.add(str + "-" + str2 + "-TYPES");
                }
            }
            if (this.flags.useTypeySequences) {
                arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TPS2");
                arrayList.add(featureLabel2.get(AbstractMapLabel.SHAPE_KEY) + "-TNS1");
            }
            if (this.flags.useTaggySequences) {
                if (this.flags.useTags) {
                    arrayList.add(featureLabel3.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TS");
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel3.get("distSim") + "-" + featureLabel.get("distSim") + "-DISTSIM_TS1");
                }
            }
            if (this.flags.useParenMatching) {
                if (this.flags.useReverse) {
                    if ((word.equals("(") || word.equals("[") || word.equals(PTBLexer.openparen)) && (featureLabel3.word().equals(")") || featureLabel3.word().equals("]") || featureLabel3.word().equals(PTBLexer.closeparen))) {
                        arrayList.add("PAREN-MATCH");
                    }
                } else if ((word.equals(")") || word.equals("]") || word.equals(PTBLexer.closeparen)) && (featureLabel3.word().equals("(") || featureLabel3.word().equals("[") || featureLabel3.word().equals(PTBLexer.openparen))) {
                    arrayList.add("PAREN-MATCH");
                }
            }
            if (this.flags.useEntityTypeSequences) {
                arrayList.add(featureLabel3.get("entityType") + "-" + featureLabel.get("entityType") + "-ETSEQ");
            }
            if (this.flags.useURLSequences) {
                arrayList.add(featureLabel3.get("isURL") + "-" + featureLabel.get("isURL") + "-URLSEQ");
            }
        } else if (this.flags.useInternal) {
            if (this.flags.useSequences && this.flags.usePrevSequences) {
                arrayList.add("PSEQ");
                arrayList.add(word + "-PSEQW");
            }
            if (this.flags.useTypeySequences) {
                arrayList.add(featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TPS2");
            }
        } else if (this.flags.useExternal) {
            if ((this.flags.wordShape > -1 || this.flags.useShapeStrings) && this.flags.useTypeSeqs && (this.flags.useTypeSeqs2 || this.flags.useTypeSeqs3)) {
                String str3 = (String) featureLabel3.get(AbstractMapLabel.SHAPE_KEY);
                String str4 = (String) featureLabel.get(AbstractMapLabel.SHAPE_KEY);
                if (this.flags.useTypeSeqs3) {
                    arrayList.add(str3 + "-" + str4 + "-" + featureLabel2.get(AbstractMapLabel.SHAPE_KEY) + "-PCNSHAPES");
                }
                if (this.flags.useTypeSeqs2) {
                    arrayList.add(str3 + "-" + str4 + "-TYPES");
                }
            }
            if (this.flags.useTypeySequences) {
                arrayList.add(featureLabel2.get(AbstractMapLabel.SHAPE_KEY) + "-TNS1");
                arrayList.add(featureLabel3.get(AbstractMapLabel.SHAPE_KEY) + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TPS");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCp2C(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i - 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 2);
        String word = featureLabel.word();
        ArrayList arrayList = new ArrayList();
        if (this.flags.useMoreAbbr) {
            arrayList.add(featureLabel3.get("abbr") + "-" + featureLabel.get("abbr") + "-P2ABBRANS");
        }
        if (this.flags.useMinimalAbbr) {
            arrayList.add(featureLabel3.get("abbr") + "-" + featureLabel.get("abbr") + "-P2AP2CABB");
        }
        if (this.flags.useMinimalAbbr1 && !featureLabel.get("abbr").equals("XX")) {
            arrayList.add(featureLabel3.get("abbr") + "-" + featureLabel.get("abbr") + "-P2AP2CABB");
        }
        if (this.flags.useParenMatching) {
            if (this.flags.useReverse) {
                if ((word.equals("(") || word.equals("[") || word.equals(PTBLexer.openparen)) && ((featureLabel3.word().equals(")") || featureLabel3.word().equals("]") || featureLabel3.word().equals(PTBLexer.closeparen)) && !featureLabel2.word().equals(")") && !featureLabel2.word().equals("]") && !featureLabel2.word().equals(PTBLexer.closeparen))) {
                    arrayList.add("PAREN-MATCH");
                }
            } else if ((word.equals(")") || word.equals("]") || word.equals(PTBLexer.closeparen)) && ((featureLabel3.word().equals("(") || featureLabel3.word().equals("[") || featureLabel3.word().equals(PTBLexer.openparen)) && !featureLabel2.word().equals("(") && !featureLabel2.word().equals("[") && !featureLabel2.word().equals(PTBLexer.openparen))) {
                arrayList.add("PAREN-MATCH");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCp3C(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i - 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 2);
        FeatureLabel featureLabel4 = paddedList.get(i - 3);
        String word = featureLabel.word();
        ArrayList arrayList = new ArrayList();
        if (this.flags.useParenMatching) {
            if (this.flags.useReverse) {
                if ((word.equals("(") || word.equals("[")) && this.flags.maxLeft >= 3 && ((featureLabel4.word().equals(")") || featureLabel4.word().equals("]")) && !featureLabel3.word().equals(")") && !featureLabel3.word().equals("]") && !featureLabel2.word().equals(")") && !featureLabel2.word().equals("]"))) {
                    arrayList.add("PAREN-MATCH");
                }
            } else if ((word.equals(")") || word.equals("]")) && this.flags.maxLeft >= 3 && ((featureLabel4.word().equals("(") || featureLabel4.word().equals("[")) && !featureLabel3.word().equals("(") && !featureLabel3.word().equals("[") && !featureLabel2.word().equals("(") && !featureLabel2.word().equals("["))) {
                arrayList.add("PAREN-MATCH");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCp4C(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i - 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 2);
        FeatureLabel featureLabel4 = paddedList.get(i - 3);
        FeatureLabel featureLabel5 = paddedList.get(i - 4);
        String word = featureLabel.word();
        ArrayList arrayList = new ArrayList();
        if (this.flags.useParenMatching) {
            if (this.flags.useReverse) {
                if ((word.equals("(") || word.equals("[")) && this.flags.maxLeft >= 4 && ((featureLabel5.word().equals(")") || featureLabel5.word().equals("]")) && !featureLabel4.word().equals(")") && !featureLabel4.word().equals("]") && !featureLabel3.word().equals(")") && !featureLabel3.word().equals("]") && !featureLabel2.word().equals(")") && !featureLabel2.word().equals("]"))) {
                    arrayList.add("PAREN-MATCH");
                }
            } else if ((word.equals(")") || word.equals("]")) && this.flags.maxLeft >= 4 && ((featureLabel5.word().equals("(") || featureLabel5.word().equals("[")) && !featureLabel4.word().equals("(") && !featureLabel4.word().equals("[") && !featureLabel3.word().equals("(") && !featureLabel3.word().equals("[") && !featureLabel2.word().equals("(") && !featureLabel2.word().equals("["))) {
                arrayList.add("PAREN-MATCH");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCp5C(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i - 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 2);
        FeatureLabel featureLabel4 = paddedList.get(i - 3);
        FeatureLabel featureLabel5 = paddedList.get(i - 4);
        FeatureLabel featureLabel6 = paddedList.get(i - 5);
        String word = featureLabel.word();
        ArrayList arrayList = new ArrayList();
        if (this.flags.useParenMatching) {
            if (this.flags.useReverse) {
                if ((word.equals("(") || word.equals("[")) && this.flags.maxLeft >= 5 && ((featureLabel6.word().equals(")") || featureLabel6.word().equals("]")) && !featureLabel5.word().equals(")") && !featureLabel5.word().equals("]") && !featureLabel4.word().equals(")") && !featureLabel4.word().equals("]") && !featureLabel3.word().equals(")") && !featureLabel3.word().equals("]") && !featureLabel2.word().equals(")") && !featureLabel2.word().equals("]"))) {
                    arrayList.add("PAREN-MATCH");
                }
            } else if ((word.equals(")") || word.equals("]")) && this.flags.maxLeft >= 5 && ((featureLabel6.word().equals("(") || featureLabel6.word().equals("[")) && !featureLabel5.word().equals("(") && !featureLabel5.word().equals("[") && !featureLabel4.word().equals("(") && !featureLabel4.word().equals("[") && !featureLabel3.word().equals("(") && !featureLabel3.word().equals("[") && !featureLabel2.word().equals("(") && !featureLabel2.word().equals("["))) {
                arrayList.add("PAREN-MATCH");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCpCp2C(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i - 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 2);
        ArrayList arrayList = new ArrayList();
        if (this.flags.useInternal && this.flags.useExternal) {
            if (this.flags.useAbbr) {
                arrayList.add(featureLabel3.get("abbr") + "-" + featureLabel2.get("abbr") + "-" + featureLabel.get("abbr") + "-2PABBRANS");
            }
            if (this.flags.useChunks) {
                arrayList.add(featureLabel3.get("chunk") + "-" + featureLabel2.get("chunk") + "-" + featureLabel.get("chunk") + "-2PCHUNKS");
            }
            if (this.flags.useLongSequences) {
                arrayList.add("PPSEQ");
            }
            if (this.flags.useBoundarySequences && featureLabel2.word().equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
                arrayList.add("BNDRY-SPAN-PPSEQ");
            }
            if (this.flags.useTaggySequences) {
                if (this.flags.useTags) {
                    arrayList.add(featureLabel3.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TTS");
                    if (this.flags.useTaggySequencesShapeInteraction) {
                        arrayList.add(featureLabel3.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TTS-CS");
                    }
                }
                if (this.flags.useDistSim) {
                    arrayList.add(featureLabel3.get("distSim") + "-" + featureLabel2.get("distSim") + "-" + featureLabel.get("distSim") + "-DISTSIM_TTS1");
                    if (this.flags.useTaggySequencesShapeInteraction) {
                        arrayList.add(featureLabel3.get("distSim") + "-" + featureLabel2.get("distSim") + "-" + featureLabel.get("distSim") + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-DISTSIM_TTS1-CS");
                    }
                }
            }
            if ((this.flags.wordShape > -1 || this.flags.useShapeStrings) && this.flags.useTypeSeqs && this.flags.useTypeSeqs2 && this.flags.maxLeft >= 2) {
                arrayList.add(((String) featureLabel3.get(AbstractMapLabel.SHAPE_KEY)) + "-" + ((String) featureLabel2.get(AbstractMapLabel.SHAPE_KEY)) + "-" + ((String) featureLabel.get(AbstractMapLabel.SHAPE_KEY)) + "-TYPETYPES");
            }
        } else if (this.flags.useInternal) {
            if (this.flags.useLongSequences) {
                arrayList.add("PPSEQ");
            }
        } else if (this.flags.useExternal) {
            if (this.flags.useLongSequences) {
                arrayList.add("PPSEQ");
            }
            if ((this.flags.wordShape > -1 || this.flags.useShapeStrings) && this.flags.useTypeSeqs && this.flags.useTypeSeqs2 && this.flags.maxLeft >= 2) {
                arrayList.add(((String) featureLabel3.get(AbstractMapLabel.SHAPE_KEY)) + "-" + ((String) featureLabel2.get(AbstractMapLabel.SHAPE_KEY)) + "-" + ((String) featureLabel.get(AbstractMapLabel.SHAPE_KEY)) + "-TYPETYPES");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCpCp2Cp3C(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        FeatureLabel featureLabel2 = paddedList.get(i - 1);
        FeatureLabel featureLabel3 = paddedList.get(i - 2);
        FeatureLabel featureLabel4 = paddedList.get(i - 3);
        ArrayList arrayList = new ArrayList();
        if (this.flags.useTaggySequences) {
            if (this.flags.useTags && this.flags.maxLeft >= 3 && !this.flags.dontExtendTaggy) {
                arrayList.add(featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel3.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-TTTS");
                if (this.flags.useTaggySequencesShapeInteraction) {
                    arrayList.add(featureLabel4.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel3.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel2.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.TAG_KEY) + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-TTTS-CS");
                }
            }
            if (this.flags.useDistSim && this.flags.maxLeft >= 3 && !this.flags.dontExtendTaggy) {
                arrayList.add(featureLabel4.get("distSim") + "-" + featureLabel3.get("distSim") + "-" + featureLabel2.get("distSim") + "-" + featureLabel.get("distSim") + "-DISTSIM_TTTS1");
                if (this.flags.useTaggySequencesShapeInteraction) {
                    arrayList.add(featureLabel4.get("distSim") + "-" + featureLabel3.get("distSim") + "-" + featureLabel2.get("distSim") + "-" + featureLabel.get("distSim") + "-" + featureLabel.get(AbstractMapLabel.SHAPE_KEY) + "-DISTSIM_TTTS1-CS");
                }
            }
        }
        if (this.flags.maxLeft >= 3) {
            if (this.flags.useLongSequences) {
                arrayList.add("PPPSEQ");
            }
            if (this.flags.useBoundarySequences && featureLabel2.word().equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
                arrayList.add("BNDRY-SPAN-PPPSEQ");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCpCp2Cp3Cp4C(PaddedList<FeatureLabel> paddedList, int i) {
        ArrayList arrayList = new ArrayList();
        FeatureLabel featureLabel = paddedList.get(i - 1);
        if (this.flags.maxLeft >= 4) {
            if (this.flags.useLongSequences) {
                arrayList.add("PPPPSEQ");
            }
            if (this.flags.useBoundarySequences && featureLabel.word().equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
                arrayList.add("BNDRY-SPAN-PPPPSEQ");
            }
        }
        return arrayList;
    }

    protected Collection<String> featuresCnC(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        ArrayList arrayList = new ArrayList();
        if (this.flags.useNext && this.flags.useSequences && this.flags.useNextSequences) {
            arrayList.add("NSEQ");
            arrayList.add(featureLabel.word() + "-NSEQW");
        }
        return arrayList;
    }

    protected Collection<String> featuresCpCnC(PaddedList<FeatureLabel> paddedList, int i) {
        FeatureLabel featureLabel = paddedList.get(i);
        ArrayList arrayList = new ArrayList();
        if (this.flags.useNext && this.flags.usePrev && this.flags.useSequences && this.flags.usePrevSequences && this.flags.useNextSequences) {
            arrayList.add("PNSEQ");
            arrayList.add(featureLabel.word() + "-PNSEQW");
        }
        return arrayList;
    }

    int reverse(int i) {
        return this.flags.useReverse ? (-1) * i : i;
    }

    private Collection<String> occurrencePatterns(PaddedList<FeatureLabel> paddedList, int i) {
        String word = paddedList.get(i).word();
        String word2 = paddedList.get(i + reverse(1)).word();
        FeatureLabel featureLabel = paddedList.get(i - reverse(1));
        String word3 = featureLabel.word();
        if (!isNameCase(word) || !noUpperCase(word2) || !hasLetter(word2) || !hasLetter(word3) || featureLabel == paddedList.getPad()) {
            return Collections.singletonList("NO-OCCURRENCE-PATTERN");
        }
        HashSet hashSet = new HashSet();
        if (isNameCase(word3) && paddedList.get(i - reverse(1)).get(AbstractMapLabel.TAG_KEY).equals("NNP")) {
            for (int i2 = 3; i2 < 150; i2++) {
                if (paddedList.get(i + reverse(i2)).word().equals(word)) {
                    if (paddedList.get(i + reverse(i2 - 1)).word().equals(word3)) {
                        hashSet.add("XY-NEXT-OCCURRENCE-XY");
                    } else {
                        hashSet.add("XY-NEXT-OCCURRENCE-Y");
                    }
                }
            }
            for (int i3 = -3; i3 > -150; i3--) {
                if (paddedList.get(i + reverse(i3)).word().equals(word)) {
                    if (paddedList.get(i + reverse(i3 - 1)).word().equals(word3)) {
                        hashSet.add("XY-PREV-OCCURRENCE-XY");
                    } else {
                        hashSet.add("XY-PREV-OCCURRENCE-Y");
                    }
                }
            }
        } else {
            for (int i4 = 3; i4 < 150; i4++) {
                if (paddedList.get(i + reverse(i4)).word().equals(word)) {
                    if (isNameCase(paddedList.get(i + reverse(i4 - 1)).word()) && paddedList.get(i + reverse(i4 - 1)).get(AbstractMapLabel.TAG_KEY).equals("NNP")) {
                        hashSet.add("X-NEXT-OCCURRENCE-YX");
                    } else if (isNameCase(paddedList.get(i + reverse(i4 + 1)).word()) && paddedList.get(i + reverse(i4 + 1)).get(AbstractMapLabel.TAG_KEY).equals("NNP")) {
                        hashSet.add("X-NEXT-OCCURRENCE-XY");
                    } else {
                        hashSet.add("X-NEXT-OCCURRENCE-X");
                    }
                }
            }
            for (int i5 = -3; i5 > -150; i5--) {
                if (paddedList.get(i + i5).word().equals(word)) {
                    if (isNameCase(paddedList.get(i + reverse(i5 + 1)).word()) && paddedList.get(i + reverse(i5 + 1)).get(AbstractMapLabel.TAG_KEY).equals("NNP")) {
                        hashSet.add("X-PREV-OCCURRENCE-YX");
                    } else if (isNameCase(paddedList.get(i + reverse(i5 - 1)).word()) && paddedList.get(i + reverse(i5 - 1)).get(AbstractMapLabel.TAG_KEY).equals("NNP")) {
                        hashSet.add("X-PREV-OCCURRENCE-XY");
                    } else {
                        hashSet.add("X-PREV-OCCURRENCE-X");
                    }
                }
            }
        }
        return hashSet;
    }

    String intern(String str) {
        return this.flags.intern ? str.intern() : str;
    }

    public void initGazette() {
        try {
            if (this.flags.gazettes == null) {
                this.flags.gazettes = new ArrayList();
            }
            Iterator<String> it = this.flags.gazettes.iterator();
            while (it.hasNext()) {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(it.next()));
                readGazette(bufferedReader);
                bufferedReader.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }
}
