package edu.stanford.nlp.process;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/process/WordShapeClassifier.class */
public class WordShapeClassifier {
    public static final int NOWORDSHAPE = -1;
    public static final int WORDSHAPEDAN1 = 0;
    public static final int WORDSHAPECHRIS1 = 1;
    public static final int WORDSHAPEDAN2 = 2;
    public static final int WORDSHAPEDAN2USELC = 3;
    public static final int WORDSHAPEDAN2BIO = 4;
    public static final int WORDSHAPEDAN2BIOUSELC = 5;
    public static final int WORDSHAPEJENNY1 = 6;
    public static final int WORDSHAPEJENNY1USELC = 7;
    public static final int WORDSHAPECHRIS2 = 8;
    public static final int WORDSHAPECHRIS2USELC = 9;
    public static final int WORDSHAPECHRIS3 = 10;
    public static final int WORDSHAPECHRIS3USELC = 11;
    public static final int WORDSHAPECHRIS4 = 12;
    private static Set knownLCWords = new HashSet();
    private static final String[] greek = {"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "theta", "iota", "kappa", "lambda", "omicron", "rho", "sigma", "tau", "upsilon", "omega"};
    private static final Pattern biogreek = Pattern.compile("alpha|beta|gamma|delta|epsilon|zeta|theta|iota|kappa|lambda|omicron|rho|sigma|tau|upsilon|omega", 2);

    private WordShapeClassifier() {
    }

    public static int lookupShaper(String str) {
        if (str == null) {
            return -1;
        }
        if (str.equalsIgnoreCase("dan1")) {
            return 0;
        }
        if (str.equalsIgnoreCase("chris1")) {
            return 1;
        }
        if (str.equalsIgnoreCase("dan2")) {
            return 2;
        }
        if (str.equalsIgnoreCase("dan2useLC")) {
            return 3;
        }
        if (str.equalsIgnoreCase("dan2bio")) {
            return 4;
        }
        if (str.equalsIgnoreCase("dan2bioUseLC")) {
            return 5;
        }
        if (str.equalsIgnoreCase("jenny1")) {
            return 6;
        }
        if (str.equalsIgnoreCase("jenny1useLC")) {
            return 7;
        }
        if (str.equalsIgnoreCase("chris2")) {
            return 8;
        }
        if (str.equalsIgnoreCase("chris2useLC")) {
            return 9;
        }
        if (str.equalsIgnoreCase("chris3")) {
            return 10;
        }
        if (str.equalsIgnoreCase("chris3useLC")) {
            return 11;
        }
        return str.equalsIgnoreCase("chris4") ? 12 : -1;
    }

    public static boolean usesLC(int i) {
        return i == 3 || i == 5 || i == 7 || i == 9 || i == 11;
    }

    public static String wordShape(String str, int i) {
        return wordShape(str, i, false);
    }

    public static String wordShape(String str, int i, Set set) {
        return wordShape(str, i, false, set);
    }

    public static String wordShape(String str, int i, boolean z) {
        switch (i) {
            case -1:
                return str;
            case 0:
                return wordShapeDan1(str);
            case 1:
                return wordShapeChris1(str);
            case 2:
                return wordShapeDan2(str, z);
            case 3:
                return wordShapeDan2(str, true);
            case WORDSHAPEDAN2BIO /* 4 */:
                return wordShapeDan2Bio(str, z);
            case WORDSHAPEDAN2BIOUSELC /* 5 */:
                return wordShapeDan2Bio(str, true);
            case WORDSHAPEJENNY1 /* 6 */:
                return wordShapeJenny1(str, z);
            case WORDSHAPEJENNY1USELC /* 7 */:
                return wordShapeJenny1(str, true);
            case WORDSHAPECHRIS2 /* 8 */:
                return wordShapeChris2(str, z, false);
            case WORDSHAPECHRIS2USELC /* 9 */:
                return wordShapeChris2(str, true, false);
            case WORDSHAPECHRIS3 /* 10 */:
                return wordShapeChris2(str, z, true);
            case WORDSHAPECHRIS3USELC /* 11 */:
                return wordShapeChris2(str, true, true);
            case WORDSHAPECHRIS4 /* 12 */:
                return wordShapeChris4(str, z, false);
            default:
                throw new IllegalStateException("Bad WordShapeClassifier");
        }
    }

    public static String wordShape(String str, int i, boolean z, Set set) {
        setKnownLowerCaseWords(set);
        return wordShape(str, i, z);
    }

    public static String wordShapeDan1(String str) {
        boolean z = true;
        boolean z2 = true;
        boolean z3 = true;
        boolean z4 = true;
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (!Character.isDigit(charAt)) {
                z = false;
            }
            if (!Character.isLowerCase(charAt)) {
                z3 = false;
            }
            if (!Character.isUpperCase(charAt)) {
                z2 = false;
            }
            if ((i == 0 && !Character.isUpperCase(charAt)) || (i >= 1 && !Character.isLowerCase(charAt))) {
                z4 = false;
            }
        }
        return z ? "ALL-DIGITS" : z2 ? "ALL-UPPER" : z3 ? "ALL-LOWER" : z4 ? "MIXED-CASE" : "OTHER";
    }

    public static String wordShapeDan2(String str, boolean z) {
        StringBuilder sb = new StringBuilder("WT-");
        char c = '~';
        boolean z2 = false;
        int length = str.length();
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            char c2 = charAt;
            if (Character.isDigit(charAt)) {
                c2 = 'd';
            } else if (Character.isLowerCase(charAt) || charAt == '_') {
                c2 = 'x';
            } else if (Character.isUpperCase(charAt)) {
                c2 = 'X';
            }
            if (c2 != 'x' && c2 != 'X') {
                z2 = true;
            }
            if (c2 != c) {
                sb.append(c2);
            }
            c = c2;
        }
        if (length <= 3) {
            sb.append(":").append(length);
        }
        if (z && !z2 && knownLCWords.contains(str.toLowerCase())) {
            sb.append("k");
        }
        return sb.toString();
    }

    public static String wordShapeJenny1(String str, boolean z) {
        StringBuilder sb = new StringBuilder("WT-");
        char c = '~';
        boolean z2 = false;
        int i = 0;
        while (i < str.length()) {
            char charAt = str.charAt(i);
            char c2 = charAt;
            if (Character.isDigit(charAt)) {
                c2 = 'd';
            } else if (Character.isLowerCase(charAt)) {
                c2 = 'x';
            } else if (Character.isUpperCase(charAt)) {
                c2 = 'X';
            }
            int i2 = 0;
            while (true) {
                if (i2 >= greek.length) {
                    break;
                }
                if (str.indexOf(greek[i2]) == i) {
                    c2 = 'g';
                    i = (i + greek[i2].length()) - 1;
                    break;
                }
                i2++;
            }
            if (c2 != 'x' && c2 != 'X') {
                z2 = true;
            }
            if (c2 != c) {
                sb.append(c2);
            }
            c = c2;
            i++;
        }
        if (str.length() <= 3) {
            sb.append(":").append(str.length());
        }
        if (z && !z2 && knownLCWords.contains(str.toLowerCase())) {
            sb.append("k");
        }
        return sb.toString();
    }

    public static String wordShapeChris2(String str, boolean z, boolean z2) {
        int i;
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        HashSet hashSet = new HashSet();
        TreeSet<Character> treeSet = new TreeSet();
        boolean z3 = false;
        int length = str.length();
        for (int i2 = 0; i2 < length; i2 = i2 + i + 1) {
            char charAt = str.charAt(i2);
            char c = charAt;
            i = 0;
            if (Character.isDigit(charAt)) {
                c = 'd';
            } else if (Character.isLowerCase(charAt)) {
                c = 'x';
            } else if (Character.isUpperCase(charAt) || Character.isTitleCase(charAt)) {
                c = 'X';
            }
            int i3 = 0;
            while (true) {
                if (i3 >= greek.length) {
                    break;
                }
                if (str.indexOf(greek[i3], i2) == i2) {
                    c = 'g';
                    i = greek[i3].length() - 1;
                    break;
                }
                i3++;
            }
            if (c != 'x' && c != 'X') {
                z3 = true;
            }
            if (i2 < 2) {
                sb.append(c);
                hashSet.add(new Character(c));
            } else if (i2 < length - 2) {
                treeSet.add(new Character(c));
            } else {
                hashSet.add(new Character(c));
                sb2.append(c);
            }
        }
        for (Character ch : treeSet) {
            if (!z2 || !hashSet.contains(ch)) {
                sb.append(ch.charValue());
            }
        }
        sb.append((CharSequence) sb2);
        if (z && !z3 && knownLCWords.contains(str.toLowerCase())) {
            sb.append("k");
        }
        return sb.toString();
    }

    public static String wordShapeChris4(String str, boolean z, boolean z2) {
        int i;
        char c;
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        HashSet hashSet = new HashSet();
        TreeSet<Character> treeSet = new TreeSet();
        boolean z3 = false;
        int length = str.length();
        for (int i2 = 0; i2 < length; i2 = i2 + i + 1) {
            char charAt = str.charAt(i2);
            i = 0;
            if (Character.isDigit(charAt)) {
                c = 'd';
            } else if (Character.isLowerCase(charAt)) {
                c = 'x';
            } else if (Character.isUpperCase(charAt) || Character.isTitleCase(charAt)) {
                c = 'X';
            } else if (Character.isWhitespace(charAt) || Character.isSpaceChar(charAt)) {
                c = 's';
            } else {
                int type = Character.getType(charAt);
                c = type == 26 ? '$' : type == 25 ? '+' : (type == 28 || charAt == '|') ? '|' : type == 21 ? '(' : type == 22 ? ')' : type == 29 ? '`' : (type == 30 || charAt == '\'') ? '\'' : type == 23 ? '_' : type == 20 ? '-' : (charAt < ' ' || charAt > '~') ? 'q' : charAt;
            }
            int i3 = 0;
            while (true) {
                if (i3 >= greek.length) {
                    break;
                }
                if (str.indexOf(greek[i3], i2) == i2) {
                    c = 'g';
                    i = greek[i3].length() - 1;
                    break;
                }
                i3++;
            }
            if (c != 'x' && c != 'X') {
                z3 = true;
            }
            if (i2 < 2) {
                sb.append(c);
                hashSet.add(new Character(c));
            } else if (i2 < length - 2) {
                treeSet.add(new Character(c));
            } else {
                hashSet.add(new Character(c));
                sb2.append(c);
            }
        }
        for (Character ch : treeSet) {
            if (!z2 || !hashSet.contains(ch)) {
                sb.append(ch.charValue());
            }
        }
        sb.append((CharSequence) sb2);
        if (z && !z3 && knownLCWords.contains(str.toLowerCase())) {
            sb.append("k");
        }
        return sb.toString();
    }

    public static Set getKnownLowerCaseWords() {
        return knownLCWords;
    }

    public static void setKnownLowerCaseWords(Set set) {
        knownLCWords = set;
    }

    public static void addKnownLowerCaseWords(Collection collection) {
        knownLCWords.addAll(collection);
    }

    public static String wordShapeDan2Bio(String str, boolean z) {
        String wordShapeDan2 = wordShapeDan2(str, z);
        if (containsGreekLetter(str)) {
            wordShapeDan2 = wordShapeDan2 + "-GREEK";
        }
        return wordShapeDan2;
    }

    private static boolean containsGreekLetter(String str) {
        return biogreek.matcher(str).find();
    }

    public static String wordShapeChris1(String str) {
        int length = str.length();
        if (length == 0) {
            return "SYMBOLS";
        }
        boolean z = false;
        boolean z2 = true;
        boolean z3 = false;
        boolean z4 = false;
        int i = 0;
        while (i < length) {
            char charAt = str.charAt(i);
            boolean isDigit = Character.isDigit(charAt);
            if (isDigit) {
                z3 = true;
            } else {
                z4 = true;
            }
            if (!(isDigit || charAt == '.' || charAt == ',' || (i == 0 && (charAt == '-' || charAt == '+')))) {
                z2 = false;
            }
            i++;
        }
        if (!z3) {
            z2 = false;
        }
        if (z3 && !z4) {
            z = true;
        }
        if (z) {
            return length < 4 ? "CARDINAL13" : length == 4 ? "CARDINAL4" : "CARDINAL5PLUS";
        }
        if (z2) {
            return "NUMBER";
        }
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = true;
        boolean z8 = true;
        boolean z9 = false;
        boolean z10 = false;
        boolean z11 = false;
        for (int i2 = 0; i2 < length; i2++) {
            char charAt2 = str.charAt(i2);
            boolean isUpperCase = Character.isUpperCase(charAt2);
            boolean isLetter = Character.isLetter(charAt2);
            boolean isTitleCase = Character.isTitleCase(charAt2);
            if (charAt2 == '-') {
                z10 = true;
            } else if (charAt2 == '.') {
                z11 = true;
            }
            if (isTitleCase) {
                z6 = true;
                z8 = false;
                z5 = true;
                z7 = false;
            } else if (isUpperCase) {
                z6 = true;
                z8 = false;
            } else if (isLetter) {
                z5 = true;
                z7 = false;
            }
            if (i2 == 0 && (isUpperCase || isTitleCase)) {
                z9 = true;
            }
        }
        return (length == 2 && z9 && z11) ? "ACRONYM1" : (z6 && z7 && !z3 && z11) ? "ACRONYM" : (!z3 || !z10 || z6 || z5) ? (z9 && z5 && z3 && z10) ? "CAPITALIZED-DIGIT-DASH" : (z9 && z5 && z3) ? "CAPITALIZED-DIGIT" : (!z9 || !z5 || !z10) ? (z9 && z5) ? "CAPITALIZED" : (z6 && z7 && z3 && z10) ? "ALLCAPS-DIGIT-DASH" : (z6 && z7 && z3) ? "ALLCAPS-DIGIT" : (z6 && z7 && z10) ? "ALLCAPS" : (z6 && z7) ? "ALLCAPS" : (z5 && z8 && z3 && z10) ? "LOWERCASE-DIGIT-DASH" : (z5 && z8 && z3) ? "LOWERCASE-DIGIT" : (z5 && z8 && z10) ? "LOWERCASE-DASH" : (z5 && z8) ? "LOWERCASE" : (z5 && z3) ? "MIXEDCASE-DIGIT" : z5 ? "MIXEDCASE" : z3 ? "SYMBOL-DIGIT" : "SYMBOL" : "CAPITALIZED-DASH" : "DIGIT-DASH";
    }

    public static void main(String[] strArr) {
        int i = 0;
        int i2 = 1;
        if (strArr.length == 0) {
            System.out.println("edu.stanford.nlp.process.WordShapeClassifier [-wordShape name] string+");
        } else if (strArr[0].charAt(0) == '-') {
            if (!strArr[0].equals("-wordShape") || strArr.length < 2) {
                System.err.println("Unknown flag: " + strArr[0]);
                i = 0 + 1;
            } else {
                i2 = lookupShaper(strArr[1]);
                i = 0 + 2;
            }
        }
        while (i < strArr.length) {
            System.out.print(strArr[i] + ": ");
            System.out.println(wordShape(strArr[i], i2));
            i++;
        }
    }
}
