package info.ephyra.questionanalysis;

import info.ephyra.nlp.indices.FunctionWords;
import info.ephyra.nlp.indices.WordFrequencies;
import info.ephyra.util.StringUtils;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:info/ephyra/questionanalysis/KeywordExtractor.class */
public class KeywordExtractor {
    private static final Pattern DELIMS1 = Pattern.compile("(\\!|\\?|;|\"|'|/|\\\\|\\(|\\)|\\[|\\]|\\{|\\})");
    private static final Pattern DELIMS2 = Pattern.compile("(^|\\D)(,|\\:)($|\\D)");
    private static final Pattern DELIMS3 = Pattern.compile("(\\.)$");
    private static final String IGNORE = "(names?|give|tell|list)";
    private static final int MAX_WORDS = Integer.MAX_VALUE;

    private static String[] dropSingleChars(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            if (str.length() > 1) {
                arrayList.add(str);
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    private static String[] dropBadKeywords(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            if (!str.matches("(?i)(names?|give|tell|list)")) {
                arrayList.add(str);
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    private static String[] dropFunctionWords(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            if (!FunctionWords.lookup(str)) {
                arrayList.add(str);
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    private static String[] dropDuplicates(String[] strArr) {
        HashSet hashSet = new HashSet();
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            if (hashSet.add(StringUtils.normalize(str))) {
                arrayList.add(str);
            }
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    private static String[] dropFrequentWords(String[] strArr) {
        if (strArr.length <= MAX_WORDS) {
            return strArr;
        }
        int[] iArr = new int[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            iArr[i] = WordFrequencies.lookup(strArr[i]);
        }
        int i2 = -1;
        int i3 = -1;
        for (int i4 = 0; i4 < strArr.length - MAX_WORDS; i4++) {
            for (int i5 = 0; i5 < strArr.length; i5++) {
                if (iArr[i5] > i3) {
                    i2 = i5;
                    i3 = iArr[i5];
                }
            }
            iArr[i2] = -1;
            i3 = -1;
        }
        String[] strArr2 = new String[MAX_WORDS];
        int i6 = 0;
        for (int i7 = 0; i7 < strArr.length; i7++) {
            if (iArr[i7] >= 0) {
                int i8 = i6;
                i6++;
                strArr2[i8] = strArr[i7];
            }
        }
        return strArr2;
    }

    public static String tokenizeWithSpaces(String str) {
        Matcher matcher = DELIMS1.matcher(str);
        while (matcher.find()) {
            str = str.replace(matcher.group(0), " " + matcher.group(0) + " ");
        }
        Matcher matcher2 = DELIMS2.matcher(str);
        while (matcher2.find()) {
            str = str.replace(matcher2.group(0), String.valueOf(matcher2.group(1)) + " " + matcher2.group(2) + " " + matcher2.group(3));
        }
        Matcher matcher3 = DELIMS3.matcher(str);
        if (matcher3.find()) {
            str = String.valueOf(str.substring(0, str.length() - 1)) + (" " + matcher3.group(0));
        }
        return str.replaceAll("\\s++", " ").trim();
    }

    public static String[] tokenize(String str) {
        return tokenizeWithSpaces(str).split(" ");
    }

    public static String[] getKeywords(String str) {
        return dropDuplicates(dropFunctionWords(dropBadKeywords(dropSingleChars(tokenize(str)))));
    }

    public static String[] getKeywords(String str, String str2) {
        return getKeywords(String.valueOf(str) + " " + str2);
    }

    public static String[] getInfrequentKeywords(String str) {
        return dropFrequentWords(getKeywords(str));
    }

    public static boolean containsKeyword(String str, String[] strArr) {
        HashSet hashSet = new HashSet();
        for (String str2 : strArr) {
            hashSet.add(str2);
        }
        for (String str3 : tokenize(str)) {
            if (hashSet.contains(str3)) {
                return true;
            }
        }
        return false;
    }
}
