package info.ephyra.answerselection.filters;

import info.ephyra.io.Logger;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.NETagger;
import info.ephyra.nlp.OpenNLP;
import info.ephyra.nlp.SnowballStemmer;
import info.ephyra.nlp.StanfordNeTagger;
import info.ephyra.nlp.indices.WordFrequencies;
import info.ephyra.querygeneration.Query;
import info.ephyra.querygeneration.generators.BagOfWordsG;
import info.ephyra.questionanalysis.AnalyzedQuestion;
import info.ephyra.questionanalysis.KeywordExtractor;
import info.ephyra.questionanalysis.QuestionNormalizer;
import info.ephyra.search.Result;
import info.ephyra.trec.TREC13To16Parser;
import info.ephyra.trec.TRECTarget;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;

/* loaded from: input_file:info/ephyra/answerselection/filters/WebTermImportanceFilter.class */
public abstract class WebTermImportanceFilter extends Filter {
    protected static final String person = "person";
    protected static final String organization = "organization";
    protected static final String location = "location";
    protected static final String event = "event";
    public static final int NO_NORMALIZATION = 0;
    public static final int LINEAR_LENGTH_NORMALIZATION = 1;
    public static final int SQUARE_ROOT_LENGTH_NORMALIZATION = 2;
    public static final int LOG_LENGTH_NORMALIZATION = 3;
    public static final int LOG_10_LENGTH_NORMALIZATION = 4;
    private final int normalizationMode;
    private final int tfNormalizationMode;
    private final boolean isCombined;
    private HashSet<String> extensionList = new HashSet<>();
    private static final String[] extensions = {"University", "Corporation", "International", "Incorporated", "Inc.", "Comp.", "Corp.", "Co.", "Museum", "<to be extended>"};
    private static HashMap<String, CacheEntry> cache = new HashMap<>();
    protected static boolean TEST_TARGET_GENERATION = false;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:info/ephyra/answerselection/filters/WebTermImportanceFilter$CacheEntry.class */
    public static class CacheEntry {
        String target;
        HashMap<String, TermCounter> termCounters;

        public CacheEntry(String str, HashMap<String, TermCounter> hashMap) {
            this.target = str;
            this.termCounters = hashMap;
        }
    }

    /* loaded from: input_file:info/ephyra/answerselection/filters/WebTermImportanceFilter$TargetGeneratorTest.class */
    private static class TargetGeneratorTest extends WebTermImportanceFilter {
        TargetGeneratorTest(int i) {
            super(i, i, false);
        }

        @Override // info.ephyra.answerselection.filters.WebTermImportanceFilter
        public HashMap<String, TermCounter> getTermCounters(String[] strArr) {
            return new HashMap<>();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:info/ephyra/answerselection/filters/WebTermImportanceFilter$TermCounter.class */
    public class TermCounter {
        private int value;

        /* JADX INFO: Access modifiers changed from: protected */
        public TermCounter() {
            this.value = 0;
        }

        protected TermCounter(int i) {
            this.value = 0;
            this.value = i;
        }

        public int getValue() {
            return this.value;
        }

        public void increment() {
            this.value++;
        }

        public void increment(int i) {
            this.value += i;
        }

        public void decrement() {
            this.value--;
        }

        public void decrement(int i) {
            this.value -= i;
        }

        public void multiplyValue(int i) {
            this.value *= i;
        }

        public void divideValue(int i) {
            this.value /= i;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public WebTermImportanceFilter(int i, int i2, boolean z) {
        this.normalizationMode = i;
        this.tfNormalizationMode = i2;
        this.isCombined = z;
    }

    public abstract HashMap<String, TermCounter> getTermCounters(String[] strArr);

    public String[] getTargets(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        arrayList.add(str);
        boolean z = false;
        boolean z2 = false;
        if (str.startsWith("the ")) {
            arrayList.add(str.substring(4, str.length()));
        } else if (str.startsWith("an ")) {
            arrayList.add(str.substring(3, str.length()));
        } else if (str.startsWith("a ")) {
            arrayList.add(str.substring(2, str.length()));
        }
        String checkType = checkType(str);
        if (TEST_TARGET_GENERATION) {
            if (checkType == null) {
                System.out.println(" target type could not be determined");
            } else {
                System.out.println(" target type is " + checkType);
            }
        }
        if ("person".equals(checkType)) {
            z = true;
        } else if (str.contains("(") && str.contains(")")) {
            int indexOf = str.indexOf("(");
            int indexOf2 = str.indexOf(")");
            String substring = str.substring(0, indexOf - 1);
            String substring2 = str.substring(indexOf + 1, indexOf2);
            arrayList.clear();
            arrayList.add(substring);
            arrayList.add(substring2);
            z2 = true;
        } else if (!cutExtension(str, arrayList)) {
            if (str.endsWith("University")) {
                arrayList.add(str.substring(0, str.length() - 11));
            } else if (str.endsWith("International")) {
                arrayList.add(str.substring(0, str.length() - 14));
            } else if (str.endsWith("Corporation")) {
                arrayList.add(str.substring(0, str.length() - 12));
            } else {
                extractUpperCaseParts(arrayList);
                Iterator it = new LinkedHashSet(arrayList).iterator();
                while (it.hasNext()) {
                    String str2 = (String) it.next();
                    if ("person".equals(checkType(str2))) {
                        arrayList.add(str2);
                    }
                }
            }
        }
        if (z) {
            arrayList.add("\"" + str + "\"");
        } else if (!z2) {
            extractUpperCaseParts(arrayList);
            cutFirstNpInNpSequence(arrayList);
            extractAcronyms(arrayList);
            postProcess(arrayList);
        }
        Iterator it2 = new LinkedHashSet(arrayList).iterator();
        while (it2.hasNext()) {
            String str3 = (String) it2.next();
            String checkType2 = checkType(str3);
            if ("organization".equals(checkType2)) {
                arrayList.add("the " + str3);
                if (!z2) {
                    arrayList.add("the " + str);
                }
            } else if ("person".equals(checkType2)) {
                arrayList.add("\"" + str3 + "\"");
            }
            if (str3.matches("([A-Z]){3,}")) {
                arrayList.add("the " + str3);
            } else if (str3.matches("([A-Z]\\.){2,}")) {
                arrayList.add("the " + str3);
            }
        }
        Iterator it3 = new LinkedHashSet(arrayList).iterator();
        while (it3.hasNext()) {
            String str4 = (String) it3.next();
            if (str4.matches("([A-Z][a-z]++)++")) {
                arrayList.add("\"" + str4 + "\"");
            }
        }
        if (NETagger.tokenize(str).length > 1) {
            arrayList.add("\"" + str + "\"");
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet(arrayList);
        return (String[]) linkedHashSet.toArray(new String[linkedHashSet.size()]);
    }

    private String checkType(String str) {
        if (!StanfordNeTagger.isInitialized()) {
            StanfordNeTagger.init();
        }
        HashMap<String, String[]> extractNEs = StanfordNeTagger.extractNEs(str);
        ArrayList arrayList = new ArrayList(extractNEs.keySet());
        for (int i = 0; i < arrayList.size(); i++) {
            String str2 = (String) arrayList.get(i);
            for (String str3 : extractNEs.get(str2)) {
                if (str3.equals(str)) {
                    return str2.replace("NE", "");
                }
            }
        }
        return null;
    }

    private boolean cutExtension(String str, ArrayList<String> arrayList) {
        if (this.extensionList.isEmpty()) {
            for (int i = 0; i < extensions.length; i++) {
                this.extensionList.add(extensions[i]);
            }
        }
        String[] split = str.split("\\s");
        if (!this.extensionList.contains(split[split.length - 1]) || split.length <= 1) {
            return false;
        }
        String str2 = split[0];
        for (int i2 = 1; i2 < split.length - 1; i2++) {
            str2 = String.valueOf(str2) + " " + split[i2];
        }
        arrayList.add(str2);
        return true;
    }

    private void extractUpperCaseParts(ArrayList<String> arrayList) {
        Iterator it = new LinkedHashSet(arrayList).iterator();
        while (it.hasNext()) {
            String[] split = ((String) it.next()).split("\\s");
            String str = null;
            int i = 0;
            while (i < split.length) {
                while (i < split.length && !Character.isUpperCase(split[i].charAt(0))) {
                    i++;
                }
                if (i < split.length) {
                    str = split[i];
                    i++;
                }
                while (i < split.length && !Character.isLowerCase(split[i].charAt(0))) {
                    str = String.valueOf(str) + " " + split[i];
                    i++;
                }
                if (str != null) {
                    arrayList.add(str);
                    str = null;
                }
            }
        }
    }

    private void extractAcronyms(ArrayList<String> arrayList) {
        Iterator it = new LinkedHashSet(arrayList).iterator();
        while (it.hasNext()) {
            for (String str : ((String) it.next()).split("\\s")) {
                if (str.matches("([A-Z]){3,}")) {
                    arrayList.add(str);
                } else if (str.matches("([A-Z]\\.){2,}")) {
                    arrayList.add(str);
                }
            }
        }
    }

    private void cutFirstNpInNpSequence(ArrayList<String> arrayList) {
        Iterator it = new LinkedHashSet(arrayList).iterator();
        while (it.hasNext()) {
            String[] strArr = OpenNLP.tokenize((String) it.next());
            String[] tagChunks = OpenNLP.tagChunks(strArr, OpenNLP.tagPos(strArr));
            String str = null;
            int i = 0;
            while (i < strArr.length && !"B-NP".equals(tagChunks[i])) {
                i++;
            }
            do {
                i++;
                if (i >= strArr.length) {
                    break;
                }
            } while (!"B-NP".equals(tagChunks[i]));
            if (i < strArr.length) {
                str = strArr[i];
                i++;
            }
            while (i < strArr.length) {
                str = String.valueOf(str) + " " + strArr[i];
                i++;
            }
            if (str != null) {
                arrayList.add(str);
            }
        }
    }

    private void postProcess(ArrayList<String> arrayList) {
        LinkedHashSet linkedHashSet = new LinkedHashSet(arrayList);
        arrayList.clear();
        Iterator it = linkedHashSet.iterator();
        while (it.hasNext()) {
            String trim = ((String) it.next()).trim();
            boolean z = true;
            if (trim.startsWith("(") && trim.endsWith(")")) {
                trim = trim.substring(1, trim.length() - 1).trim();
            }
            if (trim.startsWith("(") != trim.endsWith(")")) {
                z = false;
            }
            while (trim.startsWith("'")) {
                trim = trim.substring(1).trim();
            }
            while (trim.endsWith("'")) {
                trim = trim.substring(0, trim.length() - 1).trim();
            }
            while (trim.matches("[b-z]\\s.++")) {
                trim = trim.substring(2);
            }
            if (trim.length() < 2) {
                z = false;
            }
            if (z) {
                arrayList.add(trim);
            }
        }
    }

    @Override // info.ephyra.answerselection.filters.Filter
    public Result[] apply(Result[] resultArr) {
        boolean z;
        TermCounter termCounter;
        double d;
        if (resultArr.length == 0) {
            return resultArr;
        }
        String originalQueryString = resultArr[0].getQuery().getOriginalQueryString();
        System.out.println("WebTermImportanceFilter:\n processing target '" + originalQueryString + "'");
        HashMap<String, TermCounter> cacheLookup = cacheLookup(originalQueryString);
        if (TEST_TARGET_GENERATION) {
            String[] targets = getTargets(originalQueryString);
            System.out.println(" generated web serach Strings:");
            for (String str : targets) {
                System.out.println(" - " + str);
            }
            return resultArr;
        }
        if (cacheLookup == null) {
            String[] targets2 = getTargets(originalQueryString);
            System.out.println(" web serach Strings are");
            for (String str2 : targets2) {
                System.out.println(" - " + str2);
            }
            cacheLookup = getTermCounters(targets2);
            cache(originalQueryString, cacheLookup);
        }
        HashSet hashSet = new HashSet();
        for (String str3 : OpenNLP.tokenize(originalQueryString)) {
            if (Character.isLetterOrDigit(str3.charAt(0))) {
                hashSet.add(str3);
            }
        }
        HashMap<String, TermCounter> hashMap = new HashMap<>();
        HashSet hashSet2 = new HashSet();
        Iterator it = new ArrayList(cacheLookup.keySet()).iterator();
        while (it.hasNext()) {
            String str4 = (String) it.next();
            String stem = SnowballStemmer.stem(str4.toLowerCase());
            if (!hashMap.containsKey(stem)) {
                hashMap.put(stem, new TermCounter());
            }
            hashMap.get(stem).increment(cacheLookup.get(str4).getValue());
            if (hashSet.contains(str4)) {
                hashSet2.add(stem);
            }
        }
        int countSum = getCountSum(hashMap);
        int log10 = countSum > 100 ? (int) Math.log10(countSum) : 2;
        System.out.println("WebTermImportanceFilter: termCountLog is " + log10);
        ArrayList arrayList = new ArrayList();
        do {
            z = false;
            ArrayList arrayList2 = new ArrayList();
            for (Result result : resultArr) {
                if (result.getScore() != Float.NEGATIVE_INFINITY) {
                    String[] strArr = NETagger.tokenize(result.getAnswer());
                    float f = 0.0f;
                    for (int i = 0; i < strArr.length; i++) {
                        String str5 = strArr[i];
                        if (str5.length() > 1 && (termCounter = hashMap.get(SnowballStemmer.stem(str5.toLowerCase()))) != null) {
                            if (this.tfNormalizationMode == 0) {
                                d = 1.0d;
                            } else if (this.tfNormalizationMode == 3) {
                                double lookup = WordFrequencies.lookup(strArr[i].toLowerCase());
                                d = lookup > 2.718281828459045d ? Math.log(lookup) : 1.0d;
                            } else if (this.tfNormalizationMode == 3) {
                                double lookup2 = WordFrequencies.lookup(strArr[i].toLowerCase());
                                d = lookup2 > 10.0d ? Math.log10(lookup2) : 1.0d;
                            } else {
                                d = 1.0d;
                            }
                            f = (float) (f + (termCounter.getValue() / d));
                        }
                    }
                    if (this.isCombined || f > 0.0f) {
                        if (this.normalizationMode == 0) {
                            result.setScore(f);
                        } else if (this.normalizationMode == 1) {
                            result.setScore(f / strArr.length);
                        } else if (this.normalizationMode == 2) {
                            result.setScore(f / ((float) Math.sqrt(strArr.length)));
                        } else if (this.normalizationMode == 3) {
                            result.setScore(f / (1.0f + ((float) Math.log(strArr.length))));
                        } else if (this.normalizationMode == 4) {
                            result.setScore(f / (1.0f + ((float) Math.log10(strArr.length))));
                        }
                        arrayList2.add(result);
                    }
                }
            }
            if (arrayList2.size() != 0) {
                Collections.sort(arrayList2);
                Collections.reverse(arrayList2);
                Result result2 = (Result) arrayList2.remove(0);
                arrayList.add(result2);
                for (String str6 : NETagger.tokenize(result2.getAnswer())) {
                    String stem2 = SnowballStemmer.stem(str6.toLowerCase());
                    TermCounter termCounter2 = hashMap.get(stem2);
                    if (termCounter2 != null) {
                        if (hashSet2.contains(stem2)) {
                            termCounter2.divideValue(2);
                        } else {
                            termCounter2.divideValue(log10);
                        }
                        if (termCounter2.getValue() == 0) {
                            hashMap.remove(stem2);
                        }
                    }
                }
                resultArr = (Result[]) arrayList2.toArray(new Result[arrayList2.size()]);
                z = true;
            }
        } while (z);
        Collections.sort(arrayList);
        Collections.reverse(arrayList);
        if (this.isCombined) {
            float f2 = 100.0f;
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                ((Result) it2.next()).addExtraScore(String.valueOf(getClass().getName()) + this.normalizationMode, f2);
                f2 *= 0.9f;
            }
        }
        return (Result[]) arrayList.toArray(new Result[arrayList.size()]);
    }

    private void cache(String str, HashMap<String, TermCounter> hashMap) {
        String name = getClass().getName();
        System.out.println("WebTermImportanceFilter: caching web lookup result for target '" + str + "' from class '" + name + "'");
        cache.put(name, new CacheEntry(str, hashMap));
    }

    private HashMap<String, TermCounter> cacheLookup(String str) {
        String name = getClass().getName();
        System.out.println("WebTermImportanceFilter: doing cache lookup result for target '" + str + "', class '" + name + "'");
        CacheEntry cacheEntry = cache.get(name);
        if (cacheEntry == null) {
            System.out.println("  --> cache miss, no entry for '" + name + "' so far");
            return null;
        }
        if (str.equals(cacheEntry.target)) {
            System.out.println("  --> cache hit");
            return cacheEntry.termCounters;
        }
        System.out.println("  --> cache miss, last target for '" + name + "' is '" + cacheEntry.target + "'");
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addTermCounters(HashMap<String, TermCounter> hashMap, HashMap<String, TermCounter> hashMap2) {
        for (String str : hashMap.keySet()) {
            int value = hashMap.get(str).getValue();
            if (!hashMap2.containsKey(str)) {
                hashMap2.put(str, new TermCounter());
            }
            hashMap2.get(str).increment(value);
        }
    }

    protected int getMaxCount(HashMap<String, TermCounter> hashMap) {
        int i = 0;
        Iterator<String> it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            i = Math.max(i, hashMap.get(it.next()).getValue());
        }
        return i;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int getCountSum(HashMap<String, TermCounter> hashMap) {
        int i = 0;
        Iterator<String> it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            i += hashMap.get(it.next()).getValue();
        }
        return i;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int sumDiff(HashMap<String, TermCounter> hashMap, HashMap<String, TermCounter> hashMap2) {
        int i = 0;
        for (String str : hashMap.keySet()) {
            i += Math.max(hashMap.get(str).getValue() - (hashMap2.containsKey(str) ? hashMap2.get(str).getValue() : 0), 0);
        }
        return i;
    }

    public static void main(String[] strArr) {
        TEST_TARGET_GENERATION = true;
        MsgPrinter.enableStatusMsgs(true);
        MsgPrinter.enableErrorMsgs(true);
        MsgPrinter.printStatusMsg("Creating tokenizer...");
        if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz")) {
            MsgPrinter.printErrorMsg("Could not create tokenizer.");
        }
        MsgPrinter.printStatusMsg("Creating stemmer...");
        SnowballStemmer.create();
        MsgPrinter.printStatusMsg("Creating POS tagger...");
        if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict")) {
            MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
        }
        MsgPrinter.printStatusMsg("Creating chunker...");
        if (!OpenNLP.createChunker("res/nlp/phrasechunker/opennlp/EnglishChunk.bin.gz")) {
            MsgPrinter.printErrorMsg("Could not create chunker.");
        }
        MsgPrinter.printStatusMsg("Creating NE taggers...");
        NETagger.loadListTaggers("res/nlp/netagger/lists/");
        NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
        MsgPrinter.printStatusMsg("  ...loading models");
        if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init()) {
            MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
        }
        MsgPrinter.printStatusMsg("  ...done");
        TargetGeneratorTest targetGeneratorTest = new TargetGeneratorTest(0);
        for (TRECTarget tRECTarget : TREC13To16Parser.loadTargets(strArr[0])) {
            String targetDesc = tRECTarget.getTargetDesc();
            MsgPrinter.printGeneratingQueries();
            String normalize = QuestionNormalizer.normalize(targetDesc);
            MsgPrinter.printNormalization(normalize);
            Logger.logNormalization(normalize);
            String[] keywords = KeywordExtractor.getKeywords(normalize);
            AnalyzedQuestion analyzedQuestion = new AnalyzedQuestion(targetDesc);
            analyzedQuestion.setKeywords(keywords);
            analyzedQuestion.setFactoid(false);
            Query[] generateQueries = new BagOfWordsG().generateQueries(analyzedQuestion);
            for (Query query : generateQueries) {
                query.setOriginalQueryString(targetDesc);
            }
            targetGeneratorTest.apply(new Result[]{new Result("This would be the answer", generateQueries[0])});
        }
    }
}
