package info.ephyra.answerselection.filters;

import info.ephyra.io.Logger;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.NETagger;
import info.ephyra.nlp.OpenNLP;
import info.ephyra.nlp.SnowballStemmer;
import info.ephyra.nlp.indices.WordFrequencies;
import info.ephyra.querygeneration.Query;
import info.ephyra.querygeneration.generators.BagOfWordsG;
import info.ephyra.questionanalysis.AnalyzedQuestion;
import info.ephyra.questionanalysis.KeywordExtractor;
import info.ephyra.questionanalysis.QuestionNormalizer;
import info.ephyra.search.Result;
import info.ephyra.trec.TREC13To16Parser;
import info.ephyra.trec.TRECTarget;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:info/ephyra/answerselection/filters/WikipediaTermImportanceFilterOld.class */
public class WikipediaTermImportanceFilterOld extends Filter {
    protected static final String person = "person";
    protected static final String organization = "organization";
    protected static final String location = "location";
    protected static final String event = "event";
    public static final int NO_NORMALIZATION = 0;
    public static final int LINEAR_LENGTH_NORMALIZATION = 1;
    public static final int SQUARE_ROOT_LENGTH_NORMALIZATION = 2;
    public static final int LOG_LENGTH_NORMALIZATION = 3;
    public static final int LOG_10_LENGTH_NORMALIZATION = 4;
    private final int normalizationMode;
    private static String lastTarget = null;
    private static HashMap<String, TermCounter> lastTargetTermCounters = null;
    protected static boolean TEST_TERM_DOWMLOD = false;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:info/ephyra/answerselection/filters/WikipediaTermImportanceFilterOld$TermCounter.class */
    public class TermCounter {
        private int value;

        protected TermCounter() {
            this.value = 0;
        }

        protected TermCounter(int i) {
            this.value = 0;
            this.value = i;
        }

        public int getValue() {
            return this.value;
        }

        public void increment() {
            this.value++;
        }

        public void increment(int i) {
            this.value += i;
        }

        public void decrement() {
            this.value--;
        }

        public void decrement(int i) {
            this.value -= i;
        }

        public void multiplyValue(int i) {
            this.value *= i;
        }

        public void divideValue(int i) {
            this.value /= i;
        }
    }

    public WikipediaTermImportanceFilterOld(int i) {
        this.normalizationMode = i;
    }

    public HashMap<String, TermCounter> getTermCounters(String str) {
        HashMap<String, TermCounter> hashMap = null;
        try {
            URLConnection openConnection = new URL("http://en.wikipedia.org/wiki/" + str.replaceAll("\\s", "_")).openConnection();
            openConnection.setDoInput(true);
            openConnection.setDoOutput(true);
            openConnection.setUseCaches(false);
            openConnection.setRequestProperty("User-Agent", "Ephyra");
            openConnection.connect();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(openConnection.getInputStream()));
            hashMap = new HashMap<>();
            boolean z = false;
            StringBuffer stringBuffer = new StringBuffer();
            while (true) {
                int read = bufferedReader.read();
                if (read == -1) {
                    break;
                }
                if (read == 60) {
                    z = true;
                    if (stringBuffer.length() != 0) {
                        String stem = SnowballStemmer.stem(stringBuffer.toString().toLowerCase());
                        System.out.println(stem);
                        if (!hashMap.containsKey(stem)) {
                            hashMap.put(stem, new TermCounter());
                        }
                        hashMap.get(stem).increment(1);
                        stringBuffer = new StringBuffer();
                    }
                } else if (read == 62) {
                    z = false;
                } else if (!z) {
                    if (read >= 33) {
                        stringBuffer.append((char) read);
                    } else if (stringBuffer.length() != 0) {
                        String stem2 = SnowballStemmer.stem(stringBuffer.toString().toLowerCase());
                        System.out.println(stem2);
                        if (!hashMap.containsKey(stem2)) {
                            hashMap.put(stem2, new TermCounter());
                        }
                        hashMap.get(stem2).increment(1);
                        stringBuffer = new StringBuffer();
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return hashMap;
    }

    @Override // info.ephyra.answerselection.filters.Filter
    public Result[] apply(Result[] resultArr) {
        boolean z;
        if (resultArr.length == 0) {
            return resultArr;
        }
        String originalQueryString = resultArr[0].getQuery().getOriginalQueryString();
        System.out.println("WikipediaTermImportanceFilter:\n processing target '" + originalQueryString + "'");
        HashMap<String, TermCounter> cacheLookup = cacheLookup(originalQueryString);
        if (cacheLookup == null) {
            cacheLookup = getTermCounters(originalQueryString);
            cache(originalQueryString, cacheLookup);
        }
        if (cacheLookup == null) {
            return resultArr;
        }
        HashSet hashSet = new HashSet();
        for (String str : OpenNLP.tokenize(originalQueryString)) {
            if (Character.isLetterOrDigit(str.charAt(0))) {
                hashSet.add(str);
            }
        }
        HashMap hashMap = new HashMap();
        HashSet hashSet2 = new HashSet();
        Iterator it = new ArrayList(cacheLookup.keySet()).iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            String stem = SnowballStemmer.stem(str2.toLowerCase());
            if (!hashMap.containsKey(stem)) {
                hashMap.put(stem, new TermCounter());
            }
            ((TermCounter) hashMap.get(stem)).increment(cacheLookup.get(str2).getValue());
            if (hashSet.contains(str2)) {
                hashSet2.add(stem);
            }
        }
        ArrayList arrayList = new ArrayList();
        do {
            z = false;
            ArrayList arrayList2 = new ArrayList();
            for (Result result : resultArr) {
                if (result.getScore() != Float.NEGATIVE_INFINITY) {
                    float f = 0.0f;
                    for (String str3 : NETagger.tokenize(result.getAnswer())) {
                        if (str3.length() > 1) {
                            if (((TermCounter) hashMap.get(SnowballStemmer.stem(str3.toLowerCase()))) != null) {
                                f += r0.getValue() / Math.max(WordFrequencies.lookup(r0), 1);
                            }
                        }
                    }
                    if (f > 0.0f) {
                        if (this.normalizationMode == 0) {
                            result.setScore(f);
                        } else if (this.normalizationMode == 1) {
                            result.setScore(f / r0.length);
                        } else if (this.normalizationMode == 2) {
                            result.setScore(f / ((float) Math.sqrt(r0.length)));
                        } else if (this.normalizationMode == 3) {
                            result.setScore(f / (1.0f + ((float) Math.log(r0.length))));
                        } else if (this.normalizationMode == 4) {
                            result.setScore(f / (1.0f + ((float) Math.log10(r0.length))));
                        }
                        arrayList2.add(result);
                    }
                }
            }
            if (arrayList2.size() != 0) {
                Collections.sort(arrayList2);
                Collections.reverse(arrayList2);
                Result result2 = (Result) arrayList2.remove(0);
                arrayList.add(result2);
                for (String str4 : NETagger.tokenize(result2.getAnswer())) {
                    String stem2 = SnowballStemmer.stem(str4.toLowerCase());
                    TermCounter termCounter = (TermCounter) hashMap.get(stem2);
                    if (termCounter != null) {
                        if (hashSet2.contains(stem2)) {
                            termCounter.divideValue(1);
                        } else {
                            termCounter.divideValue(2);
                        }
                        if (termCounter.getValue() == 0) {
                            hashMap.remove(stem2);
                        }
                    }
                }
                resultArr = (Result[]) arrayList2.toArray(new Result[arrayList2.size()]);
                z = true;
            }
        } while (z);
        Collections.sort(arrayList);
        Collections.reverse(arrayList);
        float f2 = 100.0f;
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            ((Result) it2.next()).addExtraScore(String.valueOf(getClass().getName()) + this.normalizationMode, f2);
            f2 *= 0.9f;
        }
        return (Result[]) arrayList.toArray(new Result[arrayList.size()]);
    }

    private void cache(String str, HashMap<String, TermCounter> hashMap) {
        System.out.println("WikipediaTermImportanceFilter: caching web lookup result for target '" + str + "'");
        lastTarget = str;
        lastTargetTermCounters = hashMap;
    }

    private HashMap<String, TermCounter> cacheLookup(String str) {
        System.out.println("WikipediaTermImportanceFilter: doing cache lookup result for target '" + str + "'");
        if (str.equals(lastTarget)) {
            System.out.println("  --> cache hit");
            return lastTargetTermCounters;
        }
        System.out.println("  --> cache miss, last target is '" + lastTarget + "'");
        return null;
    }

    public static void main(String[] strArr) {
        TEST_TERM_DOWMLOD = true;
        MsgPrinter.enableStatusMsgs(true);
        MsgPrinter.enableErrorMsgs(true);
        MsgPrinter.printStatusMsg("Creating tokenizer...");
        if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz")) {
            MsgPrinter.printErrorMsg("Could not create tokenizer.");
        }
        MsgPrinter.printStatusMsg("Creating stemmer...");
        SnowballStemmer.create();
        MsgPrinter.printStatusMsg("Creating NE taggers...");
        NETagger.loadListTaggers("res/nlp/netagger/lists/");
        NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
        MsgPrinter.printStatusMsg("  ...loading models");
        MsgPrinter.printStatusMsg("  ...done");
        WikipediaTermImportanceFilter wikipediaTermImportanceFilter = new WikipediaTermImportanceFilter(0, 0, false);
        for (TRECTarget tRECTarget : TREC13To16Parser.loadTargets(strArr[0])) {
            String targetDesc = tRECTarget.getTargetDesc();
            MsgPrinter.printGeneratingQueries();
            String normalize = QuestionNormalizer.normalize(targetDesc);
            MsgPrinter.printNormalization(normalize);
            Logger.logNormalization(normalize);
            String[] keywords = KeywordExtractor.getKeywords(normalize);
            AnalyzedQuestion analyzedQuestion = new AnalyzedQuestion(targetDesc);
            analyzedQuestion.setKeywords(keywords);
            analyzedQuestion.setFactoid(false);
            Query[] generateQueries = new BagOfWordsG().generateQueries(analyzedQuestion);
            for (Query query : generateQueries) {
                query.setOriginalQueryString(targetDesc);
            }
            wikipediaTermImportanceFilter.apply(new Result[]{new Result("This would be the answer", generateQueries[0])});
        }
    }
}
