package info.ephyra.answerselection.filters;

import info.ephyra.answerselection.filters.WebTermImportanceFilter;
import info.ephyra.io.Logger;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.NETagger;
import info.ephyra.nlp.OpenNLP;
import info.ephyra.nlp.SnowballStemmer;
import info.ephyra.querygeneration.Query;
import info.ephyra.querygeneration.generators.BagOfWordsG;
import info.ephyra.questionanalysis.AnalyzedQuestion;
import info.ephyra.questionanalysis.KeywordExtractor;
import info.ephyra.questionanalysis.QuestionNormalizer;
import info.ephyra.search.Result;
import info.ephyra.trec.TREC13To16Parser;
import info.ephyra.trec.TRECTarget;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;

/* loaded from: input_file:info/ephyra/answerselection/filters/WikipediaTermImportanceFilter.class */
public class WikipediaTermImportanceFilter extends WebTermImportanceFilter {
    protected static boolean TEST_TERM_DOWMLOD = false;

    public WikipediaTermImportanceFilter(int i, int i2, boolean z) {
        super(i, i2, z);
    }

    @Override // info.ephyra.answerselection.filters.WebTermImportanceFilter
    public String[] getTargets(String str) {
        return new String[]{str};
    }

    @Override // info.ephyra.answerselection.filters.WebTermImportanceFilter
    public HashMap<String, WebTermImportanceFilter.TermCounter> getTermCounters(String[] strArr) {
        return strArr.length == 0 ? new HashMap<>() : getTermCounters(strArr[0]);
    }

    public HashMap<String, WebTermImportanceFilter.TermCounter> getTermCounters(String str) {
        HashMap<String, WebTermImportanceFilter.TermCounter> hashMap = null;
        try {
            URLConnection openConnection = new URL("http://en.wikipedia.org/wiki/" + str.replaceAll("\\s", "_")).openConnection();
            openConnection.setDoInput(true);
            openConnection.setDoOutput(true);
            openConnection.setUseCaches(false);
            openConnection.setRequestProperty("User-Agent", "Ephyra");
            openConnection.connect();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(openConnection.getInputStream()));
            hashMap = new HashMap<>();
            boolean z = false;
            StringBuffer stringBuffer = new StringBuffer();
            while (true) {
                int read = bufferedReader.read();
                if (read == -1) {
                    break;
                }
                if (read == 60) {
                    z = true;
                    if (stringBuffer.length() != 0) {
                        String stem = SnowballStemmer.stem(stringBuffer.toString().toLowerCase());
                        System.out.println(stem);
                        if (!hashMap.containsKey(stem)) {
                            hashMap.put(stem, new WebTermImportanceFilter.TermCounter());
                        }
                        hashMap.get(stem).increment(1);
                        stringBuffer = new StringBuffer();
                    }
                } else if (read == 62) {
                    z = false;
                } else if (!z) {
                    if (read >= 33) {
                        stringBuffer.append((char) read);
                    } else if (stringBuffer.length() != 0) {
                        String stem2 = SnowballStemmer.stem(stringBuffer.toString().toLowerCase());
                        System.out.println(stem2);
                        if (!hashMap.containsKey(stem2)) {
                            hashMap.put(stem2, new WebTermImportanceFilter.TermCounter());
                        }
                        hashMap.get(stem2).increment(1);
                        stringBuffer = new StringBuffer();
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return hashMap;
    }

    public static void main(String[] strArr) {
        TEST_TERM_DOWMLOD = true;
        MsgPrinter.enableStatusMsgs(true);
        MsgPrinter.enableErrorMsgs(true);
        MsgPrinter.printStatusMsg("Creating tokenizer...");
        if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz")) {
            MsgPrinter.printErrorMsg("Could not create tokenizer.");
        }
        MsgPrinter.printStatusMsg("Creating stemmer...");
        SnowballStemmer.create();
        MsgPrinter.printStatusMsg("Creating NE taggers...");
        NETagger.loadListTaggers("res/nlp/netagger/lists/");
        NETagger.loadRegExTaggers("res/nlp/netagger/patterns.lst");
        MsgPrinter.printStatusMsg("  ...loading models");
        MsgPrinter.printStatusMsg("  ...done");
        WikipediaTermImportanceFilter wikipediaTermImportanceFilter = new WikipediaTermImportanceFilter(0, 0, false);
        for (TRECTarget tRECTarget : TREC13To16Parser.loadTargets(strArr[0])) {
            String targetDesc = tRECTarget.getTargetDesc();
            MsgPrinter.printGeneratingQueries();
            String normalize = QuestionNormalizer.normalize(targetDesc);
            MsgPrinter.printNormalization(normalize);
            Logger.logNormalization(normalize);
            String[] keywords = KeywordExtractor.getKeywords(normalize);
            AnalyzedQuestion analyzedQuestion = new AnalyzedQuestion(targetDesc);
            analyzedQuestion.setKeywords(keywords);
            analyzedQuestion.setFactoid(false);
            Query[] generateQueries = new BagOfWordsG().generateQueries(analyzedQuestion);
            for (Query query : generateQueries) {
                query.setOriginalQueryString(targetDesc);
            }
            wikipediaTermImportanceFilter.apply(new Result[]{new Result("This would be the answer", generateQueries[0])});
        }
    }
}
