package info.ephyra.answerselection.filters;

import com.google.soap.search.GoogleSearch;
import com.google.soap.search.GoogleSearchFault;
import com.google.soap.search.GoogleSearchResult;
import com.google.soap.search.GoogleSearchResultElement;
import info.ephyra.answerselection.filters.WebTermImportanceFilter;
import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.NETagger;
import info.ephyra.nlp.OpenNLP;
import info.ephyra.nlp.SnowballStemmer;
import info.ephyra.nlp.StanfordNeTagger;
import info.ephyra.search.searchers.GoogleKM;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;

/* loaded from: input_file:info/ephyra/answerselection/filters/WikipediaGoogleTermImportanceFilter.class */
public class WikipediaGoogleTermImportanceFilter extends WebTermImportanceFilter {
    private static final String GOOGLE_KEY = "psf+tjo2xCCULVZCwYYG20pFKPC863E3";
    private static final int MAX_RESULTS_TOTAL = 250;
    private static final int MAX_RESULTS_PERQUERY = 10;
    private static final int RETRIES = 50;
    private static HashMap<String, WebTermImportanceFilter.TermCounter> missPageTermCounters = new HashMap<>();

    private void initMissTerms() {
        if (!missPageTermCounters.isEmpty()) {
            return;
        }
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader("./res/definitional/webreinforcement/wikipediaMissTerms"));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                } else {
                    if (!missPageTermCounters.containsKey(readLine)) {
                        missPageTermCounters.put(readLine, new WebTermImportanceFilter.TermCounter());
                    }
                    missPageTermCounters.get(readLine).increment(1);
                }
            }
        } catch (IOException e) {
        }
    }

    public WikipediaGoogleTermImportanceFilter(int i, int i2, boolean z) {
        super(i, i2, z);
        initMissTerms();
    }

    @Override // info.ephyra.answerselection.filters.WebTermImportanceFilter
    public HashMap<String, WebTermImportanceFilter.TermCounter> getTermCounters(String[] strArr) {
        HashMap<String, WebTermImportanceFilter.TermCounter> hashMap = new HashMap<>();
        for (int i = 0; i < strArr.length; i++) {
            String str = strArr[i];
            HashMap<String, WebTermImportanceFilter.TermCounter> hashMap2 = new HashMap<>();
            if (!str.startsWith("\"")) {
                try {
                    URLConnection openConnection = new URL("http://en.wikipedia.org/wiki/" + str.replaceAll("\\s", "_")).openConnection();
                    openConnection.setDoInput(true);
                    openConnection.setDoOutput(true);
                    openConnection.setUseCaches(false);
                    openConnection.setRequestProperty("User-Agent", "Ephyra");
                    openConnection.connect();
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(openConnection.getInputStream()));
                    boolean z = false;
                    StringBuffer stringBuffer = new StringBuffer();
                    while (true) {
                        int read = bufferedReader.read();
                        if (read == -1) {
                            break;
                        }
                        if (read == 60) {
                            z = true;
                            if (stringBuffer.length() != 0) {
                                String stem = SnowballStemmer.stem(stringBuffer.toString().toLowerCase());
                                if (!hashMap2.containsKey(stem)) {
                                    hashMap2.put(stem, new WebTermImportanceFilter.TermCounter());
                                }
                                hashMap2.get(stem).increment(1);
                                stringBuffer = new StringBuffer();
                            }
                        } else if (read == 62) {
                            z = false;
                        } else if (!z) {
                            if (read >= 33) {
                                stringBuffer.append((char) read);
                            } else if (stringBuffer.length() != 0) {
                                String stem2 = SnowballStemmer.stem(stringBuffer.toString().toLowerCase());
                                if (!hashMap2.containsKey(stem2)) {
                                    hashMap2.put(stem2, new WebTermImportanceFilter.TermCounter());
                                }
                                hashMap2.get(stem2).increment(1);
                                stringBuffer = new StringBuffer();
                            }
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            int countSum = getCountSum(hashMap2);
            int sumDiff = sumDiff(hashMap2, missPageTermCounters);
            System.out.println("WikipediaGoogleWebTermImportanceFilter: sum is " + countSum + ", diff is " + sumDiff);
            if (sumDiff * 10 > countSum) {
                addTermCounters(hashMap2, hashMap);
                System.out.println("WikipediaGoogleWebTermImportanceFilter: found target '" + str + "' in wikipedia");
                if (i == 0) {
                    System.out.println("  ==> No further lookups needed");
                    return hashMap;
                }
            } else {
                System.out.println("WikipediaGoogleWebTermImportanceFilter: target '" + str + "' not found in wikipedia, doing Google lookup");
                addTermCounters(getGoogleTermCounters(str), hashMap);
            }
        }
        return hashMap;
    }

    private HashMap<String, WebTermImportanceFilter.TermCounter> getGoogleTermCounters(String str) {
        HashMap<String, WebTermImportanceFilter.TermCounter> hashMap = new HashMap<>();
        for (int i = 0; i < MAX_RESULTS_TOTAL; i += 10) {
            GoogleSearch googleSearch = new GoogleSearch();
            if (TEST_TARGET_GENERATION) {
                System.out.println("Got search ...");
            }
            googleSearch.setKey(GOOGLE_KEY);
            if (TEST_TARGET_GENERATION) {
                System.out.println(" - key is psf+tjo2xCCULVZCwYYG20pFKPC863E3");
            }
            googleSearch.setQueryString(str);
            if (TEST_TARGET_GENERATION) {
                System.out.println(" - target is " + str);
            }
            googleSearch.setLanguageRestricts("English");
            if (TEST_TARGET_GENERATION) {
                System.out.println(" - language set");
            }
            googleSearch.setStartResult(i);
            if (TEST_TARGET_GENERATION) {
                System.out.println(" - start result set to " + i);
            }
            googleSearch.setMaxResults(10);
            if (TEST_TARGET_GENERATION) {
                System.out.println(" - max results set");
            }
            GoogleSearchResult googleSearchResult = null;
            int i2 = 0;
            while (googleSearchResult == null) {
                try {
                    googleSearchResult = googleSearch.doSearch();
                } catch (GoogleSearchFault e) {
                    MsgPrinter.printSearchError(e);
                    if (i2 == RETRIES) {
                        MsgPrinter.printErrorMsg("\nSearch failed.");
                        return hashMap;
                    }
                    i2++;
                    try {
                        GoogleKM.sleep(1000L);
                    } catch (InterruptedException e2) {
                    }
                }
            }
            GoogleSearchResultElement[] resultElements = googleSearchResult.getResultElements();
            if (TEST_TARGET_GENERATION) {
                System.out.println(" - got results: " + resultElements.length);
            }
            int i3 = 0;
            for (GoogleSearchResultElement googleSearchResultElement : resultElements) {
                String replaceAll = googleSearchResultElement.getSnippet().replaceAll("\\<[^\\>]++\\>", " ").replaceAll("\\&\\#39\\;", "'");
                if (TEST_TARGET_GENERATION) {
                    System.out.println(" - plain: " + replaceAll);
                }
                String[] strArr = NETagger.tokenize(replaceAll);
                i3 += strArr.length;
                for (String str2 : strArr) {
                    String stem = SnowballStemmer.stem(str2.toLowerCase());
                    if (stem.length() > 1) {
                        if (!hashMap.containsKey(stem)) {
                            hashMap.put(stem, new WebTermImportanceFilter.TermCounter());
                        }
                        hashMap.get(stem).increment();
                    }
                }
            }
        }
        return hashMap;
    }

    public static void main(String[] strArr) {
        TEST_TARGET_GENERATION = true;
        MsgPrinter.enableStatusMsgs(true);
        MsgPrinter.enableErrorMsgs(true);
        MsgPrinter.printStatusMsg("Creating tokenizer...");
        if (!OpenNLP.createTokenizer("res/nlp/tokenizer/opennlp/EnglishTok.bin.gz")) {
            MsgPrinter.printErrorMsg("Could not create tokenizer.");
        }
        MsgPrinter.printStatusMsg("Creating stemmer...");
        SnowballStemmer.create();
        MsgPrinter.printStatusMsg("Creating POS tagger...");
        if (!OpenNLP.createPosTagger("res/nlp/postagger/opennlp/tag.bin.gz", "res/nlp/postagger/opennlp/tagdict")) {
            MsgPrinter.printErrorMsg("Could not create POS tagger.");
        }
        MsgPrinter.printStatusMsg("Creating chunker...");
        if (!OpenNLP.createChunker("res/phrasechunker/opennlp/EnglishChunk.bin.gz")) {
            MsgPrinter.printErrorMsg("Could not create chunker.");
        }
        MsgPrinter.printStatusMsg("Creating Stanford NE tagger...");
        if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init()) {
            MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
        }
        GoogleTermImportanceFilter googleTermImportanceFilter = new GoogleTermImportanceFilter(0, 0, false);
        final HashMap<String, WebTermImportanceFilter.TermCounter> termCounters = googleTermImportanceFilter.getTermCounters(googleTermImportanceFilter.getTargets("Warren Moon"));
        ArrayList arrayList = new ArrayList(termCounters.keySet());
        Collections.sort(arrayList, new Comparator<String>() { // from class: info.ephyra.answerselection.filters.WikipediaGoogleTermImportanceFilter.1
            @Override // java.util.Comparator
            public int compare(String str, String str2) {
                int value = ((WebTermImportanceFilter.TermCounter) termCounters.get(str)).getValue();
                int value2 = ((WebTermImportanceFilter.TermCounter) termCounters.get(str2)).getValue();
                return value == value2 ? str.compareTo(str2) : value2 - value;
            }
        });
        Iterator it = arrayList.iterator();
        int i = 0;
        while (it.hasNext()) {
            String str = (String) it.next();
            int value = termCounters.get(str).getValue();
            System.out.println(String.valueOf(str) + ": " + value);
            if (value > 4) {
                i++;
            }
        }
        System.out.println("At least 5 times: " + i);
    }
}
