package info.ephyra.answerselection.ag.resource.web;

import com.google.soap.search.GoogleSearch;
import com.google.soap.search.GoogleSearchFault;
import com.google.soap.search.GoogleSearchResult;
import com.google.soap.search.GoogleSearchResultElement;
import edu.cmu.lti.japaneseNLP.util.NLPTool;
import edu.cmu.lti.javelin.qa.Term;
import edu.cmu.lti.javelin.util.ChineseModule;
import edu.cmu.lti.javelin.util.Language;
import info.ephyra.answerselection.ag.resource.Stopwords;
import info.ephyra.answerselection.ag.utility.IOManager;
import info.ephyra.answerselection.ag.utility.Utility;
import info.ephyra.answerselection.ag.utility.WildChar;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URLEncoder;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;

/* loaded from: input_file:info/ephyra/answerselection/ag/resource/web/GoogleManager.class */
public class GoogleManager {
    private String NEWLINE = "\r\n";
    boolean DEBUG = false;
    boolean SEGMENTATION = true;
    static final String clientKey = "psf+tjo2xCCULVZCwYYG20pFKPC863E3";
    GoogleSearch google;
    GoogleSearchResult result;
    GoogleSearchResultElement[] element;
    boolean isCLQA;
    String answertype;
    String subtype;
    String language;
    String baseDir;
    String cacheDir;
    NLPTool jpNLPTool;
    ChineseModule chNLPTool;
    private static final Logger log = Logger.getLogger(GoogleManager.class);
    private static String QUOTE = "\"";
    private static String OR = " OR ";

    public GoogleManager(String str, NLPTool nLPTool, ChineseModule chineseModule) {
        this.baseDir = str.endsWith("/") ? str : String.valueOf(str) + "/";
        this.jpNLPTool = nLPTool;
        this.chNLPTool = chineseModule;
        File file = new File(this.baseDir);
        if (!file.exists()) {
            file.mkdir();
        }
        this.google = new GoogleSearch();
        this.google.setKey(clientKey);
    }

    public void setDebug(boolean z) {
        this.DEBUG = z;
    }

    public String getResultElements(int i) {
        if (i >= 0 && i < this.element.length && this.element[i] != null) {
            return this.element[i].getSnippet();
        }
        System.out.println("********************** eleement is null");
        return null;
    }

    public int search(String str, List<Term> list, String str2, String str3, Language language, boolean z) throws Exception {
        int i;
        this.answertype = str2;
        this.subtype = Pattern.compile("/").matcher(str3).replaceAll("");
        this.isCLQA = z;
        if (Utility.isJapanese(language)) {
            this.language = "jp";
        } else if (Utility.isChinese(language)) {
            this.language = "ch";
        } else {
            this.language = "en";
        }
        if (Utility.isEnglish(language)) {
            this.NEWLINE = "\n";
        } else {
            this.NEWLINE = "\r\n";
        }
        String fileName = getFileName(str, list);
        String query = getQuery(str, list, -1);
        if (str.length() > 70) {
            log.info("** answer is too long:" + str.length());
            return -1;
        }
        if (query.length() > 2048) {
            log.info("********** query is too long: " + query);
            query = getQuery(str, list, 2);
        }
        if (query.indexOf("**") >= 0 || query.indexOf("---") >= 0 || query.indexOf("::") >= 0) {
            return 0;
        }
        int readCache = readCache(fileName);
        if (readCache >= 0) {
            return readCache;
        }
        System.out.println("Google query: " + query);
        try {
            this.google.setQueryString(query);
            this.google.setSafeSearch(true);
            this.google.setFilter(true);
            this.google.setLanguageRestricts(getLanguageFormat());
            this.result = this.google.doSearch();
            this.element = this.result.getResultElements();
            i = this.element.length;
            segmentation(str);
            storeToCache(fileName);
        } catch (Exception e) {
            i = -1;
            System.out.println("***** (2) failure in google search for :" + query);
        } catch (GoogleSearchFault e2) {
            System.out.println("***** failure in google search for :" + query);
            i = -1;
        }
        return i;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getQuery(String str, List<Term> list, int i) {
        String str2 = String.valueOf(QUOTE) + str + QUOTE;
        for (int i2 = 0; i2 < list.size(); i2++) {
            Term term = list.get(i2);
            if (!Stopwords.getInstance().isStopword(term.getText()) && term.getText().length() != 0) {
                String str3 = "";
                int min = Math.min(term.getAliases().size(), i);
                if (i == -1) {
                    min = term.getAliases().size();
                }
                for (int i3 = 0; i3 < min && i3 < 5; i3++) {
                    Term term2 = (Term) term.getAliases().get(i3);
                    if (term2.getText() != null && term2.getText().length() != 0) {
                        str3 = String.valueOf(str3) + QUOTE + term2.getText() + QUOTE;
                        if (i3 < min - 1) {
                            str3 = String.valueOf(str3) + OR;
                        }
                    }
                }
                if (this.isCLQA) {
                    if (Pattern.compile("[A-Z](.*)").matcher(term.getText()).find()) {
                        str3 = String.valueOf(QUOTE) + term.getText() + QUOTE + OR + str3;
                    }
                    str2 = str3.indexOf(OR) < 0 ? String.valueOf(str2) + " " + str3 : String.valueOf(str2) + " (" + str3 + ") ";
                } else {
                    str2 = String.valueOf(str2) + " " + QUOTE + term.getText() + QUOTE;
                }
            }
        }
        return str2.trim();
    }

    private String getFileName(String str, List<Term> list) {
        String replaceAll = Pattern.compile("\\s").matcher(str).replaceAll("-");
        if (Utility.isChinese(this.language) || Utility.isJapanese(this.language)) {
            try {
                replaceAll = URLEncoder.encode(replaceAll, "UTF-8");
            } catch (Exception e) {
            }
        }
        for (int i = 0; i < list.size(); i++) {
            String text = list.get(i).getText();
            if (text != null && text.trim().length() != 0) {
                replaceAll = String.valueOf(replaceAll) + "_" + Pattern.compile("\\s").matcher(text).replaceAll("-");
            }
        }
        String replaceAll2 = Pattern.compile("/").matcher(Pattern.compile("\\?").matcher(replaceAll.toLowerCase()).replaceAll("")).replaceAll("");
        String str2 = String.valueOf(this.baseDir) + this.language;
        if (Utility.isChinese(this.language) && !this.SEGMENTATION) {
            str2 = String.valueOf(this.baseDir) + "ch_nosegmentation";
        }
        File file = new File(str2);
        if (!file.exists()) {
            file.mkdir();
        }
        String str3 = String.valueOf(str2) + "/" + this.answertype + "/";
        File file2 = new File(str3);
        if (!file2.exists()) {
            file2.mkdir();
        }
        if (this.subtype.length() > 0) {
            this.cacheDir = String.valueOf(str3) + this.subtype + "/";
        } else {
            this.cacheDir = str3;
        }
        File file3 = new File(this.cacheDir);
        if (!file3.exists()) {
            file3.mkdir();
        }
        if (Utility.isJapanese(this.language) && replaceAll2.length() > 70) {
            replaceAll2 = replaceAll2.substring(0, 70);
        }
        return replaceAll2;
    }

    private String getLanguageFormat() {
        return Utility.isJapanese(this.language) ? "lang_ja" : Utility.isChinese(this.language) ? "lang_zh-CN" : "lang_en";
    }

    private int readCache(String str) {
        String readFile = new IOManager().readFile(String.valueOf(this.cacheDir) + str);
        if (readFile == null || readFile.trim().length() == 0) {
            return -1;
        }
        String[] split = readFile.split(this.NEWLINE);
        int i = -1;
        if (split == null || split.length < 1) {
            return -1;
        }
        try {
            i = Utility.isEnglish(this.language) ? split.length - 1 : Integer.parseInt(split[0]);
        } catch (Exception e) {
        }
        if (i > split.length) {
            return -1;
        }
        this.element = new GoogleSearchResultElement[i];
        for (int i2 = 0; i2 < i; i2++) {
            if (Utility.isJapanese(this.language)) {
                split[i2 + 1] = removeSpace(split[i2 + 1]);
            }
            this.element[i2] = new GoogleSearchResultElement();
            this.element[i2].setSnippet(split[i2 + 1]);
        }
        this.result = new GoogleSearchResult();
        this.result.setResultElements(this.element);
        this.result.setEstimatedTotalResultsCount(i);
        return i;
    }

    private void storeToCache(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        String str2 = String.valueOf(this.cacheDir) + str;
        try {
            int length = this.element.length;
            stringBuffer.append(String.valueOf(length) + this.NEWLINE);
            for (int i = 0; i < length; i++) {
                stringBuffer.append(String.valueOf(this.element[i].getSnippet()) + this.NEWLINE);
            }
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
            bufferedWriter.write(stringBuffer.toString());
            bufferedWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void cleanText() {
        int length = this.element.length > 100 ? 100 : this.element.length;
        for (int i = 0; i < length; i++) {
            this.element[i].setSnippet(Pattern.compile("\\s\\s").matcher(Pattern.compile(",").matcher(Pattern.compile("\\)").matcher(Pattern.compile("\\(").matcher(Pattern.compile("&quot;").matcher(Pattern.compile("&#39;").matcher(Pattern.compile("<br>").matcher(this.element[i].getSnippet()).replaceAll("")).replaceAll("'")).replaceAll("\"")).replaceAll("")).replaceAll("")).replaceAll("")).replaceAll("\\s").trim());
        }
    }

    private void segmentation(String str) throws Exception {
        if (Utility.isEnglish(this.language)) {
            return;
        }
        int length = this.element.length > 100 ? 100 : this.element.length;
        for (int i = 0; i < length; i++) {
            String replaceAll = this.element[i].getSnippet().toLowerCase().replaceAll("&gt;", " ").replaceAll("<br>", " ");
            String str2 = replaceAll;
            if (this.DEBUG) {
                log.debug("segmentation: " + replaceAll);
            }
            if (Utility.isJapanese(this.language) && this.jpNLPTool != null) {
                try {
                    str2 = removeSpace(this.jpNLPTool.segment(replaceAll).replaceAll(WildChar.getInstance().OPEN_B_TAG, "<b>").replaceAll(WildChar.getInstance().CLOSE_B_TAG, "</b>").replaceAll(WildChar.getInstance().B_TAG, "<b>...</b>"));
                    this.element[i].setSnippet(str2);
                } catch (Exception e) {
                }
            } else if (Utility.isChinese(this.language) && this.SEGMENTATION) {
                if (this.chNLPTool != null) {
                    String replaceAll2 = replaceAll.replaceAll("/", "");
                    String mSRSegOutputNoCache = this.chNLPTool.getMSRSegOutputNoCache(replaceAll2);
                    if (replaceAll2.length() > 1 && mSRSegOutputNoCache.length() == 0) {
                        System.out.println("**** failure in MSRSeg");
                    }
                    String replaceAll3 = mSRSegOutputNoCache.replaceAll("/", " ").replaceAll("<b>...<b>", "<b>...</b>");
                    while (true) {
                        String str3 = replaceAll3;
                        if (str3.indexOf("< b >") >= 0) {
                            Matcher matcher = Pattern.compile("< b >([^<]*)< b >").matcher(str3);
                            if (matcher.find()) {
                                replaceAll3 = matcher.replaceFirst("<b>" + matcher.group(1).replaceAll(" ", "") + "</b>");
                            }
                        }
                        try {
                            str3 = str3.replaceAll(";", "").replaceAll(":", "").replaceAll("\"", "").replaceAll(",", "");
                        } catch (Exception e2) {
                        }
                        str2 = str3.replaceAll("\\[P", "").replaceAll("\\[L", "").replaceAll("\\[O", "").replaceAll("\\[int", "").replaceAll("\\[dat", "").replaceAll("\\[dur", "").replaceAll("\\[mea", "").replaceAll("\\[mon", "").replaceAll("\\[tim", "").replaceAll("\\[per", "").replaceAll("\\[ema", "").replaceAll("\\[pho", "").replaceAll("\\[www", "").replaceAll("\\[qut", "").replaceAll("\\[buk", "").replaceAll("]", "").replaceAll("  ", " ");
                        this.element[i].setSnippet(str2);
                        break;
                    }
                } else {
                    return;
                }
            }
            if (this.DEBUG) {
                log.debug("=> segmented:" + str2);
            }
        }
    }

    String removeSpace(String str) {
        int i = 0;
        while (true) {
            int indexOf = str.indexOf("<b>", i);
            if (indexOf < 0) {
                return str;
            }
            Matcher matcher = Pattern.compile("<b>([^<]*)</b>").matcher(str);
            if (matcher.find()) {
                str = matcher.replaceFirst("<b>" + matcher.group(1).replaceAll(" ", "") + "</b>");
            }
            i = indexOf + 1;
        }
    }
}
