package info.ephyra.questionanalysis.atype.extractor;

import edu.cmu.lti.chineseNLP.util.Tree;
import edu.cmu.lti.chineseNLP.util.TreeHelper;
import edu.cmu.lti.javelin.qa.Term;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.MutableInstance;
import info.ephyra.nlp.StanfordParser;
import info.ephyra.questionanalysis.atype.FocusFinder;
import info.ephyra.questionanalysis.atype.WordNetAnswerTypeMapping;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.didion.jwnl.data.IndexWord;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.dictionary.Dictionary;
import org.apache.log4j.Logger;

/* loaded from: input_file:info/ephyra/questionanalysis/atype/extractor/EnglishFeatureExtractor.class */
public class EnglishFeatureExtractor extends FeatureExtractor {
    private static final Logger log = Logger.getLogger(EnglishFeatureExtractor.class);
    private static String HOW_MUCH_PTRN = "([hH]ow much)";
    private static String HOW_MUCH_OF_PTRN = "([hH]ow much of)";
    private static String HOW_MANY_PTRN = "([hH]ow many)";
    private static String WHOSE_PTRN = "([wW]hose)";
    private static String WHO_PTRN = "([wW]ho)";
    private static String WHOM_PTRN = "([wW]hom)";
    private static String WHAT_PTRN = "([wW]hat)";
    private static String WHEN_PTRN = "([wW]hen)";
    private static String WHERE_PTRN = "([wW]here)";
    private static String WHY_PTRN = "([wW]y)";
    private static String HOW_PTRN = "([hH]ow)";
    private static String WHICH_PTRN = "([wW]hich)";
    private static String WHICH_ANYWHERE_PTRN = ".*(which)";
    private static String WHAT_ANYWHERE_PTRN = ".*(what)";
    private static String REST_PTRN = "\\b.*";
    private static String SPACE_PTRN = "\\s+";
    private static String[] OF_HEAD_WORDS = {"type", "kind", "genre"};
    private static List<String> whPtrns = new ArrayList();

    static {
        whPtrns.add(HOW_MANY_PTRN);
        whPtrns.add(HOW_MUCH_PTRN);
        whPtrns.add(HOW_MUCH_OF_PTRN);
        whPtrns.add(HOW_PTRN);
        whPtrns.add(WHO_PTRN);
        whPtrns.add(WHOM_PTRN);
        whPtrns.add(WHOSE_PTRN);
        whPtrns.add(WHAT_PTRN);
        whPtrns.add(WHEN_PTRN);
        whPtrns.add(WHERE_PTRN);
        whPtrns.add(WHY_PTRN);
        whPtrns.add(WHICH_PTRN);
        whPtrns.add(WHICH_ANYWHERE_PTRN);
        whPtrns.add(WHAT_ANYWHERE_PTRN);
    }

    @Override // info.ephyra.questionanalysis.atype.extractor.FeatureExtractor
    public void initialize() throws Exception {
        if (isInitialized()) {
            return;
        }
        super.initialize();
        FocusFinder.initialize();
        WordNetAnswerTypeMapping.initialize();
        StanfordParser.initialize();
        setInitialized(true);
    }

    protected static void addWordLevelFeatures(MutableInstance mutableInstance, List<Term> list, Term term) {
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            Term term2 = list.get(i);
            if (term2.getText() != null) {
                strArr[i] = term2.getText().replaceAll("\\s+", "_");
            } else {
                strArr[i] = "-";
            }
        }
        for (String str : strArr) {
            mutableInstance.addBinary(new Feature("UNIGRAM." + str));
        }
        for (int i2 = 0; i2 < strArr.length - 1; i2++) {
            mutableInstance.addBinary(new Feature("BIGRAM." + strArr[i2] + "-" + strArr[i2 + 1]));
        }
        String str2 = "";
        Iterator<Term> it = list.iterator();
        while (it.hasNext()) {
            str2 = String.valueOf(str2) + it.next().getText() + " ";
        }
        String trim = str2.trim();
        String str3 = null;
        Iterator<String> it2 = whPtrns.iterator();
        while (true) {
            if (!it2.hasNext()) {
                break;
            }
            Matcher matcher = Pattern.compile("^" + it2.next() + REST_PTRN).matcher(trim);
            if (matcher.matches()) {
                str3 = matcher.group(1).toLowerCase().replaceAll("\\s+", "_");
                mutableInstance.addBinary(new Feature("WH_WORD." + str3));
                break;
            }
        }
        if (str3 == null) {
            Iterator<String> it3 = whPtrns.iterator();
            while (true) {
                if (!it3.hasNext()) {
                    break;
                }
                Matcher matcher2 = Pattern.compile(String.valueOf(it3.next()) + REST_PTRN).matcher(trim);
                if (matcher2.find()) {
                    mutableInstance.addBinary(new Feature("WH_WORD." + matcher2.group(1).toLowerCase().replaceAll("\\s+", "_")));
                    break;
                }
            }
        }
        if (term == null) {
            return;
        }
        for (String str4 : OF_HEAD_WORDS) {
            if (Pattern.compile(String.valueOf(str4) + "s? of " + term.getText()).matcher(trim).find()) {
                mutableInstance.addBinary(new Feature("OF_HEAD." + str4));
                return;
            }
        }
    }

    protected static void addSyntacticFeatures(MutableInstance mutableInstance, List<Term> list, String str, Term term) {
        if (str == null) {
            log.error("Syntactic parse of the question is null.");
            return;
        }
        Tree buildTree = TreeHelper.buildTree(str, 1);
        TreeHelper.markHeadNode(buildTree);
        String headWord = buildTree.getHeadWord();
        try {
            IndexWord lookupIndexWord = Dictionary.getInstance().lookupIndexWord(POS.VERB, headWord);
            String str2 = null;
            if (lookupIndexWord != null) {
                str2 = lookupIndexWord.getLemma();
            }
            if (str2 != null) {
                headWord = str2;
            }
        } catch (Exception e) {
            log.warn("Failed to get lemma for verb '" + headWord + "'", e);
        }
        if (headWord == null) {
            headWord = "-";
        }
        mutableInstance.addBinary(new Feature("MAIN_VERB." + headWord));
        if (term != null && term.getText() != null) {
            String text = term.getText();
            String str3 = "";
            Iterator<Term> it = list.iterator();
            while (it.hasNext()) {
                str3 = String.valueOf(str3) + it.next().getText() + " ";
            }
            String trim = str3.trim();
            Iterator<String> it2 = whPtrns.iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                } else if (Pattern.compile(String.valueOf(it2.next()) + SPACE_PTRN + text + REST_PTRN).matcher(trim).matches()) {
                    mutableInstance.addBinary(new Feature("WH_DET.+"));
                    break;
                }
            }
        }
        Tree findFirstPreterminalWithPrecedingPreterminal = TreeHelper.findFirstPreterminalWithPrecedingPreterminal(buildTree, "RB|JJ", "WRB");
        if (findFirstPreterminalWithPrecedingPreterminal != null) {
            mutableInstance.addBinary(new Feature("FOCUS_ADJ." + findFirstPreterminalWithPrecedingPreterminal.getHeadWord()));
        }
    }

    protected static void addSemanticFeatures(MutableInstance mutableInstance, Term term) {
        String answerType = WordNetAnswerTypeMapping.getAnswerType(term);
        if (answerType == null) {
            answerType = "-";
        }
        mutableInstance.addBinary(new Feature("FOCUS_TYPE." + answerType));
    }

    @Override // info.ephyra.questionanalysis.atype.extractor.FeatureExtractor
    public Instance createInstance(List<Term> list, String str) {
        String str2 = "";
        Iterator<Term> it = list.iterator();
        while (it.hasNext()) {
            str2 = String.valueOf(str2) + it.next() + " ";
        }
        MutableInstance mutableInstance = new MutableInstance(str2.trim());
        log.debug("Parse: " + str);
        Term findFocusTerm = FocusFinder.findFocusTerm(TreeHelper.buildTree(str, 1));
        if (findFocusTerm != null) {
            log.debug("Focus: " + findFocusTerm.getText());
        }
        addWordLevelFeatures(mutableInstance, list, findFocusTerm);
        addSyntacticFeatures(mutableInstance, list, str, findFocusTerm);
        addSemanticFeatures(mutableInstance, findFocusTerm);
        return mutableInstance;
    }

    @Override // info.ephyra.questionanalysis.atype.extractor.FeatureExtractor
    public Instance createInstance(String str) {
        String[] split = str.split("\\s+");
        ArrayList arrayList = new ArrayList();
        for (String str2 : split) {
            arrayList.add(str2);
        }
        try {
            return createInstance(str, StanfordParser.parse(str));
        } catch (Exception e) {
            log.error("Failed to parse question, using only word-level features.", e);
            ArrayList arrayList2 = new ArrayList();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                arrayList2.add(new Term(0, 0, (String) it.next()));
            }
            MutableInstance mutableInstance = new MutableInstance(str);
            addWordLevelFeatures(mutableInstance, arrayList2, null);
            return mutableInstance;
        }
    }
}
