package info.ephyra.nlp.indices;

import info.ephyra.io.MsgPrinter;
import info.ephyra.nlp.NETagger;
import info.ephyra.questionanalysis.TermExpander;
import info.ephyra.util.FileUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Hashtable;

/* loaded from: input_file:info/ephyra/nlp/indices/WordFrequencies.class */
public class WordFrequencies {
    private static final int MAX_WORDS = 0;
    private static final boolean LOWER_CASE = true;
    private static final int MIN_FREQUENCY = 2;
    private static final boolean SORT_BY_FREQUENCY = true;
    private static int total;
    private static int distinct;
    private static Hashtable<String, Integer> index;

    public static boolean createIndexFromFile(String str) {
        total = 0;
        distinct = 0;
        index = new Hashtable<>(10000);
        return updateIndexFromFile(str);
    }

    public static boolean updateIndexFromFile(String str) {
        int i;
        MsgPrinter.printStatusMsg(str);
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
            while (bufferedReader.ready()) {
                for (String str2 : NETagger.tokenize(bufferedReader.readLine())) {
                    String lowerCase = str2.toLowerCase();
                    if (index.containsKey(lowerCase)) {
                        i = index.get(lowerCase).intValue();
                    } else {
                        i = 0;
                        distinct++;
                    }
                    index.put(lowerCase, new Integer(i + 1));
                    total++;
                }
            }
            bufferedReader.close();
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    public static boolean createIndexFromDir(String str) {
        total = 0;
        distinct = 0;
        index = new Hashtable<>(10000);
        return updateIndexFromDir(str);
    }

    public static boolean updateIndexFromDir(String str) {
        for (File file : FileUtils.getFiles(str)) {
            if (!updateIndexFromFile(file.getPath())) {
                return false;
            }
        }
        return true;
    }

    public static void dropRareWords() {
        Hashtable<String, Integer> hashtable = new Hashtable<>();
        for (String str : index.keySet()) {
            int intValue = index.get(str).intValue();
            if (intValue < 2) {
                distinct--;
            } else {
                hashtable.put(str, Integer.valueOf(intValue));
            }
        }
        index = hashtable;
    }

    public static String[] getSortedWords() {
        String[] strArr = (String[]) index.keySet().toArray(new String[index.size()]);
        Arrays.sort(strArr, new Comparator<String>() { // from class: info.ephyra.nlp.indices.WordFrequencies.1
            @Override // java.util.Comparator
            public int compare(String str, String str2) {
                return WordFrequencies.lookup(str2) - WordFrequencies.lookup(str);
            }
        });
        return strArr;
    }

    public static boolean saveIndex(String str) {
        if (index == null) {
            return false;
        }
        try {
            PrintWriter printWriter = new PrintWriter(new FileOutputStream(new File(str)));
            printWriter.println(total);
            printWriter.println(distinct);
            for (String str2 : getSortedWords()) {
                printWriter.println(str2);
                printWriter.println(lookup(str2));
            }
            printWriter.close();
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    public static boolean loadIndex(String str) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
            total = Integer.parseInt(bufferedReader.readLine());
            distinct = Integer.parseInt(bufferedReader.readLine());
            index = new Hashtable<>(2 * distinct);
            for (int i = 0; i < distinct; i++) {
                index.put(bufferedReader.readLine(), new Integer(Integer.parseInt(bufferedReader.readLine())));
            }
            bufferedReader.close();
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    public static int getTotal() {
        return total;
    }

    public static int getDistinct() {
        return distinct;
    }

    public static int lookup(String str) {
        if (index == null) {
            return 0;
        }
        String lowerCase = str.toLowerCase();
        if (index.containsKey(lowerCase)) {
            return index.get(lowerCase).intValue();
        }
        return 0;
    }

    public static double lookupRel(String str) {
        return total > 0 ? lookup(str) / total : TermExpander.MIN_EXPANSION_WEIGHT;
    }

    public static void main(String[] strArr) {
        if (strArr.length < 2) {
            MsgPrinter.printUsage("java WordFrequencies corpus_folder output_file");
            System.exit(1);
        }
        MsgPrinter.enableStatusMsgs(true);
        MsgPrinter.printStatusMsg("Building index of word frequencies...");
        createIndexFromDir(strArr[0]);
        dropRareWords();
        saveIndex(strArr[1]);
        MsgPrinter.printStatusMsg("...completed.");
    }
}
