package info.ephyra.answerselection.ag.resource.gazetteer;

import info.ephyra.answerselection.ag.utility.IOManager;
import java.io.File;
import java.util.Hashtable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;

/* loaded from: input_file:info/ephyra/answerselection/ag/resource/gazetteer/USstates.class */
public class USstates {
    private Logger log = Logger.getLogger(USstates.class);
    boolean DEBUG = false;
    Hashtable<String, State> states;
    String dir;

    public USstates(String str, Hashtable<String, State> hashtable) throws Exception {
        this.states = hashtable;
        this.dir = str;
        read();
    }

    private void read() throws Exception {
        File file = new File(this.dir);
        if (!file.isDirectory()) {
            this.log.debug("-- Input is not directory: " + this.dir);
            return;
        }
        IOManager iOManager = new IOManager();
        File[] listFiles = file.listFiles();
        for (int i = 0; i < listFiles.length; i++) {
            String absolutePath = listFiles[i].getAbsolutePath();
            if (!listFiles[i].getName().equals("CVS") && !listFiles[i].getName().startsWith(".")) {
                if (this.DEBUG) {
                    this.log.debug("-- filename: " + absolutePath);
                }
                String replaceAll = Pattern.compile("50states List").matcher(iOManager.readHTML(absolutePath, false)).replaceAll("");
                State state = new State();
                state.name = extract(replaceAll, "Zip Codes:", "Zip Codes");
                state.capital = extract(replaceAll, "Capital City:", "City Guides: (.*) City Guides:");
                state.bird = extract(replaceAll, "Bird:", "Border States:");
                if (state.bird == null) {
                    state.bird = extract(replaceAll, "Bird:", "Cam World:");
                }
                state.motto = extract(replaceAll, "Mottoe?s?:", "Museums:");
                String extract = extract(replaceAll, "Population: Quick facts", "(.*)Public Records:");
                if (extract != null) {
                    state.population = extract.split(";")[0];
                }
                state.university = extract2(replaceAll, "Colleges, Universities:", "Community and Technical Colleges:");
                state.largeCities = extract2(replaceAll, "Largest Cities:", "Library:");
                state.tree = extract2(replaceAll, "Tree:", "Unclaimed Funds:");
                state.flower = extract2(replaceAll, "Flower:", "Genealogical Resources:");
                String extract2 = extract(replaceAll, "Nicknames?:", "Nursing Schools:");
                if (extract2 != null) {
                    state.nickname = extract2.split("/");
                }
                if (this.DEBUG) {
                    state.print();
                }
                if (state.name != null) {
                    this.states.put(state.name, state);
                }
            }
        }
        State state2 = new State();
        state2.name = "district of columbia";
        this.states.put(state2.name, state2);
    }

    private String extract(String str, String str2, String str3) {
        Matcher matcher = Pattern.compile(String.valueOf(str2) + "(\\s+)(.*)(\\s+)" + str3).matcher(str);
        if (!matcher.find()) {
            return null;
        }
        String replaceAll = Pattern.compile("&#160;").matcher(matcher.group(2).trim()).replaceAll("");
        matcher.end();
        return replaceAll.toLowerCase();
    }

    private String[] extract2(String str, String str2, String str3) {
        Matcher matcher = Pattern.compile(String.valueOf(str2) + "(\\s+)(.*)(\\s+)" + str3).matcher(str);
        String str4 = "";
        if (matcher.find()) {
            str4 = matcher.group(2).trim();
            if (str4.equals("none")) {
                str4 = "";
            }
            matcher.end();
        }
        return str4.split("&#160;");
    }
}
