package info.ephyra.indexing;

import info.ephyra.io.MsgPrinter;
import info.ephyra.util.FileUtils;
import info.ephyra.util.HTMLConverter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;

/* loaded from: input_file:info/ephyra/indexing/Blog06Preprocessor.class */
public class Blog06Preprocessor {
    private static void convertFile(File file) {
        MsgPrinter.printStatusMsg("Parsing " + file.getName() + "...");
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            boolean z = false;
            while (bufferedReader.ready()) {
                if (z) {
                    StringBuilder sb = new StringBuilder();
                    while (bufferedReader.ready()) {
                        String readLine = bufferedReader.readLine();
                        sb.append(String.valueOf(readLine) + "\n");
                        if (readLine.matches("\\s*?</DOC>\\s*+")) {
                            break;
                        }
                    }
                    arrayList2.add(sb.toString());
                    z = false;
                } else {
                    StringBuilder sb2 = new StringBuilder();
                    while (bufferedReader.ready()) {
                        String readLine2 = bufferedReader.readLine();
                        sb2.append(String.valueOf(readLine2) + "\n");
                        if (readLine2.matches("\\s*?</DOCHDR>\\s*+")) {
                            break;
                        }
                    }
                    arrayList.add(sb2.toString());
                    z = true;
                }
            }
            bufferedReader.close();
            if (arrayList.size() == 0 || arrayList2.size() == 0 || arrayList.size() != arrayList2.size()) {
                MsgPrinter.printErrorMsg(String.valueOf(file.getName()) + " is malformatted.");
                System.exit(1);
            }
        } catch (IOException e) {
            MsgPrinter.printErrorMsg("Could not read from " + file.getName() + ".");
            System.exit(1);
        }
        for (int i = 0; i < arrayList2.size(); i++) {
            String html2text = HTMLConverter.html2text((String) arrayList2.get(i));
            if (html2text == null) {
                MsgPrinter.printErrorMsg(String.valueOf(file.getName()) + " could not be parsed.");
                System.exit(1);
            }
            arrayList2.set(i, html2text);
        }
        for (int i2 = 0; i2 < arrayList2.size(); i2++) {
            String[] split = ((String) arrayList2.get(i2)).split("\\n");
            StringBuilder sb3 = new StringBuilder();
            for (String str : split) {
                sb3.append("<P>" + str + "</P>\n");
            }
            arrayList2.set(i2, sb3.toString());
        }
        File file2 = new File(String.valueOf(file.getPath()) + ".parsed");
        try {
            PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
            for (int i3 = 0; i3 < arrayList.size(); i3++) {
                printWriter.print((String) arrayList.get(i3));
                printWriter.print((String) arrayList2.get(i3));
                printWriter.print("</DOC>\n");
            }
            printWriter.close();
        } catch (IOException e2) {
            MsgPrinter.printErrorMsg("Could not write to " + file2.getName() + ".");
            System.exit(1);
        }
    }

    public static void main(String[] strArr) {
        if (strArr.length < 1) {
            MsgPrinter.printUsage("java Blog06Preprocessor Blog06_directory");
            System.exit(1);
        }
        String str = strArr[0];
        MsgPrinter.enableStatusMsgs(true);
        MsgPrinter.enableErrorMsgs(true);
        for (File file : FileUtils.getFilesRec(str)) {
            String name = file.getName();
            if (!name.startsWith(".") && name.matches(".*?-\\d++")) {
                convertFile(file);
            }
        }
        MsgPrinter.printStatusMsg("...done.");
    }
}
