package info.ephyra.util;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;

/* loaded from: input_file:info/ephyra/util/HTMLConverter.class */
public class HTMLConverter {
    private static final int TIMEOUT = 120000;

    public static boolean isUrl(String str) {
        try {
            new URL(str);
            return true;
        } catch (MalformedURLException e) {
            return false;
        }
    }

    public static synchronized String replaceSpecialCharacters(String str) {
        return str.replaceAll("&#09;", " ").replaceAll("&#10;", " ").replaceAll("&#32;", " ").replaceAll("&#33;", "!").replaceAll("(?i)(&#34;|&quot;)", "\"").replaceAll("&#35;", "#").replaceAll("&#36;", "$").replaceAll("&#37;", "%").replaceAll("(?i)(&#38;|&amp;)", "&").replaceAll("&#39;", "'").replaceAll("&#40;", "(").replaceAll("&#41;", ")").replaceAll("&#42;", "*").replaceAll("&#43;", "+").replaceAll("&#44;", ",").replaceAll("&#45;", "-").replaceAll("&#46;", ".").replaceAll("(?i)(&#47;|&frasl;)", "/").replaceAll("&#48;", "0").replaceAll("&#49;", "1").replaceAll("&#50;", "2").replaceAll("&#51;", "3").replaceAll("&#52;", "4").replaceAll("&#53;", "5").replaceAll("&#54;", "6").replaceAll("&#55;", "7").replaceAll("&#56;", "8").replaceAll("&#57;", "9").replaceAll("&#58;", ":").replaceAll("&#59;", ";").replaceAll("(?i)(&#60;|&lt;)", "<").replaceAll("&#61;", "=").replaceAll("(?i)(&#62;|&gt;)", ">").replaceAll("&#63;", "?").replaceAll("&#64;", "@").replaceAll("&#65;", "A").replaceAll("&#66;", "B").replaceAll("&#67;", "C").replaceAll("&#68;", "D").replaceAll("&#69;", "E").replaceAll("&#70;", "F").replaceAll("&#71;", "G").replaceAll("&#72;", "H").replaceAll("&#73;", "I").replaceAll("&#74;", "J").replaceAll("&#75;", "K").replaceAll("&#76;", "L").replaceAll("&#77;", "M").replaceAll("&#78;", "N").replaceAll("&#79;", "O").replaceAll("&#80;", "P").replaceAll("&#81;", "Q").replaceAll("&#82;", "R").replaceAll("&#83;", "S").replaceAll("&#84;", "T").replaceAll("&#85;", "U").replaceAll("&#86;", "V").replaceAll("&#87;", "W").replaceAll("&#88;", "X").replaceAll("&#89;", "Y").replaceAll("&#90;", "Z").replaceAll("&#91;", "[").replaceAll("&#92;", "\\").replaceAll("&#93;", "]").replaceAll("&#94;", "^").replaceAll("&#95;", "_").replaceAll("&#96;", "`").replaceAll("&#97;", "a").replaceAll("&#98;", "b").replaceAll("&#99;", "c").replaceAll("&#100;", "d").replaceAll("&#101;", "e").replaceAll("&#102;", "f").replaceAll("&#103;", "g").replaceAll("&#104;", "h").replaceAll("&#105;", "i").replaceAll("&#106;", "j").replaceAll("&#107;", "k").replaceAll("&#108;", "l").replaceAll("&#109;", "m").replaceAll("&#110;", "n").replaceAll("&#111;", "o").replaceAll("&#112;", "p").replaceAll("&#113;", "q").replaceAll("&#114;", "r").replaceAll("&#115;", "s").replaceAll("&#116;", "t").replaceAll("&#117;", "u").replaceAll("&#118;", "v").replaceAll("&#119;", "w").replaceAll("&#120;", "x").replaceAll("&#121;", "y").replaceAll("&#122;", "z").replaceAll("&#123;", "{").replaceAll("&#124;", "|").replaceAll("&#125;", "}").replaceAll("&#126;", "~").replaceAll("(?i)(&#150;|&ndash;)", "–").replaceAll("(?i)(&#151;|&mdash;)", "—").replaceAll("(?i)(&#160;|&nbsp;)", " ").replaceAll("(?i)(&#161;|&iexcl;)", "¡").replaceAll("(?i)(&#162;|&cent;)", "¢").replaceAll("(?i)(&#163;|&pound;)", "£").replaceAll("(?i)(&#164;|&curren;)", "¤").replaceAll("(?i)(&#165;|&yen;)", "¥").replaceAll("(?i)(&#166;|&brvbar;|&brkbar;)", "¦").replaceAll("(?i)(&#167;|&sect;)", "§").replaceAll("(?i)(&#168;|&uml;|&die;)", "¨").replaceAll("(?i)(&#169;|&copy;)", "©").replaceAll("(?i)(&#170;|&ordf;)", "ª").replaceAll("(?i)(&#171;|&laquo;)", "«").replaceAll("(?i)(&#172;|&not;)", "¬").replaceAll("(?i)(&#173;|&shy;)", "").replaceAll("(?i)(&#174;|&reg;)", "®").replaceAll("(?i)(&#175;|&macr;|&hibar;)", "¯").replaceAll("&#?+\\w*+;", "");
    }

    public static synchronized String htmlsnippet2text(String str) {
        return replaceSpecialCharacters(str.replaceAll("<[^>]*+>", "")).replaceAll("\\s++", " ").trim();
    }

    public static synchronized String html2text(String str) {
        StringBean stringBean = new StringBean();
        stringBean.setLinks(false);
        stringBean.setReplaceNonBreakingSpaces(true);
        stringBean.setCollapse(true);
        Parser parser = new Parser();
        try {
            parser.setInputHTML(str);
            parser.visitAllNodesWith(stringBean);
            String strings = stringBean.getStrings();
            if (strings == null) {
                strings = "";
            }
            return strings;
        } catch (ParserException e) {
            return null;
        }
    }

    public static synchronized String file2text(String str) {
        StringBean stringBean = new StringBean();
        stringBean.setLinks(false);
        stringBean.setReplaceNonBreakingSpaces(true);
        stringBean.setCollapse(true);
        Parser parser = new Parser();
        try {
            parser.setResource(str);
            parser.visitAllNodesWith(stringBean);
            return stringBean.getStrings();
        } catch (ParserException e) {
            return null;
        }
    }

    public static synchronized String url2text(String str) throws SocketTimeoutException {
        try {
            URLConnection openConnection = new URL(str).openConnection();
            if (!(openConnection instanceof HttpURLConnection)) {
                return null;
            }
            openConnection.setRequestProperty("User-agent", "Mozilla/4.0");
            openConnection.setConnectTimeout(TIMEOUT);
            openConnection.setReadTimeout(TIMEOUT);
            StringBean stringBean = new StringBean();
            stringBean.setLinks(false);
            stringBean.setReplaceNonBreakingSpaces(true);
            stringBean.setCollapse(true);
            stringBean.setConnection(openConnection);
            return stringBean.getStrings();
        } catch (IOException e) {
            return null;
        }
    }
}
