package info.ephyra.nlp;

import info.ephyra.util.HTMLConverter;

/* loaded from: input_file:info/ephyra/nlp/SentenceExtractor.class */
public class SentenceExtractor {
    private static final String NON_STRUC_TAGS = "(?i)<b( .*?)?>|</b>|<i( .*?)?>|</i>|<u( .*?)?>|</u>|<sup( .*?)?>|</sup>|<sub( .*?)?>|</sub>|<tt( .*?)?>|</tt>|<font( .*?)?>|</font>|<small( .*?)?>|</small>|<big( .*?)?>|</big>|<a( .*?)?>|</a>|<br>|<nobr>";

    public static String[] getSentencesFromHtml(String str) {
        return HTMLConverter.replaceSpecialCharacters(str).replaceAll(NON_STRUC_TAGS, "").replaceAll("<.*?>", "<delim>").replaceAll("\\. ", "\\.<delim>").replaceAll("! ", "!<delim>").replaceAll("\\? ", "\\?<delim>").replaceAll("\\s+", " ").replaceAll(" ?<delim>( |<delim>)*", "<delim>").replaceAll("\\A( |<delim>)|( |<delim>)\\z", "").split("<delim>");
    }
}
