htmlcleaner-xpath解析工具
<dependency>
<groupId>net.sourceforge.htmlcleaner</groupId>
<artifactId>htmlcleaner</artifactId>
<version>2.24</version>
</dependency>
example
private static HtmlCleaner htmlCleaner = new HtmlCleaner();
/**
* 把html转化为待处理对象
*
* @param html html文本
* @return
*/
public static TagNode toTagNode(String html) {
TagNode tn = htmlCleaner.clean(html);
return tn;
}
public static String getFirstTextByXPath(TagNode tn, String xpath) {
Optional<Object[]> objects1 = getByXPath(tn, xpath);
if (objects1.isPresent() && objects1.get().length > 0) {
Object val = objects1.get()[0];
return StringUtils.trimToEmpty(null == val ? "" : val.toString());
}
return StrPool.EMPTY;
}
public static Optional<Object[]> getByXPath(TagNode tn, String xpath) {
try {
return Optional.ofNullable(tn.evaluateXPath(xpath));
} catch (XPatherException e) {
}
return Optional.empty();
}