htmlcleaner-xpath解析工具

  1. <dependency>
  2. <groupId>net.sourceforge.htmlcleaner</groupId>
  3. <artifactId>htmlcleaner</artifactId>
  4. <version>2.24</version>
  5. </dependency>

example

  1. private static HtmlCleaner htmlCleaner = new HtmlCleaner();
  2. /**
  3. * 把html转化为待处理对象
  4. *
  5. * @param html html文本
  6. * @return
  7. */
  8. public static TagNode toTagNode(String html) {
  9. TagNode tn = htmlCleaner.clean(html);
  10. return tn;
  11. }
  12. public static String getFirstTextByXPath(TagNode tn, String xpath) {
  13. Optional<Object[]> objects1 = getByXPath(tn, xpath);
  14. if (objects1.isPresent() && objects1.get().length > 0) {
  15. Object val = objects1.get()[0];
  16. return StringUtils.trimToEmpty(null == val ? "" : val.toString());
  17. }
  18. return StrPool.EMPTY;
  19. }
  20. public static Optional<Object[]> getByXPath(TagNode tn, String xpath) {
  21. try {
  22. return Optional.ofNullable(tn.evaluateXPath(xpath));
  23. } catch (XPatherException e) {
  24. }
  25. return Optional.empty();
  26. }