获取所有的 group 内容

将帖子/博文中的 #xxx 内容都提取出来。

  1. package cn.mrcode;
  2. import java.util.ArrayList;
  3. import java.util.List;
  4. import java.util.concurrent.CountDownLatch;
  5. import java.util.regex.Matcher;
  6. import java.util.regex.Pattern;
  7. import lombok.extern.slf4j.Slf4j;
  8. /**
  9. * 帖子 tag 处理
  10. *
  11. * @author mrcode
  12. * @date 2022/6/23 17:05
  13. */
  14. @Service
  15. @Slf4j
  16. public class HashtagTask {
  17. final static Pattern pattern = Pattern.compile("(#.+?)(?=#| )");
  18. /**
  19. * 提取 tag
  20. * @param body
  21. * @return
  22. */
  23. public static List<String> extractHashTags(String body) {
  24. final Matcher matcher = pattern.matcher(body);
  25. List<String> array = new ArrayList<>();
  26. while (matcher.find()) {
  27. array.add(matcher.group());
  28. }
  29. return array;
  30. }
  31. public static void main(String[] args) {
  32. String body = "#KalAdvisory#KalAdvisory #Kal #Shark #vacuum #cleaning #PowerFins #housecleaning #antihairwrap #duoclean #Petcare #TruePet https://t.co/T3ffrOG6JZ\n" +
  33. "i got a robot shark vacuum\n" +
  34. "ive named him bruce\n" +
  35. "and i've taped a shark fin into him\n" +
  36. "not to be dramatic but i'd die for him? lol\n" +
  37. "Our first DuoClean vacuum! It got all of the dirt that our vacuum missed.";
  38. final List<String> tags = extractHashTags(body);
  39. // 输出值 [#KalAdvisory , #Kal , #Shark , #vacuum , #cleaning , #PowerFins , #housecleaning , #antihairwrap , #duoclean , #Petcare , #TruePet ]
  40. System.out.println(tags);
  41. }
  42. }