获取所有的 group 内容
将帖子/博文中的 #xxx
内容都提取出来。
package cn.mrcode;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
/**
* 帖子 tag 处理
*
* @author mrcode
* @date 2022/6/23 17:05
*/
@Service
@Slf4j
public class HashtagTask {
final static Pattern pattern = Pattern.compile("(#.+?)(?=#| )");
/**
* 提取 tag
* @param body
* @return
*/
public static List<String> extractHashTags(String body) {
final Matcher matcher = pattern.matcher(body);
List<String> array = new ArrayList<>();
while (matcher.find()) {
array.add(matcher.group());
}
return array;
}
public static void main(String[] args) {
String body = "#KalAdvisory#KalAdvisory #Kal #Shark #vacuum #cleaning #PowerFins #housecleaning #antihairwrap #duoclean #Petcare #TruePet https://t.co/T3ffrOG6JZ\n" +
"i got a robot shark vacuum\n" +
"ive named him bruce\n" +
"and i've taped a shark fin into him\n" +
"not to be dramatic but i'd die for him? lol\n" +
"Our first DuoClean vacuum! It got all of the dirt that our vacuum missed.";
final List<String> tags = extractHashTags(body);
// 输出值 [#KalAdvisory , #Kal , #Shark , #vacuum , #cleaning , #PowerFins , #housecleaning , #antihairwrap , #duoclean , #Petcare , #TruePet ]
System.out.println(tags);
}
}