1. package com.nowcoder.community.util;
    2. import org.apache.commons.lang3.CharUtils;
    3. import org.apache.commons.lang3.StringUtils;
    4. import org.slf4j.Logger;
    5. import org.slf4j.LoggerFactory;
    6. import org.springframework.stereotype.Component;
    7. import javax.annotation.PostConstruct;
    8. import java.io.BufferedReader;
    9. import java.io.IOException;
    10. import java.io.InputStream;
    11. import java.io.InputStreamReader;
    12. import java.util.HashMap;
    13. import java.util.Map;
    14. @Component
    15. public class SensitiveFilter {
    16. private static final Logger logger = LoggerFactory.getLogger(SensitiveFilter.class);
    17. // 替换符
    18. private static final String REPLACEMENT = "***";
    19. // 根节点
    20. private TrieNode rootNode = new TrieNode();
    21. @PostConstruct
    22. public void init() {
    23. try (
    24. InputStream is = this.getClass().getClassLoader().getResourceAsStream("sensitive-words.txt");
    25. BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    26. ) {
    27. String keyword;
    28. while ((keyword = reader.readLine()) != null) {
    29. // 添加到前缀树
    30. this.addKeyword(keyword);
    31. }
    32. } catch (IOException e) {
    33. logger.error("加载敏感词文件失败: " + e.getMessage());
    34. }
    35. }
    36. // 将一个敏感词添加到前缀树中
    37. private void addKeyword(String keyword) {
    38. TrieNode tempNode = rootNode;
    39. for (int i = 0; i < keyword.length(); i++) {
    40. char c = keyword.charAt(i);
    41. TrieNode subNode = tempNode.getSubNode(c);
    42. if (subNode == null) {
    43. // 初始化子节点
    44. subNode = new TrieNode();
    45. tempNode.addSubNode(c, subNode);
    46. }
    47. // 指向子节点,进入下一轮循环
    48. tempNode = subNode;
    49. // 设置结束标识
    50. if (i == keyword.length() - 1) {
    51. tempNode.setKeywordEnd(true);
    52. }
    53. }
    54. }
    55. /**
    56. * 过滤敏感词
    57. *
    58. * @param text 待过滤的文本
    59. * @return 过滤后的文本
    60. */
    61. public String filter(String text) {
    62. if (StringUtils.isBlank(text)) {
    63. return null;
    64. }
    65. // 指针1
    66. TrieNode tempNode = rootNode;
    67. // 指针2
    68. int begin = 0;
    69. // 指针3
    70. int position = 0;
    71. // 结果
    72. StringBuilder sb = new StringBuilder();
    73. while (position < text.length()) {
    74. char c = text.charAt(position);
    75. // 跳过符号
    76. if (isSymbol(c)) {
    77. // 若指针1处于根节点,将此符号计入结果,让指针2向下走一步
    78. if (tempNode == rootNode) {
    79. sb.append(c);
    80. begin++;
    81. }
    82. // 无论符号在开头或中间,指针3都向下走一步
    83. position++;
    84. continue;
    85. }
    86. // 检查下级节点
    87. tempNode = tempNode.getSubNode(c);
    88. if (tempNode == null) {
    89. // 以begin开头的字符串不是敏感词
    90. sb.append(text.charAt(begin));
    91. // 进入下一个位置
    92. position = ++begin;
    93. // 重新指向根节点
    94. tempNode = rootNode;
    95. } else if (tempNode.isKeywordEnd()) {
    96. // 发现敏感词,将begin~position字符串替换掉
    97. sb.append(REPLACEMENT);
    98. // 进入下一个位置
    99. begin = ++position;
    100. // 重新指向根节点
    101. tempNode = rootNode;
    102. } else {
    103. // 检查下一个字符
    104. position++;
    105. }
    106. }
    107. // 将最后一批字符计入结果
    108. sb.append(text.substring(begin));
    109. return sb.toString();
    110. }
    111. // 判断是否为符号
    112. private boolean isSymbol(Character c) {
    113. // 0x2E80~0x9FFF 是东亚文字范围
    114. return !CharUtils.isAsciiAlphanumeric(c) && (c < 0x2E80 || c > 0x9FFF);
    115. }
    116. // 前缀树
    117. private class TrieNode {
    118. // 关键词结束标识
    119. private boolean isKeywordEnd = false;
    120. // 子节点(key是下级字符,value是下级节点)
    121. private Map<Character, TrieNode> subNodes = new HashMap<>();
    122. public boolean isKeywordEnd() {
    123. return isKeywordEnd;
    124. }
    125. public void setKeywordEnd(boolean keywordEnd) {
    126. isKeywordEnd = keywordEnd;
    127. }
    128. // 添加子节点
    129. public void addSubNode(Character c, TrieNode node) {
    130. subNodes.put(c, node);
    131. }
    132. // 获取子节点
    133. public TrieNode getSubNode(Character c) {
    134. return subNodes.get(c);
    135. }
    136. }
    137. }