代码:

  1. import java.util.regex.Matcher;
  2. import java.util.regex.Pattern;
  3. public class Main {
  4. public static void main(String[] args) {
  5. String a = "<!DOCTYPE html>\n" +
  6. "<html lang=\"en\">\n" +
  7. "\n" +
  8. "<head>\n" +
  9. " <meta charset=\"UTF-8\">\n" +
  10. " <title>第五次实验文件</title>\n" +
  11. "</head>\n" +
  12. "\n" +
  13. "<body>\n" +
  14. " <p style=\"text-align: center;\">\n" +
  15. " <span style=\"font-size: 20px; color: rgb(255, 0, 0);\">\n" +
  16. " <span\n" +
  17. " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">六年级二班</span>\n" +
  18. " <span\n" +
  19. " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">学生</span>\n" +
  20. " <span\n" +
  21. " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">点</span>\n" +
  22. " <span\n" +
  23. " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">名册</span>\n" +
  24. " <span\n" +
  25. " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">&nbsp;</span>\n" +
  26. " </span>\n" +
  27. " </p>\n" +
  28. " <p>\n" +
  29. " <span style=\"font-size: 14px;\">\n" +
  30. " <strong>\n" +
  31. " <span\n" +
  32. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">时间:2019年\n" +
  33. " 3月 1日——3月 8日&nbsp;</span>\n" +
  34. " </strong>\n" +
  35. " </span>\n" +
  36. " </p>\n" +
  37. " <p>\n" +
  38. " <span\n" +
  39. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周一\n" +
  40. " 缺到&nbsp;\n" +
  41. " <span\n" +
  42. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">张玲珑(家长电话:13534321432),\n" +
  43. " <span\n" +
  44. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">王鑫(\n" +
  45. " <span\n" +
  46. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13889765432),\n" +
  47. " </span>\n" +
  48. " </span>\n" +
  49. " </span>\n" +
  50. " <span\n" +
  51. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">左晓睿(\n" +
  52. " <span\n" +
  53. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13645322221),\n" +
  54. " </span>\n" +
  55. " <span\n" +
  56. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">刘平(\n" +
  57. " <span\n" +
  58. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13889902188)\n" +
  59. " </span>\n" +
  60. " </p>\n" +
  61. " <p>\n" +
  62. " <span\n" +
  63. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周二&nbsp;\n" +
  64. " <span\n" +
  65. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到&nbsp;</span>\n" +
  66. " <span\n" +
  67. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">张玲珑(家长电话:13534321432),</span>\n" +
  68. " </span>\n" +
  69. " <span\n" +
  70. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">王艳丽\n" +
  71. " <span\n" +
  72. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  73. " <span\n" +
  74. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" +
  75. " <span\n" +
  76. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13444897652)</span>\n" +
  77. " </span>\n" +
  78. " </p>\n" +
  79. " <p>\n" +
  80. " <span\n" +
  81. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">\n" +
  82. " <span\n" +
  83. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周三\n" +
  84. " <span\n" +
  85. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">&nbsp;</span>\n" +
  86. " <span\n" +
  87. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到&nbsp;</span>\n" +
  88. " </span>\n" +
  89. " </span>\n" +
  90. " <span\n" +
  91. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">刘浩\n" +
  92. " <span\n" +
  93. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(电话:13352058788)</span>,王鑫\n" +
  94. " <span\n" +
  95. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  96. " <span\n" +
  97. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" +
  98. " <span\n" +
  99. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13889765432),</span>侯康\n" +
  100. " <span\n" +
  101. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  102. " <span\n" +
  103. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">母亲电话:</span>\n" +
  104. " <span\n" +
  105. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13472005828)</span>,翟晓雯\n" +
  106. " <span\n" +
  107. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  108. " <span\n" +
  109. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" +
  110. " <span\n" +
  111. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13800331200)</span>\n" +
  112. " </span>\n" +
  113. " </p>\n" +
  114. " <p>\n" +
  115. " <span\n" +
  116. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周四\n" +
  117. " <span\n" +
  118. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">&nbsp;</span>\n" +
  119. " <span\n" +
  120. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到&nbsp;</span>\n" +
  121. " </span>\n" +
  122. " <span\n" +
  123. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">张玲珑(\n" +
  124. " <span\n" +
  125. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13534321432</span>),刘瑞(\n" +
  126. " <span\n" +
  127. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">父亲电话:13671745850</span>),李杰\n" +
  128. " (<span\n" +
  129. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13334321400</span>)\n" +
  130. " </span>\n" +
  131. " </p>\n" +
  132. " <p>\n" +
  133. " <span\n" +
  134. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周五\n" +
  135. " <span\n" +
  136. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">&nbsp;</span>\n" +
  137. " <span\n" +
  138. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到&nbsp;</span>\n" +
  139. " </span>\n" +
  140. " <span\n" +
  141. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">吴婷婷\n" +
  142. " <span\n" +
  143. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  144. " <span\n" +
  145. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13500563200</span>\n" +
  146. " <span\n" +
  147. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"></span>\n" +
  148. " <span\n" +
  149. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">),</span>张晴\n" +
  150. " <span\n" +
  151. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  152. " <span\n" +
  153. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:15834302100</span>\n" +
  154. " <span\n" +
  155. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">),</span>翟超楠\n" +
  156. " <span\n" +
  157. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
  158. " <span\n" +
  159. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">爷爷电话:13763202130</span>\n" +
  160. " <span\n" +
  161. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">)</span>\n" +
  162. " </span>\n" +
  163. " </p>\n" +
  164. " <p>\n" +
  165. " <span\n" +
  166. " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">\n" +
  167. " <span\n" +
  168. " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">备注:每周五下午4点前汇总交到教导处刘老师(联系电话:13995403310)。</span>\n" +
  169. " </span>\n" +
  170. " </p>\n" +
  171. " <p>\n" +
  172. " <br />\n" +
  173. " </p>\n" +
  174. " <p>\n" +
  175. " <br />\n" +
  176. " </p>\n" +
  177. " <p>\n" +
  178. " <br />\n" +
  179. " </p>\n" +
  180. "</body>\n" +
  181. "\n" +
  182. "</html>";
  183. // String str = "[\u0391-\uffe5]";//匹配汉字内容
  184. String ans = "";
  185. String str = "[\u0391-\uffe5]|\\d{11}|\\d{4}|。|,|:|</p>|\\(|\\)|\\d月|\\d日";
  186. Pattern pattern = Pattern.compile(str);
  187. Matcher matcher = pattern.matcher(a);
  188. while(matcher.find()){
  189. if(matcher.group(0).equals("</p>")){
  190. ans += "\n";
  191. }
  192. else{
  193. ans += matcher.group(0);
  194. }
  195. }
  196. //字符串替换操作
  197. ans = ans.replaceAll("\\(\\)","");
  198. ans = ans.replaceAll("第五次实验文件","");
  199. System.out.println(ans);
  200. }
  201. }

html页面:

image.png

扣取结果:

image.png