代码:
import java.util.regex.Matcher;import java.util.regex.Pattern;public class Main { public static void main(String[] args) { String a = "<!DOCTYPE html>\n" + "<html lang=\"en\">\n" + "\n" + "<head>\n" + " <meta charset=\"UTF-8\">\n" + " <title>第五次实验文件</title>\n" + "</head>\n" + "\n" + "<body>\n" + " <p style=\"text-align: center;\">\n" + " <span style=\"font-size: 20px; color: rgb(255, 0, 0);\">\n" + " <span\n" + " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">六年级二班</span>\n" + " <span\n" + " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">学生</span>\n" + " <span\n" + " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">点</span>\n" + " <span\n" + " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">名册</span>\n" + " <span\n" + " style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\"> </span>\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span style=\"font-size: 14px;\">\n" + " <strong>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">时间:2019年\n" + " 3月 1日——3月 8日 </span>\n" + " </strong>\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周一\n" + " 缺到 \n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">张玲珑(家长电话:13534321432),\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">王鑫(\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13889765432),\n" + " </span>\n" + " </span>\n" + " </span>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">左晓睿(\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13645322221),\n" + " </span>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">刘平(\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13889902188)\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周二 \n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">张玲珑(家长电话:13534321432),</span>\n" + " </span>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">王艳丽\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13444897652)</span>\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周三\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"> </span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" + " </span>\n" + " </span>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">刘浩\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(电话:13352058788)</span>,王鑫\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13889765432),</span>侯康\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">母亲电话:</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13472005828)</span>,翟晓雯\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13800331200)</span>\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周四\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"> </span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" + " </span>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">张玲珑(\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13534321432</span>),刘瑞(\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">父亲电话:13671745850</span>),李杰\n" + " (<span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13334321400</span>)\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周五\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"> </span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" + " </span>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">吴婷婷\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13500563200</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"></span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">),</span>张晴\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:15834302100</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">),</span>翟超楠\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">爷爷电话:13763202130</span>\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">)</span>\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <span\n" + " style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">\n" + " <span\n" + " style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">备注:每周五下午4点前汇总交到教导处刘老师(联系电话:13995403310)。</span>\n" + " </span>\n" + " </p>\n" + " <p>\n" + " <br />\n" + " </p>\n" + " <p>\n" + " <br />\n" + " </p>\n" + " <p>\n" + " <br />\n" + " </p>\n" + "</body>\n" + "\n" + "</html>";// String str = "[\u0391-\uffe5]";//匹配汉字内容 String ans = ""; String str = "[\u0391-\uffe5]|\\d{11}|\\d{4}|。|,|:|</p>|\\(|\\)|\\d月|\\d日"; Pattern pattern = Pattern.compile(str); Matcher matcher = pattern.matcher(a); while(matcher.find()){ if(matcher.group(0).equals("</p>")){ ans += "\n"; } else{ ans += matcher.group(0); } } //字符串替换操作 ans = ans.replaceAll("\\(\\)",""); ans = ans.replaceAll("第五次实验文件",""); System.out.println(ans); }}
html页面:
扣取结果:
