代码:
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] args) {
String a = "<!DOCTYPE html>\n" +
"<html lang=\"en\">\n" +
"\n" +
"<head>\n" +
" <meta charset=\"UTF-8\">\n" +
" <title>第五次实验文件</title>\n" +
"</head>\n" +
"\n" +
"<body>\n" +
" <p style=\"text-align: center;\">\n" +
" <span style=\"font-size: 20px; color: rgb(255, 0, 0);\">\n" +
" <span\n" +
" style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">六年级二班</span>\n" +
" <span\n" +
" style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">学生</span>\n" +
" <span\n" +
" style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">点</span>\n" +
" <span\n" +
" style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">名册</span>\n" +
" <span\n" +
" style=\"font-size: 20px; font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\"> </span>\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span style=\"font-size: 14px;\">\n" +
" <strong>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; background-color: rgb(255, 255, 255);\">时间:2019年\n" +
" 3月 1日——3月 8日 </span>\n" +
" </strong>\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周一\n" +
" 缺到 \n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">张玲珑(家长电话:13534321432),\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">王鑫(\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13889765432),\n" +
" </span>\n" +
" </span>\n" +
" </span>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">左晓睿(\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13645322221),\n" +
" </span>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">刘平(\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>13889902188)\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周二 \n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">张玲珑(家长电话:13534321432),</span>\n" +
" </span>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">王艳丽\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13444897652)</span>\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周三\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"> </span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" +
" </span>\n" +
" </span>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">刘浩\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(电话:13352058788)</span>,王鑫\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13889765432),</span>侯康\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">母亲电话:</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13472005828)</span>,翟晓雯\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">13800331200)</span>\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周四\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"> </span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" +
" </span>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">张玲珑(\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13534321432</span>),刘瑞(\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">父亲电话:13671745850</span>),李杰\n" +
" (<span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13334321400</span>)\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">周五\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"> </span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">缺到 </span>\n" +
" </span>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">吴婷婷\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:13500563200</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\"></span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">),</span>张晴\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">家长电话:15834302100</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">),</span>翟超楠\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">(</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">爷爷电话:13763202130</span>\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">)</span>\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <span\n" +
" style=\"background-color: rgb(255, 255, 255); color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px;\">\n" +
" <span\n" +
" style=\"color: rgb(34, 34, 34); font-family: Arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255);\">备注:每周五下午4点前汇总交到教导处刘老师(联系电话:13995403310)。</span>\n" +
" </span>\n" +
" </p>\n" +
" <p>\n" +
" <br />\n" +
" </p>\n" +
" <p>\n" +
" <br />\n" +
" </p>\n" +
" <p>\n" +
" <br />\n" +
" </p>\n" +
"</body>\n" +
"\n" +
"</html>";
// String str = "[\u0391-\uffe5]";//匹配汉字内容
String ans = "";
String str = "[\u0391-\uffe5]|\\d{11}|\\d{4}|。|,|:|</p>|\\(|\\)|\\d月|\\d日";
Pattern pattern = Pattern.compile(str);
Matcher matcher = pattern.matcher(a);
while(matcher.find()){
if(matcher.group(0).equals("</p>")){
ans += "\n";
}
else{
ans += matcher.group(0);
}
}
//字符串替换操作
ans = ans.replaceAll("\\(\\)","");
ans = ans.replaceAll("第五次实验文件","");
System.out.println(ans);
}
}
html页面:
扣取结果: