概述

主要是使用MutationObserver来实现页面组件变化的监控,收到变化后,将变化内容输出到Console中。
然后再监控Console,从而实现消息的抓取。

换成大白话就是,在js中监控页面组件的变化,然后输出到console。

参考

https://github.com/microsoft/playwright/issues/4051

实现

  1. import com.microsoft.playwright.*;
  2. import lombok.extern.slf4j.Slf4j;
  3. /**
  4. * 抖音弹幕抓取demo
  5. */
  6. @Slf4j
  7. public class DouxxDanMuDemo {
  8. public void capture(String captureUrl) {
  9. log.debug("start to test playwright demo");
  10. try (Playwright playwright = Playwright.create()) {
  11. Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false).setSlowMo(50));
  12. Page page = browser.newPage();
  13. page.navigate(captureUrl);
  14. //等待页面加载完成
  15. page.waitForLoadState();
  16. page.onConsoleMessage(msg->{
  17. // log.debug("收到控制台消息[{}], 类型[{}], 参数[{}], 位置[{}]", msg.text(), msg.type(), msg.args(), msg.location());
  18. log.debug("收到控制台消息[{}]", msg.text());
  19. });
  20. String jsCode = "const targetNode = document.getElementsByClassName(\"webcast-chatroom___items\")[0].getElementsByTagName(\"div\")[0];\n" +
  21. "\n" +
  22. "const callback = function(mutationsList, observer) {\n" +
  23. " for(let mutation of mutationsList) {\n" +
  24. " if (mutation.type === 'childList') {\n" +
  25. " let addNodes = mutation.addedNodes;\n" +
  26. " for (let i = 0; i < addNodes.length; ++i) {\n" +
  27. " let item = addNodes[i];\n" +
  28. " let allSpan = item.querySelectorAll(\"div > span\");\n" +
  29. " let totalContent = \"\";\n" +
  30. " for(let j=0;j<allSpan.length;j++){\n" +
  31. " totalContent +=allSpan[j].innerText;\n" +
  32. " }\n" +
  33. " console.log('A child node has been added or removed.value is', totalContent);\n" +
  34. " }\n" +
  35. " }\n" +
  36. " else if (mutation.type === 'attributes') {\n" +
  37. " //console.log('The ' + mutation.attributeName + ' attribute was modified.');\n" +
  38. " }\n" +
  39. " }\n" +
  40. "};\n" +
  41. "\n" +
  42. "new MutationObserver(callback).observe(targetNode, { attributes: true, childList: true, subtree: true });";
  43. page.evaluate(jsCode);
  44. page.waitForTimeout(3000 * 1000);
  45. log.debug("over");
  46. }
  47. }
  48. public static void main(String[] args) {
  49. DouyinDanMuDemo demo = new DouyinDanMuDemo();
  50. demo.capture("https://live.douxx.com/19171003");
  51. }
  52. }

其中,js代码还可以再次优化,同时监控弹幕和用户进入直播的提示信息,如下:

  1. let danmuObserver, userComeObserver;
  2. const callback = function(mutationsList, observer) {
  3. for(let mutation of mutationsList) {
  4. if (mutation.type === 'childList') {
  5. let addNodes = mutation.addedNodes;
  6. for (let i = 0; i < addNodes.length; ++i) {
  7. let item = addNodes[i];
  8. let allSpan = item.querySelectorAll("div > span");
  9. let totalContent = "";
  10. for(let j=0;j<allSpan.length;j++){
  11. totalContent +=allSpan[j].innerText;
  12. }
  13. if(observer === danmuObserver){
  14. console.log('danmu>>', totalContent);
  15. }else if(observer === userComeObserver){
  16. console.log('userCome>>', totalContent);
  17. }
  18. }
  19. }
  20. }
  21. };
  22. const danmuNode = document.getElementsByClassName("webcast-chatroom___items")[0].getElementsByTagName("div")[0];
  23. danmuObserver = new MutationObserver(callback);
  24. danmuObserver.observe(danmuNode, { attributes: false, childList: true, subtree: true });
  25. const userComeNode = document.getElementsByClassName("webcast-chatroom___bottom-message")[0];
  26. userComeObserver = new MutationObserver(callback);
  27. userComeObserver.observe(userComeNode, { attributes: false, childList: true, subtree: true });