概述
主要是使用MutationObserver来实现页面组件变化的监控,收到变化后,将变化内容输出到Console中。
然后再监控Console,从而实现消息的抓取。
换成大白话就是,在js中监控页面组件的变化,然后输出到console。
参考
https://github.com/microsoft/playwright/issues/4051
实现
import com.microsoft.playwright.*;import lombok.extern.slf4j.Slf4j;/*** 抖音弹幕抓取demo*/@Slf4jpublic class DouxxDanMuDemo {public void capture(String captureUrl) {log.debug("start to test playwright demo");try (Playwright playwright = Playwright.create()) {Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false).setSlowMo(50));Page page = browser.newPage();page.navigate(captureUrl);//等待页面加载完成page.waitForLoadState();page.onConsoleMessage(msg->{// log.debug("收到控制台消息[{}], 类型[{}], 参数[{}], 位置[{}]", msg.text(), msg.type(), msg.args(), msg.location());log.debug("收到控制台消息[{}]", msg.text());});String jsCode = "const targetNode = document.getElementsByClassName(\"webcast-chatroom___items\")[0].getElementsByTagName(\"div\")[0];\n" +"\n" +"const callback = function(mutationsList, observer) {\n" +" for(let mutation of mutationsList) {\n" +" if (mutation.type === 'childList') {\n" +" let addNodes = mutation.addedNodes;\n" +" for (let i = 0; i < addNodes.length; ++i) {\n" +" let item = addNodes[i];\n" +" let allSpan = item.querySelectorAll(\"div > span\");\n" +" let totalContent = \"\";\n" +" for(let j=0;j<allSpan.length;j++){\n" +" totalContent +=allSpan[j].innerText;\n" +" }\n" +" console.log('A child node has been added or removed.value is', totalContent);\n" +" }\n" +" }\n" +" else if (mutation.type === 'attributes') {\n" +" //console.log('The ' + mutation.attributeName + ' attribute was modified.');\n" +" }\n" +" }\n" +"};\n" +"\n" +"new MutationObserver(callback).observe(targetNode, { attributes: true, childList: true, subtree: true });";page.evaluate(jsCode);page.waitForTimeout(3000 * 1000);log.debug("over");}}public static void main(String[] args) {DouyinDanMuDemo demo = new DouyinDanMuDemo();demo.capture("https://live.douxx.com/19171003");}}
其中,js代码还可以再次优化,同时监控弹幕和用户进入直播的提示信息,如下:
let danmuObserver, userComeObserver;const callback = function(mutationsList, observer) {for(let mutation of mutationsList) {if (mutation.type === 'childList') {let addNodes = mutation.addedNodes;for (let i = 0; i < addNodes.length; ++i) {let item = addNodes[i];let allSpan = item.querySelectorAll("div > span");let totalContent = "";for(let j=0;j<allSpan.length;j++){totalContent +=allSpan[j].innerText;}if(observer === danmuObserver){console.log('danmu>>', totalContent);}else if(observer === userComeObserver){console.log('userCome>>', totalContent);}}}}};const danmuNode = document.getElementsByClassName("webcast-chatroom___items")[0].getElementsByTagName("div")[0];danmuObserver = new MutationObserver(callback);danmuObserver.observe(danmuNode, { attributes: false, childList: true, subtree: true });const userComeNode = document.getElementsByClassName("webcast-chatroom___bottom-message")[0];userComeObserver = new MutationObserver(callback);userComeObserver.observe(userComeNode, { attributes: false, childList: true, subtree: true });
