概述
主要是使用MutationObserver来实现页面组件变化的监控,收到变化后,将变化内容输出到Console中。
然后再监控Console,从而实现消息的抓取。
换成大白话就是,在js中监控页面组件的变化,然后输出到console。
参考
https://github.com/microsoft/playwright/issues/4051
实现
import com.microsoft.playwright.*;
import lombok.extern.slf4j.Slf4j;
/**
* 抖音弹幕抓取demo
*/
@Slf4j
public class DouxxDanMuDemo {
public void capture(String captureUrl) {
log.debug("start to test playwright demo");
try (Playwright playwright = Playwright.create()) {
Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false).setSlowMo(50));
Page page = browser.newPage();
page.navigate(captureUrl);
//等待页面加载完成
page.waitForLoadState();
page.onConsoleMessage(msg->{
// log.debug("收到控制台消息[{}], 类型[{}], 参数[{}], 位置[{}]", msg.text(), msg.type(), msg.args(), msg.location());
log.debug("收到控制台消息[{}]", msg.text());
});
String jsCode = "const targetNode = document.getElementsByClassName(\"webcast-chatroom___items\")[0].getElementsByTagName(\"div\")[0];\n" +
"\n" +
"const callback = function(mutationsList, observer) {\n" +
" for(let mutation of mutationsList) {\n" +
" if (mutation.type === 'childList') {\n" +
" let addNodes = mutation.addedNodes;\n" +
" for (let i = 0; i < addNodes.length; ++i) {\n" +
" let item = addNodes[i];\n" +
" let allSpan = item.querySelectorAll(\"div > span\");\n" +
" let totalContent = \"\";\n" +
" for(let j=0;j<allSpan.length;j++){\n" +
" totalContent +=allSpan[j].innerText;\n" +
" }\n" +
" console.log('A child node has been added or removed.value is', totalContent);\n" +
" }\n" +
" }\n" +
" else if (mutation.type === 'attributes') {\n" +
" //console.log('The ' + mutation.attributeName + ' attribute was modified.');\n" +
" }\n" +
" }\n" +
"};\n" +
"\n" +
"new MutationObserver(callback).observe(targetNode, { attributes: true, childList: true, subtree: true });";
page.evaluate(jsCode);
page.waitForTimeout(3000 * 1000);
log.debug("over");
}
}
public static void main(String[] args) {
DouyinDanMuDemo demo = new DouyinDanMuDemo();
demo.capture("https://live.douxx.com/19171003");
}
}
其中,js代码还可以再次优化,同时监控弹幕和用户进入直播的提示信息,如下:
let danmuObserver, userComeObserver;
const callback = function(mutationsList, observer) {
for(let mutation of mutationsList) {
if (mutation.type === 'childList') {
let addNodes = mutation.addedNodes;
for (let i = 0; i < addNodes.length; ++i) {
let item = addNodes[i];
let allSpan = item.querySelectorAll("div > span");
let totalContent = "";
for(let j=0;j<allSpan.length;j++){
totalContent +=allSpan[j].innerText;
}
if(observer === danmuObserver){
console.log('danmu>>', totalContent);
}else if(observer === userComeObserver){
console.log('userCome>>', totalContent);
}
}
}
}
};
const danmuNode = document.getElementsByClassName("webcast-chatroom___items")[0].getElementsByTagName("div")[0];
danmuObserver = new MutationObserver(callback);
danmuObserver.observe(danmuNode, { attributes: false, childList: true, subtree: true });
const userComeNode = document.getElementsByClassName("webcast-chatroom___bottom-message")[0];
userComeObserver = new MutationObserver(callback);
userComeObserver.observe(userComeNode, { attributes: false, childList: true, subtree: true });