此文仅为抛砖引玉之用

在VBA中添加引用Microsoft HTML Object Library之后,可以使用html对象,作为无头浏览器,让它加载页面,让它执行javascript脚本,用它输出数据等

复制了网页HTML代码,可以粘贴到excel作为表格

用power query的advanced editor即m 语言,也可以从网页加载,亦即Web.Content及Web.Page函数,也可以指定HTML及javascript脚本文本。

VBA中的操作

方法一,XML库下载

引用:
Microsoft HTML Object Library
Microsoft XML, v6.0

  1. '=================== GET js data from nowgoal =======================
  2. Dim http As New XMLHTTP60, res As Variant
  3. Dim s As String
  4. With http
  5. .Open "GET", "http://info.nowgoal.com/jsData/matchResult/" & season & "/s" & leagueID & "_en.js", False
  6. .send
  7. res = .responseText
  8. End With
  9. '=============== generate javascript, to render json =================
  10. Dim HTML As IHTMLDocument2
  11. Dim w As HTMLWindow2
  12. Set HTML = New HTMLDocument
  13. Set w = HTML.parentWindow
  14. tt = "s="""";var teamd={};" & _
  15. "for (i_team=0;i_team<arrTeam.length;i_team++){teamd[arrTeam[i_team][0]]=arrTeam[i_team][3];}" & _
  16. "for (round in jh){" & _
  17. "for (matches in jh[round]){" & _
  18. "s+=""<tr>"";" & _
  19. "s+=""<td>""+round.substring(2)+""</td>"";" & _
  20. "for (i_match=0;i_match<jh[round][matches].length;i_match++) {" & _
  21. "s1=jh[round][matches][i_match];" & _
  22. "if (i_match==4 || i_match==5){" & _
  23. "s+=""<td>""+teamd[s1]+""</td>"";" & _
  24. "}else{s+=""<td>""+s1+""</td>"";}}" & _
  25. "s+=""</tr>"";}}"
  26. While InStr(res, ",,") > 0
  27. res = Replace(res, ",,", ",'',")
  28. Wend
  29. res = Replace(res, "'", """")
  30. '====================== execute javascript ===========================
  31. w.execScript "var jh={};"
  32. w.execScript res
  33. w.execScript (tt)
  34. '=================== render json to HTML Table =======================
  35. HTML.body.innerHTML = "<html><head></head><body><table id=""jht"">" & w.s & "</table></body></html>"
  36. '========================= copy and paste ============================
  37. w.execScript ("var ctrlRange = document.body.createControlRange();ctrlRange.add(document.getElementById('jht'));ctrlRange.execCommand(""Copy"");")

方法二,使用Internet Controls下载

引用:
Microsoft HTML Object Library
Microsoft Internet Controls

  1. Dim elslds As Object
  2. Dim html As New HTMLDocument
  3. Set Soccerwayhtml = CreateObject("Msxml2.ServerXMLHTTP")
  4. Url="https://int.soccerway.com/matches/2022/07/04/united-states/mls/nashville-mls/portland-timbers-mls/3709463/"
  5. With Soccerwayhtml
  6. .Open "GET", Url, False
  7. .setRequestHeader "Accept", "text/html"
  8. .setRequestHeader "Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7"
  9. .setRequestHeader "Accept-Encoding", "identity"
  10. .setRequestHeader "Cookie", "___ws_d_st={}; sw_l10m=us; sw_l10org=US"
  11. .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
  12. .send
  13. resdata = .responseText
  14. End With
  15. Do Until Soccerwayhtml.readyState = 4
  16. DoEvents
  17. Loop
  18. '===处理数据===
  19. html.body.innerHTML = resdata
  20. Set elslds = html.getElementsByClassName("sidelined-content")