爬取数据

电脑端打开空间相册,过滤请求cgi_list_photo
image.png

复制这个请求

  1. https://h5.qzone.qq.com/proxy/domain/photo.qzone.qq.com/fcgi-bin/cgi_list_photo?g_tk=1987381833&callback=shine4_Callback&t=433437617&mode=0&idcNum=4&hostUin=1632534087&topicId=V10Mv1if1L1cw0&noTopic=0&uin=1632534087&pageStart=0&pageNum=1000000&skipCmtCount=0&singleurl=1&batchId=&notice=0&appid=4&inCharset=utf-8&outCharset=utf-8&source=qzone&plat=qzone&outstyle=json&format=jsonp&json_esc=1&callbackFun=shine4&_=1643626219591

请求中有个参数pageStart=0&pageNum=1000000
pageNum最大为500,可以分页多次爬取,我只有1677张,所以就手动爬了。
获取的结构是一个类似下边的结构,可以取自己想要的数据。

  1. {
  2. "code": 0,
  3. "subcode": 0,
  4. "message": "",
  5. "default": 0,
  6. "data": {
  7. "limit": 0,
  8. "photoList": [
  9. {
  10. "batchId": "884755853",
  11. "browser": 0,
  12. "cameratype": "Apple iPhone XR",
  13. "cp_flag": false,
  14. "cp_x": 0,
  15. "cp_y": 0,
  16. "desc": "",
  17. "exif": {
  18. "exposureCompensation": "0",
  19. "exposureMode": "0",
  20. "exposureProgram": "2",
  21. "exposureTime": "",
  22. "flash": "16",
  23. "fnumber": "2.2",
  24. "focalLength": "2.87",
  25. "iso": " 640",
  26. "lensModel": "",
  27. "make": "Apple",
  28. "meteringMode": "5",
  29. "model": "iPhone XR",
  30. "originalTime": "2021:11:01 22:54:34"
  31. },
  32. "forum": 0,
  33. "frameno": 0,
  34. "height": 1440,
  35. "id": 4294967295,
  36. "is_video": false,
  37. "is_weixin_mode": 0,
  38. "ismultiup": 0,
  39. "lloc": "NR8AVjZiQ2dBeE5qTXlOVE0wTURnM3ZQOSpZWXZLbUIwIQcAcGhvdG9neg!!",
  40. "modifytime": 1635778492,
  41. "name": "2021-11-01",
  42. "origin": 0,
  43. "origin_upload": 0,
  44. "origin_url": "",
  45. "owner": "1632534087",
  46. "ownername": "1632534087",
  47. "photocubage": 66267,
  48. "phototype": 1,
  49. "picmark_flag": 0,
  50. "picrefer": 22,
  51. "platformId": 52,
  52. "platformSubId": 2,
  53. "poiName": "",
  54. "pre": "http://photogz.photo.store.qq.com/psc?/V10Mv1if1L1cw0/ruAMsa53pVQWN7FLK88i5v0y1iaoHUkJ6L5KHCqdg642wKdLYEWGWgIWjjSIJhohOkvPoCnGgHdy0KYj373W3yJfzUF6ZjrPhrJ47ScObg0!/m&bo=OASgBQAAAAABB7k!",
  55. "raw": "",
  56. "raw_upload": 0,
  57. "rawshoottime": "2021-11-01 22:54:34",
  58. "shoottime": "2021-11-01 ",
  59. "shorturl": "",
  60. "sloc": "NR8AVjZiQ2dBeE5qTXlOVE0wTURnM3ZQOSpZWXZLbUIwIQcAcGhvdG9neg!!",
  61. "tag": "",
  62. "uploadtime": "2021-11-01 22:54:52",
  63. "url": "http://photogz.photo.store.qq.com/psc?/V10Mv1if1L1cw0/ruAMsa53pVQWN7FLK88i5v0y1iaoHUkJ6L5KHCqdg642wKdLYEWGWgIWjjSIJhohOkvPoCnGgHdy0KYj373W3yJfzUF6ZjrPhrJ47ScObg0!/b&bo=OASgBQAAAAABB7k!",
  64. "width": 1080,
  65. "yurl": 0
  66. }
  67. // ...
  68. ],
  69. "t": "433437617",
  70. "topic": {
  71. "bitmap": "10000010",
  72. "browser": 0,
  73. "classid": 106,
  74. "comment": 1,
  75. "cover_id": "NR8AVjZiQ2dBeE5qTXlOVE0wTURnM001VDJZWGFmQWkwIQcAcGhvdG90ag!!",
  76. "createtime": 1487917751,
  77. "desc": "每天一张",
  78. "handset": 0,
  79. "id": "V10Mv1if1L1cw0",
  80. "is_share_album": 0,
  81. "lastuploadtime": 1643549755,
  82. "modifytime": 1643549770,
  83. "name": "每天一张20170224",
  84. "ownerName": "1632534087",
  85. "ownerUin": "1632534087",
  86. "pre": "http://m.qpic.cn/psc?/V10Mv1if1L1cw0/ruAMsa53pVQWN7FLK88i5nUfSKMv7jMt2.ivhScEjYT..7p.H2RYcSlc*4IGoY6AeKZBOWfhGvTvd66Mc62ExbXLKs8qEo7epDP9ROIma30!/a",
  87. "priv": 5,
  88. "pypriv": 3,
  89. "share_album_owner": 0,
  90. "total": 1677,
  91. "url": "http://m.qpic.cn/psc?/V10Mv1if1L1cw0/ruAMsa53pVQWN7FLK88i5nUfSKMv7jMt2.ivhScEjYT..7p.H2RYcSlc*4IGoY6AeKZBOWfhGvTvd66Mc62ExbXLKs8qEo7epDP9ROIma30!/b",
  92. "viewtype": 2
  93. },
  94. "totalInAlbum": 1677,
  95. "totalInPage": 10
  96. }
  97. }

nodejs下载到本地

  1. const request = require("request");
  2. const fs = require("fs");
  3. const async = require("async");
  4. const downloadPic = function (src, dest) {
  5. request(src)
  6. .pipe(fs.createWriteStream(dest))
  7. .on("close", function () {});
  8. };
  9. // downloadPic使用
  10. // downloadPic(
  11. // "http://m.qpic.cn/psc?/V10Mv1if1L1cw0/ruAMsa53pVQWN7FLK88i5nUfSKMv7jMt2.ivhScEjYT..7p.H2RYcSlc*4IGoY6AeKZBOWfhGvTvd66Mc62ExbXLKs8qEo7epDP9ROIma30!/b&bo=OASgBQAAAAABB7k!",
  12. // "./pictures/1.jpg"
  13. // );
  14. const picList = [
  15. {
  16. url: "http://m.qpic.cn/psc?/V10Mv1if1L1cw0/ruAMsa53pVQWN7FLK88i5nUfSKMv7jMt2.ivhScEjYT..7p.H2RYcSlc*4IGoY6AeKZBOWfhGvTvd66Mc62ExbXLKs8qEo7epDP9ROIma30!/b&bo=OASgBQAAAAABB7k!",
  17. shoottime: "2022-01-30",
  18. },
  19. ];
  20. async.mapSeries(
  21. picList,
  22. function (item, callback) {
  23. setTimeout(function () {
  24. downloadPic(item.url, `./pictures/${item.shoottime}.jpg`);
  25. callback(null, item);
  26. }, 400);
  27. },
  28. function (err, results) {}
  29. );

github地址

https://github.com/withwz/qq-zoom-album-download