b站开发文档:http://api.kaaass.net/biliapi
    相关博客:http://docs.kaaass.net/

    1. # !/usr/bin/env python3
    2. # -*- coding: utf-8 -*-
    3. import requests
    4. import random
    5. import json
    6. import time
    7. import pandas as pd
    8. # ---------------------------------------------------------------------------------------
    9. # 计算时间差,格式: 时分秒
    10. def gettimediff(start, end):
    11. seconds = (end - start).seconds
    12. m, s = divmod(seconds, 60)
    13. h, m = divmod(m, 60)
    14. diff = ("%02d:%02d:%02d" % (h, m, s))
    15. return diff
    16. # ----------------------------------------------------------------------------------------------------------------------
    17. # 返回一个随机的请求头 headers
    18. def getheaders():
    19. user_agent_list = [ \
    20. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1" \
    21. "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", \
    22. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", \
    23. "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", \
    24. "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", \
    25. "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", \
    26. "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", \
    27. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \
    28. "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \
    29. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \
    30. "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \
    31. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \
    32. "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \
    33. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \
    34. "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \
    35. "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", \
    36. "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", \
    37. "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
    38. ]
    39. UserAgent = random.choice(user_agent_list)
    40. headers = {'User-Agent': UserAgent}
    41. return headers
    42. # -----------------------------------------------------检查ip是否可用---------------------
    43. def checkip(targeturl, ip):
    44. headers = getheaders() # 定制请求头
    45. proxies = {ip.split(':')[0]: ip} # 代理ip
    46. try:
    47. response = requests.get(url=targeturl, proxies=proxies, headers=headers, timeout=5).status_code
    48. if response == 200:
    49. return True
    50. else:
    51. return False
    52. except:
    53. return False
    54. # -------------------------------------------------------获取代理方法----------------------
    55. def findip(): # ip类型,页码,目标url,存放ip的路径
    56. url = 'https://api.bilibili.com/x/web-interface/online' # 配置url
    57. headers = getheaders() # 定制请求头
    58. callback = requests.get(url=url, headers=headers, timeout=10).text
    59. data_json = json.loads(callback) # json格式读取返回值
    60. web_online = data_json['data']['web_online'] # 获取在线人数
    61. play_online = data_json['data']['play_online']
    62. all_count = data_json['data']['all_count']
    63. return web_online, play_online, all_count
    64. if __name__ == "__main__":
    65. num_sum = 0
    66. play_sum = 0
    67. count_sum = 0
    68. time_now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 获取当前时间,格式%Y-%m-%d %H:%M:%S
    69. for i in range(10): # 统计十次数据,求平均
    70. num_sum = num_sum + findip()[0]
    71. play_sum = play_sum + findip()[1]
    72. count_sum = count_sum + findip()[2]
    73. time.sleep(1)
    74. num = num_sum / 10
    75. play_online = play_sum / 10
    76. count = count_sum / 10
    77. with open('online_num.csv', 'a') as f: # 写入文件数据,r+
    78. f.write('%s,%s,%s,%s\n' % (time_now, num, play_online, count))
    79. f.close()
    80. df = pd.read_csv(r"online_num.csv", engine="python", header=None, encoding='gbk')
    81. df.index = range(len(df))
    82. df.columns = ["现在时间","网页在线数","在线播放人数","投稿数"]
    83. df.to_excel("online_num.xlsx")