b站开发文档:http://api.kaaass.net/biliapi
相关博客:http://docs.kaaass.net/
# !/usr/bin/env python3# -*- coding: utf-8 -*-import requestsimport randomimport jsonimport timeimport pandas as pd# ---------------------------------------------------------------------------------------# 计算时间差,格式: 时分秒def gettimediff(start, end):seconds = (end - start).secondsm, s = divmod(seconds, 60)h, m = divmod(m, 60)diff = ("%02d:%02d:%02d" % (h, m, s))return diff# ----------------------------------------------------------------------------------------------------------------------# 返回一个随机的请求头 headersdef getheaders():user_agent_list = [ \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1" \"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", \"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", \"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", \"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"]UserAgent = random.choice(user_agent_list)headers = {'User-Agent': UserAgent}return headers# -----------------------------------------------------检查ip是否可用---------------------def checkip(targeturl, ip):headers = getheaders() # 定制请求头proxies = {ip.split(':')[0]: ip} # 代理iptry:response = requests.get(url=targeturl, proxies=proxies, headers=headers, timeout=5).status_codeif response == 200:return Trueelse:return Falseexcept:return False# -------------------------------------------------------获取代理方法----------------------def findip(): # ip类型,页码,目标url,存放ip的路径url = 'https://api.bilibili.com/x/web-interface/online' # 配置urlheaders = getheaders() # 定制请求头callback = requests.get(url=url, headers=headers, timeout=10).textdata_json = json.loads(callback) # json格式读取返回值web_online = data_json['data']['web_online'] # 获取在线人数play_online = data_json['data']['play_online']all_count = data_json['data']['all_count']return web_online, play_online, all_countif __name__ == "__main__":num_sum = 0play_sum = 0count_sum = 0time_now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 获取当前时间,格式%Y-%m-%d %H:%M:%Sfor i in range(10): # 统计十次数据,求平均num_sum = num_sum + findip()[0]play_sum = play_sum + findip()[1]count_sum = count_sum + findip()[2]time.sleep(1)num = num_sum / 10play_online = play_sum / 10count = count_sum / 10with open('online_num.csv', 'a') as f: # 写入文件数据,r+f.write('%s,%s,%s,%s\n' % (time_now, num, play_online, count))f.close()df = pd.read_csv(r"online_num.csv", engine="python", header=None, encoding='gbk')df.index = range(len(df))df.columns = ["现在时间","网页在线数","在线播放人数","投稿数"]df.to_excel("online_num.xlsx")
