def get_trade_behavior(uid): import requests import random import time import json result = [] res = [] headers = [{ 'User-Agent': "Mozilla/5.0 (X11; CrOS x86_64 10066.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, { 'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/69.0.3497.100 Mobile/13B143 Safari/601.1.46", 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, { 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A", 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0", 'Accept': 'application/json, text/plain, */*', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", 'Accept': 'application/json, text/plain, */*', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}] s = requests.Session() s.keep_alive = False # t = 1 try: # while True: url = "https://xueqiu.com/service/tc/snowx/PAMID/cubes/rebalancing/history?cube_symbol=SP" + uid + "&count=20&page=1" obj = s.get(url, headers=random.choice(headers), stream=True, allow_redirects=False).json() time.sleep(random.random() * 3) maxpage = obj["maxPage"] # if obj["list"] != []: for k in range(1, maxpage + 1): url = "https://xueqiu.com/service/tc/snowx/PAMID/cubes/rebalancing/history?cube_symbol=SP" + uid + "&count=20&page=" + str(k) print("正在检索{%s}-第%d页-总共%d页" % (uid, k, maxpage)) obj = s.get(url, headers=random.choice(headers), stream=True, allow_redirects=False).json() time.sleep(random.random() * 3) for i in obj["list"]: res.append(uid) time_stamp = i["updated_at"] time_stamp_10 = int(round(time_stamp) / 1000) time_local = time.localtime(time_stamp_10) trade_time = time.strftime("%Y-%m-%d %H:%M:%S", time_local) trade_history_stock_name = i["rebalancing_histories"][0]["stock_name"] trade_history_stock_symbol = i["rebalancing_histories"][0]["stock_symbol"] trade_history_stock_prev_weight = i["rebalancing_histories"][0]["prev_weight_adjusted"] trade_history_stock_target_weight = i["rebalancing_histories"][0]["target_weight"] trade_history_stock_exec_price = i["rebalancing_histories"][0]["price"] res.append(trade_time) res.append(trade_history_stock_name) res.append(trade_history_stock_symbol) res.append(trade_history_stock_prev_weight) res.append(trade_history_stock_target_weight) res.append(trade_history_stock_exec_price) res_copy = res.copy() result.append(res_copy) res.clear() print("{%s} 检索完毕!" % uid) return result except: print("{%s} 异常!" % uid) return [uid, "异常"]def read_csv(name): import csv '''读取CSV文件数据''' csv_file = csv.reader(open("C:\\Users\\viemax\\Desktop\\" + name + ".csv", "r")) object_website = [] for i in csv_file: object_website.append(i) # print(i) return object_websiteno_data_id = read_csv("no_data_id")obj = []for i in no_data_id[2:]: obj.append(i[1])res = []for i in obj[0::2]: r = get_trade_behavior(i) res.append(r)
def xueqiu(num): import requests from bs4 import BeautifulSoup import random import time url = u"https://xueqiu.com/P/SP" + num headers = [{'User-Agent': "Mozilla/5.0 (X11; CrOS x86_64 10066.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, {'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/69.0.3497.100 Mobile/13B143 Safari/601.1.46", 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A", 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0", 'Accept': 'application/json, text/plain, */*', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}, {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", 'Accept': 'application/json, text/plain, */*', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Connection': 'close'}] cookie = [dict(cookies_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; _gat_gtag_UA_16079156_4=1; xq_a_token=18b7f7dec4f54032863219716eaf839ee940199d; xqat=18b7f7dec4f54032863219716eaf839ee940199d; xq_r_token=f27bcc9f6c7b6446279ee9448db195b118b8f17c; xq_token_expire=Sat%20Nov%2024%202018%2001%3A55%3A26%20GMT%2B0800%20(CST); xq_is_login=1; u=7147604028; __utmb=1.52.10.1540828390; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835763"), dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; xq_token_expire=Sat%20Nov%2024%202018%2001%3A55%3A26%20GMT%2B0800%20(CST); __utmb=1.52.10.1540828390; _gat_gtag_UA_16079156_4=1; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=b2f21e25cd1817bf15c1c89cc72b25ad537495de; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat=b2f21e25cd1817bf15c1c89cc72b25ad537495de; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token=bb8e27cca180872ab70314097a5077578ff119c8; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1559188240; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835848"), dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; xq_a_token=b70e7188d32f804237b6a42c052b5bcf74ebeea2; xqat=b70e7188d32f804237b6a42c052b5bcf74ebeea2; xq_r_token=b004ebba4649dfef7bba54f6ae7b703e5bca6a61; xq_token_expire=Sat%20Nov%2024%202018%2001%3A58%3A30%20GMT%2B0800%20(CST); xq_is_login=1; u=1497969916; __utmb=1.56.10.1540828390; _gat_gtag_UA_16079156_4=1; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835925"), dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; __utma=1.312459015.1529772425.1540825606.1540828390.19; xq_token_expire=Sat%20Nov%2024%202018%2001%3A58%3A30%20GMT%2B0800%20(CST); aliyungf_tc=AQAAAAVyoiWa1w4AKvJZ2ozyzTPwnciM; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540829378,1540829450,1540836740,1540866196; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=4458f8df93a013c35835d0320917b19dcaab0a24; xq_a_token.sig=FfAS5LGC_XBO11rmXuA6Nb3o4VI; xqat=4458f8df93a013c35835d0320917b19dcaab0a24; xqat.sig=t2g7eE2UG80Frcg03R-7nudVIBA; xq_r_token=4812b56991883e9913998e8816706912bff911e8; xq_r_token.sig=R6AgMpKf0fhe6GkWdS_etJ0Y3Dw; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=6146826778; u.sig=h5P6Xki5cmObHzNcRMVufpWUnZc; _gat_gtag_UA_16079156_4=1; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540866325")] s = requests.Session() # s.keep_alive = False try: cookies = random.choice(cookie) obj = s.get(url, headers=random.choice(headers), cookies=cookies, stream=True, allow_redirects=False, timeout=20) time.sleep(8 + random.random() * 3.2) bs = BeautifulSoup(obj.content, 'lxml') except requests.exceptions.Timeout: print([num, "timeout", "timeout"]) return [num, "timeout", "timeout"] try: try: res_current = bs.find_all(attrs={"class": "cube-closed"})[0].get_text() except IndexError: res_current = "未关停!" res_id = bs.find_all(attrs={"class": "creator fn-clear"})[0].attrs["href"] s.close() print([num, res_id[1:], res_current]) return [num, res_id[1:], res_current] except IndexError: try: res_404 = bs.find("title").get_text() if res_404 == "404_雪球": s.close() print([num, "NaN", res_404]) return [num, "NaN", res_404] except AttributeError: s.close() print([num, "AttributeError", "page_error"]) return [num, "AttributeError", "page_error"]
no_data_id.csv
result = []res_final = []res_final.extend(res)res_final.extend(res_0)for i in res_final: if i != []: result.append(i)final = []for i in result: if i[1] != "异常": final.append(i)except_id = []for i in result: if i[1] == "异常": except_id.append(i)need = []for i in final: need.extend(i)