1. def get_trade_behavior(uid):
    2. import requests
    3. import random
    4. import time
    5. import json
    6. result = []
    7. res = []
    8. headers = [{
    9. 'User-Agent': "Mozilla/5.0 (X11; CrOS x86_64 10066.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
    10. 'Accept': 'text/html;q=0.9,*/*;q=0.8',
    11. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    12. 'Connection': 'close'},
    13. {
    14. 'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/69.0.3497.100 Mobile/13B143 Safari/601.1.46",
    15. 'Accept': 'text/html;q=0.9,*/*;q=0.8',
    16. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    17. 'Connection': 'close'},
    18. {
    19. 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",
    20. 'Accept': 'text/html;q=0.9,*/*;q=0.8',
    21. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    22. 'Connection': 'close'},
    23. {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
    24. 'Accept': 'application/json, text/plain, */*',
    25. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    26. 'Connection': 'close'},
    27. {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
    28. 'Accept': 'application/json, text/plain, */*',
    29. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    30. 'Connection': 'close'}]
    31. s = requests.Session()
    32. s.keep_alive = False
    33. # t = 1
    34. try:
    35. # while True:
    36. url = "https://xueqiu.com/service/tc/snowx/PAMID/cubes/rebalancing/history?cube_symbol=SP" + uid + "&count=20&page=1"
    37. obj = s.get(url, headers=random.choice(headers), stream=True, allow_redirects=False).json()
    38. time.sleep(random.random() * 3)
    39. maxpage = obj["maxPage"]
    40. # if obj["list"] != []:
    41. for k in range(1, maxpage + 1):
    42. url = "https://xueqiu.com/service/tc/snowx/PAMID/cubes/rebalancing/history?cube_symbol=SP" + uid + "&count=20&page=" + str(k)
    43. print("正在检索{%s}-第%d页-总共%d页" % (uid, k, maxpage))
    44. obj = s.get(url, headers=random.choice(headers), stream=True, allow_redirects=False).json()
    45. time.sleep(random.random() * 3)
    46. for i in obj["list"]:
    47. res.append(uid)
    48. time_stamp = i["updated_at"]
    49. time_stamp_10 = int(round(time_stamp) / 1000)
    50. time_local = time.localtime(time_stamp_10)
    51. trade_time = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
    52. trade_history_stock_name = i["rebalancing_histories"][0]["stock_name"]
    53. trade_history_stock_symbol = i["rebalancing_histories"][0]["stock_symbol"]
    54. trade_history_stock_prev_weight = i["rebalancing_histories"][0]["prev_weight_adjusted"]
    55. trade_history_stock_target_weight = i["rebalancing_histories"][0]["target_weight"]
    56. trade_history_stock_exec_price = i["rebalancing_histories"][0]["price"]
    57. res.append(trade_time)
    58. res.append(trade_history_stock_name)
    59. res.append(trade_history_stock_symbol)
    60. res.append(trade_history_stock_prev_weight)
    61. res.append(trade_history_stock_target_weight)
    62. res.append(trade_history_stock_exec_price)
    63. res_copy = res.copy()
    64. result.append(res_copy)
    65. res.clear()
    66. print("{%s} 检索完毕!" % uid)
    67. return result
    68. except:
    69. print("{%s} 异常!" % uid)
    70. return [uid, "异常"]
    71. def read_csv(name):
    72. import csv
    73. '''读取CSV文件数据'''
    74. csv_file = csv.reader(open("C:\\Users\\viemax\\Desktop\\" + name + ".csv", "r"))
    75. object_website = []
    76. for i in csv_file:
    77. object_website.append(i)
    78. # print(i)
    79. return object_website
    80. no_data_id = read_csv("no_data_id")
    81. obj = []
    82. for i in no_data_id[2:]:
    83. obj.append(i[1])
    84. res = []
    85. for i in obj[0::2]:
    86. r = get_trade_behavior(i)
    87. res.append(r)
    1. def xueqiu(num):
    2. import requests
    3. from bs4 import BeautifulSoup
    4. import random
    5. import time
    6. url = u"https://xueqiu.com/P/SP" + num
    7. headers = [{'User-Agent': "Mozilla/5.0 (X11; CrOS x86_64 10066.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
    8. 'Accept': 'text/html;q=0.9,*/*;q=0.8',
    9. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    10. 'Connection': 'close'},
    11. {'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/69.0.3497.100 Mobile/13B143 Safari/601.1.46",
    12. 'Accept': 'text/html;q=0.9,*/*;q=0.8',
    13. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    14. 'Connection': 'close'},
    15. {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",
    16. 'Accept': 'text/html;q=0.9,*/*;q=0.8',
    17. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    18. 'Connection': 'close'},
    19. {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
    20. 'Accept': 'application/json, text/plain, */*',
    21. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    22. 'Connection': 'close'},
    23. {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
    24. 'Accept': 'application/json, text/plain, */*',
    25. 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    26. 'Connection': 'close'}]
    27. cookie = [dict(cookies_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; _gat_gtag_UA_16079156_4=1; xq_a_token=18b7f7dec4f54032863219716eaf839ee940199d; xqat=18b7f7dec4f54032863219716eaf839ee940199d; xq_r_token=f27bcc9f6c7b6446279ee9448db195b118b8f17c; xq_token_expire=Sat%20Nov%2024%202018%2001%3A55%3A26%20GMT%2B0800%20(CST); xq_is_login=1; u=7147604028; __utmb=1.52.10.1540828390; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835763"),
    28. dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; xq_token_expire=Sat%20Nov%2024%202018%2001%3A55%3A26%20GMT%2B0800%20(CST); __utmb=1.52.10.1540828390; _gat_gtag_UA_16079156_4=1; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=b2f21e25cd1817bf15c1c89cc72b25ad537495de; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat=b2f21e25cd1817bf15c1c89cc72b25ad537495de; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token=bb8e27cca180872ab70314097a5077578ff119c8; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1559188240; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835848"),
    29. dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; aliyungf_tc=AQAAAIe8YFC/zwwAKvJZ2tC9k8DvMt34; __utmc=1; __utma=1.312459015.1529772425.1540825606.1540828390.19; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540825899,1540828382,1540829378,1540829450; snbim_minify=true; __utmt=1; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token.sig=p4pCAuWXphKrks3IjEzTbJFCcb4; xqat.sig=uWTQIYsOCqtgymFewPvkgLk8CyM; xq_r_token.sig=Q9P70D5S5ZuHuFEXVJ6umTRqL1o; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u.sig=Ra3Ht4oGmAXu5VtkPBpRXum-Ntc; xq_a_token=b70e7188d32f804237b6a42c052b5bcf74ebeea2; xqat=b70e7188d32f804237b6a42c052b5bcf74ebeea2; xq_r_token=b004ebba4649dfef7bba54f6ae7b703e5bca6a61; xq_token_expire=Sat%20Nov%2024%202018%2001%3A58%3A30%20GMT%2B0800%20(CST); xq_is_login=1; u=1497969916; __utmb=1.56.10.1540828390; _gat_gtag_UA_16079156_4=1; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540835925"),
    30. dict(cookie_are="device_id=33a80200aacb73cf594a45942b285a12; _ga=GA1.2.312459015.1529772425; s=ey177hmx06; bid=ae1522508305909e11f0ccaefc21ae37_jn93s7rs; __utmz=1.1539536073.4.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_fe218c11eab60b6ab1b6f84fb38bcc4a=1539591917; _gid=GA1.2.758749044.1540657586; __utma=1.312459015.1529772425.1540825606.1540828390.19; xq_token_expire=Sat%20Nov%2024%202018%2001%3A58%3A30%20GMT%2B0800%20(CST); aliyungf_tc=AQAAAAVyoiWa1w4AKvJZ2ozyzTPwnciM; Hm_lvt_1db88642e346389874251b5a1eded6e3=1540829378,1540829450,1540836740,1540866196; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=4458f8df93a013c35835d0320917b19dcaab0a24; xq_a_token.sig=FfAS5LGC_XBO11rmXuA6Nb3o4VI; xqat=4458f8df93a013c35835d0320917b19dcaab0a24; xqat.sig=t2g7eE2UG80Frcg03R-7nudVIBA; xq_r_token=4812b56991883e9913998e8816706912bff911e8; xq_r_token.sig=R6AgMpKf0fhe6GkWdS_etJ0Y3Dw; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=6146826778; u.sig=h5P6Xki5cmObHzNcRMVufpWUnZc; _gat_gtag_UA_16079156_4=1; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1540866325")]
    31. s = requests.Session()
    32. # s.keep_alive = False
    33. try:
    34. cookies = random.choice(cookie)
    35. obj = s.get(url, headers=random.choice(headers), cookies=cookies, stream=True, allow_redirects=False, timeout=20)
    36. time.sleep(8 + random.random() * 3.2)
    37. bs = BeautifulSoup(obj.content, 'lxml')
    38. except requests.exceptions.Timeout:
    39. print([num, "timeout", "timeout"])
    40. return [num, "timeout", "timeout"]
    41. try:
    42. try:
    43. res_current = bs.find_all(attrs={"class": "cube-closed"})[0].get_text()
    44. except IndexError:
    45. res_current = "未关停!"
    46. res_id = bs.find_all(attrs={"class": "creator fn-clear"})[0].attrs["href"]
    47. s.close()
    48. print([num, res_id[1:], res_current])
    49. return [num, res_id[1:], res_current]
    50. except IndexError:
    51. try:
    52. res_404 = bs.find("title").get_text()
    53. if res_404 == "404_雪球":
    54. s.close()
    55. print([num, "NaN", res_404])
    56. return [num, "NaN", res_404]
    57. except AttributeError:
    58. s.close()
    59. print([num, "AttributeError", "page_error"])
    60. return [num, "AttributeError", "page_error"]

    no_data_id.csv

    1. result = []
    2. res_final = []
    3. res_final.extend(res)
    4. res_final.extend(res_0)
    5. for i in res_final:
    6. if i != []:
    7. result.append(i)
    8. final = []
    9. for i in result:
    10. if i[1] != "异常":
    11. final.append(i)
    12. except_id = []
    13. for i in result:
    14. if i[1] == "异常":
    15. except_id.append(i)
    16. need = []
    17. for i in final:
    18. need.extend(i)