1. def data_arrange(data):
    2. import json
    3. import requests
    4. data_set = []
    5. for ps in data:
    6. try:
    7. data_set.append(ps.get())
    8. except json.decoder.JSONDecodeError:
    9. pass
    10. except requests.exceptions.SSLError:
    11. pass
    12. return data_set
    13. def data_tran(text):
    14. loca = tuple(text.keys())[0]
    15. data = text[loca]
    16. res = []
    17. contain = []
    18. for it in data:
    19. keys = tuple(it.keys())[0]
    20. te = it[keys]
    21. for t in te:
    22. res.append(loca)
    23. res.append(keys)
    24. res.extend(t)
    25. middle = res.copy()
    26. contain.append(middle)
    27. res.clear()
    28. print(contain)
    29. return contain
    30. def data_output(data):
    31. res = []
    32. data_sets = data_arrange(data)
    33. for it in data_sets:
    34. out = data_tran(it)
    35. res.extend(out)
    36. for g in res:
    37. if len(g) > 4:
    38. g.remove("NaN")
    39. return res
    40. def output_data(all_data, output_file_name):
    41. '''该函数将最终数据输出为CSV文件'''
    42. import pandas as pd
    43. name = ["location", "keyword", "date", "text"]
    44. table = pd.DataFrame(columns=name, data=all_data)
    45. table.to_csv("/Users/mengxinwei/Downloads/" + output_file_name + ".csv")
    46. return table
    47. ###############################
    48. data_final = data_output(data_1)
    49. for g in data_final:
    50. if len(g) > 4:
    51. g.remove("NaN")
    52. s = output_data(final_2000, "Weibo_RawText_2000")
    53. def get_sentiment(text):
    54. ssl._create_default_https_context = ssl._create_unverified_context
    55. urllib3.disable_warnings()
    56. ###第一步:获取access_token
    57. host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=pd152AVsnYc6Nbfk5Yuh9XTh&client_secret=ZTn7ASCKAGgPFEFmMxszEoQmAlVb2LRM'
    58. request = urllib.request.Request(host)
    59. request.add_header('Content-Type', 'application/json; charset=UTF-8')
    60. response = urllib.request.urlopen(request)
    61. content = response.read()
    62. # if (content):
    63. # print(type(content)) # <class 'bytes'>
    64. content_str = str(content, encoding="utf-8")
    65. content_dir = eval(content_str)
    66. access_token = content_dir['access_token']
    67. ### 第二步:获取
    68. # print(sys.getdefaultencoding())
    69. http=urllib3.PoolManager()
    70. url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?access_token="+access_token
    71. # print(url)
    72. data ={"text": text}
    73. encode_data = json.dumps(data).encode('GBK')
    74. #JSON:在发起请求时,可以通过定义body 参数并定义headers的Content-Type参数来发送一个已经过编译的JSON数据:
    75. request = http.request('POST',
    76. url,
    77. body=encode_data,
    78. headers={'Content-Type': 'application/json'}
    79. )
    80. result = request.data.decode("GBK")[-4]
    81. return result