def data_arrange(data):
import json
import requests
data_set = []
for ps in data:
try:
data_set.append(ps.get())
except json.decoder.JSONDecodeError:
pass
except requests.exceptions.SSLError:
pass
return data_set
def data_tran(text):
loca = tuple(text.keys())[0]
data = text[loca]
res = []
contain = []
for it in data:
keys = tuple(it.keys())[0]
te = it[keys]
for t in te:
res.append(loca)
res.append(keys)
res.extend(t)
middle = res.copy()
contain.append(middle)
res.clear()
print(contain)
return contain
def data_output(data):
res = []
data_sets = data_arrange(data)
for it in data_sets:
out = data_tran(it)
res.extend(out)
for g in res:
if len(g) > 4:
g.remove("NaN")
return res
def output_data(all_data, output_file_name):
'''该函数将最终数据输出为CSV文件'''
import pandas as pd
name = ["location", "keyword", "date", "text"]
table = pd.DataFrame(columns=name, data=all_data)
table.to_csv("/Users/mengxinwei/Downloads/" + output_file_name + ".csv")
return table
###############################
data_final = data_output(data_1)
for g in data_final:
if len(g) > 4:
g.remove("NaN")
s = output_data(final_2000, "Weibo_RawText_2000")
def get_sentiment(text):
ssl._create_default_https_context = ssl._create_unverified_context
urllib3.disable_warnings()
###第一步:获取access_token
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=pd152AVsnYc6Nbfk5Yuh9XTh&client_secret=ZTn7ASCKAGgPFEFmMxszEoQmAlVb2LRM'
request = urllib.request.Request(host)
request.add_header('Content-Type', 'application/json; charset=UTF-8')
response = urllib.request.urlopen(request)
content = response.read()
# if (content):
# print(type(content)) # <class 'bytes'>
content_str = str(content, encoding="utf-8")
content_dir = eval(content_str)
access_token = content_dir['access_token']
### 第二步:获取
# print(sys.getdefaultencoding())
http=urllib3.PoolManager()
url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?access_token="+access_token
# print(url)
data ={"text": text}
encode_data = json.dumps(data).encode('GBK')
#JSON:在发起请求时,可以通过定义body 参数并定义headers的Content-Type参数来发送一个已经过编译的JSON数据:
request = http.request('POST',
url,
body=encode_data,
headers={'Content-Type': 'application/json'}
)
result = request.data.decode("GBK")[-4]
return result