对一句话进行分析,给出其是正向还是负面。
`from sklearn.naive_bayes import MultinomialNB
from snownlp import SnowNLP
import pandas as pd
import jieba
from sklearn.feature_extraction.text import TfidfVectorizer
def a1():
a = “环境优美” # 一条正向句子
_b = “垃圾遍地” # 一条负面句子
_m = SnowNLP(a)
n = SnowNLP(b)
print(“正面:”,m.sentiments)
print(“负面”,n.sentiments)
def a2():
# 读取文件
_data = pd.read_csv(“酒店评论.csv”)
remarks = data[“review”].values.tolist() # 评论
labels = data[“label”].values.tolist() # 标签
content = []
for line in remarks:
line = str(line)
line = line.strip(“\r\t\n”)
temp = jieba.lcut(line)
content.append(“ “.join(temp))
# print(content)
_tfidf = TfidfVectorizer()<br /> tfidf.fit(content)<br /> train = tfidf.transform(content)
_# 朴素贝叶斯<br /> _model = MultinomialNB()<br /> _# 训练模型<br /> _model.fit(train,labels)
_# 使用模型进行测试<br /> _test = "垃圾"<br /> test = jieba.lcut(test)<br /> test = tfidf.transform(test)<br /> _# 识别情感<br /> _rs = model.predict(test)<br /> em = "正面"<br /> if rs == 0:<br /> em = "负面"<br /> print("测试语句:",test,"的情感预测为:",em)
_# 38 - 46行代码只能针对test是单个词进行分析<br /> # 现在对其优化,使其能够真正对一句话进行分析<br /> _testNew = "真他妈垃圾"<br /> testNew = jieba.lcut(testNew)<br /> print("testNew = ",testNew)<br /> testNew = tfidf.transform(testNew)<br /> rs = model.predict(testNew)<br /> print("rs = ",rs)<br /> emnum = sum(rs)<br /> print("emnum = ",emnum)<br /> em = "正面"<br /> if emnum <= len(rs)/2:<br /> em = "负面"<br /> print("测试语句:", testNew, "的情感预测为:", em)`<br />
训练数据:酒店评论.csv