数据库设计如下


代码如下
# -*- coding: utf-8 -*-import requests # 导入requests包import jsonimport datetimeimport timeimport pymysql# 打开数据库链接db = pymysql.connect( host="localhost", port=3306, user="root", password="1234", database="database1")# 存放微博用户数据userdata = []# 从数据库查询所有微博的账户信息def get_account_mes(): # 使用cursor创建一个游标对象 cursor = db.cursor() # 准备sql语句 select_sql = """select * from users""" cursor.execute(select_sql) result = cursor.fetchall() for row in result: if row[1] == "微博": userdata.append(row) cursor.close()get_account_mes()# 提交数据至数据库def Submit_data_to_database(userdata, fbnr_data) -> str: try: # 提交该账户至账户数据库 # 使用 cursor() 方法创建一个游标对象 cursor cursor = db.cursor() tmp = [userdata] # 准备插入用户账户数据的SQL user_sql = """INSERT INTO `weibo_account`( `Subordinate_units`, `name`, `phonenumber`, `platform`, `Account_name`, `weibohao`, `Number_of_vermicelli`, `Attention_number`, `Total_Weibo_number`, `authentication`, `brief_introduction`, `Statistical_time`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""" # 使用 execute() 方法执行 SQL 查询 cursor.executemany(user_sql, tuple(tmp)) # 准备插入用户发布内容的sql fbnr_data_sql = """INSERT INTO `weibo_works`( `Subordinate_units`, `name`, `phonenumber`, `platform`, `Account_name`, `weibohao`, `Weibo_content`, `Weibo_number`, `Forward_number`, `Like_number`, `Number_of_comments`, `original`, `Release_time`, `Statistical_time`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""" cursor.executemany(fbnr_data_sql, fbnr_data) # 关闭游标 cursor.close() db.commit() return "提交成功" # 提交账户发布内容数据至内容数据库 except Exception as err: return str(err)# 转换时间函数def trans_format(time_string, from_format, to_format='%Y.%m.%d %H:%M:%S'): """ @note 时间格式转化 :param time_string: :param from_format: :param to_format: :return: """ time_struct = time.strptime(time_string, from_format) times = time.strftime(to_format, time_struct) return times# 构造请求头headers = { "cookie": "SUB=_2AkMVdLNgf8NxqwFRmP0cxW7jaY5-wwvEieKjKEK7JRMxHRl-yT_nqlIAtRB6PvSdgcJJunaym373dN91W1MIJArq7AzH; " "SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WhXawrTDmUF1NkVijRui0Nq; XSRF-TOKEN=etaNQZcst4rs9As_AFABQCIw; _" "s_tentry=weibo.com; Apache=5741302240361.778.1646804064448; SINAGLOBAL=5741302240361.778.1646804064448; " "ULV=1646804064507:1:1:1:5741302240361.778.1646804064448:; WBPSESS=lGn6cRy34B6AsqM-wzgd2I3xJQoNdKT6SN7Fn" "X0cs85ue7ykV44_MHoXYZ8pflevb5zjicdSLORoi8NsE9e4TLIieWK6K88wqnQQWv1OCjPWh7PkbuL85XSFk_elSupLSXOoaSpYmka7" "56SI5n5pJLbwdq81wNl0EaauOZM7zfE=", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/92.0.4515.107 Safari/537.36 HBPC/12.0.0.300",}for usermes in userdata: try: if usermes is None: continue if usermes[5] != "肖战工作室": continue time.sleep(1) # 准备一个数组存储当前用户的数据 thisWbUserData = [] # 添加序号 # thisWbUserData.append(len(wbUserData)+1) # 拼接url url = 'https://weibo.com/ajax/profile/info?custom=' + usermes[6] strHtml = requests.get(url=url, headers=headers, stream=True) strHtml.encoding = 'utf-8' mes = json.loads(strHtml.text) # 添加账号名称 print("正在查询" + mes["data"]["user"]["screen_name"]) # 添加所属单位 thisWbUserData.append(usermes[2]) # 添加姓名 thisWbUserData.append(usermes[3]) # 添加联系电话 thisWbUserData.append(usermes[4]) # 添加开设平台 thisWbUserData.append(usermes[1]) # 添加账号名称 thisWbUserData.append(mes["data"]["user"]["screen_name"]) # 添加微博号 thisWbUserData.append(mes["data"]["user"]["idstr"]) # 添加粉丝数 thisWbUserData.append(str(mes["data"]["user"]["followers_count"])) # 添加关注数 thisWbUserData.append(str(mes["data"]["user"]["friends_count"])) # 添加微博数 thisWbUserData.append(str(mes["data"]["user"]["statuses_count"])) # 添加认证 if "verified_reason" in mes["data"]["user"]: thisWbUserData.append(mes["data"]["user"]["verified_reason"]) else: thisWbUserData.append("暂无认证") # 添加简介 if mes["data"]["user"]["description"] == "": thisWbUserData.append("暂无简介") else: thisWbUserData.append(mes["data"]["user"]["description"]) # 添加统计时间 thisWbUserData.append(str(datetime.date.today())) # 统计该用户的所有微博数据 # 构建数组存放微博文章数据 # 存储文章 wbUserAtricData = [] # 先计算该用户的微博共有几页 pagenum = 0 if (int(mes["data"]["user"]["statuses_count"]) - 9) % 10 == 0: pagenum = int(int(int(mes["data"]["user"]["statuses_count"]) - 9) / 10) + 1 elif int(mes["data"]["user"]["statuses_count"]) <= 9: pagenum = 1 else: pagenum = int(int(int(mes["data"]["user"]["statuses_count"]) - 9) / 10) + 2 if pagenum >= 100: pagenum = 99 num = 0 isbreak = False for i in range(1, pagenum + 1): time.sleep(3) # 拼接url pageurl = "https://weibo.com/ajax/statuses/mymblog?uid=" + usermes[6] + "&page=" + str(i) + "&feature=0" pageMes = requests.get(url=pageurl, headers=headers, stream=True) time.sleep(2) pageJson = json.loads(pageMes.text) # 如果长度为0说明服务器返回数据有误 if len(pageJson["data"]["list"]) == 0: isbreak = True break for i in pageJson["data"]["list"]: # 如果时间小于限定时间,则直接break if datetime.datetime.strptime( trans_format(i["created_at"], '%a %b %d %H:%M:%S +0800 %Y', '%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S') < datetime.datetime.strptime('2022-01-01 00:00:00', '%Y-%m-%d %H:%M:%S'): isbreak = True break # 准备一个数组存放这篇文章的数据 thisArticData = [] print(mes["data"]["user"]["screen_name"] + "第" + str(num) + "篇微博") num = num + 1 # 添加所属单位 thisArticData.append(usermes[2]) # 添加姓名 thisArticData.append(usermes[3]) # 添加联系电话 thisArticData.append(usermes[4]) # 添加开设平台 thisArticData.append(usermes[1]) # 添加账号名 thisArticData.append(mes["data"]["user"]["screen_name"]) # 添加微博号 thisArticData.append(mes["data"]["user"]["idstr"]) # 添加微博内容 if len(i["text_raw"]) > 100: thisArticData.append((i["text_raw"][:100]).replace("\u200b", "").replace("\n", "") + "...") else: thisArticData.append((i["text_raw"]).replace("\u200b", "").replace("\n", "")) # 添加微博编号 thisArticData.append(i["idstr"]) # 添加转发数 thisArticData.append(str(i['reposts_count'])) # 添加点赞数 thisArticData.append(str(i['attitudes_count'])) # 添加评论数 thisArticData.append(str(i['comments_count'])) if 'page_info' in i or 'retweeted_status' in i: thisArticData.append("转发") else: thisArticData.append("原创") # 添加发布时间 tmp_time = trans_format(i["created_at"], '%a %b %d %H:%M:%S +0800 %Y', '%Y-%m-%d %H:%M:%S') # print(tmp_time) thisArticData.append(tmp_time) # 添加统计时间 thisArticData.append(str(datetime.date.today())) # 合并至总数组 wbUserAtricData.append(tuple(thisArticData)) # 如果已经查询到日期早于限定的文章,则直接退出 if isbreak: print("后续微博日期不符合要求,即将提交数据并查询下一位用户") break s = Submit_data_to_database(tuple(thisWbUserData), tuple(wbUserAtricData)) print(s) except Exception as e: print(e)# 关闭数据库连接db.close()