1. import re
    2. import time
    3. import base64
    4. import rsa
    5. import json
    6. import requests
    7. from binascii import b2a_hex
    8. class WeiboLogin(object):
    9. def __init__(self, username, pwd):
    10. self.username = username
    11. self.pwd = pwd
    12. self.url = "https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)"
    13. self.headers = {
    14. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    15. 'Accept-Encoding': 'gzip, deflate, br',
    16. 'Accept-Language': 'zh-CN,zh;q=0.9',
    17. 'Cache-Control': 'no-cache',
    18. 'Connection': 'keep-alive',
    19. 'Content-Length': '651',
    20. 'Content-Type': 'application/x-www-form-urlencoded',
    21. 'Host': 'login.sina.com.cn',
    22. 'Origin': 'https://weibo.com',
    23. 'Pragma': 'no-cache',
    24. 'Referer': 'https://weibo.com/',
    25. 'Sec-Fetch-Dest': 'iframe',
    26. 'Sec-Fetch-Mode': 'navigate',
    27. 'Sec-Fetch-Site': 'cross-site',
    28. 'Sec-Fetch-User': '?1',
    29. 'Upgrade-Insecure-Requests': '1',
    30. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
    31. }
    32. self.session = requests.Session()
    33. def get_prelogin(self, su):
    34. """获取登入前请求所得的响应,该响应中包含登入时所需要的参数"""
    35. url = 'https://login.sina.com.cn/sso/prelogin.php'
    36. # 获取当前毫秒时间戳
    37. ts = str(int(time.time()*1000))
    38. params = {
    39. 'entry': 'weibo',
    40. 'callback': 'sinaSSOController.preloginCallBack',
    41. 'su': su,
    42. 'rsakt': 'mod',
    43. 'client': 'ssologin.js(v1.4.19)',
    44. '_': ts,
    45. }
    46. res = requests.get(url, params=params)
    47. # 返回的数据是普通文本类型,而非json类型,通过正则取出,并将其由str类型转为dict类型
    48. pattern = r'{.*?}'
    49. data = re.search(pattern, res.text).group()
    50. data_dict = json.loads(data)
    51. servertime = data_dict.get("servertime")
    52. nonce = data_dict.get("nonce")
    53. rsakv = data_dict.get("rsakv")
    54. pubkey = data_dict.get("pubkey")
    55. exectime = data_dict.get("exectime")
    56. # prelt是指预登入时所用时间,单位是毫秒
    57. prelt = int(time.time() * 1000) - int(ts) - exectime
    58. pcid = data_dict.get('pcid')
    59. return servertime, nonce, rsakv, pubkey, prelt, pcid
    60. def get_su(self, username):
    61. """生成通过base64编码后的用户名"""
    62. su = base64.b64encode(username.encode()).decode()
    63. return su
    64. def get_sp(self, servertime, nonce, pubkey):
    65. """生成通过rsa加密后的密码"""
    66. # 按照规则生成公钥
    67. rsa_pubkey = rsa.PublicKey(int(pubkey, 16), int('10001', 16))
    68. # rsa加密的是在pwd前拼接了servertime和nonce两个参数的字符,而不仅是pwd
    69. message = str(servertime) + '\t' + nonce + '\n' + self.pwd
    70. # 加密,得到的是字节类型数据
    71. res = rsa.encrypt(message.encode(), rsa_pubkey)
    72. # 返回其16进制的表现形式
    73. sp = b2a_hex(res).decode()
    74. return sp
    75. def get_post_data(self):
    76. """构造登入时需要提交的表单参数"""
    77. su = self.get_su(self.username)
    78. servertime, nonce, rsakv, pubkey, prelt, pcid = self.get_prelogin(su)
    79. sp = self.get_sp(servertime, nonce, pubkey)
    80. form_data = {
    81. 'entry': 'weibo',
    82. 'gateway': '1',
    83. 'from': '',
    84. 'savestate': '0',
    85. 'qrcode_flag': 'false',
    86. 'useticket': '1',
    87. 'pagerefer': '',
    88. 'vsnf': '1',
    89. 'su': su,
    90. 'service': 'miniblog',
    91. 'servertime': servertime,
    92. 'nonce': nonce,
    93. 'pwencode': 'rsa2',
    94. 'rsakv': rsakv,
    95. 'sp': sp,
    96. 'sr': '1536*864',
    97. 'encoding': 'UTF-8',
    98. 'prelt': prelt,
    99. 'url': 'https://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
    100. 'returntype': 'META',
    101. }
    102. return form_data
    103. def get_redirect_url(self, res):
    104. """使用re获取去响应中要跳转的url, url为空表示登入出现问题"""
    105. pattern = r"location.replace\([\"\'](.*?)[\"\']\)"
    106. url = re.search(pattern, res.text)
    107. if url:
    108. return url.group(1)
    109. return url
    110. def get_errno(self, url1, res):
    111. """获取去登入失败的状态码,以判断登入失败的原因"""
    112. errno_dict = {
    113. 4038: "登录次数过于频繁",
    114. 4049: "请填写验证码",
    115. 4010: "帐号尚未激活",
    116. 4090: "此帐号未激活,请登录原帐号",
    117. 5024: "请填写正确的微盾动态码",
    118. 5025: "动态码有误,请重新输入",
    119. 5: "尚未注册微博",
    120. 101: "用户名或密码错误",
    121. 4098: "您的帐号还没有设置密码,为方便登录请",
    122. 9999: "当前网络超时,请稍后再试",
    123. 2071: "您已开启登录保护,请扫码登录"
    124. }
    125. pattern = r"{.*?}"
    126. data = re.search(pattern, res.text)
    127. if data:
    128. data = data.group()
    129. errno = json.loads(data).get('errno')
    130. errno_msg = errno_dict.get(int(errno))
    131. print(errno, errno_msg)
    132. return errno
    133. else:
    134. print('重定向时请求头headers1中的cookie已过期, 请在浏览器中访问下面的url,复制请求头中的cookie添加到headers1')
    135. print(url1)
    136. def get_login_res(self, form_data):
    137. """一共跳转3次到最后的登入后的微博首页"""
    138. res = self.session.post(self.url, headers=self.headers, data=form_data)
    139. url1 = self.get_redirect_url(res)
    140. res1 = self.session.get(url1)
    141. url2 = self.get_redirect_url(res1)
    142. if not url2:
    143. # url2为空,则表示登入时出现问题
    144. print('跳转失败')
    145. headers1 = {
    146. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    147. 'Accept-Encoding': 'gzip, deflate, br',
    148. 'Accept-Language': 'zh-CN,zh;q=0.9',
    149. 'Cache-Control': 'no-cache',
    150. 'Connection': 'keep-alive',
    151. 'Host': 'weibo.com',
    152. 'Pragma': 'no-cache',
    153. 'Sec-Fetch-Dest': 'document',
    154. 'Sec-Fetch-Mode': 'navigate',
    155. 'Sec-Fetch-Site': 'none',
    156. 'Sec-Fetch-User': '?1',
    157. 'Upgrade-Insecure-Requests': '1',
    158. 'Cookie': '',
    159. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
    160. }
    161. res1 = self.session.get(url1, headers=headers1)
    162. errno = self.get_errno(url1, res1)
    163. else:
    164. # url2不为空则继续跳转
    165. print('跳转成功')
    166. res2 = self.session.get(url2)
    167. url3 = 'https://weibo.com'
    168. res3 = self.session.get(url3)
    169. if '我的首页 微博-随时随地发现新鲜事' in res3.text:
    170. print('登入成功')
    171. else:
    172. print('登入失败')
    173. with open('weibo.html', 'w', encoding='utf-8') as f:
    174. f.write(res3.text)
    175. def run(self):
    176. # 构造postdata表单数据
    177. form_data = self.get_post_data()
    178. # 发送登入请求,登入成功后请求登入才能访问的页面并返回
    179. self.get_login_res(form_data)
    180. if __name__ == '__main__':
    181. username = ''
    182. pwd = ''
    183. weiboLogin = WeiboLogin(username, pwd)
    184. weiboLogin.run()