import requests
#这例有个坑,这个登录地址不是直接看到的登录地址,这是人人网的反爬机制
url = "http://www.renren.com/PLogin.do"
# url = "http://www.renren.com/SysHome.do"
# 在此输入账号密码,注意“email”“password”是网页中查到的key值
data = {"email": "18904041689", "password": "123686"}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
session = requests.Session()
session.post(url, data = data, headers = headers)
response = session.get("http://www.renren.com/personal/413066191")
with open('renrenzhuye.html', 'w', encoding="utf-8") as fp:
fp.write(response.text)
# coding=utf-8
import requests
session = requests.session()
# 登录的表单url
post_url = "http://www.renren.com/PLogin.do"
post_data = {"email": "your_email", "password": "your_password"}
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
}
# 使用session发送post请求,cookie保存在其中
session.post(post_url, data=post_data, headers=headers)
# 在使用session进行请求登陆之后才能访问的地址
# 这是个人首页url
r = session.get("http://www.renren.com/327550088/profile", headers=headers)
# 保存页面到本地
with open("renren1.html", "w", encoding="utf-8") as f:
f.write(r.content.decode('utf-8'))