关键词:python 爬虫
get示例
log_in_url = "http://www.uimaker.com/member/ubi.php?action=qd&_=1526202830017"# headersheaders = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36","Referer": "http://www.uimaker.com/member/index.php"}# 参数使用params参数cookies = {}print(os.path.join(os.path.abspath("."), "cookies"))with open(os.path.join(os.path.abspath("."), "cookies"), 'r', encoding='utf-8') as f:for item in f.read().split(';'):name, value = item.split('=', 1)cookies[name] = value# 请求时保留cookiessession = requests.session()response = session.get(log_in_url, headers=headers, cookies=cookies)print(response.json())with open('./log.txt', 'a+', encoding='utf-8') as f:f.write(str(datetime.datetime.now()) + "\t")f.write(str(response.status_code) + os.linesep)
post示例
jq22签到示例,其实cookies直接放入到header里面即可
import requestsfrom bs4 import BeautifulSoup as bsimport osimport datetimesession = requests.session()log_in_url = "http://www.jq22.com/signIn.aspx"# headersheaders = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36","Referer":"http://www.jq22.com/signIn.aspx"}# 参数data = {"Button1": "签 到","__VIEWSTATE": "OQz0gcxiE2uemW6coqrtvy4sKAViSr6o8Xxn4QjA2WoOiQFGhlAYIOEl9am94jQqkvf4+19RlxfIFvNagglpiyBGz4smPulqXEeQ7q4kU0VsdhfifzVlfGrj+DTYHKTR","__VIEWSTATEGENERATOR": "ECDA716A", # 不变"__EVENTVALIDATION": "/Up6CRbE1Igwin/Bz2lx2qJo9+keEisPxD70frbfphQPhghu+ViAae9BGgf/NgO1peWGTpB1g0dHGJUyuCP9R53m201mCW3WyGvL6fjdFH58Ds7ZxC7HP3GlDan4Jl1C"}cookies = {}print(os.path.join(os.path.abspath("."),"cookies"))with open(os.path.join(os.path.abspath("."),"cookies"), 'r', encoding='utf-8') as f:for item in f.read().split(';'):name, value = item.split('=', 1)cookies[name] = valuecookieresponse = session.post(log_in_url, headers=headers, data=data, cookies=cookies)print(response.status_code)with open('./log.txt','a+',encoding='utf-8') as f:f.write(str(datetime.datetime.now()) + "\t")f.write(str(response.status_code) + os.linesep)
json请求示例
import requestsimport jsonheaders = {"Content-Type": "application/json; charset=UTF-8","Referer": "http://jinbao.pinduoduo.com/index?page=5","User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",}url = "http://jinbao.pinduoduo.com/network/api/common/goodsList"pyload = {"keyword": "", "sortType": 0, "withCoupon": 0, "categoryId": 16, "pageNumber": 1, "pageSize": 60}response = requests.post(url, data=json.dumps(pyload), headers=headers).textprint(response)
获取代理
import requestsimport re"""可能是歧梦谷系统不完善,每天访问推广链接添加的梦感不超过10,这里面以不同的ip访问一百次"""proxies_list = []# 获取proxiesurl = "http://www.xicidaili.com/wn"headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}response = requests.get(url, headers=headers)re_list = re.findall(r'<td>\d+\.\d+\.\d+\.\d+</td>[\s\S]+?<td>\d+</td>', response.text, re.S)for x in re_list:ip = re.search('<td>([\s\S]+)</td>[\s\S]+?<td>', x).group(1)port = re.search('<td>[\s\S]+</td>[\s\S]+?<td>([\s\S]+)</td>', x).group(1)proxies_list.append("http://" + ip + ":" + port)url = "http://www.68m.com/?fromuid=566043"headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}proxies = {}i = 0for x in proxies_list:proxies["https"] = xresponse = requests.get(url, proxies=proxies, headers=headers)print(response)i += 1if i > 10:break
