import requestsfrom bs4 import BeautifulSoupimport timeurl = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Connection': 'keep-alive'}cookie = dict(cookies_are="WEE_SID=7ppLQ6_O6fNNT1kUEhhdjNme5Dd6XpTxHdD5AqOHXnJ_R3VkjJK2!-1371704578!792269866!1543155986382; IS_LOGIN=true; wee_username=d2VpbWVuZ3hpbjIwMTI%3D; wee_password=YWR3YWhhZHM2MTM2ODc5; JSESSIONID=7ppLQ6_O6fNNT1kUEhhdjNme5Dd6XpTxHdD5AqOHXnJ_R3VkjJK2!-1371704578!792269866")info = {"searchCondition.searchExp": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))", "searchCondition.executableSearchExp": "VDB:((APD='20090101' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))", "resultPagination.start": 0, "resultPagination.limit": 12, "searchCondition.literatureSF": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))"}# 代理服务器proxyHost = "http-dyn.abuyun.com"proxyPort = "9020"# 代理隧道验证信息proxyUser = "HX7YIG4D7IR9907D"proxyPass = "D9A0153CCBC8081F"proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { "host": proxyHost, "port": proxyPort, "user": proxyUser, "pass": proxyPass,}proxies = { "http": proxyMeta, "https": proxyMeta,}s = requests.session()html = s.post(url, headers=head, proxies=proxies, data=info, cookies=cookie).content
def patent_requests(date): import requests res = [] result = [] url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml" head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Connection': 'keep-alive'} cookie = dict(cookies_are="WEE_SID=O1xKu2UpRWGCOPDH_QJnZv-Qcx-EBjY-Z64OGBBgv3x1s4UoRg5Y!792269866!-627030802!1543147054377; IS_LOGIN=true; JSESSIONID=O1xKu2UpRWGCOPDH_QJnZv-Qcx-EBjY-Z64OGBBgv3x1s4UoRg5Y!792269866!-627030802") pages = 1 while True: info = {"searchCondition.searchExp": "(申请日=%s) AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date, "searchCondition.executableSearchExp": "VDB:((APD=%s AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))" % date, "resultPagination.start": (pages - 1) * 12, "resultPagination.limit": 12, "searchCondition.literatureSF": "(申请日=%s) AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date} # 代理服务器 proxyHost = "http-dyn.abuyun.com" proxyPort = "9020" # 代理隧道验证信息 proxyUser = "HX7YIG4D7IR9907D" proxyPass = "D9A0153CCBC8081F" proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { "host": proxyHost, "port": proxyPort, "user": proxyUser, "pass": proxyPass, } proxies = { "http": proxyMeta, "https": proxyMeta, } html = requests.post(url, proxies=proxies, data=info, cookies=cookie, headers=head).json() page = html["searchResultDTO"]["pagination"]["totalCount"] print("共%d条记录" % page) if html["searchResultDTO"]["searchResultRecord"] != []: for i in html["searchResultDTO"]["searchResultRecord"]: lawStatus = i["lawStatus"] patentType = i["patentType"] name = i["fieldMap"]["TIVIEW"] fnum = i["fieldMap"]["FNUM"] cpnum = i["fieldMap"]["CPNUM"] pnum = i["fieldMap"]["PNUM"] VID = i["fieldMap"]["VID"] APSN = i["fieldMap"]["APSN"] APD = i["fieldMap"]["APD"] PN_BAK = i["fieldMap"]["PN_BAK"] PD = i["fieldMap"]["PD"] # 公开日 IC = i["fieldMap"]["IC"] # IPC分类号 CPC = i["fieldMap"]["CPC"] # CPC分类号 PAVIEW = i["fieldMap"]["PAVIEW"] # 申请人 INVIEW = i["fieldMap"]["INVIEW"] # 发明人 AA = i["fieldMap"]["AA"] # 地址 AGT = i["fieldMap"]["AGT"] # 代理人 AGY = i["fieldMap"]["AGY"] # 代理机构 OWNER_STATUS = i["fieldMap"]["AGY"] # 代理机构 res.extend([lawStatus, patentType, name, fnum, cpnum, pnum, VID, APSN, APD, PN_BAK, PD, IC, CPC, PAVIEW, INVIEW, AA, AGT, AGY, OWNER_STATUS]) middle = res.copy() result.append(middle) res.clear() pages += 1 else: print("检索完毕!") return resulttest = patent_requests("20090101")"IS_LOGIN=true; WEE_SID=oi5Kv5x04H_asaak8hnaCfGhSkgSyurNL47sRSKG8RCihySrFPjj!792269866!-627030802!1543147330676; JSESSIONID=oi5Kv5x04H_asaak8hnaCfGhSkgSyurNL47sRSKG8RCihySrFPjj!792269866!-627030802"
from selenium import webdriverfrom selenium.webdriver.firefox.options import Optionsfrom selenium.webdriver.support.wait import WebDriverWaitfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECimport selenium.webdriver.support.ui as uifrom bs4 import BeautifulSoupimport timeimport jsonimport randomimport requestsdef get_cookies(): options = Options() # options.add_argument('-headless') driver = webdriver.Firefox(executable_path=r'C:\Users\viemax\Desktop\geckodriver.exe', options=options) url = "http://www.pss-system.gov.cn/sipopublicsearch/portal/uiIndex.shtml" driver.get(url) time.sleep(1) driver.find_element_by_id("j_username").send_keys("weimengxin2012") driver.find_element_by_id("j_password_show").send_keys("adwahads6136879") # html_login = driver.page_source valid = input("请手动输入验证码:") driver.find_element_by_id("j_validation_code").send_keys(valid) driver.find_element_by_link_text("登录").click() time.sleep(1) cookie = driver.get_cookies() cookie_1 = cookie[0]["name"] + "=" + cookie[0]["value"] cookie_2 = cookie[1]["name"] + "=" + cookie[1]["value"] cookie_3 = cookie[2]["name"] + "=" + cookie[2]["value"] cookie_all = cookie_3 + "; " + cookie_1 + "; " + cookie_2 cookies_head = dict() cookies_head["cookies_are"] = cookie_all driver.quit() return cookies_headcookies_headless = get_cookies()def requests_login_pss_system(date): res = [] result = [] pages = 1 url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml" head = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36', 'Connection': 'keep-alive', 'Origin': 'http://www.pss-system.gov.cn'} while True: info = {"searchCondition.searchExp": "(申请日='%s') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date, "searchCondition.executableSearchExp": "VDB:((APD='%s' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))" % date, "resultPagination.start": (pages - 1) * 12, "resultPagination.limit": 12, "searchCondition.literatureSF": "(申请日='%s') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date} s = requests.session() s.keep_alive = False html = s.post(url, headers=head, data=info, cookies=cookies_headless).json() time.sleep(3 + random.random() * 2.5) page = html["searchResultDTO"]["pagination"]["totalCount"] if page == -1: print("page=-1,第%d页" % pages) pages += 1 s.close() else: print("共%d条记录" % page) if html["searchResultDTO"]["searchResultRecord"] != []: for i in html["searchResultDTO"]["searchResultRecord"]: lawStatus = i["lawStatus"] patentType = i["patentType"] name = i["fieldMap"]["TIVIEW"] fnum = i["fieldMap"]["FNUM"] cpnum = i["fieldMap"]["CPNUM"] pnum = i["fieldMap"]["PNUM"] VID = i["fieldMap"]["VID"] APSN = i["fieldMap"]["APSN"] APD = i["fieldMap"]["APD"] PN_BAK = i["fieldMap"]["PN_BAK"] PD = i["fieldMap"]["PD"] # 公开日 IC = i["fieldMap"]["IC"] # IPC分类号 CPC = i["fieldMap"]["CPC"] # CPC分类号 PAVIEW = i["fieldMap"]["PAVIEW"] # 申请人 INVIEW = i["fieldMap"]["INVIEW"] # 发明人 AA = i["fieldMap"]["AA"] # 地址 AGT = i["fieldMap"]["AGT"] # 代理人 AGY = i["fieldMap"]["AGY"] # 代理机构 OWNER_STATUS = i["fieldMap"]["AGY"] # 代理机构 res.extend([lawStatus, patentType, name, fnum, cpnum, pnum, VID, APSN, APD, PN_BAK, PD, IC, CPC, PAVIEW, INVIEW, AA, AGT, AGY, OWNER_STATUS]) print(res) middle = res.copy() result.append(middle) res.clear() pages += 1 time.sleep(2) s.close() else: print("检索完毕!") return resulttest = requests_login_pss_system("20160101")import requestsimport randomdate = '20090101'res = []result = []pages = 4url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"head = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0', 'Connection': 'keep-alive'}# while True:cookies_js = dict(cookies_are="IS_LOGIN=true; WEE_SID=iNxQozE_ihlq-6IwM2XQb6F3SKSTY30wNrb7e8YS9I7QMfu6QRax!1250085850!1820912863!1543246131519; avoid_declare=declare_pass; JSESSIONID=iNxQozE_ihlq-6IwM2XQb6F3SKSTY30wNrb7e8YS9I7QMfu6QRax!1250085850!1820912863")info = { "searchCondition.searchExp": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" , "searchCondition.executableSearchExp": "VDB:((APD='20090101' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))", "searchCondition.dbId": "VDB", "searchCondition.searchType": "Sino_foreign", "searchCondition.sortFields": "-APD,+PD", "searchCondition.extendInfo['MODE']": "MODE_TABLE", "resultPagination.start": (pages - 1) * 12, "resultPagination.limit": 12, "resultPagination.sumLimit": 10, "searchCondition.literatureSF": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))"}# s = requests.session()# s.keep_alive = Falsehtml = requests.post(url, headers=head, data=info, cookies=cookies_headless).json()