- import requests
- from bs4 import BeautifulSoup
- import time
- url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
- head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
-              'Connection': 'keep-alive'}
- cookie = dict(cookies_are="WEE_SID=7ppLQ6_O6fNNT1kUEhhdjNme5Dd6XpTxHdD5AqOHXnJ_R3VkjJK2!-1371704578!792269866!1543155986382; IS_LOGIN=true; wee_username=d2VpbWVuZ3hpbjIwMTI%3D; wee_password=YWR3YWhhZHM2MTM2ODc5; JSESSIONID=7ppLQ6_O6fNNT1kUEhhdjNme5Dd6XpTxHdD5AqOHXnJ_R3VkjJK2!-1371704578!792269866")
- info = {"searchCondition.searchExp": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))",
-         "searchCondition.executableSearchExp": "VDB:((APD='20090101' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))",
-         "resultPagination.start": 0,
-         "resultPagination.limit": 12,
-         "searchCondition.literatureSF": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))"}
- # 代理服务器
- proxyHost = "http-dyn.abuyun.com"
- proxyPort = "9020"
- # 代理隧道验证信息
- proxyUser = "HX7YIG4D7IR9907D"
- proxyPass = "D9A0153CCBC8081F"
- proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
-     "host": proxyHost,
-     "port": proxyPort,
-     "user": proxyUser,
-     "pass": proxyPass,
- }
- proxies = {
-     "http": proxyMeta,
-     "https": proxyMeta,
- }
- s = requests.session()
- html = s.post(url, headers=head, proxies=proxies, data=info, cookies=cookie).content
- def patent_requests(date):
-     import requests
-     res = []
-     result = []
-     url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
-     head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
-                  'Connection': 'keep-alive'}
-     cookie = dict(cookies_are="WEE_SID=O1xKu2UpRWGCOPDH_QJnZv-Qcx-EBjY-Z64OGBBgv3x1s4UoRg5Y!792269866!-627030802!1543147054377; IS_LOGIN=true; JSESSIONID=O1xKu2UpRWGCOPDH_QJnZv-Qcx-EBjY-Z64OGBBgv3x1s4UoRg5Y!792269866!-627030802")
-     pages = 1
-     while True:
-         info = {"searchCondition.searchExp": "(申请日=%s) AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date,
-                 "searchCondition.executableSearchExp": "VDB:((APD=%s AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))" % date,
-                 "resultPagination.start": (pages - 1) * 12,
-                 "resultPagination.limit": 12,
-                 "searchCondition.literatureSF": "(申请日=%s) AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date}
-         # 代理服务器
-         proxyHost = "http-dyn.abuyun.com"
-         proxyPort = "9020"
-         # 代理隧道验证信息
-         proxyUser = "HX7YIG4D7IR9907D"
-         proxyPass = "D9A0153CCBC8081F"
-         proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
-             "host": proxyHost,
-             "port": proxyPort,
-             "user": proxyUser,
-             "pass": proxyPass,
-         }
-         proxies = {
-             "http": proxyMeta,
-             "https": proxyMeta,
-         }
-         html = requests.post(url, proxies=proxies, data=info, cookies=cookie, headers=head).json()
-         page = html["searchResultDTO"]["pagination"]["totalCount"]
-         print("共%d条记录" % page)
-         if html["searchResultDTO"]["searchResultRecord"] != []:
-             for i in html["searchResultDTO"]["searchResultRecord"]:
-                 lawStatus = i["lawStatus"]
-                 patentType = i["patentType"]
-                 name = i["fieldMap"]["TIVIEW"]
-                 fnum = i["fieldMap"]["FNUM"]
-                 cpnum = i["fieldMap"]["CPNUM"]
-                 pnum = i["fieldMap"]["PNUM"]
-                 VID = i["fieldMap"]["VID"]
-                 APSN = i["fieldMap"]["APSN"]
-                 APD = i["fieldMap"]["APD"]
-                 PN_BAK = i["fieldMap"]["PN_BAK"]
-                 PD = i["fieldMap"]["PD"]    # 公开日
-                 IC = i["fieldMap"]["IC"]    # IPC分类号
-                 CPC = i["fieldMap"]["CPC"]  # CPC分类号
-                 PAVIEW = i["fieldMap"]["PAVIEW"]   # 申请人
-                 INVIEW = i["fieldMap"]["INVIEW"]   # 发明人
-                 AA = i["fieldMap"]["AA"]  # 地址
-                 AGT = i["fieldMap"]["AGT"]  # 代理人
-                 AGY = i["fieldMap"]["AGY"]  # 代理机构
-                 OWNER_STATUS = i["fieldMap"]["AGY"]  # 代理机构
-                 res.extend([lawStatus, patentType, name, fnum, cpnum, pnum, VID, APSN, APD, PN_BAK, PD, IC, CPC, PAVIEW, INVIEW, AA, AGT, AGY, OWNER_STATUS])
-                 middle = res.copy()
-                 result.append(middle)
-                 res.clear()
-                 pages += 1
-         else:
-             print("检索完毕!")
-             return result
- test = patent_requests("20090101")
- "IS_LOGIN=true; WEE_SID=oi5Kv5x04H_asaak8hnaCfGhSkgSyurNL47sRSKG8RCihySrFPjj!792269866!-627030802!1543147330676; JSESSIONID=oi5Kv5x04H_asaak8hnaCfGhSkgSyurNL47sRSKG8RCihySrFPjj!792269866!-627030802"
- from selenium import webdriver
- from selenium.webdriver.firefox.options import Options
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support import expected_conditions as EC
- import selenium.webdriver.support.ui as ui
- from bs4 import BeautifulSoup
- import time
- import json
- import random
- import requests
- def get_cookies():
-     options = Options()
-     # options.add_argument('-headless')
-     driver = webdriver.Firefox(executable_path=r'C:\Users\viemax\Desktop\geckodriver.exe', options=options)
-     url = "http://www.pss-system.gov.cn/sipopublicsearch/portal/uiIndex.shtml"
-     driver.get(url)
-     time.sleep(1)
-     driver.find_element_by_id("j_username").send_keys("weimengxin2012")
-     driver.find_element_by_id("j_password_show").send_keys("adwahads6136879")
-     # html_login = driver.page_source
-     valid = input("请手动输入验证码:")
-     driver.find_element_by_id("j_validation_code").send_keys(valid)
-     driver.find_element_by_link_text("登录").click()
-     time.sleep(1)
-     cookie = driver.get_cookies()
-     cookie_1 = cookie[0]["name"] + "=" + cookie[0]["value"]
-     cookie_2 = cookie[1]["name"] + "=" + cookie[1]["value"]
-     cookie_3 = cookie[2]["name"] + "=" + cookie[2]["value"]
-     cookie_all = cookie_3 + "; " + cookie_1 + "; " + cookie_2
-     cookies_head = dict()
-     cookies_head["cookies_are"] = cookie_all
-     driver.quit()
-     return cookies_head
- cookies_headless = get_cookies()
- def requests_login_pss_system(date):
-     res = []
-     result = []
-     pages = 1
-     url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
-     head = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36',
-             'Connection': 'keep-alive',
-             'Origin': 'http://www.pss-system.gov.cn'}
-     while True:
-         info = {"searchCondition.searchExp": "(申请日='%s') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date,
-                 "searchCondition.executableSearchExp": "VDB:((APD='%s' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))" % date,
-                 "resultPagination.start": (pages - 1) * 12,
-                 "resultPagination.limit": 12,
-                 "searchCondition.literatureSF": "(申请日='%s') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date}
-         s = requests.session()
-         s.keep_alive = False
-         html = s.post(url, headers=head, data=info, cookies=cookies_headless).json()
-         time.sleep(3 + random.random() * 2.5)
-         page = html["searchResultDTO"]["pagination"]["totalCount"]
-         if page == -1:
-             print("page=-1,第%d页" % pages)
-             pages += 1
-             s.close()
-         else:
-             print("共%d条记录" % page)
-             if html["searchResultDTO"]["searchResultRecord"] != []:
-                 for i in html["searchResultDTO"]["searchResultRecord"]:
-                     lawStatus = i["lawStatus"]
-                     patentType = i["patentType"]
-                     name = i["fieldMap"]["TIVIEW"]
-                     fnum = i["fieldMap"]["FNUM"]
-                     cpnum = i["fieldMap"]["CPNUM"]
-                     pnum = i["fieldMap"]["PNUM"]
-                     VID = i["fieldMap"]["VID"]
-                     APSN = i["fieldMap"]["APSN"]
-                     APD = i["fieldMap"]["APD"]
-                     PN_BAK = i["fieldMap"]["PN_BAK"]
-                     PD = i["fieldMap"]["PD"]  # 公开日
-                     IC = i["fieldMap"]["IC"]  # IPC分类号
-                     CPC = i["fieldMap"]["CPC"]  # CPC分类号
-                     PAVIEW = i["fieldMap"]["PAVIEW"]  # 申请人
-                     INVIEW = i["fieldMap"]["INVIEW"]  # 发明人
-                     AA = i["fieldMap"]["AA"]  # 地址
-                     AGT = i["fieldMap"]["AGT"]  # 代理人
-                     AGY = i["fieldMap"]["AGY"]  # 代理机构
-                     OWNER_STATUS = i["fieldMap"]["AGY"]  # 代理机构
-                     res.extend([lawStatus, patentType, name, fnum, cpnum, pnum, VID, APSN, APD, PN_BAK, PD, IC, CPC, PAVIEW, INVIEW, AA, AGT, AGY, OWNER_STATUS])
-                     print(res)
-                     middle = res.copy()
-                     result.append(middle)
-                     res.clear()
-                     pages += 1
-                     time.sleep(2)
-                     s.close()
-             else:
-                 print("检索完毕!")
-                 return result
- test = requests_login_pss_system("20160101")
- import requests
- import random
- date = '20090101'
- res = []
- result = []
- pages = 4
- url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
- head = {
-     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
-     'Connection': 'keep-alive'}
- # while True:
- cookies_js = dict(cookies_are="IS_LOGIN=true; WEE_SID=iNxQozE_ihlq-6IwM2XQb6F3SKSTY30wNrb7e8YS9I7QMfu6QRax!1250085850!1820912863!1543246131519; avoid_declare=declare_pass; JSESSIONID=iNxQozE_ihlq-6IwM2XQb6F3SKSTY30wNrb7e8YS9I7QMfu6QRax!1250085850!1820912863")
- info = {
-     "searchCondition.searchExp": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" ,
-     "searchCondition.executableSearchExp": "VDB:((APD='20090101' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))",
-     "searchCondition.dbId": "VDB",
-     "searchCondition.searchType": "Sino_foreign",
-     "searchCondition.sortFields": "-APD,+PD",
-     "searchCondition.extendInfo['MODE']": "MODE_TABLE",
-     "resultPagination.start": (pages - 1) * 12,
-     "resultPagination.limit": 12,
-     "resultPagination.sumLimit": 10,
-     "searchCondition.literatureSF": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))"}
- # s = requests.session()
- # s.keep_alive = False
- html = requests.post(url, headers=head, data=info, cookies=cookies_headless).json()