1. import requests
    2. from bs4 import BeautifulSoup
    3. import time
    4. url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
    5. head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
    6. 'Connection': 'keep-alive'}
    7. cookie = dict(cookies_are="WEE_SID=7ppLQ6_O6fNNT1kUEhhdjNme5Dd6XpTxHdD5AqOHXnJ_R3VkjJK2!-1371704578!792269866!1543155986382; IS_LOGIN=true; wee_username=d2VpbWVuZ3hpbjIwMTI%3D; wee_password=YWR3YWhhZHM2MTM2ODc5; JSESSIONID=7ppLQ6_O6fNNT1kUEhhdjNme5Dd6XpTxHdD5AqOHXnJ_R3VkjJK2!-1371704578!792269866")
    8. info = {"searchCondition.searchExp": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))",
    9. "searchCondition.executableSearchExp": "VDB:((APD='20090101' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))",
    10. "resultPagination.start": 0,
    11. "resultPagination.limit": 12,
    12. "searchCondition.literatureSF": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))"}
    13. # 代理服务器
    14. proxyHost = "http-dyn.abuyun.com"
    15. proxyPort = "9020"
    16. # 代理隧道验证信息
    17. proxyUser = "HX7YIG4D7IR9907D"
    18. proxyPass = "D9A0153CCBC8081F"
    19. proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
    20. "host": proxyHost,
    21. "port": proxyPort,
    22. "user": proxyUser,
    23. "pass": proxyPass,
    24. }
    25. proxies = {
    26. "http": proxyMeta,
    27. "https": proxyMeta,
    28. }
    29. s = requests.session()
    30. html = s.post(url, headers=head, proxies=proxies, data=info, cookies=cookie).content
    1. def patent_requests(date):
    2. import requests
    3. res = []
    4. result = []
    5. url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
    6. head = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
    7. 'Connection': 'keep-alive'}
    8. cookie = dict(cookies_are="WEE_SID=O1xKu2UpRWGCOPDH_QJnZv-Qcx-EBjY-Z64OGBBgv3x1s4UoRg5Y!792269866!-627030802!1543147054377; IS_LOGIN=true; JSESSIONID=O1xKu2UpRWGCOPDH_QJnZv-Qcx-EBjY-Z64OGBBgv3x1s4UoRg5Y!792269866!-627030802")
    9. pages = 1
    10. while True:
    11. info = {"searchCondition.searchExp": "(申请日=%s) AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date,
    12. "searchCondition.executableSearchExp": "VDB:((APD=%s AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))" % date,
    13. "resultPagination.start": (pages - 1) * 12,
    14. "resultPagination.limit": 12,
    15. "searchCondition.literatureSF": "(申请日=%s) AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date}
    16. # 代理服务器
    17. proxyHost = "http-dyn.abuyun.com"
    18. proxyPort = "9020"
    19. # 代理隧道验证信息
    20. proxyUser = "HX7YIG4D7IR9907D"
    21. proxyPass = "D9A0153CCBC8081F"
    22. proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
    23. "host": proxyHost,
    24. "port": proxyPort,
    25. "user": proxyUser,
    26. "pass": proxyPass,
    27. }
    28. proxies = {
    29. "http": proxyMeta,
    30. "https": proxyMeta,
    31. }
    32. html = requests.post(url, proxies=proxies, data=info, cookies=cookie, headers=head).json()
    33. page = html["searchResultDTO"]["pagination"]["totalCount"]
    34. print("共%d条记录" % page)
    35. if html["searchResultDTO"]["searchResultRecord"] != []:
    36. for i in html["searchResultDTO"]["searchResultRecord"]:
    37. lawStatus = i["lawStatus"]
    38. patentType = i["patentType"]
    39. name = i["fieldMap"]["TIVIEW"]
    40. fnum = i["fieldMap"]["FNUM"]
    41. cpnum = i["fieldMap"]["CPNUM"]
    42. pnum = i["fieldMap"]["PNUM"]
    43. VID = i["fieldMap"]["VID"]
    44. APSN = i["fieldMap"]["APSN"]
    45. APD = i["fieldMap"]["APD"]
    46. PN_BAK = i["fieldMap"]["PN_BAK"]
    47. PD = i["fieldMap"]["PD"] # 公开日
    48. IC = i["fieldMap"]["IC"] # IPC分类号
    49. CPC = i["fieldMap"]["CPC"] # CPC分类号
    50. PAVIEW = i["fieldMap"]["PAVIEW"] # 申请人
    51. INVIEW = i["fieldMap"]["INVIEW"] # 发明人
    52. AA = i["fieldMap"]["AA"] # 地址
    53. AGT = i["fieldMap"]["AGT"] # 代理人
    54. AGY = i["fieldMap"]["AGY"] # 代理机构
    55. OWNER_STATUS = i["fieldMap"]["AGY"] # 代理机构
    56. res.extend([lawStatus, patentType, name, fnum, cpnum, pnum, VID, APSN, APD, PN_BAK, PD, IC, CPC, PAVIEW, INVIEW, AA, AGT, AGY, OWNER_STATUS])
    57. middle = res.copy()
    58. result.append(middle)
    59. res.clear()
    60. pages += 1
    61. else:
    62. print("检索完毕!")
    63. return result
    64. test = patent_requests("20090101")
    65. "IS_LOGIN=true; WEE_SID=oi5Kv5x04H_asaak8hnaCfGhSkgSyurNL47sRSKG8RCihySrFPjj!792269866!-627030802!1543147330676; JSESSIONID=oi5Kv5x04H_asaak8hnaCfGhSkgSyurNL47sRSKG8RCihySrFPjj!792269866!-627030802"
    1. from selenium import webdriver
    2. from selenium.webdriver.firefox.options import Options
    3. from selenium.webdriver.support.wait import WebDriverWait
    4. from selenium.webdriver.common.by import By
    5. from selenium.webdriver.support import expected_conditions as EC
    6. import selenium.webdriver.support.ui as ui
    7. from bs4 import BeautifulSoup
    8. import time
    9. import json
    10. import random
    11. import requests
    12. def get_cookies():
    13. options = Options()
    14. # options.add_argument('-headless')
    15. driver = webdriver.Firefox(executable_path=r'C:\Users\viemax\Desktop\geckodriver.exe', options=options)
    16. url = "http://www.pss-system.gov.cn/sipopublicsearch/portal/uiIndex.shtml"
    17. driver.get(url)
    18. time.sleep(1)
    19. driver.find_element_by_id("j_username").send_keys("weimengxin2012")
    20. driver.find_element_by_id("j_password_show").send_keys("adwahads6136879")
    21. # html_login = driver.page_source
    22. valid = input("请手动输入验证码:")
    23. driver.find_element_by_id("j_validation_code").send_keys(valid)
    24. driver.find_element_by_link_text("登录").click()
    25. time.sleep(1)
    26. cookie = driver.get_cookies()
    27. cookie_1 = cookie[0]["name"] + "=" + cookie[0]["value"]
    28. cookie_2 = cookie[1]["name"] + "=" + cookie[1]["value"]
    29. cookie_3 = cookie[2]["name"] + "=" + cookie[2]["value"]
    30. cookie_all = cookie_3 + "; " + cookie_1 + "; " + cookie_2
    31. cookies_head = dict()
    32. cookies_head["cookies_are"] = cookie_all
    33. driver.quit()
    34. return cookies_head
    35. cookies_headless = get_cookies()
    36. def requests_login_pss_system(date):
    37. res = []
    38. result = []
    39. pages = 1
    40. url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
    41. head = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36',
    42. 'Connection': 'keep-alive',
    43. 'Origin': 'http://www.pss-system.gov.cn'}
    44. while True:
    45. info = {"searchCondition.searchExp": "(申请日='%s') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date,
    46. "searchCondition.executableSearchExp": "VDB:((APD='%s' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))" % date,
    47. "resultPagination.start": (pages - 1) * 12,
    48. "resultPagination.limit": 12,
    49. "searchCondition.literatureSF": "(申请日='%s') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" % date}
    50. s = requests.session()
    51. s.keep_alive = False
    52. html = s.post(url, headers=head, data=info, cookies=cookies_headless).json()
    53. time.sleep(3 + random.random() * 2.5)
    54. page = html["searchResultDTO"]["pagination"]["totalCount"]
    55. if page == -1:
    56. print("page=-1,第%d页" % pages)
    57. pages += 1
    58. s.close()
    59. else:
    60. print("共%d条记录" % page)
    61. if html["searchResultDTO"]["searchResultRecord"] != []:
    62. for i in html["searchResultDTO"]["searchResultRecord"]:
    63. lawStatus = i["lawStatus"]
    64. patentType = i["patentType"]
    65. name = i["fieldMap"]["TIVIEW"]
    66. fnum = i["fieldMap"]["FNUM"]
    67. cpnum = i["fieldMap"]["CPNUM"]
    68. pnum = i["fieldMap"]["PNUM"]
    69. VID = i["fieldMap"]["VID"]
    70. APSN = i["fieldMap"]["APSN"]
    71. APD = i["fieldMap"]["APD"]
    72. PN_BAK = i["fieldMap"]["PN_BAK"]
    73. PD = i["fieldMap"]["PD"] # 公开日
    74. IC = i["fieldMap"]["IC"] # IPC分类号
    75. CPC = i["fieldMap"]["CPC"] # CPC分类号
    76. PAVIEW = i["fieldMap"]["PAVIEW"] # 申请人
    77. INVIEW = i["fieldMap"]["INVIEW"] # 发明人
    78. AA = i["fieldMap"]["AA"] # 地址
    79. AGT = i["fieldMap"]["AGT"] # 代理人
    80. AGY = i["fieldMap"]["AGY"] # 代理机构
    81. OWNER_STATUS = i["fieldMap"]["AGY"] # 代理机构
    82. res.extend([lawStatus, patentType, name, fnum, cpnum, pnum, VID, APSN, APD, PN_BAK, PD, IC, CPC, PAVIEW, INVIEW, AA, AGT, AGY, OWNER_STATUS])
    83. print(res)
    84. middle = res.copy()
    85. result.append(middle)
    86. res.clear()
    87. pages += 1
    88. time.sleep(2)
    89. s.close()
    90. else:
    91. print("检索完毕!")
    92. return result
    93. test = requests_login_pss_system("20160101")
    94. import requests
    95. import random
    96. date = '20090101'
    97. res = []
    98. result = []
    99. pages = 4
    100. url = "http://www.pss-system.gov.cn/sipopublicsearch/patentsearch/showSearchResult-startWa.shtml"
    101. head = {
    102. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
    103. 'Connection': 'keep-alive'}
    104. # while True:
    105. cookies_js = dict(cookies_are="IS_LOGIN=true; WEE_SID=iNxQozE_ihlq-6IwM2XQb6F3SKSTY30wNrb7e8YS9I7QMfu6QRax!1250085850!1820912863!1543246131519; avoid_declare=declare_pass; JSESSIONID=iNxQozE_ihlq-6IwM2XQb6F3SKSTY30wNrb7e8YS9I7QMfu6QRax!1250085850!1820912863")
    106. info = {
    107. "searchCondition.searchExp": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))" ,
    108. "searchCondition.executableSearchExp": "VDB:((APD='20090101' AND (CC='HK' OR CC='MO' OR CC='TW' OR ((DOC_TYPE='I' OR DOC_TYPE='U' OR DOC_TYPE='D') AND CC='CN'))))",
    109. "searchCondition.dbId": "VDB",
    110. "searchCondition.searchType": "Sino_foreign",
    111. "searchCondition.sortFields": "-APD,+PD",
    112. "searchCondition.extendInfo['MODE']": "MODE_TABLE",
    113. "resultPagination.start": (pages - 1) * 12,
    114. "resultPagination.limit": 12,
    115. "resultPagination.sumLimit": 10,
    116. "searchCondition.literatureSF": "(申请日='20090101') AND 公开国家/地区/组织=(HK OR MO OR TW OR (发明类型=('I' OR 'U' OR 'D') AND 公开国家/地区/组织=(CN)))"}
    117. # s = requests.session()
    118. # s.keep_alive = False
    119. html = requests.post(url, headers=head, data=info, cookies=cookies_headless).json()