1. from selenium import webdriver
    2. from selenium.webdriver.common.by import By
    3. from selenium.webdriver.support.ui import WebDriverWait
    4. from selenium.webdriver.common.keys import Keys
    5. from selenium.webdriver.support import expected_conditions as EC
    6. from selenium.webdriver.chrome.options import Options
    7. import time
    8. import csv
    9. class BOSSjob():
    10. position_list = []
    11. num = 0
    12. def __init__(self):
    13. # self.option = webdriver.ChromeOptions()
    14. self.option = Options()
    15. self.option.add_argument('--headless')
    16. self.driver = webdriver.Chrome()
    17. self.url = 'https://www.zhipin.com/chengdu/?sid=sem_pz_bdpc_dasou_title'
    18. self.driver.get(self.url)
    19. self.driver.find_element(By.XPATH, '//div[@class="search-form-con"]/p[@class="ipt-wrap"]/input').send_keys(
    20. 'java开发工程师')
    21. self.driver.find_element(By.XPATH, '//div[@class="search-form "]/form/button[1]').click()
    22. # self.element = WebDriverWait(self.driver, 3).until(
    23. # EC.presence_of_element_located(By.XPATH, '//div[@class="search-form-con"]/p[@class="ipt-wrap"]/input')
    24. # )
    25. # self.element.send.keys('java开发')
    26. time.sleep(2)
    27. def Parsedata(self):
    28. self.driver.execute_script(
    29. 'window.scrollTo(0, document.body.scrollHeight)'
    30. )
    31. # source = self.driver.page_source
    32. posi_li = self.driver.find_elements(By.XPATH, '//div[@class="job-list"]/ul/li')
    33. for li in posi_li:
    34. try:
    35. posi_dict = {}
    36. # 职位名称
    37. posi_dict['position'] = li.find_element(
    38. By.XPATH,
    39. './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="primary-wrapper"]/div[@class="primary-box"]/div[@class="job-title"]/span/a'
    40. ).text
    41. # 地址
    42. posi_dict['address'] = li.find_element(
    43. By.XPATH,
    44. './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="primary-wrapper"]/div[@class="primary-box"]/div[@class="job-title"]/span[2]/span'
    45. ).text
    46. # 薪资
    47. posi_dict['salary'] = li.find_element(
    48. By.XPATH,
    49. './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="primary-wrapper"]/div[@class="primary-box"]/div[@class="job-limit clearfix"]/span'
    50. ).text
    51. # 公司名称
    52. posi_dict['company'] = li.find_element(
    53. By.XPATH,
    54. './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="info-company"]/div[@class="company-text"]/h3[@class="name"]/a'
    55. ).text
    56. posi_dict['skills'] = '/'.join(li.find_element(
    57. By.XPATH,
    58. './/div[@class="job-primary"]/div[@class="info-append clearfix"]/div[@class="tags"]'
    59. ).text.split())
    60. BOSSjob.position_list.append(posi_dict)
    61. except Exception as e:
    62. BOSSjob.num += 1
    63. print(e)
    64. def Savedata(self):
    65. with open('BOSS.csv', 'w', encoding='utf-8-sig', newline='') as f:
    66. writer = csv.DictWriter(f, fieldnames=('position', 'address', 'salary', 'company', 'skills'))
    67. writer.writeheader()
    68. writer.writerows(BOSSjob.position_list)
    69. def main(self):
    70. for conut in range(5):
    71. self.Parsedata()
    72. self.driver.implicitly_wait(2)
    73. tag = self.driver.find_element(By.XPATH, '//div[@class="page"]/a[@class="next"]')
    74. self.driver.execute_script('arguments[0].click()', tag)
    75. self.driver.quit()
    76. self.Savedata()
    77. print(BOSSjob.num)
    78. if __name__ == '__main__':
    79. job = BOSSjob()
    80. job.main()