from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.chrome.options import Optionsimport timeimport csvclass BOSSjob(): position_list = [] num = 0 def __init__(self): # self.option = webdriver.ChromeOptions() self.option = Options() self.option.add_argument('--headless') self.driver = webdriver.Chrome() self.url = 'https://www.zhipin.com/chengdu/?sid=sem_pz_bdpc_dasou_title' self.driver.get(self.url) self.driver.find_element(By.XPATH, '//div[@class="search-form-con"]/p[@class="ipt-wrap"]/input').send_keys( 'java开发工程师') self.driver.find_element(By.XPATH, '//div[@class="search-form "]/form/button[1]').click() # self.element = WebDriverWait(self.driver, 3).until( # EC.presence_of_element_located(By.XPATH, '//div[@class="search-form-con"]/p[@class="ipt-wrap"]/input') # ) # self.element.send.keys('java开发') time.sleep(2) def Parsedata(self): self.driver.execute_script( 'window.scrollTo(0, document.body.scrollHeight)' ) # source = self.driver.page_source posi_li = self.driver.find_elements(By.XPATH, '//div[@class="job-list"]/ul/li') for li in posi_li: try: posi_dict = {} # 职位名称 posi_dict['position'] = li.find_element( By.XPATH, './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="primary-wrapper"]/div[@class="primary-box"]/div[@class="job-title"]/span/a' ).text # 地址 posi_dict['address'] = li.find_element( By.XPATH, './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="primary-wrapper"]/div[@class="primary-box"]/div[@class="job-title"]/span[2]/span' ).text # 薪资 posi_dict['salary'] = li.find_element( By.XPATH, './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="primary-wrapper"]/div[@class="primary-box"]/div[@class="job-limit clearfix"]/span' ).text # 公司名称 posi_dict['company'] = li.find_element( By.XPATH, './/div[@class="job-primary"]/div[@class="info-primary"]/div[@class="info-company"]/div[@class="company-text"]/h3[@class="name"]/a' ).text posi_dict['skills'] = '/'.join(li.find_element( By.XPATH, './/div[@class="job-primary"]/div[@class="info-append clearfix"]/div[@class="tags"]' ).text.split()) BOSSjob.position_list.append(posi_dict) except Exception as e: BOSSjob.num += 1 print(e) def Savedata(self): with open('BOSS.csv', 'w', encoding='utf-8-sig', newline='') as f: writer = csv.DictWriter(f, fieldnames=('position', 'address', 'salary', 'company', 'skills')) writer.writeheader() writer.writerows(BOSSjob.position_list) def main(self): for conut in range(5): self.Parsedata() self.driver.implicitly_wait(2) tag = self.driver.find_element(By.XPATH, '//div[@class="page"]/a[@class="next"]') self.driver.execute_script('arguments[0].click()', tag) self.driver.quit() self.Savedata() print(BOSSjob.num)if __name__ == '__main__': job = BOSSjob() job.main()