import pymongo
    from selenium.webdriver import Chrome
    import time
    from selenium.webdriver.chrome.options import Options
    导入MongoDB并且建立连接
    client = pymongo.MongoClient(host=’localhost’,port=27017)
    db = client.jingdong

    collection = db.jingdongheisi

    定义一个存数据的空字典
    def save_data(dic):
    collection.insert_one(dic)

    def dy(url):
    opt = Options()
    opt.add_argument(‘—headless’)
    opt.add_argument(‘—disable-gpu’)

    1. driver = Chrome(options=opt)<br /> time.sleep(3)<br /> driver.get(<br /> 'https://search.jd.com/Search?keyword=%E9%BB%91%E4%B8%9D&enc=utf-8&wq=%E9%BB%91%E4%B8%9D&pvid=657b0638977e4f899bd4e339e3834dc7')<br /> time.sleep(3)<br /> for i in range(5):<br /> # 以下实现页面滑动 整个页面到10000元素 然后点击下一页循环五次 用来实现五页的抓取<br /> def drop_down():<br /> driver.execute_script('window.scrollTo(0,1000)')<br /> time.sleep(3)<br /> driver.execute_script('window.scrollTo(0,2000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,3000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,4000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,5000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,6000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,7000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,8000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,9000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,10000)')<br /> time.sleep(1)
    2. drop_down()
    3. li_list = driver.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li')<br /> time.sleep(3)
    4. # 找到输入框,输入python ===> 输入回车/点击搜索<br /> # web.find_element_by_xpath('//*[@id="search_input"]').send_keys('python',Keys.ENTER)
    5. for li in li_list:<br /> dianputitle = li.find_element_by_xpath('./div/div[7]/span/a').text<br /> title = li.find_element_by_xpath('./div/div[4]/a/em').text.strip('京东超市').strip('爱心东东').strip('\n')<br /> jiage = li.find_element_by_xpath('./div/div[3]/strong/i').text<br /> dianpu_dizhi = li.find_element_by_xpath('./div/div[1]/a').get_attribute('href')<br /> img_dizhi = li.find_element_by_xpath('./div/div[1]/a/img').get_attribute('src')<br /> dic = {'店铺名称': dianputitle, '名称': title, '价格': jiage, '店铺地址': dianpu_dizhi, '图片名称': img_dizhi}<br /> save_data(dic)<br /> driver.find_element_by_xpath('//*[@id="J_bottomPage"]/span[1]/a[9]/em').click()<br /> time.sleep(2)<br />url='https://search.jd.com/Search?keyword=%E9%BB%91%E4%B8%9D&enc=utf-8&wq=%E9%BB%91%E4%B8%9D&pvid=657b0638977e4f899bd4e339e3834dc7'<br />dy(url)