import pymongo
from selenium.webdriver import Chrome
import time
from selenium.webdriver.chrome.options import Options
导入MongoDB并且建立连接
client = pymongo.MongoClient(host=’localhost’,port=27017)
db = client.jingdong
collection = db.jingdongheisi
定义一个存数据的空字典
def save_data(dic):
collection.insert_one(dic)
def dy(url):
opt = Options()
opt.add_argument(‘—headless’)
opt.add_argument(‘—disable-gpu’)
driver = Chrome(options=opt)<br /> time.sleep(3)<br /> driver.get(<br /> 'https://search.jd.com/Search?keyword=%E9%BB%91%E4%B8%9D&enc=utf-8&wq=%E9%BB%91%E4%B8%9D&pvid=657b0638977e4f899bd4e339e3834dc7')<br /> time.sleep(3)<br /> for i in range(5):<br /> # 以下实现页面滑动 整个页面到10000元素 然后点击下一页循环五次 用来实现五页的抓取<br /> def drop_down():<br /> driver.execute_script('window.scrollTo(0,1000)')<br /> time.sleep(3)<br /> driver.execute_script('window.scrollTo(0,2000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,3000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,4000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,5000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,6000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,7000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,8000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,9000)')<br /> time.sleep(1)<br /> driver.execute_script('window.scrollTo(0,10000)')<br /> time.sleep(1)
drop_down()
li_list = driver.find_elements_by_xpath('//*[@id="J_goodsList"]/ul/li')<br /> time.sleep(3)
# 找到输入框,输入python ===> 输入回车/点击搜索<br /> # web.find_element_by_xpath('//*[@id="search_input"]').send_keys('python',Keys.ENTER)
for li in li_list:<br /> dianputitle = li.find_element_by_xpath('./div/div[7]/span/a').text<br /> title = li.find_element_by_xpath('./div/div[4]/a/em').text.strip('京东超市').strip('爱心东东').strip('\n')<br /> jiage = li.find_element_by_xpath('./div/div[3]/strong/i').text<br /> dianpu_dizhi = li.find_element_by_xpath('./div/div[1]/a').get_attribute('href')<br /> img_dizhi = li.find_element_by_xpath('./div/div[1]/a/img').get_attribute('src')<br /> dic = {'店铺名称': dianputitle, '名称': title, '价格': jiage, '店铺地址': dianpu_dizhi, '图片名称': img_dizhi}<br /> save_data(dic)<br /> driver.find_element_by_xpath('//*[@id="J_bottomPage"]/span[1]/a[9]/em').click()<br /> time.sleep(2)<br />url='https://search.jd.com/Search?keyword=%E9%BB%91%E4%B8%9D&enc=utf-8&wq=%E9%BB%91%E4%B8%9D&pvid=657b0638977e4f899bd4e339e3834dc7'<br />dy(url)