注意:
- xpath的提取不够熟练;
- 有时候抱获取不到元素的错误,要考虑iframe,同时还要考虑是否进入到了iframe,可能上一个点击事件后,还在缓存,元素都还没有加载出来的;
- 此案例需手动拖动验证码; ```python from selenium import webdriver from configparser import ConfigParser from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time import csv
class QQ(object): def init(self, username, password): self.driver = webdriver.Chrome() self.url = “https://mail.qq.com/“ self.username = username self.password = password self.mail_list = [[“序号”, “发送人”, “主题”, “发送内容”, “发送时间”]] self.num = 0
def login(self):
self.driver.get(self.url)
login_frame = self.driver.find_element_by_id("login_frame")
self.driver.switch_to.frame(login_frame)
self.driver.find_element_by_id("switcher_plogin").click()
self.driver.find_element_by_id("u").send_keys(self.username)
self.driver.find_element_by_id("p").send_keys(self.password)
self.driver.find_element_by_id("login_button").click()
def parse_mail(self):
element = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "readmailbtn_link"))
)
self.driver.find_element_by_id("readmailbtn_link").click()
time.sleep(1)
mainFrame = self.driver.find_element_by_id("mainFrame")
self.driver.switch_to.frame(mainFrame)
trs = self.driver.find_elements_by_xpath('//div[@id="div_showbefore"]/table/tbody/tr/td[@class="l"]//tr')
for tr in trs:
self.num += 1
mail = tr.find_elements_by_xpath("td")
sender = mail[0].text
title = mail[2].text.strip().split("-")[0]
content = mail[2].text.strip().split("-")[1]
sender_time = mail[3].text
self.mail_list.append([self.num, sender, title, content, sender_time])
print("{} sender{} title{} content{} sender_time{}".format(self.num, sender, title, content, sender_time))
def save_mail(self):
with open("qq邮件.csv", "w", newline="") as f:
csv_writer = csv.writer(f)
csv_writer.writerows(self.mail_list)
def run(self):
# 1、登入
self.login()
# 2、获取邮件
self.parse_mail()
# 3、保存邮件
self.save_mail()
# 4、退出浏览器
self.driver.quit()
def get_userdata(): cfg = ConfigParser() cfg.read(“userdata.ini”) username = cfg.get(“username”, “username”) password = cfg.get(“password”, “password”) return username, password
if name == “main“: username, password = get_userdata() qq = QQ(username, password) qq.run() ```