爬虫模板

  1. # pip install requests
  2. # pip install bs4
  3. # pip install pycryptodome
  4. # pip install lxml
  5. import requests,re,time,os
  6. from bs4 import BeautifulSoup
  7. from Crypto.Cipher import AES
  8. from lxml import etree
  9. from utils import func
  10. url = "https://passport.17k.com/ck/user/login"
  11. data = "loginName=13271473920&password=qqqqqqq1"
  12. session = requests.session()
  13. head={
  14. "User-Agent": func.random_ua()
  15. }
  16. proxies={
  17. "https": "https://218.45.56.8:3125"
  18. }
  19. with session.post(url,data=data,headers=head,proxies="") as res: # 拿到身份cookie
  20. url = "https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919"
  21. with session.get(url,data=data,headers=head) as res2: # 获取想要的内容
  22. print(res.json())
  23. print(res.cookies)
  24. # # --------------------------BeautifulSoup解析 html拿值
  25. # soup = BeautifulSoup(res.text, "html.parser") # html.parser指定文件类型为html,防止警告信息
  26. # table = soup.find("div", class_="tbl-body") # find(标签,属性=值)
  27. # table = table.find_all("th")
  28. # for i in range(0, len(table) - 1):
  29. # print(table[i].text)
  30. # # --------------------------re正则拿值
  31. # # ?P<guang> 起一个别名 re.S 让.具有换行的功能
  32. # obj = re.compile(r'<span style="font-size:\d+px;">(?P<guang>.*?)</span>', re.S)
  33. # res = obj.finditer(res.json())
  34. # for i in res:
  35. # print(i.group('guang'))
  36. # # --------------------------xpath解析 XHR拿值
  37. # tree = etree.parse(res.text)
  38. # result = tree.xpath("/html/body/ul/li[1]/a/text()") # 指定li下标取a标签文本
  39. # result = tree.xpath('/html/body/ul/ol/li/a/@href') # 取指定a标签href的内容
  40. # result = tree.xpath('/html/body/ul/ol/li/a[@href="feiji"]/text()') # 取指定a标签href属性为feiji的文本
  41. # # --------------------------AES解密
  42. # def dec_ts(name, key):
  43. # aes = AES.new(key=key, IV=b"0000000000000000", mode=AES.MODE_CBC)
  44. # with open(f"noval/{name}", mode="rb") as f1,\
  45. # open(f"noval/temp_{name}", mode="wb") as f2:
  46. # bs = f1.read() # 从源文件读取内容
  47. # f2.write(aes.decrypt(bs)) # 把解密好的内容写入文件
  48. # print(f"{name}处理完毕")
  49. # # --------------------------os合并文件
  50. # def merge_ts():
  51. # # mac: cat 1.ts 2.ts 3.ts > xxx.mp4
  52. # # windows: copy /b 1.ts+2.ts+3.ts xxx.mp4
  53. # lsts = []
  54. # with open("noval/越狱第一季第一集_second_m3u8.txt", mode="r", encoding="utf-8") as f:
  55. # for line in f:
  56. # if line.startswith("#"):
  57. # continue
  58. # line = line.strip()
  59. # lsts.append(f"noval/temp_{line}")
  60. #
  61. # s = "+".join(lsts) # 1.ts+2.ts+3.ts
  62. # os.system(f"copy /b {s} movie.mp4")
  63. # print("搞定!")

单线程异步协程模板

  1. import aiohttp # pip install aiohttp
  2. import aiofiles # pip install aiofiles
  3. import time
  4. import asyncio
  5. urls = [
  6. "http://kr.shanghai-jiuxin.com/file/2022/0511/f92746daee951aea129ca7b8bafbdb97.jpg",
  7. "http://kr.shanghai-jiuxin.com/file/2022/0511/d93db47964abd557794ccd68d8a8ab16.jpg",
  8. "http://kr.shanghai-jiuxin.com/file/2022/0511/8d647c98d9f59ea4ae38a0ba1e568bc3.jpg",
  9. "http://kr.shanghai-jiuxin.com/file/2022/0511/0d69db8b2f5ad317c39bfd4ab1033064.jpg"
  10. ]
  11. async def download(url):
  12. name = url.rsplit("/",1)[1] # 按/分割符 从右边开始切 切1次 取第二个值
  13. async with aiohttp.ClientSession() as session: # session 相当于request
  14. async with session.get(url) as resp: # resp 相当于request.get()
  15. async with aiofiles.open(name,mode="wb") as f: # 创建文件写入
  16. await f.write(await resp.content.read()) # 读图片是异步操作,需要await挂起
  17. print("开始准备下载")
  18. await asyncio.sleep(2)
  19. print("下载完成")
  20. async def main():
  21. tasks = []
  22. for url in urls:
  23. t = download(url)
  24. tasks.append(t) # 在py3.8以上以列表方式加函数格式为 asyncio.create_task(函数名())
  25. await asyncio.wait(tasks) # 异步投递任务 异步中用await可以让程序在等待的时候挂起继续执行下一个任务
  26. if __name__ == '__main__':
  27. t1 = time.time()
  28. asyncio.get_event_loop().run_until_complete(main()) # 异步执行入口 py3.7用asyncio.run()
  29. t2 = time.time()
  30. print(t2-t1)

线程池

  1. # 1. 如何提取单个页面的数据
  2. # 2. 上线程池,多个页面同时抓取
  3. import requests
  4. from lxml import etree
  5. import csv
  6. from concurrent.futures import ThreadPoolExecutor
  7. f = open("data.csv", mode="w", encoding="utf-8")
  8. csvwriter = csv.writer(f)
  9. def download_one_page(url):
  10. # 拿到页面源代码
  11. resp = requests.get(url)
  12. html = etree.HTML(resp.text)
  13. table = html.xpath("/html/body/div[2]/div[4]/div[1]/table")[0]
  14. # trs = table.xpath("./tr")[1:]
  15. trs = table.xpath("./tr[position()>1]")
  16. # 拿到每个tr
  17. for tr in trs:
  18. txt = tr.xpath("./td/text()")
  19. # 对数据做简单的处理: \\ / 去掉
  20. txt = (item.replace("\\", "").replace("/", "") for item in txt)
  21. # 把数据存放在文件中
  22. csvwriter.writerow(txt)
  23. print(url, "提取完毕!")
  24. if __name__ == '__main__':
  25. # 创建线程池
  26. with ThreadPoolExecutor(50) as t:
  27. for i in range(1, 200): # 199 * 20 = 3980
  28. # 把下载任务提交给线程池
  29. t.submit(download_one_page, f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml")
  30. print("全部下载完毕!")

selenium模拟

  1. # pip install selenium
  2. from selenium.webdriver import Chrome # 操作chrom浏览器
  3. from selenium.webdriver.common.action_chains import ActionChains # 事件链 基于页面中的其他框架操作
  4. from selenium.webdriver.chrome.options import Options # 一些启动参数配置
  5. from selenium.webdriver.common.keys import Keys # 按键
  6. from selenium.webdriver.support.select import Select # 处理select下拉列表
  7. from chaojiying import Chaojiying_Client
  8. import time
  9. # 初始化超级鹰
  10. chaojiying = Chaojiying_Client('13271473920', 'qqqqqqq1', '933391')
  11. # # -------------------------------------如果你的程序被识别到了,修改navigator的返回值为false
  12. # # -------------------------------------1.chrome的版本号如果小于88 启动浏览器的时候,向页面嵌入js代码
  13. # web = Chrome()
  14. # web.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
  15. # "source": """
  16. # navigator.webdriver = undefined
  17. # Object.defineProperty(navigator, 'webdriver', {
  18. # get: () => undefined
  19. # })
  20. # """
  21. # })
  22. # web.get("https://www.baidu.com")
  23. # # -------------------------------------2.chrome的版本大于等于88
  24. option = Options()
  25. # # option.add_experimental_option('excludeSwitches', ['enable-automation'])
  26. option.add_argument('--disable-blink-features=AutomationControlled')
  27. # # -------------------------------------设置后台无头模式
  28. # option.add_argument("--headless")
  29. # option.add_argument("--disbale-gpu")
  30. web = Chrome(options=option)
  31. web.get("https://kyfw.12306.cn/otn/resources/login.html")
  32. time.sleep(2)
  33. web.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()
  34. time.sleep(3)
  35. # -------------------------------------拿页面中的图片
  36. verify_img_element = web.find_element_by_xpath('//*[@id="J-loginImg"]')
  37. verify_img = verify_img_element.screenshot_as_png
  38. # -------------------------------------用超级鹰去识别验证码
  39. dic = chaojiying.PostPic(verify_img, 9004)
  40. result = dic['pic_str'] # x1,y1|x2,y2|x3,y3
  41. rs_list = result.split("|")
  42. for rs in rs_list: # x1,y1
  43. p_temp = rs.split(",")
  44. x = int(p_temp[0])
  45. y = int(p_temp[1])
  46. # 要让鼠标移动到某一个位置. 然后进行点击
  47. # 醒了 -> 掀开被子 -> 坐起来 -> 穿鞋子 -> 穿衣服 -> 开始执行动作
  48. ActionChains(web).move_to_element_with_offset(verify_img_element, x, y).click().perform()
  49. time.sleep(1)
  50. # -------------------------------------输入用户名和密码及按键
  51. web.find_element_by_xpath('//*[@id="J-userName"]').send_keys("123456789")
  52. web.find_element_by_xpath('//*[@id="J-password"]').send_keys("12346789")
  53. # web.find_element_by_xpath('//*[@id="search_input"]').send_keys("python", Keys.ENTER)
  54. # -------------------------------------点击登录
  55. web.find_element_by_xpath('//*[@id="J-login"]').click()
  56. time.sleep(5)
  57. # -------------------------------------拖拽
  58. btn = web.find_element_by_xpath('//*[@id="nc_1_n1z"]')
  59. ActionChains(web).drag_and_drop_by_offset(btn, 300, 0).perform()
  60. # # -------------------------------------切换窗口
  61. # web.switch_to.window(web.window_handles[-1])
  62. # # 在新窗口中提取内容
  63. # job_detail = web.find_element_by_xpath('//*[@id="job_detail"]/dd[2]/div').text
  64. # print(job_detail)
  65. # # 关掉子窗口
  66. # web.close()
  67. # # 变更selenium的窗口视角. 回到原来的窗口中
  68. # web.switch_to.window(web.window_handles[0])
  69. # # -------------------------------------页面中遇到了 iframe
  70. # web.get("https://www.91kanju.com/vod-play/541-2-1.html")
  71. # # 处理iframe的话. 必须先拿到iframe. 然后切换视角到iframe . 再然后才可以拿数据
  72. # iframe = web.find_element_by_xpath('//*[@id="player_iframe"]')
  73. # web.switch_to.frame(iframe) # 切换到iframe
  74. # # web.switch_to.default_content() # 切换回原页面
  75. # tx = web.find_element_by_xpath('//*[@id="main"]/h3[1]').text
  76. # print(tx)
  77. # # -------------------------------------修改 select下拉框选中项
  78. # sel_el = web.find_element_by_xpath('//*[@id="OptionDate"]')
  79. # # 对元素进行包装, 包装成下拉菜单
  80. # sel = Select(sel_el)
  81. # sel.select_by_index(i) # 按照索引进行切换

猜年龄 循环判断

  1. import random
  2. age = random.randint(10,20)
  3. print("随机年龄是:%d"%age)
  4. i = 1
  5. while i<4:
  6. age_i = int(input("请输入要猜的年龄:"))
  7. if age_i==age:
  8. print("恭喜你猜对了")
  9. break
  10. else:
  11. i += 1
  12. if i == 4:
  13. print("猜3次上限了")
  14. res = input("请输入Y继续或Q退出:")
  15. if res == "Y":
  16. i = 1
  17. continue
  18. elif res == "Q":
  19. print("我要退出了")
  20. break
  21. else:
  22. print("请输入正确的命令")
  23. res = input("请输入Y继续或Q退出:")
  24. else:
  25. print("猜错了,请重新猜一猜")

BIM判断 加减乘除

  1. # BIM计算公式 体重除以身高的平方
  2. h = float(input("请输入你的身高:"))
  3. t = float(input("请输入你的体重:"))
  4. s = t/(h**2)
  5. if s < 18.5:
  6. print("体重过轻")
  7. elif s>18.5 and s<25:
  8. print("体重正常")
  9. elif s>15 and s<28:
  10. print("体重过重")
  11. elif s>28 and s<32:
  12. print("体重肥胖")
  13. elif s>32:
  14. print("严重肥胖")

求出一个集合中不重复的值 循环

  1. def l():
  2. list1 = [1, 1, 2, 2, 3, 4, 4, 5, 5,5,3,3,4,4,7,8,9,9,0]
  3. set1 = set(list1) #set1为list1去重后的唯一值
  4. for s in set1:
  5. a = 0
  6. for i in list1:
  7. if i == s:
  8. a += 1
  9. if a==1:
  10. print("出现不重复的值:{}".format(s))
  11. l()

双人对战 类和对象的魔术方法调用

  1. import random
  2. import time
  3. class Person:
  4. def __init__(self,name,blood): #定义初始化方法
  5. '''
  6. 双人对战
  7. :param name: 姓名
  8. :param blood: 血量
  9. '''
  10. self.name=name #类中的全局实例属性
  11. self.blood=blood
  12. def tong(self,dr):
  13. dr.blood-=10
  14. print("%s捅了%s一刀,%s掉10点血,%s剩余%s点血"%(self.name,dr.name,dr.name,dr.name,dr.blood))
  15. def kan(self,dr):
  16. dr.blood -= 20
  17. print("%s砍了%s一刀,%s掉20点血,%s剩余%s点血"%(self.name,dr.name,dr.name,dr.name,dr.blood))
  18. def chi(self,dr):
  19. dr.blood += 10
  20. print("%s吃了一颗药,加10点血,%s剩余%s点血"%(dr.name,dr.name,dr.blood))
  21. def __str__(self):
  22. return("%s还剩下%s血"%(self.name,self.blood))
  23. xm=Person('西门',100)
  24. ygc=Person('叶孤城',100)
  25. while True:
  26. sj = random.randint(1,6)
  27. if xm.blood <= 0 or ygc.blood <= 0:
  28. break
  29. elif sj == 1:
  30. xm.tong(ygc)
  31. elif sj == 2:
  32. xm.kan(ygc)
  33. elif sj == 3:
  34. xm.chi(ygc)
  35. elif sj == 4:
  36. ygc.tong(xm)
  37. elif sj == 5:
  38. ygc.kan(xm)
  39. elif sj == 6:
  40. ygc.chi(xm)
  41. print(ygc,xm)
  42. print("*"*40)
  43. time.sleep(1)
  44. print("游戏结束",ygc,xm)