1. # -*- coding = ut-8 -*-
    2. # @Time : 2021/4/14 10:15
    3. # @Author : PHC
    4. # @File lastGoodGoal.py
    5. # @Software : PyCharm
    6. from bs4 import BeautifulSoup
    7. import xlwt
    8. import re
    9. import urllib.request,urllib.error
    10. import sqlite3
    11. def main():
    12. link="https://v.qq.com/channel/nba?channel=nba&feature=4&iplayer=1&listpage=1"
    13. askURl(link)
    14. # savepath()
    15. datalist=getDate(link)
    16. savepath="科比个人锦集.xls"
    17. # saveData(datalist,savepath)
    18. dbpath="KoBeShow.db"
    19. saveDB(datalist,dbpath)
    20. # 定制规则
    21. #1、获得影片的链接
    22. getVideoLink=re.compile(r'<a.*href="(.*?)".*>') # .*
    23. #2、获得影片的封面
    24. getImgSrc=re.compile(r'<img.*src="(.*)".')
    25. #3、视频时长
    26. getTime=re.compile(r'<div class="figure_caption">(.*?)</div>')
    27. #4、获取标题名字
    28. getName=re.compile(r'<a.*target="_blank" title=.*>(.*?)</a>')
    29. #1、访问网页,得到网页内容
    30. def askURl(url):
    31. head={
    32. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.76"
    33. }
    34. request=urllib.request.Request(url,headers=head) #发送请求
    35. html=""
    36. try:
    37. response=urllib.request.urlopen(request) #响应请求
    38. html=response.read().decode("utf-8") #根据请求获取网页内容
    39. # print(html)
    40. except urllib.error.URLError as e:
    41. if hasattr(e,"code"):
    42. print(e.code)
    43. if hasattr(e,"reason"):
    44. print(e.reason)
    45. return html
    46. #2、定制规则、获取网页所需要的数据
    47. def getDate(link):
    48. datalist=[]
    49. html=askURl(link)
    50. #逐一解析数据
    51. soup=BeautifulSoup(html,"html.parser")
    52. # for item in soup.find_all('div',class_="mod_figure mod_figure_h_default mod_figure_list_box"):
    53. for item in soup.find_all('div', class_="list_item"):
    54. data=[] #保存一部视频的信息
    55. # # print(type(item))
    56. item=str(item)
    57. # print(item)
    58. videoLink=re.findall(getVideoLink,item)[0]
    59. # videoLink=videoLink.replace("[","")
    60. # videoLink=re.sub("]","",videoLink)
    61. data.append(videoLink)
    62. # print(videoLink)
    63. img=re.findall(getImgSrc,item)[0]
    64. # img=img.replace("[","")
    65. # img=re.sub("]","",img)
    66. data.append(img)
    67. # print(img)
    68. # #
    69. time=re.findall(getTime,item)[0]
    70. data.append(time)
    71. # print(time)
    72. name=re.findall(getName,item)[0]
    73. data.append(name)
    74. # print(name)
    75. datalist.append(data)
    76. # print(datalist)
    77. return datalist
    78. #3、保存数据
    79. # def saveData(datalist,savepath):
    80. # # print("数据已保存")
    81. # table=xlwt.Workbook(encoding="utf-8",style_compression=0)
    82. # sheet=table.add_sheet("科比绝杀锦集",cell_overwrite_ok=True)
    83. # colum=("视频链接","视频封面","视频时长","视频名称")
    84. # for i in range(0,4):
    85. # sheet.write(0,i,colum[i])
    86. # for i in range(len(datalist)):
    87. # print("第%s条"%(i+1))
    88. # data=datalist[i]
    89. # for j in range(0,4):
    90. # sheet.write(i+1,j,data[j])
    91. # table.save(savepath)
    92. #保存数据到数据库
    93. def saveDB(datalist,dbpath):
    94. create_db(dbpath)
    95. conn=sqlite3.connect(dbpath)
    96. cursor=conn.cursor()
    97. for data in datalist:
    98. for index in range(len(data)):
    99. # if index==2:
    100. # continue
    101. data[index]='"'+data[index]+'"'
    102. sql='''
    103. insert into kobeshow
    104. (videolink,img,time,name)
    105. values(%s)'''%",".join(data)
    106. print(sql,";")
    107. cursor.execute(sql)
    108. conn.commit()
    109. cursor.close()
    110. conn.close()
    111. print("已保存到数据库")
    112. #创建数据库
    113. def create_db(dbpath):
    114. sql='''
    115. create table if not exists kobeshow (
    116. Id integer primary key autoincrement,
    117. videolink text,
    118. img text,
    119. time varchar (10),
    120. name text
    121. )
    122. '''
    123. conn=sqlite3.connect(dbpath)
    124. cur=conn.cursor()
    125. cur.execute(sql)
    126. conn.commit()
    127. conn.close()
    128. print("数据库已创建")
    129. if __name__=="__main__":
    130. main()
    131. # create_db("test1.db")