原文链接

写在前面:

自己已经创建公众号啦~

AI算法交流+开源数据汇总+私房数据及标注数据共享+自己实践项目开源

欢迎大家关注:DeepAI 视界

【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图1
展示一下:
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图2
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图3
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图4
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图5
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图6
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图7
【项目实战】数据爬虫 + 数据清洗   数据可视化 开源代码啦 - 图8

爬虫:链接网二手房(以贵阳市为例)

  1. """
  2. 爬虫
  3. """
  4. import requests
  5. from lxml import etree
  6. import xlwt
  7. import xlrd
  8. import csv
  9. import pandas as pd
  10. import time
  11. import re
  12. class LanjiaSpider:
  13. def __init__(self):
  14. self.url = 'https://wh.lianjia.com/ershoufang/ronganxian/pg{}/'
  15. self.headers = {
  16. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
  17. def get_response_spider(self, url_str): # 发送请求
  18. get_response = requests.get(url_str, headers=self.headers)
  19. time.sleep(2)
  20. response = get_response.content.decode()
  21. html = etree.HTML(response)
  22. return html
  23. def get_content_html(self, html): # 使xpath获取数据
  24. self.houseInfo = html.xpath('//div[@class="houseInfo"]/text()')
  25. self.title = html.xpath('//div[@class="title"]/a/text()')
  26. self.positionInfo = html.xpath('//div[@class="positionInfo"]/a/text()')
  27. self.totalPrice = html.xpath('//div[@class="totalPrice"]/span/text()')
  28. self.unitPrice = html.xpath('//div[@class="unitPrice"]/span/text()')
  29. self.followInfo = html.xpath('//div[@class="followInfo"]/text()')
  30. self.tag = html.xpath('//div[@class="tag"]/span/text()')
  31. # print(title)
  32. # return houseInfo,title,positionInfo,totalPrice,unitPrice,followInfo,tag
  33. def xpath_houseInfo(self):
  34. #print(self.houseInfo)
  35. #print(type(self.houseInfo))
  36. # df = pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title,"positionInfo":self.positionInfo,"totaPrice":self.totalPrice,"unitPrice":self.unitPrice,"followInfo":self.followInfo,"tag":self.tag})
  37. # df=pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title})
  38. # df.to_excel(r'C:\Users\wy\Desktop\sublime\链家\pand3.xlsx')
  39. # a=len(self.houseInfo)
  40. for i in range(len(self.houseInfo)):
  41. # print(i)
  42. # yield i
  43. # print(type(self.houseInfo))
  44. yield self.houseInfo[i]
  45. def qingxi_data_houseInfo(self): # 清洗数据
  46. self.xpath_houseInfo()
  47. self.xpath_title()
  48. self.xpath_positionInfo()
  49. self.xpath_totalPrice()
  50. self.xpath_unitPrice()
  51. self.xpath_followInfo()
  52. self.xpath_tag()
  53. get_houseInfo = self.xpath_houseInfo()
  54. get_title = self.xpath_title()
  55. get_positionInfo=self.xpath_positionInfo()
  56. get_totalPrice = self.xpath_totalPrice()
  57. get_unitPrice = self.xpath_unitPrice()
  58. get_followInfo=self.xpath_followInfo()
  59. get_tag=self.xpath_tag()
  60. i = 1
  61. while True:
  62. data_houseInfo= next(get_houseInfo)
  63. data_title=next(get_title)
  64. data_positionInfo=next(get_positionInfo)
  65. data_totalPrice=next(get_totalPrice)
  66. data_unitPrice=next(get_unitPrice)
  67. data_followInfo=next(get_followInfo)
  68. data_tag=next(get_tag)
  69. with open("a.csv", "a", newline="", encoding="utf-8-sig") as f:
  70. # fieldnames = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
  71. # writer = csv.DictWriter(f, fieldnames=fieldnames) # 写入表头
  72. # writer.writeheader()
  73. writer = csv.DictWriter(f, fieldnames=fieldnames) # 写入表头
  74. list_1 = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
  75. list_2 = [data_houseInfo,data_title,data_positionInfo,data_totalPrice,data_unitPrice,data_followInfo,data_tag]
  76. list_3 = dict(zip(list_1, list_2))
  77. writer.writerow(list_3)
  78. print("写入第"+str(i)+"行数据")
  79. i += 1
  80. if i > len(self.houseInfo):
  81. break
  82. def xpath_title(self):
  83. for i in range(len(self.title)):
  84. yield self.title[i]
  85. def xpath_positionInfo(self):
  86. for i in range(len(self.positionInfo)):
  87. yield self.positionInfo[i]
  88. def xpath_totalPrice(self):
  89. for i in range(len(self.totalPrice)):
  90. yield self.totalPrice[i]
  91. def xpath_unitPrice(self):
  92. for i in range(len(self.unitPrice)):
  93. yield self.unitPrice[i]
  94. def xpath_followInfo(self):
  95. for i in range(len(self.followInfo)):
  96. yield self.followInfo[i]
  97. def xpath_tag(self):
  98. for i in range(len(self.tag)):
  99. yield self.tag[i]
  100. def run(self):
  101. i = 1
  102. while True:
  103. url_str = self.url.format(i) # 构造请求url
  104. html = self.get_response_spider(url_str)
  105. self.get_content_html(html)
  106. self.qingxi_data_houseInfo()
  107. i += 1
  108. if i == 1: # 不包括57页
  109. break
  110. if __name__ == "__main__":
  111. with open("a.csv", "a", newline="", encoding="utf-8-sig") as f:
  112. fieldnames = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
  113. writer = csv.DictWriter(f, fieldnames=fieldnames) # 写入表头
  114. writer.writeheader()
  115. lanjia = LanjiaSpider()
  116. lanjia.run()

对应的数据可视化:

  1. """
  2. 数据分析及可视化
  3. """
  4. import pandas as pd
  5. from pyecharts.charts import Line, Bar
  6. import numpy as np
  7. from pyecharts.globals import ThemeType
  8. from pyecharts.charts import Pie
  9. from pyecharts import options as opts
  10. places = ['lianjia_BaiYunQu', 'lianjia_GuanShanHuQu', 'lianjia_HuaXiQu', 'lianjia_NanMingQu', 'lianjia_WuDangQu', 'lianjia_YunYanQu']
  11. place = ['白云区', '观山湖区', '花溪区', '南明区', '乌当区', '云岩区']
  12. avgs = [] # 房价均值
  13. median = [] # 房价中位数
  14. favourate_avg = [] # 房价收藏人数均值
  15. favourate_median = [] # 房价收藏人数中位数
  16. houseidfo = ['2室1厅', '3室1厅', '2室2厅', '3室2厅', '其他'] # 房型定义
  17. houseidfos = ['2.1', '3.1', '2.2', '3.2']
  18. sum_house = [0, 0, 0, 0, 0] # 各房型数量
  19. price = [] # 房价
  20. fav = [] # 收藏人数
  21. type = []
  22. area = [] # 房间面积
  23. def avg(name):
  24. df = pd.read_csv(str(name)+'.csv', encoding='utf-8')
  25. pattern = '\d+'
  26. df['totalPrice/万元'] = df['totalPrice/万元'].str.findall(pattern)
  27. df['followInfo'] = df['followInfo'].str.findall(pattern)
  28. df['houseInfo'] = df['houseInfo'].str.findall(pattern)
  29. sum_houses = [0, 0, 0, 0, 0]
  30. # print(sum_house)
  31. avg_work_year = []
  32. medians = []
  33. favourates = []
  34. k = 0
  35. k1 = 0
  36. k3 = 0
  37. k4 = 0
  38. for i in range(len(df)):
  39. if (i + 1) % 2 == 0:
  40. continue
  41. else:
  42. if len(df['totalPrice/万元'][i]) == 2:
  43. avg_work_year.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
  44. medians.append(float(','.join(df['totalPrice/万元'][i]).replace(',', '.')))
  45. price.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
  46. if len(df['followInfo'][i]) ==2:
  47. favourates.append(int(','.join(df['followInfo'][i][:1])))
  48. fav.append(int(','.join(df['followInfo'][i][:1])))
  49. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.1:
  50. k +=1
  51. sum_houses[0] =k
  52. type.append(2.1)
  53. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.1:
  54. k1 +=1
  55. sum_houses[1] =k1
  56. type.append(3.1)
  57. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.2:
  58. k3 +=1
  59. sum_houses[2] =k3
  60. type.append(2.2)
  61. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.2:
  62. k4 +=1
  63. sum_houses[3] =k4
  64. type.append(3.2)
  65. else:
  66. k4 +=1
  67. sum_houses[4] = k4
  68. type.append('other')
  69. area.append(float(','.join(df['houseInfo'][i][2:4]).replace(',', '.')))
  70. sum_house[0] =sum_houses[0]
  71. sum_house[1] = sum_houses[1]
  72. sum_house[2] = sum_houses[2]
  73. sum_house[3] = sum_houses[3]
  74. sum_house[4] = sum_houses[4]
  75. favourates.sort()
  76. favourate_median.append(int(np.median(favourates)))
  77. medians.sort()
  78. median.append(np.median(medians))
  79. # price = avg_work_year
  80. b = len(avg_work_year)
  81. b1= len(favourates)
  82. sum = 0
  83. sum1 = 0
  84. for i in avg_work_year:
  85. sum = sum+float(i)
  86. avgs.append(round(sum/b, 2))
  87. for i in favourates:
  88. sum1 = sum1+float(i)
  89. favourate_avg.append(round(int(sum1/b1), 2))
  90. for i in places:
  91. avg(i)
  92. print("各区平均房价", avgs)
  93. print('各房型的出售总数:', sum_house)
  94. print("房间面积", area)
  95. """
  96. [280, 56, 504, 1676, 1680]
  97. [392, 112, 448, 1679, 1680]
  98. [224, 0, 616, 3359, 3360]
  99. [448, 112, 280, 1679, 1680]
  100. [504, 0, 336, 1680, 1679]
  101. [224, 56, 168, 1680, 1670]
  102. [66.17, 65.6, 76.04, 78.94, 62.06, 74.37]
  103. [68.8, 67.8, 79.8, 70.8, 57.6, 78.8]
  104. [6, 6, 9, 4, 4, 4] [5, 4, 3, 2, 3, 2]
  105. """
  106. # print(median)
  107. # print(favourate_avg,favourate_median)
  108. line = Line()
  109. line.add_xaxis(place)
  110. line.add_yaxis("贵阳各地房价平均值(万元)", avgs)
  111. line.add_yaxis("贵阳各地房价中位数值(万元)", median)
  112. # line.render("predict_line.html")
  113. def bar() -> Bar:
  114. c = (
  115. Bar({"theme": ThemeType.MACARONS})
  116. .add_xaxis(place)
  117. .add_yaxis("平均值", avgs)
  118. .add_yaxis("中位数", median)
  119. .set_global_opts(
  120. title_opts={"text": "贵阳各地房价(万元)"}
  121. )
  122. )
  123. return c
  124. bar().render("predict_bar.html")
  125. # print(sum_house)
  126. def bar() -> Bar:
  127. c = (
  128. Bar({"theme": ThemeType.MACARONS})
  129. .add_xaxis(houseidfo)
  130. .add_yaxis(place[0], [280, 56, 504, 1676, 1680])
  131. .add_yaxis(place[1], [392, 112, 448, 1679, 1680])
  132. .add_yaxis(place[2], [224, 0, 616, 3359, 3360])
  133. .add_yaxis(place[3], [448, 112, 280, 1679, 1680])
  134. .add_yaxis(place[4], [504, 0, 336, 1680, 1679])
  135. .add_yaxis(place[-1], sum_house)
  136. # .add_yaxis("中位数", favourate_median)
  137. .set_global_opts(
  138. title_opts={"text": "贵阳各地房型\n数量"}
  139. )
  140. )
  141. return c
  142. # bar().render("house_bar.html")
  143. line = Line()
  144. line.add_xaxis(place)
  145. line.add_yaxis("贵阳各地房子平均面积\n(平米)", area)
  146. line.render("Area_line.html")
  147. list_num = favourate_avg
  148. attr = place
  149. # print(zip(attr, list_num))
  150. s = [list(z) for z in zip(attr, list_num)]
  151. c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="贵阳市各区楼房\n平均收藏人数"))
  152. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  153. )
  154. c.render("pie_avg.html")
  155. list_num = favourate_median
  156. attr = place
  157. # print(zip(attr, list_num))
  158. s = [list(z) for z in zip(attr, list_num)]
  159. c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="贵阳市各区楼房\n收藏人数中位数"))
  160. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  161. )
  162. c.render("pie_median.html")
  163. from pyecharts import options as opts
  164. from pyecharts.charts import Scatter3D
  165. from pyecharts.faker import Faker
  166. price=[float(i)/1 for i in price]
  167. # print(price)
  168. # types=list(map(mapfunc,df.house_type.values))
  169. # type = [224, 56, 168, 1680, 1670]
  170. data = []
  171. # print(fav,type)
  172. # for j in range(len(type)):
  173. # for k in range(len(fav)):
  174. for j in range(100):
  175. for k in range(100):
  176. for i in range(500):
  177. try:
  178. data.append([type[j], favourate_avg[k],price[i]])
  179. except:
  180. continue
  181. # print(data)
  182. scatter = (
  183. Scatter3D(init_opts=opts.InitOpts(width='900px', height='600px')) # 初始化
  184. .add("", data,
  185. grid3d_opts=opts.Grid3DOpts(
  186. width=300, depth=300, rotate_speed=300, is_rotate=True,
  187. ),)
  188. # 设置全局配置项
  189. .set_global_opts(
  190. title_opts=opts.TitleOpts(title="房型——关注度——价格\n三维关系图"), # 添加标题
  191. visualmap_opts=opts.VisualMapOpts(
  192. max_=100, # 最大值
  193. pos_top=200, # visualMap 组件离容器上侧的距离
  194. range_color=Faker.visual_color # 颜色映射
  195. )
  196. )
  197. # .render("3D散点图.html")
  198. )
  199. print('数据分析和可视化结束,左边点开~')

同时赠送给大家另一个版本的:

爬虫:链家网:柳州市

  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. import requests
  4. from lxml import etree
  5. import xlwt
  6. import xlrd
  7. import csv
  8. import pandas as pd
  9. import time
  10. class LanjiaSpider:
  11. def __init__(self):
  12. self.url = 'https://liuzhou.lianjia.com/ershoufang/yufengqu/pg{}/'
  13. self.headers = {
  14. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
  15. def get_response_spider(self, url_str): # 发送请求
  16. get_response = requests.get(self.url, headers=self.headers)
  17. time.sleep(2)
  18. response = get_response.content.decode()
  19. html = etree.HTML(response)
  20. return html
  21. def get_content_html(self, html): # 使xpath获取数据
  22. self.houseInfo = html.xpath('//div[@class="houseInfo"]/text()')
  23. self.title = html.xpath('//div[@class="title"]/a/text()')
  24. self.positionInfo = html.xpath('//div[@class="positionInfo"]/a/text()')
  25. self.totalPrice = html.xpath('//div[@class="totalPrice"]/span/text()')
  26. self.unitPrice = html.xpath('//div[@class="unitPrice"]/span/text()')
  27. self.followInfo = html.xpath('//div[@class="followInfo"]/text()')
  28. self.tag = html.xpath('//div[@class="tag"]/span/text()')
  29. # print(title)
  30. # return houseInfo,title,positionInfo,totalPrice,unitPrice,followInfo,tag
  31. def xpath_houseInfo(self):
  32. #print(self.houseInfo)
  33. #print(type(self.houseInfo))
  34. # df = pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title,"positionInfo":self.positionInfo,"totaPrice":self.totalPrice,"unitPrice":self.unitPrice,"followInfo":self.followInfo,"tag":self.tag})
  35. # df=pd.DataFrame({"houseInfo": self.houseInfo,"tite":self.title})
  36. # df.to_excel(r'C:\Users\wy\Desktop\sublime\链家\pand3.xlsx')
  37. # a=len(self.houseInfo)
  38. for i in range(len(self.houseInfo)):
  39. # print(i)
  40. # yield i
  41. # print(type(self.houseInfo))
  42. yield self.houseInfo[i]
  43. def qingxi_data_houseInfo(self): # 清洗数据
  44. self.xpath_houseInfo()
  45. self.xpath_title()
  46. self.xpath_positionInfo()
  47. self.xpath_totalPrice()
  48. self.xpath_unitPrice()
  49. self.xpath_followInfo()
  50. self.xpath_tag()
  51. get_houseInfo = self.xpath_houseInfo()
  52. get_title = self.xpath_title()
  53. get_positionInfo=self.xpath_positionInfo()
  54. get_totalPrice = self.xpath_totalPrice()
  55. get_unitPrice = self.xpath_unitPrice()
  56. get_followInfo=self.xpath_followInfo()
  57. get_tag=self.xpath_tag()
  58. i = 1
  59. while True:
  60. data_houseInfo= next(get_houseInfo)
  61. data_title=next(get_title)
  62. data_positionInfo=next(get_positionInfo)
  63. data_totalPrice=next(get_totalPrice)
  64. data_unitPrice=next(get_unitPrice)
  65. data_followInfo=next(get_followInfo)
  66. data_tag=next(get_tag)
  67. with open("yufengqu.csv", "a", newline="", encoding="utf-8-sig") as f:
  68. fieldnames = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
  69. writer = csv.DictWriter(f, fieldnames=fieldnames) # 写入表头
  70. writer.writeheader()
  71. list_1 = ['houseInfo', 'title', 'positionInfo', 'totalPrice/万元', 'unitPrice', 'followInfo', 'tag']
  72. list_2 = [data_houseInfo,data_title,data_positionInfo,data_totalPrice,data_unitPrice,data_followInfo,data_tag]
  73. list_3 = dict(zip(list_1, list_2))
  74. writer.writerow(list_3)
  75. print("写入第"+str(i)+"行数据")
  76. i += 1
  77. if i > len(self.houseInfo):
  78. break
  79. def xpath_title(self):
  80. for i in range(len(self.title)):
  81. yield self.title[i]
  82. def xpath_positionInfo(self):
  83. for i in range(len(self.positionInfo)):
  84. yield self.positionInfo[i]
  85. def xpath_totalPrice(self):
  86. for i in range(len(self.totalPrice)):
  87. yield self.totalPrice[i]
  88. def xpath_unitPrice(self):
  89. for i in range(len(self.unitPrice)):
  90. yield self.unitPrice[i]
  91. def xpath_followInfo(self):
  92. for i in range(len(self.followInfo)):
  93. yield self.followInfo[i]
  94. def xpath_tag(self):
  95. for i in range(len(self.tag)):
  96. yield self.tag[i]
  97. def run(self):
  98. i = 1
  99. while True:
  100. url_str = self.url.format(i) # 构造请求url
  101. html = self.get_response_spider(url_str)
  102. self.get_content_html(html)
  103. self.qingxi_data_houseInfo()
  104. i += 1
  105. if i == 100: # 不包括100页
  106. break
  107. # if __name__ == "__main__":
  108. # lanjia = LanjiaSpider()
  109. # lanjia.run()
  110. class MyspiderSpider(scrapy.Spider):
  111. name = 'myspider'
  112. allowed_domains = ['https://wh.lianjia.com/ershoufang/jianghan/']
  113. start_urls = ['https://wh.lianjia.com/ershoufang/jianghan//']
  114. def parse(self, response):
  115. print('爬取ing....')
  116. lanjia = LanjiaSpider()
  117. lanjia.run()

数据可视化:(优化版)

  1. """
  2. 数据分析及可视化
  3. auuthor: 周小夏
  4. """
  5. import pandas as pd
  6. from pyecharts.charts import Line, Bar
  7. import numpy as np
  8. from pyecharts.globals import ThemeType
  9. from pyecharts.charts import Pie
  10. from pyecharts import options as opts
  11. places = ['chengzhongqu', 'liubeiqu', 'liuchengxian', 'liujiangqu', 'liunanqu', 'yufengqu']
  12. place = ['城中区', '柳北区', '柳城县', '柳江区', '柳南区', '鱼峰区']
  13. avgs = [] # 房价均值
  14. median = [] # 房价中位数
  15. favourate_avg = [] # 房价收藏人数均值
  16. favourate_median = [] # 房价收藏人数中位数
  17. houseidfo = ['2室1厅', '3室1厅', '2室2厅', '3室2厅', '其他'] # 房型定义
  18. houseidfos = ['2.1', '3.1', '2.2', '3.2']
  19. sum_house = [0, 0, 0, 0, 0] # 各房型数量
  20. sum_houses = []
  21. price = [] # 房价均值
  22. unitprice = [] # 单价
  23. fav = [] # 收藏人数
  24. type = []
  25. area = [] # 房间
  26. def avg(name):
  27. df = pd.read_csv('./spiders/' + str(name)+'.csv', encoding='utf-8')
  28. pattern = '\d+'
  29. df['totalPrice/万元'] = df['totalPrice/万元'].str.findall(pattern)
  30. df['followInfo'] = df['followInfo'].str.findall(pattern)
  31. df['houseInfo'] = df['houseInfo'].str.findall(pattern)
  32. df['unitPrice'] = df['unitPrice'].str.findall(pattern)
  33. sum_houses = [0, 0, 0, 0, 0]
  34. # print(sum_house)
  35. avg_work_year = []
  36. areas = []
  37. unit_avg = []
  38. medians = []
  39. favourates = []
  40. k = 0
  41. k1 = 0
  42. k3 = 0
  43. k4 = 0
  44. for i in range(len(df)):
  45. if (i + 1) % 2 == 0:
  46. continue
  47. else:
  48. if len(df['unitPrice'][i]) >= 0:
  49. unit_avg.append(','.join(df['unitPrice'][i]).replace(',', '.'))
  50. if len(df['totalPrice/万元'][i]) >= 0:
  51. avg_work_year.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
  52. medians.append(float(','.join(df['totalPrice/万元'][i]).replace(',', '.'))*100)
  53. price.append(','.join(df['totalPrice/万元'][i]).replace(',', '.'))
  54. if len(df['followInfo'][i]) ==2:
  55. favourates.append(int(','.join(df['followInfo'][i][:1])))
  56. fav.append(int(','.join(df['followInfo'][i][:1])))
  57. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.1:
  58. k +=1
  59. sum_houses[0] =k
  60. type.append(2.1)
  61. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.1:
  62. k1 +=1
  63. sum_houses[1] =k1
  64. type.append(3.1)
  65. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 2.2:
  66. k3 +=1
  67. sum_houses[2] =k3
  68. type.append(2.2)
  69. if float(','.join(df['houseInfo'][i][:2]).replace(',', '.')) == 3.2:
  70. k4 +=1
  71. sum_houses[3] =k4
  72. type.append(3.2)
  73. else:
  74. k4 +=1
  75. sum_houses[4] = k4
  76. type.append('other')
  77. areas.append(float(','.join(df['houseInfo'][i][2:4]).replace(',', '.')))
  78. sum_house[0] =sum_houses[0]
  79. sum_house[1] = sum_houses[1]
  80. sum_house[2] = sum_houses[2]
  81. sum_house[3] = sum_houses[3]
  82. sum_house[4] = sum_houses[4]
  83. sum_house.append(sum_house[0])
  84. sum_house.append(sum_house[1])
  85. sum_house.append(sum_house[2])
  86. sum_house.append(sum_house[3])
  87. sum_house.append(sum_house[4])
  88. # print(sum_houses)
  89. favourates.sort()
  90. favourate_median.append(int(np.median(favourates)))
  91. medians.sort()
  92. median.append(np.median(medians))
  93. # price = avg_work_year
  94. b = len(avg_work_year)*100
  95. b1= len(favourates)
  96. b2 = len(unit_avg)
  97. b4 = len(areas)*100
  98. sum = 0
  99. sum1 = 0
  100. for i in unit_avg:
  101. sum = sum+float(i)
  102. unitprice.append(round(sum/b2, 2))
  103. for i in areas:
  104. sum = sum+float(i)
  105. area.append(round(sum/b4, 2))
  106. for i in avg_work_year:
  107. sum = sum+float(i)
  108. avgs.append(round(sum/b, 2))
  109. for i in favourates:
  110. sum1 = sum1+float(i)
  111. favourate_avg.append(round(int(sum1/b1), 2))
  112. for i in places:
  113. avg(i)
  114. print("各区平均房价", avgs)
  115. print('各房型的出售总数:', sum_house)
  116. print("房间面积", area)
  117. print("房价单价", unitprice)
  118. a = []
  119. for i in median:
  120. a.append(i/100)
  121. # print(median)
  122. # print(favourate_avg,favourate_median)
  123. line = Line()
  124. line.add_xaxis(place)
  125. line.add_yaxis("柳州市各地房价平均值(万元)", avgs)
  126. line.add_yaxis("柳州市各地房价中位数值(万元)", a)
  127. line.render("predict_line.html")
  128. def bar() -> Bar:
  129. c = (
  130. Bar({"theme": ThemeType.MACARONS})
  131. .add_xaxis(place)
  132. .add_yaxis("平均值", unitprice)
  133. .set_global_opts(
  134. title_opts={"text": "柳州市各地房价单价(元)"}
  135. )
  136. )
  137. return c
  138. bar().render("unit_prices.html")
  139. def bar() -> Bar:
  140. c = (
  141. Bar({"theme": ThemeType.MACARONS})
  142. .add_xaxis(place)
  143. .add_yaxis("平均值", avgs)
  144. .add_yaxis("中位数", a)
  145. .set_global_opts(
  146. title_opts={"text": "柳州市各地房价(万元)"}
  147. )
  148. )
  149. return c
  150. bar().render("predict_bar.html")
  151. # print(sum_house)
  152. def bar() -> Bar:
  153. c = (
  154. Bar({"theme": ThemeType.MACARONS})
  155. .add_xaxis(houseidfo)
  156. .add_yaxis(place[0], sum_house[0:5])
  157. .add_yaxis(place[1], sum_house[5:10])
  158. .add_yaxis(place[2], sum_house[10:15])
  159. .add_yaxis(place[3], sum_house[15:20])
  160. .add_yaxis(place[4], sum_house[20:25])
  161. .add_yaxis(place[-1], sum_house[25:30])
  162. # .add_yaxis("中位数", favourate_median)
  163. .set_global_opts(
  164. title_opts={"text": "柳州市各地房型\n数量"}
  165. )
  166. )
  167. return c
  168. bar().render("house_bar.html")
  169. line = Line()
  170. line.add_xaxis(place)
  171. line.add_yaxis("柳州市各地房子平均面积\n(平米)", area)
  172. line.render("Area_line.html")
  173. list_num = favourate_avg
  174. attr = place
  175. # print(zip(attr, list_num))
  176. s = [list(z) for z in zip(attr, list_num)]
  177. c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="柳州市各区楼房\n平均收藏人数"))
  178. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  179. )
  180. c.render("pie_avg.html")
  181. list_num = favourate_median
  182. attr = place
  183. # print(zip(attr, list_num))
  184. s = [list(z) for z in zip(attr, list_num)]
  185. c = (Pie().add("", s).set_global_opts(title_opts=opts.TitleOpts(title="柳州市各区楼房\n收藏人数中位数"))
  186. .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
  187. )
  188. c.render("pie_median.html")
  189. from pyecharts import options as opts
  190. from pyecharts.charts import Scatter3D
  191. from pyecharts.faker import Faker
  192. line = Line()
  193. line.add_xaxis(place)
  194. line.add_yaxis("房间面积\n(平米)", area)
  195. line.add_yaxis("房价\n(/万元)", avgs)
  196. line.render("price1_line.html")
  197. price=[float(i)/1 for i in price]
  198. # print(price)
  199. # types=list(map(mapfunc,df.house_type.values))
  200. # type = [224, 56, 168, 1680, 1670]
  201. data = []
  202. # print(fav,type)
  203. # for j in range(len(type)):
  204. # for k in range(len(fav)):
  205. for j in range(100):
  206. for k in range(100):
  207. for i in range(500):
  208. try:
  209. data.append([type[j], favourate_avg[k],price[i]])
  210. except:
  211. continue
  212. # print(data)
  213. scatter = (
  214. Scatter3D(init_opts=opts.InitOpts(width='900px', height='600px')) # 初始化
  215. .add("", data,
  216. grid3d_opts=opts.Grid3DOpts(
  217. width=300, depth=300, rotate_speed=300, is_rotate=True,
  218. ),)
  219. # 设置全局配置项
  220. .set_global_opts(
  221. title_opts=opts.TitleOpts(title="房型——关注度——价格\n三维关系图"), # 添加标题
  222. visualmap_opts=opts.VisualMapOpts(
  223. max_=300, # 最大值
  224. pos_top=200, # visualMap 组件离容器上侧的距离
  225. range_color=Faker.visual_color # 颜色映射
  226. )
  227. )
  228. .render("3D散点图.html")
  229. )
  230. print('数据分析和可视化结束,左边点开~')

最后,别忘记了关注公众号~

分享最新算法!

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-
CiQYvfTG-1594979733395)(D:\CSDN\pic\WeChat Image_20200716151357.jpg)]