1. import numpy as np
    2. import pandas as pd
    3. import random
    4. import re
    1. 读取lianjia.csv文件里的数据

      1. df = pd.read_csv("链家.csv")
      2. df
    2. 观察结构,调整列索引顺序

    (Region”,”Garden”,”Layout”,”Floor”,”Year”,”Size”,”Elevator”,”Direction”,”Renovation”,”Price”)

    1. df = df[["Region","Garden","Layout","Floor","Year","Size","Elevator","Direction","Renovation","Price"]]
    2. df
    1. 增加一个列关于目前状况(state),是否卖出状态随机设定

      1. list1 = []
      2. for i in range(0,1710):
      3. a = random.choice(("已售","未售"))
      4. list1.append(a)
      5. print(list1)
      6. state1 = pd.DataFrame(list1,columns=["state"])
      7. state1
      8. df = pd.concat([df,state1],axis=1)
      9. df
    2. 查找楼层低的房子(这里提取低楼层)

      1. df[df.Floor.str.contains('低楼层')]
    3. 电梯这列存在缺失值,想办法处理下缺失值

      1. df["Floor"].str.contains(r"\d")
      2. df_Floor = df["Floor"].str.extract(r'(\d+)',expand=False)
      3. df_Floor
      4. df_Floor = df_Floor.astype("int")
      5. df[(df['Elevator'].isnull())&(df_Floor>8)]