import numpy as np
import pandas as pd
import random
import re
读取lianjia.csv文件里的数据
df = pd.read_csv("链家.csv")
df
观察结构,调整列索引顺序
(Region”,”Garden”,”Layout”,”Floor”,”Year”,”Size”,”Elevator”,”Direction”,”Renovation”,”Price”)
df = df[["Region","Garden","Layout","Floor","Year","Size","Elevator","Direction","Renovation","Price"]]
df
增加一个列关于目前状况(state),是否卖出状态随机设定
list1 = []
for i in range(0,1710):
a = random.choice(("已售","未售"))
list1.append(a)
print(list1)
state1 = pd.DataFrame(list1,columns=["state"])
state1
df = pd.concat([df,state1],axis=1)
df
查找楼层低的房子(这里提取低楼层)
df[df.Floor.str.contains('低楼层')]
电梯这列存在缺失值,想办法处理下缺失值
df["Floor"].str.contains(r"\d")
df_Floor = df["Floor"].str.extract(r'(\d+)',expand=False)
df_Floor
df_Floor = df_Floor.astype("int")
df[(df['Elevator'].isnull())&(df_Floor>8)]