1. 行标签与列标签获取
print(cities)
print(cities.index)
print(cities.columns)
area bb
a 12 12
b 25 25
c 56 56
d 67 67
e 42 42
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
Index(['area', 'bb'], dtype='object')
2. 使用类名取值
population = {'a': 12, 'b': 25, 'c': 56, 'd': 67, 'e': 42}
area = pd.Series({'a': 11, 'b': 22, 'c': 55, 'd': 66, 'e': 44})
data = pd.DataFrame({'population': population, 'area': area})
# data = pd.DataFrame({1.1: population_dict, 2.2: area})
print(data)
print('用area的类名取值')
print(data['area'])
population area
a 12 11
b 25 22
c 56 55
d 67 66
e 42 44
用类名取值
a 11
b 22
c 55
d 66
e 44
Name: area, dtype: int64
3. 使用属性形式取值
print('使用属性形式取值')
print(data.area)
a 11
b 22
c 55
d 66
e 44
Name: area, dtype: int64
4. 增加一列
data['density'] = data['population'] / data['area']
print(data)
population area density
a 12 11 1.090909
b 25 22 1.136364
c 56 55 1.018182
d 67 66 1.015152
e 42 44 0.954545
5. 使用values属性按行查看数据
print(data.values)
[[12. 11. 1.09090909]
[25. 22. 1.13636364]
[56. 55. 1.01818182]
[67. 66. 1.01515152]
[42. 44. 0.95454545]]
6. 数据转置
转置时并不会改变原值。
print(data.T)
print(data)
a b c d e
population 12.000000 25.000000 56.000000 67.000000 42.000000
area 11.000000 22.000000 55.000000 66.000000 44.000000
density 1.090909 1.136364 1.018182 1.015152 0.954545
population area density
a 12 11 1.090909
b 25 22 1.136364
c 56 55 1.018182
d 67 66 1.015152
e 42 44 0.954545
7. 切片时隐式与显式
print('进行切片,选择一定范围内,显式索引')
print(data.loc[:'b', :'area'])
print(data.loc['b']['area'])
print('隐式索引')
print(data.iloc[:3, :2])
population area
a 12 11
b 25 22
22.0
隐式索引
population area
a 12 11
b 25 22
c 56 55
print(data['a': 'd'])
print('上式相当与用了')
print(data.loc['a': 'd'])
population area density
a 12 11 1.090909
b 25 22 1.136364
c 56 55 1.018182
d 67 66 1.015152
上式相当与用了
population area density
a 12 11 1.090909
b 25 22 1.136364
c 56 55 1.018182
d 67 66 1.015152
print(data.loc['a': 'd', :])
print('上式相当与用了')
print(data[:4]) # 隐式索引
population area density
a 12 11 1.090909
b 25 22 1.136364
c 56 55 1.018182
d 67 66 1.015152
上式相当与用了
population area density
a 12 11 1.090909
b 25 22 1.136364
c 56 55 1.018182
d 67 66 1.015152
8. 使用numpy的方法取值
print(data.loc[data.density > 1, ['population', 'density']])
print(data.iloc[0, 2])
print(data.iloc[0][2])
population density
a 12 1.090909
b 25 1.136364
c 56 1.018182
d 67 1.015152
1.0909090909090908
1.0909090909090908
9. 单个或多个标签取值
print('单个标签取值')
print(data['area'])
print('多个标签取值')
print(data.loc[:, 'population':'area'])
单个标签取值
a 11
b 22
c 55
d 66
e 44
Name: area, dtype: int64
多个标签取值
population area
a 12 11
b 25 22
c 56 55
d 67 66
e 42 44
10. 特定列与某个值比大小
判断data中每一行density是否大于1
print(data.density > 1)
a True
b True
c True
d True
e False
Name: density, dtype: bool