1. 行标签与列标签获取
print(cities)print(cities.index)print(cities.columns)area bba 12 12b 25 25c 56 56d 67 67e 42 42Index(['a', 'b', 'c', 'd', 'e'], dtype='object')Index(['area', 'bb'], dtype='object')
2. 使用类名取值
population = {'a': 12, 'b': 25, 'c': 56, 'd': 67, 'e': 42}area = pd.Series({'a': 11, 'b': 22, 'c': 55, 'd': 66, 'e': 44})data = pd.DataFrame({'population': population, 'area': area})# data = pd.DataFrame({1.1: population_dict, 2.2: area})print(data)print('用area的类名取值')print(data['area'])population areaa 12 11b 25 22c 56 55d 67 66e 42 44用类名取值a 11b 22c 55d 66e 44Name: area, dtype: int64
3. 使用属性形式取值
print('使用属性形式取值')print(data.area)a 11b 22c 55d 66e 44Name: area, dtype: int64
4. 增加一列
data['density'] = data['population'] / data['area']print(data)population area densitya 12 11 1.090909b 25 22 1.136364c 56 55 1.018182d 67 66 1.015152e 42 44 0.954545
5. 使用values属性按行查看数据
print(data.values)[[12. 11. 1.09090909][25. 22. 1.13636364][56. 55. 1.01818182][67. 66. 1.01515152][42. 44. 0.95454545]]
6. 数据转置
转置时并不会改变原值。
print(data.T)print(data)a b c d epopulation 12.000000 25.000000 56.000000 67.000000 42.000000area 11.000000 22.000000 55.000000 66.000000 44.000000density 1.090909 1.136364 1.018182 1.015152 0.954545population area densitya 12 11 1.090909b 25 22 1.136364c 56 55 1.018182d 67 66 1.015152e 42 44 0.954545
7. 切片时隐式与显式
print('进行切片,选择一定范围内,显式索引')print(data.loc[:'b', :'area'])print(data.loc['b']['area'])print('隐式索引')print(data.iloc[:3, :2])population areaa 12 11b 25 2222.0隐式索引population areaa 12 11b 25 22c 56 55
print(data['a': 'd'])print('上式相当与用了')print(data.loc['a': 'd'])population area densitya 12 11 1.090909b 25 22 1.136364c 56 55 1.018182d 67 66 1.015152上式相当与用了population area densitya 12 11 1.090909b 25 22 1.136364c 56 55 1.018182d 67 66 1.015152
print(data.loc['a': 'd', :])print('上式相当与用了')print(data[:4]) # 隐式索引population area densitya 12 11 1.090909b 25 22 1.136364c 56 55 1.018182d 67 66 1.015152上式相当与用了population area densitya 12 11 1.090909b 25 22 1.136364c 56 55 1.018182d 67 66 1.015152
8. 使用numpy的方法取值
print(data.loc[data.density > 1, ['population', 'density']])print(data.iloc[0, 2])print(data.iloc[0][2])population densitya 12 1.090909b 25 1.136364c 56 1.018182d 67 1.0151521.09090909090909081.0909090909090908
9. 单个或多个标签取值
print('单个标签取值')print(data['area'])print('多个标签取值')print(data.loc[:, 'population':'area'])单个标签取值a 11b 22c 55d 66e 44Name: area, dtype: int64多个标签取值population areaa 12 11b 25 22c 56 55d 67 66e 42 44
10. 特定列与某个值比大小
判断data中每一行density是否大于1
print(data.density > 1)a Trueb Truec Trued Truee FalseName: density, dtype: bool
