基础 - DataFrame---行列访问添加删除 - 《数据分析》

import numpy as np
import pandas as pd

列的访问

d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
     'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']), 
     'three' : pd.Series([1, 3, 4], index=['a', 'c', 'd'])}
df = pd.DataFrame(d)
df

	one	two	three
a	1.0	1	1.0
b	2.0	2	NaN
c	3.0	3	3.0
d	NaN	4	4.0

#列访问的第一种写法  直接用列名可以取出来
df['two']   #类型是Series
df[['one','two']]  #类型是DataFrame

	one	two
a	1.0	1
b	2.0	2
c	3.0	3
d	NaN	4

#通过columns属性切片来取
df[df.columns[0:2]]

	one	two
a	1.0	1
b	2.0	2
c	3.0	3
d	NaN	4

#通过columns属性索引来取
df[df.columns[[0,2]]]

	one	three
a	1.0	1.0
b	2.0	NaN
c	3.0	3.0
d	NaN	4.0

列的添加

#列的添加
df['four']=pd.Series([90, 80, 70, 60], index=['a', 'b', 'c', 'd'])
df
# 新建一个列索引，并赋值 
# 当插入与DataFrame索引不同的Series时，它将符合DataFrame的 索引
# 可以插入原始ndarray，但它们的长度必须与DataFrame索引的长度 匹配，默认情况下，
  #列会插入到末尾。

	one	two	three	four
a	1.0	1	1.0	90
b	2.0	2	NaN	80
c	3.0	3	3.0	70
d	NaN	4	4.0	60

# insert 功能可插入到列中的特定位置
df.insert(2, 'five', df['one'])  # 在2位置插入five列用one列的值
df

	one	two	five	three	four
a	1.0	1	1.0	1.0	90
b	2.0	2	2.0	NaN	80
c	3.0	3	3.0	3.0	70
d	NaN	4	NaN	4.0	60

列删除

#删除某列数据需要用到pandas提供的方法del、pop、drop方法，用法如下：
import pandas as pd 
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
                         'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']), 
                         'three' : pd.Series([10, 20, 30], index=['a', 'b', 'c']),
                         'four' : pd.Series([40, 50, 60], index=['a', 'b', 'c']),
                         'five' : pd.Series([70, 80, 90], index=['a', 'b', 'c'])}
df = pd.DataFrame(d) 
print(df)

   one  two  three  four  five
a  1.0    1   10.0  40.0  70.0
b  2.0    2   20.0  50.0  80.0
c  3.0    3   30.0  60.0  90.0
d  NaN    4    NaN   NaN   NaN

# del方法删除 
del(df['one']) 
print(df)

   two  three  four  five
a    1   10.0  40.0  70.0
b    2   20.0  50.0  80.0
c    3   30.0  60.0  90.0
d    4    NaN   NaN   NaN

# pop方法删除 
df.pop('two') 
print(df)

   one  three  four  five
a  1.0   10.0  40.0  70.0
b  2.0   20.0  50.0  80.0
c  3.0   30.0  60.0  90.0
d  NaN    NaN   NaN   NaN

# drop方法删除 
df.drop(['three'], axis=1)

	one	four	five
a	1.0	40.0	70.0
b	2.0	50.0	80.0
c	3.0	60.0	90.0
d	NaN	NaN	NaN

行的访问

import pandas as pd 
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
                         'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']), 
                         'three' : pd.Series([10, 20, 30], index=['a', 'b', 'c']),
                         'four' : pd.Series([40, 50, 60], index=['a', 'b', 'c']),
                         'five' : pd.Series([70, 80, 90], index=['a', 'b', 'c'])}
df = pd.DataFrame(d) 
print(df)

   one  two  three  four  five
a  1.0    1   10.0  40.0  70.0
b  2.0    2   20.0  50.0  80.0
c  3.0    3   30.0  60.0  90.0
d  NaN    4    NaN   NaN   NaN

df.loc['b']  #类型是Series  用的是人为命名的index

one       2.0
two       2.0
three    20.0
four     50.0
five     80.0
Name: b, dtype: float64

df.loc[['b','c']]   #类型是Series

	one	two	three	four	five
b	2.0	2	20.0	50.0	80.0
c	3.0	3	30.0	60.0	90.0

df.iloc[0]      #用的是系统给的index

one       1.0
two       1.0
three    10.0
four     40.0
five     70.0
Name: a, dtype: float64

#行访问，切片（系统自动给的  整数切片）
df[0:3]

	one	two	three	four	five
a	1.0	1	10.0	40.0	70.0
b	2.0	2	20.0	50.0	80.0
c	3.0	3	30.0	60.0	90.0

#行访问切片（index）自己命名的
df['a':'c']

	one	two	three	four	five
a	1.0	1	10.0	40.0	70.0
b	2.0	2	20.0	50.0	80.0
c	3.0	3	30.0	60.0	90.0

df.loc['a':'c','one':'three']

	one	two	three
a	1.0	1	10.0
b	2.0	2	20.0
c	3.0	3	30.0

df.iloc[0:2,0:3]  #遵循切片 左闭右开  2 3切不到

	one	two	three
a	1.0	1	10.0
b	2.0	2	20.0

行添加

import pandas as pd
df = pd.DataFrame([['zs', 12], ['ls', 4]], columns = ['Name','Age'], index=[0, 1]) 
df

	Name	Age
0	zs	12
1	ls	4

df2 = pd.DataFrame([['ww', 16], ['zl', 8]], columns = ['Name','Age'], index=[2, 3])
df2

	Name	Age
2	ww	16
3	zl	8

df = df.append(df2)
df

	Name	Age
0	zs	12
1	ls	4
2	ww	16
3	zl	8
2	ww	16
3	zl	8
2	ww	16
3	zl	8

行删除

#使用索引标签从DataFrame中删除列或删除行。 如果标签重复，则会删除多行。
df.drop(2)  #drop 默认的axis=0 所以列删除需要

	Name	Age
0	zs	12
1	ls	4
3	zl	8
3	zl	8
3	zl	8

DataFrame---行列访问 添加删除

DataFrame---行列访问添加删除