1. import numpy as np
    2. import pandas as pd

    列的访问

    1. d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
    2. 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
    3. 'three' : pd.Series([1, 3, 4], index=['a', 'c', 'd'])}
    4. df = pd.DataFrame(d)
    5. df
    one two three
    a 1.0 1 1.0
    b 2.0 2 NaN
    c 3.0 3 3.0
    d NaN 4 4.0
    1. #列访问的第一种写法 直接用列名可以取出来
    2. df['two'] #类型是Series
    3. df[['one','two']] #类型是DataFrame
    one two
    a 1.0 1
    b 2.0 2
    c 3.0 3
    d NaN 4
    1. #通过columns属性切片来取
    2. df[df.columns[0:2]]
    one two
    a 1.0 1
    b 2.0 2
    c 3.0 3
    d NaN 4
    1. #通过columns属性索引来取
    2. df[df.columns[[0,2]]]
    one three
    a 1.0 1.0
    b 2.0 NaN
    c 3.0 3.0
    d NaN 4.0

    列的添加

    1. #列的添加
    2. df['four']=pd.Series([90, 80, 70, 60], index=['a', 'b', 'c', 'd'])
    3. df
    4. # 新建一个列索引,并赋值
    5. # 当插入与DataFrame索引不同的Series时,它将符合DataFrame的 索引
    6. # 可以插入原始ndarray,但它们的长度必须与DataFrame索引的长度 匹配,默认情况下,
    7. #列会插入到末尾。
    one two three four
    a 1.0 1 1.0 90
    b 2.0 2 NaN 80
    c 3.0 3 3.0 70
    d NaN 4 4.0 60
    1. # insert 功能可插入到列中的特定位置
    2. df.insert(2, 'five', df['one']) # 在2位置插入five列用one列的值
    3. df
    one two five three four
    a 1.0 1 1.0 1.0 90
    b 2.0 2 2.0 NaN 80
    c 3.0 3 3.0 3.0 70
    d NaN 4 NaN 4.0 60

    列删除

    1. #删除某列数据需要用到pandas提供的方法del、pop、drop方法,用法如下:
    2. import pandas as pd
    3. d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
    4. 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
    5. 'three' : pd.Series([10, 20, 30], index=['a', 'b', 'c']),
    6. 'four' : pd.Series([40, 50, 60], index=['a', 'b', 'c']),
    7. 'five' : pd.Series([70, 80, 90], index=['a', 'b', 'c'])}
    8. df = pd.DataFrame(d)
    9. print(df)
    1. one two three four five
    2. a 1.0 1 10.0 40.0 70.0
    3. b 2.0 2 20.0 50.0 80.0
    4. c 3.0 3 30.0 60.0 90.0
    5. d NaN 4 NaN NaN NaN
    1. # del方法删除
    2. del(df['one'])
    3. print(df)
    1. two three four five
    2. a 1 10.0 40.0 70.0
    3. b 2 20.0 50.0 80.0
    4. c 3 30.0 60.0 90.0
    5. d 4 NaN NaN NaN
    1. # pop方法删除
    2. df.pop('two')
    3. print(df)
    1. one three four five
    2. a 1.0 10.0 40.0 70.0
    3. b 2.0 20.0 50.0 80.0
    4. c 3.0 30.0 60.0 90.0
    5. d NaN NaN NaN NaN
    1. # drop方法删除
    2. df.drop(['three'], axis=1)
    one four five
    a 1.0 40.0 70.0
    b 2.0 50.0 80.0
    c 3.0 60.0 90.0
    d NaN NaN NaN

    行的访问

    1. import pandas as pd
    2. d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
    3. 'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
    4. 'three' : pd.Series([10, 20, 30], index=['a', 'b', 'c']),
    5. 'four' : pd.Series([40, 50, 60], index=['a', 'b', 'c']),
    6. 'five' : pd.Series([70, 80, 90], index=['a', 'b', 'c'])}
    7. df = pd.DataFrame(d)
    8. print(df)
    1. one two three four five
    2. a 1.0 1 10.0 40.0 70.0
    3. b 2.0 2 20.0 50.0 80.0
    4. c 3.0 3 30.0 60.0 90.0
    5. d NaN 4 NaN NaN NaN
    1. df.loc['b'] #类型是Series 用的是人为命名的index
    1. one 2.0
    2. two 2.0
    3. three 20.0
    4. four 50.0
    5. five 80.0
    6. Name: b, dtype: float64
    1. df.loc[['b','c']] #类型是Series
    one two three four five
    b 2.0 2 20.0 50.0 80.0
    c 3.0 3 30.0 60.0 90.0
    1. df.iloc[0] #用的是系统给的index
    1. one 1.0
    2. two 1.0
    3. three 10.0
    4. four 40.0
    5. five 70.0
    6. Name: a, dtype: float64
    1. #行访问,切片(系统自动给的 整数切片)
    2. df[0:3]
    one two three four five
    a 1.0 1 10.0 40.0 70.0
    b 2.0 2 20.0 50.0 80.0
    c 3.0 3 30.0 60.0 90.0
    1. #行访问切片(index)自己命名的
    2. df['a':'c']
    one two three four five
    a 1.0 1 10.0 40.0 70.0
    b 2.0 2 20.0 50.0 80.0
    c 3.0 3 30.0 60.0 90.0
    1. df.loc['a':'c','one':'three']
    one two three
    a 1.0 1 10.0
    b 2.0 2 20.0
    c 3.0 3 30.0
    1. df.iloc[0:2,0:3] #遵循切片 左闭右开 2 3切不到
    one two three
    a 1.0 1 10.0
    b 2.0 2 20.0

    行添加

    1. import pandas as pd
    2. df = pd.DataFrame([['zs', 12], ['ls', 4]], columns = ['Name','Age'], index=[0, 1])
    3. df
    Name Age
    0 zs 12
    1 ls 4
    1. df2 = pd.DataFrame([['ww', 16], ['zl', 8]], columns = ['Name','Age'], index=[2, 3])
    2. df2
    Name Age
    2 ww 16
    3 zl 8
    1. df = df.append(df2)
    2. df
    Name Age
    0 zs 12
    1 ls 4
    2 ww 16
    3 zl 8
    2 ww 16
    3 zl 8
    2 ww 16
    3 zl 8

    行删除

    1. #使用索引标签从DataFrame中删除列或删除行。 如果标签重复,则会删除多行。
    2. df.drop(2) #drop 默认的axis=0 所以列删除需要
    Name Age
    0 zs 12
    1 ls 4
    3 zl 8
    3 zl 8
    3 zl 8