行操作
代码1 : append ,drop ,loc 等应用
import pandas as pd
page_001 = pd.read_excel('./Students.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('./Students.xlsx', sheet_name='Page_002')
"""
append 可以追加一个DateFrame
reset_index(drop=True),可以重新生成一个index,但是原有的index依然在,加上drop=True后
"""
students = page_001.append(page_002).reset_index(drop=True)
# 增加一行 ,如果不使用ignore_index=True,加入的Series会找不到index,报错
stu = pd.Series({'ID': 41, 'Name': 'Abel', "Score": 99})
students = students.append(stu, ignore_index=True)
# 单元格修改数值
students.at[39, 'Name'] = 'Bailey'
students.at[39, 'Score'] = '900'
# 整行替换
stu2 = pd.Series({'ID': 31, 'Name': 'nzl5423', 'Score': 88})
students.iloc[30] = stu2
# 插入一行
stu3 = pd.Series({'ID': 101, 'Name': 'wyy', 'Score': 88})
part1 = students[:20]
part2 = students[20:]
"""
插入一行的操作是先将DateFrame 切片,通过append方法 把part1、stu3、part2 连接起来就可以了
新插进来的Series没有index ,所以要使用ignore_index=True,否则报错
将后面的DateFrame接上来后,因为这部分已经有index了,只需要reset_index(drop=True),但是原有的index依然在,加上drop=True后
"""
students = part1.append(stu3, ignore_index=True).append(part2).reset_index(drop=True)
# 删除数据行
# students.drop(index=[0, 1, 2], inplace=True)
# students.drop(index=range(0,10),inplace=True)
# students.drop(students[0:10].index, inplace=True)
# 按条件删除行
for i in range(5, 15):
students['Name'].at[i] = ''
missing = students.loc[students['Name'] == '']
students.drop(index=missing.index, inplace=True)
students.reset_index(drop=True) # 为新的DataFrame 生成新的 index
print(students)
"""
结果显示
ID Name Score
0 1 Student_001 90
1 2 Student_002 90
2 3 Student_003 90
3 4 Student_004 90
4 5 Student_005 90
15 16 Student_016 90
16 17 Student_017 90
17 18 Student_018 90
18 19 Student_019 90
19 20 Student_020 90
20 101 wyy 88
21 21 Student_021 80
22 22 Student_022 80
23 23 Student_023 80
24 24 Student_024 80
25 25 Student_025 80
26 26 Student_026 80
27 27 Student_027 80
28 28 Student_028 80
29 29 Student_029 80
30 30 Student_030 80
31 31 nzl5423 88
32 32 Student_032 80
33 33 Student_033 80
34 34 Student_034 80
35 35 Student_035 80
36 36 Student_036 80
37 37 Student_037 80
38 38 Student_038 80
39 39 Student_039 80
40 40 Bailey 900
41 41 Abel 99
Process finished with exit code 0
"""
代码2 concat 应用
import pandas as pd
page_001 = pd.read_excel('./Students.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('./Students.xlsx', sheet_name='Page_002')
"""
concat 可以追加一个DateFrame, axis=0为添加行,axis=1为纵向添加,
concat后面添加 .reset_index(drop=True)可以重设index但是原有的index依然在,加上drop=True后
"""
# students = pd.concat([page_001, page_002],axis=1).reset_index(drop=True)
students = pd.concat([page_001, page_002], axis=0).reset_index(drop=True)
# students.reset_index(drop=True) # 为新的DataFrame 生成新的 index
print(students)
列操作
import pandas as pd
import numpy as np
page_001 = pd.read_excel('./Students.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('./Students.xlsx', sheet_name='Page_002')
"""
concat 可以追加一个DateFrame, axis=0为添加行,axis=1为纵向添加,
concat后面添加 .reset_index(drop=True)可以重设index但是原有的index依然在,加上drop=True后
"""
# students = pd.concat([page_001, page_002],axis=1).reset_index(drop=True)
students = pd.concat([page_001, page_002], axis=0).reset_index(drop=True)
# 增加列
# students['Age'] = 18
# 所有数字都等于18
# students['Age'] = np.repeat(20, len(students))
# numpy方法 所有都等于20
students['Age'] = np.arange(0, len(students))
# numpy方法生成序列 0—len(students)
# 删除列
students.drop(columns=['Score', 'Age'], inplace=True)
# 插入列 插入位置 插入的列名 插入的值 ,插入列默认 inplace=True,不用写
students.insert(1, column='Foo', value=np.repeat('foo', len(students)))
# 修改列名
students.rename(columns={'Foo': 'FOO', 'Nmae': 'NAME'}, inplace=True)
# 删除空值
# 只有float才可以有空值 not number,即 NaN,所以先转化为float类型
students['ID'] = students['ID'].astype(float)
for i in range(5, 14):
# np.nan 即为 not number, NaN
students['ID'].at[i] = np.nan
# .dropna 方法从上到下扫描每一行,一旦行中有 NaN,即删除整行
students.dropna(inplace=True)
# students.reset_index(drop=True) # 为新的DataFrame 生成新的 index
print(students)
"""
结果显示
ID FOO Name
0 1.0 foo Student_001
1 2.0 foo Student_002
2 3.0 foo Student_003
3 4.0 foo Student_004
4 5.0 foo Student_005
14 15.0 foo Student_015
15 16.0 foo Student_016
16 17.0 foo Student_017
17 18.0 foo Student_018
18 19.0 foo Student_019
19 20.0 foo Student_020
20 21.0 foo Student_021
21 22.0 foo Student_022
22 23.0 foo Student_023
23 24.0 foo Student_024
24 25.0 foo Student_025
25 26.0 foo Student_026
26 27.0 foo Student_027
27 28.0 foo Student_028
28 29.0 foo Student_029
29 30.0 foo Student_030
30 31.0 foo Student_031
31 32.0 foo Student_032
32 33.0 foo Student_033
33 34.0 foo Student_034
34 35.0 foo Student_035
35 36.0 foo Student_036
36 37.0 foo Student_037
37 38.0 foo Student_038
38 39.0 foo Student_039
39 40.0 foo Student_040
Process finished with exit code 0
"""