1.表连接
Students16.xlsx
1.表连接
- students = pd.concat([page_001, page_002], axis=1).reset_index(drop=True)
import pandas as pd
import numpy as np
page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
students = pd.concat([page_001, page_002], axis=1).reset_index(drop=True)
print(students)
"""
ID Name Score ID Name Score
0 1 Student_001 90 21 Student_021 80
1 2 Student_002 90 22 Student_022 80
2 3 Student_003 90 23 Student_023 80
3 4 Student_004 90 24 Student_024 80
4 5 Student_005 90 25 Student_025 80
5 6 Student_006 90 26 Student_026 80
6 7 Student_007 90 27 Student_027 80
7 8 Student_008 90 28 Student_028 80
8 9 Student_009 90 29 Student_029 80
9 10 Student_010 90 30 Student_030 80
10 11 Student_011 90 31 Student_031 80
11 12 Student_012 90 32 Student_032 80
12 13 Student_013 90 33 Student_033 80
13 14 Student_014 90 34 Student_034 80
14 15 Student_015 90 35 Student_035 80
15 16 Student_016 90 36 Student_036 80
16 17 Student_017 90 37 Student_037 80
17 18 Student_018 90 38 Student_038 80
18 19 Student_019 90 39 Student_039 80
19 20 Student_020 90 40 Student_040 80
"""
2.追加列
students[‘Age’] = 25
students[‘Age’] = np.repeat(25, len(students))
students[‘Age’] = np.arange(0, len(students)) # 创建连续数组
import pandas as pd
import numpy as np
page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
students = pd.concat([page_001, page_002]).reset_index(drop=True)
students['Age'] = 25
# students['Age'] = np.repeat(25, len(students))
# students['Age'] = np.arange(0, len(students))
print(students)
3.删除列
- students.drop(columns=[‘Age’, ‘Score’], inplace=True) inplace = True让其不产生一个新的DataFrame
import pandas as pd
page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
students = pd.concat([page_001, page_002]).reset_index(drop=True)
students['Age'] = 25
students.drop(columns=['Age', 'Score'], inplace=True)
print(students)
4.插入列
- students.insert(1, column=’Foo’, value=np.repeat(‘foo’, len(students)))
import pandas as pd
page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
students = pd.concat([page_001, page_002]).reset_index(drop=True)
students.insert(1, column='Foo', value=np.repeat('foo', len(students)))
print(students)
5.修改列名
- students.rename(columns={‘Foo’: ‘FOO’, ‘Name’: ‘NAME’}, inplace=True)
import pandas as pd
page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
students = pd.concat([page_001, page_002]).reset_index(drop=True)
students.insert(1, column='Foo', value=np.repeat('foo', len(students)))
students.rename(columns={'Foo': 'FOO', 'Name': 'NAME'}, inplace=True)
print(students)
6.如何把一列变成两列
import pandas as pd
employees = pd.read_excel('tmp1\Employees.xlsx', index_col='ID')
df = employees['Full Name'].str.split(expand=True)
# 参数expand,这个参数取True时,会把切割出来的内容当做一列。
# 如果不需要pandas为你分好列,expand=False就可以了。
employees['First Name'] = df[0]
employees['Last Name'] = df[1]
employees.drop('Full Name', axis=1, inplace=True)
# 删除Full Name这一列
print(employees)