1.表连接
Students16.xlsx

1.表连接

  • students = pd.concat([page_001, page_002], axis=1).reset_index(drop=True)
  1. import pandas as pd
  2. import numpy as np
  3. page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
  4. page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
  5. students = pd.concat([page_001, page_002], axis=1).reset_index(drop=True)
  6. print(students)
  7. """
  8. ID Name Score ID Name Score
  9. 0 1 Student_001 90 21 Student_021 80
  10. 1 2 Student_002 90 22 Student_022 80
  11. 2 3 Student_003 90 23 Student_023 80
  12. 3 4 Student_004 90 24 Student_024 80
  13. 4 5 Student_005 90 25 Student_025 80
  14. 5 6 Student_006 90 26 Student_026 80
  15. 6 7 Student_007 90 27 Student_027 80
  16. 7 8 Student_008 90 28 Student_028 80
  17. 8 9 Student_009 90 29 Student_029 80
  18. 9 10 Student_010 90 30 Student_030 80
  19. 10 11 Student_011 90 31 Student_031 80
  20. 11 12 Student_012 90 32 Student_032 80
  21. 12 13 Student_013 90 33 Student_033 80
  22. 13 14 Student_014 90 34 Student_034 80
  23. 14 15 Student_015 90 35 Student_035 80
  24. 15 16 Student_016 90 36 Student_036 80
  25. 16 17 Student_017 90 37 Student_037 80
  26. 17 18 Student_018 90 38 Student_038 80
  27. 18 19 Student_019 90 39 Student_039 80
  28. 19 20 Student_020 90 40 Student_040 80
  29. """

2.追加列

  • students[‘Age’] = 25

  • students[‘Age’] = np.repeat(25, len(students))

  • students[‘Age’] = np.arange(0, len(students)) # 创建连续数组

  1. import pandas as pd
  2. import numpy as np
  3. page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
  4. page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
  5. students = pd.concat([page_001, page_002]).reset_index(drop=True)
  6. students['Age'] = 25
  7. # students['Age'] = np.repeat(25, len(students))
  8. # students['Age'] = np.arange(0, len(students))
  9. print(students)

3.删除列

  • students.drop(columns=[‘Age’, ‘Score’], inplace=True) inplace = True让其不产生一个新的DataFrame
  1. import pandas as pd
  2. page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
  3. page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
  4. students = pd.concat([page_001, page_002]).reset_index(drop=True)
  5. students['Age'] = 25
  6. students.drop(columns=['Age', 'Score'], inplace=True)
  7. print(students)

4.插入列

  • students.insert(1, column=’Foo’, value=np.repeat(‘foo’, len(students)))
  1. import pandas as pd
  2. page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
  3. page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
  4. students = pd.concat([page_001, page_002]).reset_index(drop=True)
  5. students.insert(1, column='Foo', value=np.repeat('foo', len(students)))
  6. print(students)

5.修改列名

  • students.rename(columns={‘Foo’: ‘FOO’, ‘Name’: ‘NAME’}, inplace=True)
  1. import pandas as pd
  2. page_001 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_001')
  3. page_002 = pd.read_excel('tmp1\Students16.xlsx', sheet_name='Page_002')
  4. students = pd.concat([page_001, page_002]).reset_index(drop=True)
  5. students.insert(1, column='Foo', value=np.repeat('foo', len(students)))
  6. students.rename(columns={'Foo': 'FOO', 'Name': 'NAME'}, inplace=True)
  7. print(students)

6.如何把一列变成两列

Employees.xlsx

  1. import pandas as pd
  2. employees = pd.read_excel('tmp1\Employees.xlsx', index_col='ID')
  3. df = employees['Full Name'].str.split(expand=True)
  4. # 参数expand,这个参数取True时,会把切割出来的内容当做一列。
  5. # 如果不需要pandas为你分好列,expand=False就可以了。
  6. employees['First Name'] = df[0]
  7. employees['Last Name'] = df[1]
  8. employees.drop('Full Name', axis=1, inplace=True)
  9. # 删除Full Name这一列
  10. print(employees)