Students.xlsx
image.png
image.png

行操作

代码1 : append ,drop ,loc 等应用

  1. import pandas as pd
  2. page_001 = pd.read_excel('./Students.xlsx', sheet_name='Page_001')
  3. page_002 = pd.read_excel('./Students.xlsx', sheet_name='Page_002')
  4. """
  5. append 可以追加一个DateFrame
  6. reset_index(drop=True),可以重新生成一个index,但是原有的index依然在,加上drop=True后
  7. """
  8. students = page_001.append(page_002).reset_index(drop=True)
  9. # 增加一行 ,如果不使用ignore_index=True,加入的Series会找不到index,报错
  10. stu = pd.Series({'ID': 41, 'Name': 'Abel', "Score": 99})
  11. students = students.append(stu, ignore_index=True)
  12. # 单元格修改数值
  13. students.at[39, 'Name'] = 'Bailey'
  14. students.at[39, 'Score'] = '900'
  15. # 整行替换
  16. stu2 = pd.Series({'ID': 31, 'Name': 'nzl5423', 'Score': 88})
  17. students.iloc[30] = stu2
  18. # 插入一行
  19. stu3 = pd.Series({'ID': 101, 'Name': 'wyy', 'Score': 88})
  20. part1 = students[:20]
  21. part2 = students[20:]
  22. """
  23. 插入一行的操作是先将DateFrame 切片,通过append方法 把part1、stu3、part2 连接起来就可以了
  24. 新插进来的Series没有index ,所以要使用ignore_index=True,否则报错
  25. 将后面的DateFrame接上来后,因为这部分已经有index了,只需要reset_index(drop=True),但是原有的index依然在,加上drop=True后
  26. """
  27. students = part1.append(stu3, ignore_index=True).append(part2).reset_index(drop=True)
  28. # 删除数据行
  29. # students.drop(index=[0, 1, 2], inplace=True)
  30. # students.drop(index=range(0,10),inplace=True)
  31. # students.drop(students[0:10].index, inplace=True)
  32. # 按条件删除行
  33. for i in range(5, 15):
  34. students['Name'].at[i] = ''
  35. missing = students.loc[students['Name'] == '']
  36. students.drop(index=missing.index, inplace=True)
  37. students.reset_index(drop=True) # 为新的DataFrame 生成新的 index
  38. print(students)
  39. """
  40. 结果显示
  41. ID Name Score
  42. 0 1 Student_001 90
  43. 1 2 Student_002 90
  44. 2 3 Student_003 90
  45. 3 4 Student_004 90
  46. 4 5 Student_005 90
  47. 15 16 Student_016 90
  48. 16 17 Student_017 90
  49. 17 18 Student_018 90
  50. 18 19 Student_019 90
  51. 19 20 Student_020 90
  52. 20 101 wyy 88
  53. 21 21 Student_021 80
  54. 22 22 Student_022 80
  55. 23 23 Student_023 80
  56. 24 24 Student_024 80
  57. 25 25 Student_025 80
  58. 26 26 Student_026 80
  59. 27 27 Student_027 80
  60. 28 28 Student_028 80
  61. 29 29 Student_029 80
  62. 30 30 Student_030 80
  63. 31 31 nzl5423 88
  64. 32 32 Student_032 80
  65. 33 33 Student_033 80
  66. 34 34 Student_034 80
  67. 35 35 Student_035 80
  68. 36 36 Student_036 80
  69. 37 37 Student_037 80
  70. 38 38 Student_038 80
  71. 39 39 Student_039 80
  72. 40 40 Bailey 900
  73. 41 41 Abel 99
  74. Process finished with exit code 0
  75. """

代码2 concat 应用

  1. import pandas as pd
  2. page_001 = pd.read_excel('./Students.xlsx', sheet_name='Page_001')
  3. page_002 = pd.read_excel('./Students.xlsx', sheet_name='Page_002')
  4. """
  5. concat 可以追加一个DateFrame, axis=0为添加行,axis=1为纵向添加,
  6. concat后面添加 .reset_index(drop=True)可以重设index但是原有的index依然在,加上drop=True后
  7. """
  8. # students = pd.concat([page_001, page_002],axis=1).reset_index(drop=True)
  9. students = pd.concat([page_001, page_002], axis=0).reset_index(drop=True)
  10. # students.reset_index(drop=True) # 为新的DataFrame 生成新的 index
  11. print(students)

列操作

  1. import pandas as pd
  2. import numpy as np
  3. page_001 = pd.read_excel('./Students.xlsx', sheet_name='Page_001')
  4. page_002 = pd.read_excel('./Students.xlsx', sheet_name='Page_002')
  5. """
  6. concat 可以追加一个DateFrame, axis=0为添加行,axis=1为纵向添加,
  7. concat后面添加 .reset_index(drop=True)可以重设index但是原有的index依然在,加上drop=True后
  8. """
  9. # students = pd.concat([page_001, page_002],axis=1).reset_index(drop=True)
  10. students = pd.concat([page_001, page_002], axis=0).reset_index(drop=True)
  11. # 增加列
  12. # students['Age'] = 18
  13. # 所有数字都等于18
  14. # students['Age'] = np.repeat(20, len(students))
  15. # numpy方法 所有都等于20
  16. students['Age'] = np.arange(0, len(students))
  17. # numpy方法生成序列 0—len(students)
  18. # 删除列
  19. students.drop(columns=['Score', 'Age'], inplace=True)
  20. # 插入列 插入位置 插入的列名 插入的值 ,插入列默认 inplace=True,不用写
  21. students.insert(1, column='Foo', value=np.repeat('foo', len(students)))
  22. # 修改列名
  23. students.rename(columns={'Foo': 'FOO', 'Nmae': 'NAME'}, inplace=True)
  24. # 删除空值
  25. # 只有float才可以有空值 not number,即 NaN,所以先转化为float类型
  26. students['ID'] = students['ID'].astype(float)
  27. for i in range(5, 14):
  28. # np.nan 即为 not number, NaN
  29. students['ID'].at[i] = np.nan
  30. # .dropna 方法从上到下扫描每一行,一旦行中有 NaN,即删除整行
  31. students.dropna(inplace=True)
  32. # students.reset_index(drop=True) # 为新的DataFrame 生成新的 index
  33. print(students)
  34. """
  35. 结果显示
  36. ID FOO Name
  37. 0 1.0 foo Student_001
  38. 1 2.0 foo Student_002
  39. 2 3.0 foo Student_003
  40. 3 4.0 foo Student_004
  41. 4 5.0 foo Student_005
  42. 14 15.0 foo Student_015
  43. 15 16.0 foo Student_016
  44. 16 17.0 foo Student_017
  45. 17 18.0 foo Student_018
  46. 18 19.0 foo Student_019
  47. 19 20.0 foo Student_020
  48. 20 21.0 foo Student_021
  49. 21 22.0 foo Student_022
  50. 22 23.0 foo Student_023
  51. 23 24.0 foo Student_024
  52. 24 25.0 foo Student_025
  53. 25 26.0 foo Student_026
  54. 26 27.0 foo Student_027
  55. 27 28.0 foo Student_028
  56. 28 29.0 foo Student_029
  57. 29 30.0 foo Student_030
  58. 30 31.0 foo Student_031
  59. 31 32.0 foo Student_032
  60. 32 33.0 foo Student_033
  61. 33 34.0 foo Student_034
  62. 34 35.0 foo Student_035
  63. 35 36.0 foo Student_036
  64. 36 37.0 foo Student_037
  65. 37 38.0 foo Student_038
  66. 38 39.0 foo Student_039
  67. 39 40.0 foo Student_040
  68. Process finished with exit code 0
  69. """