1. import numpy as np
    2. import pandas as pd

    apply

    1. s = pd.Series([20, 21, 12],
    2. index=['London', 'New York', 'Helsinki'])
    3. s
    1. London 20
    2. New York 21
    3. Helsinki 12
    4. dtype: int64
    1. #定义函数
    2. def square(x):
    3. return x**2
    4. #调用函数
    5. s1 = s.apply(square,convert_dtype = True)
    6. s1
    7. #把Series中的 每个元素取出来,放到func里运行,再将返回的值传入到Series中
    1. London 400
    2. New York 441
    3. Helsinki 144
    4. dtype: int64
    1. s2 = s.apply(square,convert_dtype = False) #m默认为True
    2. s2
    1. London 400
    2. New York 441
    3. Helsinki 144
    4. dtype: object
    1. s = pd.Series([1,2,3,4])
    2. s
    1. 0 1
    2. 1 2
    3. 2 3
    4. 3 4
    5. dtype: int64
    1. s.agg('min')
    1. 1
    1. s.agg(['min','max','sum','mean','count'])
    1. min 1.0
    2. max 4.0
    3. sum 10.0
    4. mean 2.5
    5. count 4.0
    6. dtype: float64
    1. s.aggregate('max') #和上面一样agg
    1. 4

    map

    1. s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
    2. s
    1. 0 cat
    2. 1 dog
    3. 2 NaN
    4. 3 rabbit
    5. dtype: object
    1. dict01 = {'cat':'ketten','dog':'puppy'}
    2. s.map(dict01)
    1. 0 ketten
    2. 1 puppy
    3. 2 NaN
    4. 3 NaN
    5. dtype: object

    groupby

    1. ser = pd.Series([390., 350., 30., 20.],
    2. index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
    3. ser
    1. Falcon 390.0
    2. Falcon 350.0
    3. Parrot 30.0
    4. Parrot 20.0
    5. Name: Max Speed, dtype: float64
    1. ser.groupby(["a", "b", "a", "b"]).mean() #根据分组规则
    1. a 210.0
    2. b 185.0
    3. Name: Max Speed, dtype: float64
    1. ser.groupby(level=0).mean() #根据index标签来
    1. Falcon 370.0
    2. Parrot 25.0
    3. Name: Max Speed, dtype: float64
    1. ser.groupby(ser > 100).sum() #根据比较运算 逻辑运算 分组聚合
    1. Max Speed
    2. False 50.0
    3. True 740.0
    4. Name: Max Speed, dtype: float64

    描述统计/见官网

    1. s = pd.Series([1,2,3,4,5,6,7,8,9,9,10,10,11,12,12,19,20])
    2. s
    1. 0 1
    2. 1 2
    3. 2 3
    4. 3 4
    5. 4 5
    6. 5 6
    7. 6 7
    8. 7 8
    9. 8 9
    10. 9 9
    11. 10 10
    12. 11 10
    13. 12 11
    14. 13 12
    15. 14 12
    16. 15 19
    17. 16 20
    18. dtype: int64
    1. s.describe() #描述性统计函数
    1. count 17.000000
    2. mean 8.705882
    3. std 5.288723
    4. min 1.000000
    5. 25% 5.000000
    6. 50% 9.000000
    7. 75% 11.000000
    8. max 20.000000
    9. dtype: float64
    1. s.value_counts() #值计数
    1. 12 2
    2. 10 2
    3. 9 2
    4. 20 1
    5. 19 1
    6. 11 1
    7. 8 1
    8. 7 1
    9. 6 1
    10. 5 1
    11. 4 1
    12. 3 1
    13. 2 1
    14. 1 1
    15. dtype: int64