基础 - Series分组聚合函数 - 《数据分析》

import numpy as np
import pandas as pd

apply

s = pd.Series([20, 21, 12],
              index=['London', 'New York', 'Helsinki'])
s

London      20
New York    21
Helsinki    12
dtype: int64

#定义函数
def square(x):
    return x**2
#调用函数
s1 = s.apply(square,convert_dtype = True)
s1 
#把Series中的 每个元素取出来，放到func里运行，再将返回的值传入到Series中

London      400
New York    441
Helsinki    144
dtype: int64

s2 = s.apply(square,convert_dtype = False) #m默认为True
s2

London      400
New York    441
Helsinki    144
dtype: object

s = pd.Series([1,2,3,4])
s

0    1
1    2
2    3
3    4
dtype: int64

s.agg('min')

s.agg(['min','max','sum','mean','count'])

min       1.0
max       4.0
sum      10.0
mean      2.5
count     4.0
dtype: float64

s.aggregate('max') #和上面一样agg

map

s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

dict01 = {'cat':'ketten','dog':'puppy'}
s.map(dict01)

0    ketten
1     puppy
2       NaN
3       NaN
dtype: object

groupby

ser = pd.Series([390., 350., 30., 20.],
                index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
ser

Falcon    390.0
Falcon    350.0
Parrot     30.0
Parrot     20.0
Name: Max Speed, dtype: float64

ser.groupby(["a", "b", "a", "b"]).mean() #根据分组规则

a    210.0
b    185.0
Name: Max Speed, dtype: float64

ser.groupby(level=0).mean()  #根据index标签来

Falcon    370.0
Parrot     25.0
Name: Max Speed, dtype: float64

ser.groupby(ser > 100).sum() #根据比较运算 逻辑运算  分组聚合

Max Speed
False     50.0
True     740.0
Name: Max Speed, dtype: float64

描述统计/见官网

s = pd.Series([1,2,3,4,5,6,7,8,9,9,10,10,11,12,12,19,20])
s

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9      9
10    10
11    10
12    11
13    12
14    12
15    19
16    20
dtype: int64

s.describe()  #描述性统计函数

count    17.000000
mean      8.705882
std       5.288723
min       1.000000
25%       5.000000
50%       9.000000
75%      11.000000
max      20.000000
dtype: float64

s.value_counts() #值计数

12    2
10    2
9     2
20    1
19    1
11    1
8     1
7     1
6     1
5     1
4     1
3     1
2     1
1     1
dtype: int64