import numpy as npimport pandas as pd
apply
s = pd.Series([20, 21, 12],index=['London', 'New York', 'Helsinki'])s
London 20New York 21Helsinki 12dtype: int64
#定义函数def square(x):return x**2#调用函数s1 = s.apply(square,convert_dtype = True)s1#把Series中的 每个元素取出来,放到func里运行,再将返回的值传入到Series中
London 400New York 441Helsinki 144dtype: int64
s2 = s.apply(square,convert_dtype = False) #m默认为Trues2
London 400New York 441Helsinki 144dtype: object
s = pd.Series([1,2,3,4])s
0 11 22 33 4dtype: int64
s.agg('min')
1
s.agg(['min','max','sum','mean','count'])
min 1.0max 4.0sum 10.0mean 2.5count 4.0dtype: float64
s.aggregate('max') #和上面一样agg
4
map
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])s
0 cat1 dog2 NaN3 rabbitdtype: object
dict01 = {'cat':'ketten','dog':'puppy'}s.map(dict01)
0 ketten1 puppy2 NaN3 NaNdtype: object
groupby
ser = pd.Series([390., 350., 30., 20.],index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")ser
Falcon 390.0Falcon 350.0Parrot 30.0Parrot 20.0Name: Max Speed, dtype: float64
ser.groupby(["a", "b", "a", "b"]).mean() #根据分组规则
a 210.0b 185.0Name: Max Speed, dtype: float64
ser.groupby(level=0).mean() #根据index标签来
Falcon 370.0Parrot 25.0Name: Max Speed, dtype: float64
ser.groupby(ser > 100).sum() #根据比较运算 逻辑运算 分组聚合
Max SpeedFalse 50.0True 740.0Name: Max Speed, dtype: float64
描述统计/见官网
s = pd.Series([1,2,3,4,5,6,7,8,9,9,10,10,11,12,12,19,20])s
0 11 22 33 44 55 66 77 88 99 910 1011 1012 1113 1214 1215 1916 20dtype: int64
s.describe() #描述性统计函数
count 17.000000mean 8.705882std 5.288723min 1.00000025% 5.00000050% 9.00000075% 11.000000max 20.000000dtype: float64
s.value_counts() #值计数
12 210 29 220 119 111 18 17 16 15 14 13 12 11 1dtype: int64
