import pandas as pd
def csv_to_df(file):
stock_df = pd.read_csv(file)
# print(stock_df.head(10))
return stock_df
def group_stock(stock_df):
stock_group = stock_df.groupby('Stock_code')
print(stock_group.describe())
if __name__ == '__main__':
filename = '../data/stock_all.csv'
stu_df = csv_to_df(filename)
group_stock(stu_df)
百分位数:统计学术语,如果将一组数据从小到大排序,并计算相应的累计百分位,则某一百分位所对应数据的值就称为这一百分位的百分位数。可表示为:一组n个观测值按数值大小排列。如,处于p%位置的值称第p百分位数
High ... Adj_Close
count mean std ... 50% 75% max
Stock_code ...
600000 2633.0 9.558350 2.623207 ... 9.101338 10.760014 12.523858
600004 2632.0 9.440567 4.473545 ... 7.628153 12.611245 23.260876
600006 2632.0 5.204897 2.021496 ... 4.530794 5.727762 14.432583
600007 2633.0 13.849244 3.575867 ... 12.769957 15.113232 21.810545
600008 2630.0 3.976465 1.335394 ... 3.221414 3.978866 8.907692
... ... ... ... ... ... ... ...
600395 2632.0 10.996351 5.393427 ... 6.807619 10.709707 17.755426
600396 2626.0 4.119981 1.786175 ... 3.616284 4.865564 13.316794
600397 2600.0 5.243262 2.258282 ... 4.890000 5.730211 14.460000
600398 2039.0 9.287401 3.447771 ... 8.984260 10.543828 19.097530
600400 2632.0 3.246414 1.737070 ... 3.225956 4.291482 7.573524
[315 rows x 48 columns]
统计各股票数据条数
def group_stock(stock_df):
stock_group = stock_df.groupby('Stock_code')
print(stock_group.Stock_code.count())
Stock_code
600000 2633
600004 2632
600006 2632
600007 2633
600008 2630
...
600395 2632
600396 2626
600397 2600
600398 2039
600400 2632
Name: Stock_code, Length: 315, dtype: int64
统计各股票收盘价的最高价
def group_stock(stock_df):
stock_group = stock_df.groupby('Stock_code')
print(stock_group.Close.max())
Stock_code
600000 14.055900
600004 23.500000
600006 15.190000
600007 23.750000
600008 9.785000
...
600395 26.200001
600396 13.630000
600397 14.460000
600398 19.959999
600400 8.246750
Name: Close, Length: 315, dtype: float64
每支股票每天最高价的历史最高价
def group_stock(stock_df):
stock_group = stock_df.groupby('Stock_code')
print(stock_group.High.max())
Stock_code
600000 14.069900
600004 23.690001
600006 16.350000
600007 23.959999
600008 9.840000
...
600395 26.733299
600396 14.370000
600397 15.000000
600398 21.059999
600400 8.246750
Name: High, Length: 315, dtype: float64
每支股票每天最高价的历史最高价和最低价
def group_stock(stock_df):
stock_group = stock_df.groupby('Stock_code')
print(stock_group.High.agg([max, min]))
max min
Stock_code
600000 14.069900 5.02098
600004 23.690001 4.15862
600006 16.350000 2.65000
600007 23.959999 8.81000
600008 9.840000 1.99500
... ... ...
600395 26.733299 4.89000
600396 14.370000 1.71000
600397 15.000000 1.73000
600398 21.059999 3.00000
600400 8.246750 1.05519
[315 rows x 2 columns]