1. import pandas as pd
    2. def csv_to_df(file):
    3. stock_df = pd.read_csv(file)
    4. # print(stock_df.head(10))
    5. return stock_df
    6. def group_stock(stock_df):
    7. stock_group = stock_df.groupby('Stock_code')
    8. print(stock_group.describe())
    9. if __name__ == '__main__':
    10. filename = '../data/stock_all.csv'
    11. stu_df = csv_to_df(filename)
    12. group_stock(stu_df)

    百分位数:统计学术语,如果将一组数据从小到大排序,并计算相应的累计百分位,则某一百分位所对应数据的值就称为这一百分位的百分位数。可表示为:一组n个观测值数值大小排列。如,处于p%位置的值称第p百分位数

    1. High ... Adj_Close
    2. count mean std ... 50% 75% max
    3. Stock_code ...
    4. 600000 2633.0 9.558350 2.623207 ... 9.101338 10.760014 12.523858
    5. 600004 2632.0 9.440567 4.473545 ... 7.628153 12.611245 23.260876
    6. 600006 2632.0 5.204897 2.021496 ... 4.530794 5.727762 14.432583
    7. 600007 2633.0 13.849244 3.575867 ... 12.769957 15.113232 21.810545
    8. 600008 2630.0 3.976465 1.335394 ... 3.221414 3.978866 8.907692
    9. ... ... ... ... ... ... ... ...
    10. 600395 2632.0 10.996351 5.393427 ... 6.807619 10.709707 17.755426
    11. 600396 2626.0 4.119981 1.786175 ... 3.616284 4.865564 13.316794
    12. 600397 2600.0 5.243262 2.258282 ... 4.890000 5.730211 14.460000
    13. 600398 2039.0 9.287401 3.447771 ... 8.984260 10.543828 19.097530
    14. 600400 2632.0 3.246414 1.737070 ... 3.225956 4.291482 7.573524
    15. [315 rows x 48 columns]

    统计各股票数据条数

    1. def group_stock(stock_df):
    2. stock_group = stock_df.groupby('Stock_code')
    3. print(stock_group.Stock_code.count())
    1. Stock_code
    2. 600000 2633
    3. 600004 2632
    4. 600006 2632
    5. 600007 2633
    6. 600008 2630
    7. ...
    8. 600395 2632
    9. 600396 2626
    10. 600397 2600
    11. 600398 2039
    12. 600400 2632
    13. Name: Stock_code, Length: 315, dtype: int64

    统计各股票收盘价的最高价

    1. def group_stock(stock_df):
    2. stock_group = stock_df.groupby('Stock_code')
    3. print(stock_group.Close.max())
    1. Stock_code
    2. 600000 14.055900
    3. 600004 23.500000
    4. 600006 15.190000
    5. 600007 23.750000
    6. 600008 9.785000
    7. ...
    8. 600395 26.200001
    9. 600396 13.630000
    10. 600397 14.460000
    11. 600398 19.959999
    12. 600400 8.246750
    13. Name: Close, Length: 315, dtype: float64

    每支股票每天最高价的历史最高价

    1. def group_stock(stock_df):
    2. stock_group = stock_df.groupby('Stock_code')
    3. print(stock_group.High.max())
    1. Stock_code
    2. 600000 14.069900
    3. 600004 23.690001
    4. 600006 16.350000
    5. 600007 23.959999
    6. 600008 9.840000
    7. ...
    8. 600395 26.733299
    9. 600396 14.370000
    10. 600397 15.000000
    11. 600398 21.059999
    12. 600400 8.246750
    13. Name: High, Length: 315, dtype: float64

    每支股票每天最高价的历史最高价和最低价

    1. def group_stock(stock_df):
    2. stock_group = stock_df.groupby('Stock_code')
    3. print(stock_group.High.agg([max, min]))
    1. max min
    2. Stock_code
    3. 600000 14.069900 5.02098
    4. 600004 23.690001 4.15862
    5. 600006 16.350000 2.65000
    6. 600007 23.959999 8.81000
    7. 600008 9.840000 1.99500
    8. ... ... ...
    9. 600395 26.733299 4.89000
    10. 600396 14.370000 1.71000
    11. 600397 15.000000 1.73000
    12. 600398 21.059999 3.00000
    13. 600400 8.246750 1.05519
    14. [315 rows x 2 columns]