pandas教程 - 10 股票数据分析 - 《Python程序设计数字教程》

import pandas as pd
def csv_to_df(file):
    stock_df = pd.read_csv(file)
    # print(stock_df.head(10))
    return stock_df
def group_stock(stock_df):
    stock_group = stock_df.groupby('Stock_code')
    print(stock_group.describe())
if __name__ == '__main__':
    filename = '../data/stock_all.csv'
    stu_df = csv_to_df(filename)
    group_stock(stu_df)

百分位数:统计学术语，如果将一组数据从小到大排序，并计算相应的累计百分位，则某一百分位所对应数据的值就称为这一百分位的百分位数。可表示为：一组n个观测值按数值大小排列。如，处于p%位置的值称第p百分位数

              High                       ...  Adj_Close                      
             count       mean       std  ...        50%        75%        max
Stock_code                               ...                                 
600000      2633.0   9.558350  2.623207  ...   9.101338  10.760014  12.523858
600004      2632.0   9.440567  4.473545  ...   7.628153  12.611245  23.260876
600006      2632.0   5.204897  2.021496  ...   4.530794   5.727762  14.432583
600007      2633.0  13.849244  3.575867  ...  12.769957  15.113232  21.810545
600008      2630.0   3.976465  1.335394  ...   3.221414   3.978866   8.907692
...            ...        ...       ...  ...        ...        ...        ...
600395      2632.0  10.996351  5.393427  ...   6.807619  10.709707  17.755426
600396      2626.0   4.119981  1.786175  ...   3.616284   4.865564  13.316794
600397      2600.0   5.243262  2.258282  ...   4.890000   5.730211  14.460000
600398      2039.0   9.287401  3.447771  ...   8.984260  10.543828  19.097530
600400      2632.0   3.246414  1.737070  ...   3.225956   4.291482   7.573524
[315 rows x 48 columns]

统计各股票数据条数

def group_stock(stock_df):
    stock_group = stock_df.groupby('Stock_code')
    print(stock_group.Stock_code.count())

Stock_code
600000    2633
600004    2632
600006    2632
600007    2633
600008    2630
          ... 
600395    2632
600396    2626
600397    2600
600398    2039
600400    2632
Name: Stock_code, Length: 315, dtype: int64

统计各股票收盘价的最高价

def group_stock(stock_df):
    stock_group = stock_df.groupby('Stock_code')
    print(stock_group.Close.max())

Stock_code
600000    14.055900
600004    23.500000
600006    15.190000
600007    23.750000
600008     9.785000
            ...    
600395    26.200001
600396    13.630000
600397    14.460000
600398    19.959999
600400     8.246750
Name: Close, Length: 315, dtype: float64

每支股票每天最高价的历史最高价

def group_stock(stock_df):
    stock_group = stock_df.groupby('Stock_code')
    print(stock_group.High.max())

Stock_code
600000    14.069900
600004    23.690001
600006    16.350000
600007    23.959999
600008     9.840000
            ...    
600395    26.733299
600396    14.370000
600397    15.000000
600398    21.059999
600400     8.246750
Name: High, Length: 315, dtype: float64

每支股票每天最高价的历史最高价和最低价

def group_stock(stock_df):
    stock_group = stock_df.groupby('Stock_code')
    print(stock_group.High.agg([max, min]))

                  max      min
Stock_code                    
600000      14.069900  5.02098
600004      23.690001  4.15862
600006      16.350000  2.65000
600007      23.959999  8.81000
600008       9.840000  1.99500
...               ...      ...
600395      26.733299  4.89000
600396      14.370000  1.71000
600397      15.000000  1.73000
600398      21.059999  3.00000
600400       8.246750  1.05519
[315 rows x 2 columns]