import pandas as pddef csv_to_df(file):stock_df = pd.read_csv(file)# print(stock_df.head(10))return stock_dfdef group_stock(stock_df):stock_group = stock_df.groupby('Stock_code')print(stock_group.describe())if __name__ == '__main__':filename = '../data/stock_all.csv'stu_df = csv_to_df(filename)group_stock(stu_df)
百分位数:统计学术语,如果将一组数据从小到大排序,并计算相应的累计百分位,则某一百分位所对应数据的值就称为这一百分位的百分位数。可表示为:一组n个观测值按数值大小排列。如,处于p%位置的值称第p百分位数
High ... Adj_Closecount mean std ... 50% 75% maxStock_code ...600000 2633.0 9.558350 2.623207 ... 9.101338 10.760014 12.523858600004 2632.0 9.440567 4.473545 ... 7.628153 12.611245 23.260876600006 2632.0 5.204897 2.021496 ... 4.530794 5.727762 14.432583600007 2633.0 13.849244 3.575867 ... 12.769957 15.113232 21.810545600008 2630.0 3.976465 1.335394 ... 3.221414 3.978866 8.907692... ... ... ... ... ... ... ...600395 2632.0 10.996351 5.393427 ... 6.807619 10.709707 17.755426600396 2626.0 4.119981 1.786175 ... 3.616284 4.865564 13.316794600397 2600.0 5.243262 2.258282 ... 4.890000 5.730211 14.460000600398 2039.0 9.287401 3.447771 ... 8.984260 10.543828 19.097530600400 2632.0 3.246414 1.737070 ... 3.225956 4.291482 7.573524[315 rows x 48 columns]
统计各股票数据条数
def group_stock(stock_df):stock_group = stock_df.groupby('Stock_code')print(stock_group.Stock_code.count())
Stock_code600000 2633600004 2632600006 2632600007 2633600008 2630...600395 2632600396 2626600397 2600600398 2039600400 2632Name: Stock_code, Length: 315, dtype: int64
统计各股票收盘价的最高价
def group_stock(stock_df):stock_group = stock_df.groupby('Stock_code')print(stock_group.Close.max())
Stock_code600000 14.055900600004 23.500000600006 15.190000600007 23.750000600008 9.785000...600395 26.200001600396 13.630000600397 14.460000600398 19.959999600400 8.246750Name: Close, Length: 315, dtype: float64
每支股票每天最高价的历史最高价
def group_stock(stock_df):stock_group = stock_df.groupby('Stock_code')print(stock_group.High.max())
Stock_code600000 14.069900600004 23.690001600006 16.350000600007 23.959999600008 9.840000...600395 26.733299600396 14.370000600397 15.000000600398 21.059999600400 8.246750Name: High, Length: 315, dtype: float64
每支股票每天最高价的历史最高价和最低价
def group_stock(stock_df):stock_group = stock_df.groupby('Stock_code')print(stock_group.High.agg([max, min]))
max minStock_code600000 14.069900 5.02098600004 23.690001 4.15862600006 16.350000 2.65000600007 23.959999 8.81000600008 9.840000 1.99500... ... ...600395 26.733299 4.89000600396 14.370000 1.71000600397 15.000000 1.73000600398 21.059999 3.00000600400 8.246750 1.05519[315 rows x 2 columns]
