PUBG吃鸡排名预测案例
因数据集过大不予演示
大致流程
- 数据清洗
- 模型构建
- 模型评估
案例分析:
- 获取数据
- 数据处理
去除na,去除开挂人群(无武器击杀,击杀过多,爆头率过高) - 确认x:数据集的特征值
y:数据集的目标值(标签) - 实例化估计器,进行训练
- 进行predict预测,计算score,mae
import pandas as pdimport numpy as npfrom sklearn.metrics import mean_absolute_errorfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import Ridge# 获取数据from sklearn.preprocessing import StandardScalerfile_path = "./data/train_V2.csv"df = pd.read_csv(file_path)print(df.head())# 基本数据处理# 去除nadf_new = df.dropna(axis=0, how=any) # 去除带有na的行# 选取开挂的数据df_part = df_new# 无武器击杀df_part_distance = []df_part_distance["weapon"] = (df_part_distance["kills"] > 0) & (df_part_distance["weaponsAcquired"] == 0)df_part_weapon0 = df_part_distance.drop(df_part_distance[df_part_distance["weapon"] == True].index # 去除无武器击杀的外挂)# 击杀过高df_part_kills = df_part_weapon0.drop(df_part_distance[df_part_distance["kills"] > 30].index)# 爆头过高df_part_kills["head_rate"] = df_part_kills["headshot_Kills"]/df_part_kills["kills"]df_part_kills["head_rate"] = df_part_kills["head_rate"].fillna(0)df_part_head = df_part_kills.drop(df_part_kills[(df_part_kills["head_rate"] == 1) & (df_part_kills["kills"] >= 9)])# 筛选有用的数据作为特征值df_select = df_part_head[["assists","boosts","DBNOs","heals","kills","matchDuration","rankPoints","revives","rideDistance","teamKills","vehicleDestroys","walkDistance","winPoints"]]# 确认特征值x和目标值yx = df_selecty = df_part_head[["winPlacePerc"]]x_train, x_test, y_train, y_test = train_test_split(df_select.data,df_select.target,random_state=22, test_size=0.2)# 标准化数据transfer = StandardScaler()x_train = transfer.fit_transform(x_train)x_test = transfer.fit_transform(x_test)# 实例化估计器 进行训练estimator = Ridge(alpha=1.0, fit_intercept=True, normalize=False, copy_X=True, max_iter=None,tol=1e-3, solver="auto",random_state=None)estimator.fit(x_train, y_train)# 预测值y_pre = estimator.predict(x_test)# 准确率score = estimator.score(x_test, y_test)# MAE 平均绝对误差(Mean Absolute Error),预测值与真实值的误差绝对值的平均值mae = mean_absolute_error(y_test, y_pre)
