Baseline学习1
%%time# Cross-validation of the classifierONLY_FIRST_FOLD = Falsefeatures = [f for f in train.columns if f != 'customer_ID' and f != 'target'] # 筛选非id和预测目标以外的列def my_booster(random_state=1, n_estimators=1200): ''' n_estimators 树的个数 ''' return LGBMClassifier(n_estimators=n_estimators, learning_rate=0.03, reg_lambda=50, min_child_samples=2400, num_leaves=95, colsample_bytree=0.19, max_bins=511, random_state=random_state)print(f"{len(features)} features")score_list = []y_pred_list = []kf = StratifiedKFold(n_splits=5) # 调用StratifiedKFold进行K折交叉for fold, (idx_tr, idx_va) in enumerate(kf.split(train, target)): X_tr, X_va, y_tr, y_va, model = None, None, None, None, None start_time = datetime.datetime.now() X_tr = train.iloc[idx_tr][features] # 训练集X X_va = train.iloc[idx_va][features] # 验证集X y_tr = target[idx_tr] # 训练集y y_va = target[idx_va] # 验证集y model = my_booster() # 实例化模型 with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=UserWarning) model.fit(X_tr, y_tr, eval_set = [(X_va, y_va)], eval_metric=[lgb_amex_metric], callbacks=[log_evaluation(100)]) # 拟合这一折下的数据 X_tr, y_tr = None, None # 清空临时变量训练集X和训练集y y_va_pred = model.predict_proba(X_va, raw_score=True) # 输出模型在验证集上的预测结构 score = amex_metric(y_va, y_va_pred) # 输出验证集下的评分 n_trees = model.best_iteration_ # 获取最佳迭代(最佳树的数量?) if n_trees is None: n_trees = model.n_estimators # 如果不存在则取最大树 print(f"{Fore.GREEN}{Style.BRIGHT}Fold {fold} | {str(datetime.datetime.now() - start_time)[-12:-7]} |" f" {n_trees:5} trees |" f" Score = {score:.5f}{Style.RESET_ALL}") score_list.append(score) if INFERENCE: y_pred_list.append(model.predict_proba(test[features], raw_score=True)) if ONLY_FIRST_FOLD: break # we only want the first foldprint(f"{Fore.GREEN}{Style.BRIGHT}OOF Score: {np.mean(score_list):.5f}{Style.RESET_ALL}")