Baseline学习1

  1. %%time
  2. # Cross-validation of the classifier
  3. ONLY_FIRST_FOLD = False
  4. features = [f for f in train.columns if f != 'customer_ID' and f != 'target'] # 筛选非id和预测目标以外的列
  5. def my_booster(random_state=1, n_estimators=1200):
  6. ''' n_estimators 树的个数 '''
  7. return LGBMClassifier(n_estimators=n_estimators,
  8. learning_rate=0.03, reg_lambda=50,
  9. min_child_samples=2400,
  10. num_leaves=95,
  11. colsample_bytree=0.19,
  12. max_bins=511, random_state=random_state)
  13. print(f"{len(features)} features")
  14. score_list = []
  15. y_pred_list = []
  16. kf = StratifiedKFold(n_splits=5) # 调用StratifiedKFold进行K折交叉
  17. for fold, (idx_tr, idx_va) in enumerate(kf.split(train, target)):
  18. X_tr, X_va, y_tr, y_va, model = None, None, None, None, None
  19. start_time = datetime.datetime.now()
  20. X_tr = train.iloc[idx_tr][features] # 训练集X
  21. X_va = train.iloc[idx_va][features] # 验证集X
  22. y_tr = target[idx_tr] # 训练集y
  23. y_va = target[idx_va] # 验证集y
  24. model = my_booster() # 实例化模型
  25. with warnings.catch_warnings():
  26. warnings.filterwarnings('ignore', category=UserWarning)
  27. model.fit(X_tr, y_tr,
  28. eval_set = [(X_va, y_va)],
  29. eval_metric=[lgb_amex_metric],
  30. callbacks=[log_evaluation(100)]) # 拟合这一折下的数据
  31. X_tr, y_tr = None, None # 清空临时变量训练集X和训练集y
  32. y_va_pred = model.predict_proba(X_va, raw_score=True) # 输出模型在验证集上的预测结构
  33. score = amex_metric(y_va, y_va_pred) # 输出验证集下的评分
  34. n_trees = model.best_iteration_ # 获取最佳迭代(最佳树的数量?)
  35. if n_trees is None: n_trees = model.n_estimators # 如果不存在则取最大树
  36. print(f"{Fore.GREEN}{Style.BRIGHT}Fold {fold} | {str(datetime.datetime.now() - start_time)[-12:-7]} |"
  37. f" {n_trees:5} trees |"
  38. f" Score = {score:.5f}{Style.RESET_ALL}")
  39. score_list.append(score)
  40. if INFERENCE:
  41. y_pred_list.append(model.predict_proba(test[features], raw_score=True))
  42. if ONLY_FIRST_FOLD: break # we only want the first fold
  43. print(f"{Fore.GREEN}{Style.BRIGHT}OOF Score: {np.mean(score_list):.5f}{Style.RESET_ALL}")