开始使用 XGBoost

这里是一个快速入门的教程, 它展示了让你快速在示例数据集上进行二元分类任务时的 xgboost 的代码片段.

Links to Helpful Other Resources

Python

  1. import xgboost as xgb
  2. # 读取数据
  3. dtrain = xgb.DMatrix('demo/data/agaricus.txt.train')
  4. dtest = xgb.DMatrix('demo/data/agaricus.txt.test')
  5. # 通过 map 指定参数
  6. param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
  7. num_round = 2
  8. bst = xgb.train(param, dtrain, num_round)
  9. # 预测
  10. preds = bst.predict(dtest)

R

  1. # 加载数据
  2. data(agaricus.train, package='xgboost')
  3. data(agaricus.test, package='xgboost')
  4. train <- agaricus.train
  5. test <- agaricus.test
  6. # 拟合模型
  7. bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
  8. nthread = 2, objective = "binary:logistic")
  9. # 预测
  10. pred <- predict(bst, test$data)

Julia

  1. using XGBoost
  2. # 读取数据
  3. train_X, train_Y = readlibsvm("demo/data/agaricus.txt.train", (6513, 126))
  4. test_X, test_Y = readlibsvm("demo/data/agaricus.txt.test", (1611, 126))
  5. # 拟合模型
  6. num_round = 2
  7. bst = xgboost(train_X, num_round, label=train_Y, eta=1, max_depth=2)
  8. # 预测
  9. pred = predict(bst, test_X)

Scala

  1. import ml.dmlc.xgboost4j.scala.DMatrix
  2. import ml.dmlc.xgboost4j.scala.XGBoost
  3. object XGBoostScalaExample {
  4. def main(args: Array[String]) {
  5. // 读取 xgboost/demo/data 目录中可用的训练数据
  6. val trainData =
  7. new DMatrix("/path/to/agaricus.txt.train")
  8. // 定义参数
  9. val paramMap = List(
  10. "eta" -> 0.1,
  11. "max_depth" -> 2,
  12. "objective" -> "binary:logistic").toMap
  13. // 迭代次数
  14. val round = 2
  15. // train the model
  16. val model = XGBoost.train(trainData, paramMap, round)
  17. // 预测
  18. val predTrain = model.predict(trainData)
  19. // 保存模型至文件
  20. model.saveModel("/local/path/to/model")
  21. }
  22. }