linear model

lm

regression

glm

classfication + regression

glmnet

classfication + regression

lda

classification

  1. # load packages
  2. library(caret)
  3. library(mlbench)
  4. library(glmnet)
  5. # Load the dataset
  6. data(PimaIndiansDiabetes)
  7. # train
  8. set.seed(7)
  9. trainControl <- trainControl(method="cv", number=5)
  10. fit.glmnet <- train(diabetes~., data=PimaIndiansDiabetes, method="glmnet",
  11. metric="Accuracy", preProc=c("center", "scale"), trControl=trainControl)
  12. # summarize fit
  13. print(fit.glmnet)

non-linear models

knn

classfication + regression

Naive Bayes

classi cation

SVM (svmRadial) radial kernal

classfication + regression

TREE-based

CART (rpart)

classfication + regression

comparing model performace

  1. # load packages
  2. library(mlbench)
  3. library(caret)
  4. # load the dataset
  5. data(PimaIndiansDiabetes)
  6. # prepare training scheme
  7. trainControl <- trainControl(method="repeatedcv", number=10, repeats=3)
  8. # CART
  9. set.seed(7)
  10. fit.cart <- train(diabetes~., data=PimaIndiansDiabetes, method="rpart",
  11. trControl=trainControl)
  12. # LDA
  13. set.seed(7)
  14. fit.lda <- train(diabetes~., data=PimaIndiansDiabetes, method="lda", trControl=trainControl)
  15. # SVM
  16. set.seed(7)
  17. fit.svm <- train(diabetes~., data=PimaIndiansDiabetes, method="svmRadial",
  18. trControl=trainControl)
  19. # KNN
  20. set.seed(7)
  21. fit.knn <- train(diabetes~., data=PimaIndiansDiabetes, method="knn", trControl=trainControl)
  22. # Random Forest
  23. set.seed(7)
  24. fit.rf <- train(diabetes~., data=PimaIndiansDiabetes, method="rf", trControl=trainControl)
  25. # collect resamples
  26. results <- resamples(list(CART=fit.cart, LDA=fit.lda, SVM=fit.svm, KNN=fit.knn, RF=fit.rf))

Visualization

  1. # box and whisker plots to compare models
  2. scales <- list(x=list(relation="free"), y=list(relation="free"))
  3. bwplot(results, scales=scales)

image.png

  1. splom(results)

image.png

  1. # difference in model predictions
  2. diffs <- diff(results)
  3. # summarize p-values for pair-wise comparisons
  4. summary(diffs)