r - Custom classification threshold for GBM -


i'm trying create custom gbm model tunes classification threshold binary classification problem. there nice example provided on caret website here, when try apply similar gbm receive following error:

error in { : task 1 failed - "argument 1 not vector" 

unfortunately, have no idea error , error isn't helpful.

here's example, code i've used defining custom gbm

library(caret) library(gbm) library(proc) #### define custom gbm model probability threshold tuning #### ## model code original gbm method caret customgbm <- getmodelinfo("gbm", regex = false)[[1]] customgbm$type <- c("classification") ## add threshold (i.e. class cutoff) tuning parameter customgbm$parameters <- data.frame(parameter = c("n.trees", "interaction.depth", "shrinkage",                                                  "n.minobsinnode", "threshold"),                                    class = rep("numeric", 5),                                    label = c("# boosting iterations", "max tree depth", "shrinkage",                                              "min. terminal node size", "probability cutoff")) ## customise tuning grid: ## paramters fixed. give tuning grid of 2,500 values if len = 100 customgbm$grid <- function(x, y, len = null, search = "grid") {   if (search == "grid") {     grid <- expand.grid(n.trees = seq(50, 250, 50),                         interaction.depth = 2, ### fix interaction depth @ 2                         shrinkage = 0.0001, ### fix learning rate @ 0.0001                         n.minobsinnode = seq(2, 10, 2),                         threshold = seq(.01, .99, length = len))     } else {     grid <- expand.grid(n.trees = floor(runif(len, min = 1, max = 5000)),                         interaction.depth = sample(1:10, replace = true, size = len),                         shrinkage = runif(len, min = .001, max = .6),                         n.minobsinnode = sample(5:25, replace = true, size = len),                         threshold = runif(1, 0, size = len))     grid <- grid[!duplicated(grid),] ### remove duplicated rows in training grid   }   grid }  ## here fit single gbm model , loop on threshold values predictions ## same gbm model. customgbm$loop = function(grid) {   library(plyr)   loop <- ddply(grid, c("n.trees", "shrinkage", "interaction.depth", "n.minobsinnode"),                 function(x) c(threshold = max(x$threshold)))   submodels <- vector(mode = "list", length = nrow(loop))   (i in seq(along = loop$threshold)) {     index <- which(grid$n.trees == loop$n.trees[i] &                      grid$interaction.depth == loop$interaction.depth[i] &                      grid$shrinkage == loop$shrinkage[i] &                      grid$n.minobsinnode == loop$n.minobsinnode[i])     cuts <- grid[index, "threshold"]     submodels[[i]] <- data.frame(threshold = cuts[cuts != loop$threshold[i]])   }   list(loop = loop, submodels = submodels) }  ## fit model independent of threshold parameter customgbm$fit = function(x, y, wts, param, lev, last, classprobs, ...) {   thedots <- list(...)   if (any(names(thedots) == "distribution")) {     moddist <- thedots$distribution     thedots$distribution <- null   } else {     if (is.numeric(y)) {       stop("this works 2-class classification problems")       } else moddist <- if (length(lev) == 2)  "bernoulli" else         stop("this works 2-class classification problems")   }   # if (length(levels(y)) != 2)   #   stop("this works 2-class problems")   ## check see if weights passed in (and availible)   if (!is.null(wts)) thedots$w <- wts   if (is.factor(y) && length(lev) == 2) y <- ifelse(y == lev[1], 1, 0)   modargs <- list(x = x,                   y = y,                   interaction.depth = param$interaction.depth,                   n.trees = param$n.trees,                   shrinkage = param$shrinkage,                   n.minobsinnode = param$n.minobsinnode,                   distribution = moddist)   do.call("gbm.fit", modargs) }   ## probability prediction , use different thresholds ## predicted class customgbm$predict = function(modelfit, newdata, submodels = null) {   out <- predict(modelfit, newdata, n.trees = modelfit$tunevalue$n.trees,                  type = "response")#[, modelfit$obslevels[1]]   out[is.nan(out)] <- na   class1prob <- ifelse(out >= modelfit$tunevalue$threshold,                                 modelfit$obslevels[1],                                 modelfit$obslevels[2])    ## raise threshold class #1 , higher level of   ## evidence needed call class 1 should   ## decrease sensitivity , increase specificity   out <- ifelse(class1prob >= modelfit$tunevalue$threshold,                 modelfit$obslevels[1],                 modelfit$obslevels[2])   if (!is.null(submodels)) {     tmp2 <- out     out <- vector(mode = "list", length = length(submodels$threshold))     out[[1]] <- tmp2     (i in seq(along = submodels$threshold)) {       out[[i + 1]] <- ifelse(class1prob >= submodels$threshold[[i]],                              modelfit$obslevels[1],                              modelfit$obslevels[2])     }   }   out }  ## probabilities same have create ## mulitple versions of probs evaluate data across ## thresholds customgbm$prob = function(modelfit, newdata, submodels = null) {   out <- predict(modelfit, newdata, type = "response",                  n.trees = modelfit$tunevalue$n.trees)   out[is.nan(out)] <- na   out <- cbind(out, 1 - out)   colnames(out) <- modelfit$obslevels   if (!is.null(submodels)) {     tmp <- predict(modelfit, newdata, type = "response", n.trees = submodels$n.trees)     tmp <- as.list(as.data.frame(tmp))     lapply(tmp, function(x, lvl) {       x <- cbind(x, 1 - x)       colnames(x) <- lvl       x}, lvl = modelfit$obslevels)     out <- c(list(out), tmp)   }   out }  fourstats <- function (data, lev = levels(data$obs), model = null) {   ## code use area under roc curve ,   ## sensitivity , specificity values using current candidate   ## value of probability threshold.   out <- c(twoclasssummary(data, lev = levels(data$obs), model = null))    ## best possible model has sensitivity of 1 , specificity of 1.    ## how far value?   coords <- matrix(c(1, 1, out["spec"], out["sens"]),                     ncol = 2,                     byrow = true)   colnames(coords) <- c("spec", "sens")   rownames(coords) <- c("best", "current")   c(out, dist = dist(coords)[1]) } 

and code showing how use custom model

set.seed(949) trainingset <- twoclasssim(500, -9) mod1 <- train(class ~ ., data = trainingset,               method = customgbm, metric = "dist",               maximize = false, tunelength = 10,               trcontrol = traincontrol(method = "cv", number = 5,                                        classprobs = true,                                        summaryfunction = fourstats)) 

the model appears run, finishes error above. if please me customising gbm model tune gbm parameters, , probability threshold classes great.


Comments

Popular posts from this blog

php - Vagrant up error - Uncaught Reflection Exception: Class DOMDocument does not exist -

vue.js - Create hooks for automated testing -

Add new key value to json node in java -