I have tried running the following code on a Unix machine with 20 CPU, using R foreach, parallel, doParallel, and party packages (my objective is to have the party / varimp function working on several CPUs in parallel):
parallel_compute_varimp <- function (object, mincriterion = 0, conditional = FALSE, threshold = 0.2, nperm = 1, OOB = TRUE, pre1.0_0 = conditional) { response <- object@responses input <- object@data@get("input") xnames <- colnames(input) inp <- initVariableFrame(input, trafo = NULL) y <- object@responses@variables[[1]] error <- function(x, oob) mean((levels(y)[sapply(x, which.max)] != y)[oob]) w <- object@initweights perror <- matrix(0, nrow = nperm * length(object@ensemble), ncol = length(xnames)) colnames(perror) <- xnames data = foreach(b = 1:length(object@ensemble), .packages = c("party","stats"), .combine = rbind) %dopar% { try({ tree <- object@ensemble[[b]] oob <- object@weights[[b]] == 0 p <- .Call("R_predict", tree, inp, mincriterion, -1L, PACKAGE = "party") eoob <- error(p, oob) for (j in unique(varIDs(tree))) { for (per in 1:nperm) { if (conditional || pre1.0_0) { tmp <- inp ccl <- create_cond_list(conditional, threshold, xnames[j], input) if (is.null(ccl)) { perm <- sample(which(oob)) } else { perm <- conditional_perm(ccl, xnames, input, tree, oob) } tmp@variables[[j]][which(oob)] <- tmp@variables[[j]][perm] p <- .Call("R_predict", tree, tmp, mincriterion, -1L, PACKAGE = "party") } else { p <- .Call("R_predict", tree, inp, mincriterion, as.integer(j), PACKAGE = "party") } perror[b, j] <- (error(p, oob) - eoob) } } ######## # return data to the %dopar% loop data variable perror[b, ] ######## }) # END OF TRY } # END OF LOOP WITH PARALLEL COMPUTING perror = data perror <- as.data.frame(perror) return(MeanDecreaseAccuracy = colMeans(perror)) } environment(parallel_compute_varimp) <- asNamespace('party') cl <- makeCluster(detectCores()) registerDoParallel(cl, cores = detectCores()) <...> system.time(data.cforest.varimp <- parallel_compute_varimp(data.cforest, conditional = TRUE)) but I am getting an error:
> system.time(data.cforest.varimp <- parallel_compute_varimp(data.cforest, conditional = TRUE)) Error in unserialize(socklist[[n]]) : error reading from connection Timing stopped at: 58.302 13.197 709.307 The code was working with a smaller dataset on 4 CPUs.
I am running out of ideas. Can someone suggest a way to reach my objective of running party package varimp function on parallel CPUs?