Performing machine learning

Question

I've written the code below to do some work on machine learning in R. I'm not overly happy with some bits of it, and I suspect I could improve it quite a bit. Bits I'm specifically interested in looking at are how to deal with appending to vectors in the loop, and whether I can combine all of the functions kf.knn, kf.svm etc into one function with various arguments.

library(class) library(nnet) library(epibasix) library(CVThresh) library(e1071) df <- read.table("semeion.data.data") df$digit <- replicate(nrow(df), 42) df$digit[which(as.logical(df$V266), arr.ind=TRUE)] = 9 df$digit[which(as.logical(df$V265), arr.ind=TRUE)] = 8 df$digit[which(as.logical(df$V264), arr.ind=TRUE)] = 7 df$digit[which(as.logical(df$V263), arr.ind=TRUE)] = 6 df$digit[which(as.logical(df$V262), arr.ind=TRUE)] = 5 df$digit[which(as.logical(df$V261), arr.ind=TRUE)] = 4 df$digit[which(as.logical(df$V260), arr.ind=TRUE)] = 3 df$digit[which(as.logical(df$V259), arr.ind=TRUE)] = 2 df$digit[which(as.logical(df$V258), arr.ind=TRUE)] = 1 df$digit[which(as.logical(df$V257), arr.ind=TRUE)] = 0 data <- df[,c(0:256,267)] squareTable <- function(x,y) { x <- factor(x) y <- factor(y) commonLevels <- sort(unique(c(levels(x), levels(y)))) x <- factor(x, levels = commonLevels) y <- factor(y, levels = commonLevels) table(x,y) } kf.knn <- function(data, k, nearest) { N <- nrow(data) data <- data[sample(1:N),] folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index total = 0 for (i in 1:k) { test <- data[folds.index[i,], 0:256] test.labels <- data[as.array(folds.index[i,]), 257] rest <- as.array(folds.index[-i,]) train <- data[rest, 0:256] train.labels <- data[rest, 257] knnpredict <- knn(train, test, train.labels, nearest) t <- table(as.factor(test.labels), as.factor(knnpredict)) kap <- epiKappa(t) total <- total + kap$kappa } return(total / k) } kf.nnet <- function(data, k, hidden) { N <- nrow(data) data <- data[sample(1:N),] folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index total = 0 for (i in 2:k) { test <- data[folds.index[i,], 0:256] test.labels <- data[as.array(folds.index[i,]), 257] rest <- as.array(folds.index[-i,]) train <- data[rest, 0:256] train.labels <- data[rest, 257] train$label <- as.factor(train.labels) nnetwork <- nnet(label ~ ., data=train, size=hidden, MaxNWts = 10000, maxit=200, linout=TRUE) nnetpredict <- predict(nnetwork, test, "class") table <- squareTable(as.factor(test.labels), as.factor(nnetpredict)) print(table) kap <- epiKappa(table) total <- total + kap$kappa } return(total / k) } kf.svm <- function(data, k) { N <- nrow(data) data <- data[sample(1:N),] folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index total = 0 for (i in 2:k) { test <- data[folds.index[i,], 0:256] test.labels <- data[as.array(folds.index[i,]), 257] rest <- as.array(folds.index[-i,]) train <- data[rest, 0:256] train.labels <- data[rest, 257] train$label <- as.factor(train.labels) svmmodel <- svm(label ~ ., data=train, cost=8, gamma=0.001, cross=2) svmpredict <- predict(svmmodel, test) t <- table(as.factor(test.labels), as.factor(svmpredict)) print(t) kap <- epiKappa(t) total <- total + kap$kappa } return(total / k) } result <- vector("numeric", 1) n <- vector("numeric", 1) for (i in 2:10) { avg <- kf.nnet(data, 5, i) result <- append(result, avg) n <- append(n, i) cat("!!!!!!!", i, avg, "\n") } result <- result[2:length(result)] n <- n[2:length(n)] df <- data.frame(kappa = result, n=n) plot(df$n, df$res) print(df)

Brian Diggs · Accepted Answer · 2013-12-09 17:00:27Z

You can avoid appending to vectors (which can cause re-allocation of space and can considerably slow things down in principle; though in your case of only a length 10 vector that shouldn't be noticeable) if you allocate them to the needed size initially and then assign within them.

result <- vector("numeric", 10) # or even: result <- numeric(10) n <- vector("numeric", 10) for (i in 2:10) { avg <- kf.nnet(data, 5, i) result[i] <- avg n[i] <- i cat("!!!!!!!", i, avg, "\n") }

I'm not clear what you are asking about combing function; you can add an additional argument which is a character vector which then the code just has a series of if/elseif's checking that parameter to see which algorithm to use, if that is what you mean.

Stack Exchange Network

Performing machine learning

1 Answer 1

You must log in to answer this question.

Hot Network Questions

Performing machine learning

1 Answer 1

You must log in to answer this question.

Related

Hot Network Questions