5
\$\begingroup\$

I've written the code below to do some work on machine learning in R. I'm not overly happy with some bits of it, and I suspect I could improve it quite a bit. Bits I'm specifically interested in looking at are how to deal with appending to vectors in the loop, and whether I can combine all of the functions kf.knn, kf.svm etc into one function with various arguments.

library(class) library(nnet) library(epibasix) library(CVThresh) library(e1071) df <- read.table("semeion.data.data") df$digit <- replicate(nrow(df), 42) df$digit[which(as.logical(df$V266), arr.ind=TRUE)] = 9 df$digit[which(as.logical(df$V265), arr.ind=TRUE)] = 8 df$digit[which(as.logical(df$V264), arr.ind=TRUE)] = 7 df$digit[which(as.logical(df$V263), arr.ind=TRUE)] = 6 df$digit[which(as.logical(df$V262), arr.ind=TRUE)] = 5 df$digit[which(as.logical(df$V261), arr.ind=TRUE)] = 4 df$digit[which(as.logical(df$V260), arr.ind=TRUE)] = 3 df$digit[which(as.logical(df$V259), arr.ind=TRUE)] = 2 df$digit[which(as.logical(df$V258), arr.ind=TRUE)] = 1 df$digit[which(as.logical(df$V257), arr.ind=TRUE)] = 0 data <- df[,c(0:256,267)] squareTable <- function(x,y) { x <- factor(x) y <- factor(y) commonLevels <- sort(unique(c(levels(x), levels(y)))) x <- factor(x, levels = commonLevels) y <- factor(y, levels = commonLevels) table(x,y) } kf.knn <- function(data, k, nearest) { N <- nrow(data) data <- data[sample(1:N),] folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index total = 0 for (i in 1:k) { test <- data[folds.index[i,], 0:256] test.labels <- data[as.array(folds.index[i,]), 257] rest <- as.array(folds.index[-i,]) train <- data[rest, 0:256] train.labels <- data[rest, 257] knnpredict <- knn(train, test, train.labels, nearest) t <- table(as.factor(test.labels), as.factor(knnpredict)) kap <- epiKappa(t) total <- total + kap$kappa } return(total / k) } kf.nnet <- function(data, k, hidden) { N <- nrow(data) data <- data[sample(1:N),] folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index total = 0 for (i in 2:k) { test <- data[folds.index[i,], 0:256] test.labels <- data[as.array(folds.index[i,]), 257] rest <- as.array(folds.index[-i,]) train <- data[rest, 0:256] train.labels <- data[rest, 257] train$label <- as.factor(train.labels) nnetwork <- nnet(label ~ ., data=train, size=hidden, MaxNWts = 10000, maxit=200, linout=TRUE) nnetpredict <- predict(nnetwork, test, "class") table <- squareTable(as.factor(test.labels), as.factor(nnetpredict)) print(table) kap <- epiKappa(table) total <- total + kap$kappa } return(total / k) } kf.svm <- function(data, k) { N <- nrow(data) data <- data[sample(1:N),] folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index total = 0 for (i in 2:k) { test <- data[folds.index[i,], 0:256] test.labels <- data[as.array(folds.index[i,]), 257] rest <- as.array(folds.index[-i,]) train <- data[rest, 0:256] train.labels <- data[rest, 257] train$label <- as.factor(train.labels) svmmodel <- svm(label ~ ., data=train, cost=8, gamma=0.001, cross=2) svmpredict <- predict(svmmodel, test) t <- table(as.factor(test.labels), as.factor(svmpredict)) print(t) kap <- epiKappa(t) total <- total + kap$kappa } return(total / k) } result <- vector("numeric", 1) n <- vector("numeric", 1) for (i in 2:10) { avg <- kf.nnet(data, 5, i) result <- append(result, avg) n <- append(n, i) cat("!!!!!!!", i, avg, "\n") } result <- result[2:length(result)] n <- n[2:length(n)] df <- data.frame(kappa = result, n=n) plot(df$n, df$res) print(df) 
\$\endgroup\$

1 Answer 1

5
\$\begingroup\$

You can avoid appending to vectors (which can cause re-allocation of space and can considerably slow things down in principle; though in your case of only a length 10 vector that shouldn't be noticeable) if you allocate them to the needed size initially and then assign within them.

result <- vector("numeric", 10) # or even: result <- numeric(10) n <- vector("numeric", 10) for (i in 2:10) { avg <- kf.nnet(data, 5, i) result[i] <- avg n[i] <- i cat("!!!!!!!", i, avg, "\n") } 

I'm not clear what you are asking about combing function; you can add an additional argument which is a character vector which then the code just has a series of if/elseif's checking that parameter to see which algorithm to use, if that is what you mean.

\$\endgroup\$

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.