Skip to main content
added 33 characters in body
Source Link
amoeba
  • 109k
  • 37
  • 325
  • 350
library(lme4) library(ggplot2) library(tidyr) m0 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy, control = lmerControl(optimizer = "nloptwrap")) # set the number of factor levels ngroups <- 3:18 # generate all possible combinations combos <- lapply(X = ngroups, FUN = function(x) combn(unique(sleepstudy$Subject), x)) # allocate output (sorry, this code is entirely un-optimized) out <- list(matrix(NA, ncol(combos[[1]]), 1), matrix(NA, ncol(combos[[2]]), 1), matrix(NA, ncol(combos[[3]]), 1), matrix(NA, ncol(combos[[4]]), 1), matrix(NA, ncol(combos[[5]]), 1), matrix(NA, ncol(combos[[6]]), 1), matrix(NA, ncol(combos[[7]]), 1), matrix(NA, ncol(combos[[8]]), 1), matrix(NA, ncol(combos[[9]]), 1), matrix(NA, ncol(combos[[10]]), 1), matrix(NA, ncol(combos[[11]]), 1), matrix(NA, ncol(combos[[12]]), 1), matrix(NA, ncol(combos[[13]]), 1), matrix(NA, ncol(combos[[14]]), 1), matrix(NA, ncol(combos[[15]]), 1), matrix(NA, ncol(combos[[16]]), 1)) # took ~ 2.5 hrs on my laptop, commented out for safety #system.time(for(ii in 1:length(combos)) { # for(jj in 1:ncol(combos[[ii]])) { # sls <- sleepstudy[sleepstudy$Subject %in% combos[[ii]][,jj],] # out[[ii]][jj] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') # } # }) # pad with zeros, not all were equal # from http://stackoverflow.com/questions/11148429/r-convert-asymmetric-list-to-matrix-number-of-elements-in-each-sub-list-diffe max.len <- max(sapply(out, length)) corrected.list <- lapply(out, function(x) {c(x, rep(NA, max.len - length(x)))}) mat <- do.call(rbind, corrected.list) mat <- data.frame(t(mat)) names(mat) <- paste0('s',3:18) mat <- gather(mat, run, value) ggplot(mat, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
library(lme4) library(ggplot2) library(tidyr) m0 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy, control = lmerControl(optimizer = "nloptwrap")) # set the number of factor levels ngroups <- 3:18 # generate all possible combinations combos <- lapply(X = ngroups, FUN = function(x) combn(unique(sleepstudy$Subject), x)) # allocate output (sorry, this code is entirely un-optimized) out <- list(matrix(NA, ncol(combos[[1]]), 1), matrix(NA, ncol(combos[[2]]), 1), matrix(NA, ncol(combos[[3]]), 1), matrix(NA, ncol(combos[[4]]), 1), matrix(NA, ncol(combos[[5]]), 1), matrix(NA, ncol(combos[[6]]), 1), matrix(NA, ncol(combos[[7]]), 1), matrix(NA, ncol(combos[[8]]), 1), matrix(NA, ncol(combos[[9]]), 1), matrix(NA, ncol(combos[[10]]), 1), matrix(NA, ncol(combos[[11]]), 1), matrix(NA, ncol(combos[[12]]), 1), matrix(NA, ncol(combos[[13]]), 1), matrix(NA, ncol(combos[[14]]), 1), matrix(NA, ncol(combos[[15]]), 1), matrix(NA, ncol(combos[[16]]), 1)) # took ~ 2.5 hrs on my laptop, commented out for safety #system.time(for(ii in 1:length(combos)) { # for(jj in 1:ncol(combos[[ii]])) { # sls <- sleepstudy[sleepstudy$Subject %in% combos[[ii]][,jj],] # out[[ii]][jj] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') # } # }) # pad with zeros, not all were equal # from http://stackoverflow.com/questions/11148429/r-convert-asymmetric-list-to-matrix-number-of-elements-in-each-sub-list-diffe max.len <- max(sapply(out, length)) corrected.list <- lapply(out, function(x) {c(x, rep(NA, max.len - length(x)))}) mat <- do.call(rbind, corrected.list) mat <- data.frame(t(mat)) names(mat) <- paste0('s',3:18) mat <- gather(mat, run, value) ggplot(mat, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
ngroups <- 3:18 reps <- 500 out2<- matrix(NA, length(ngroups), reps) for (ii in 1:length(ngroups)) { for(j in 1:reps) { sls <- sleepstudy[sleepstudy$Subject %in% sample(unique(sleepstudy$Subject), ngroups[i], replace = FALSE),] out2[i,j] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') } } out2 <- data.frame(t(out2)) names(out2) <- paste0('s',3:18) out2 <- gather(out2, run, value) ggplot(out2, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
ngroups <- 3:18 reps <- 500 out2<- matrix(NA, length(ngroups), reps) for (ii in 1:length(ngroups)) { for(j in 1:reps) { sls <- sleepstudy[sleepstudy$Subject %in% sample(unique(sleepstudy$Subject), ngroups[i], replace = FALSE),] out2[i,j] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') } } out2 <- data.frame(t(out2)) names(out2) <- paste0('s',3:18) out2 <- gather(out2, run, value) ggplot(out2, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
library(lme4) library(ggplot2) library(tidyr) m0 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy, control = lmerControl(optimizer = "nloptwrap")) # set the number of factor levels ngroups <- 3:18 # generate all possible combinations combos <- lapply(X = ngroups, FUN = function(x) combn(unique(sleepstudy$Subject), x)) # allocate output (sorry, this code is entirely un-optimized) out <- list(matrix(NA, ncol(combos[[1]]), 1), matrix(NA, ncol(combos[[2]]), 1), matrix(NA, ncol(combos[[3]]), 1), matrix(NA, ncol(combos[[4]]), 1), matrix(NA, ncol(combos[[5]]), 1), matrix(NA, ncol(combos[[6]]), 1), matrix(NA, ncol(combos[[7]]), 1), matrix(NA, ncol(combos[[8]]), 1), matrix(NA, ncol(combos[[9]]), 1), matrix(NA, ncol(combos[[10]]), 1), matrix(NA, ncol(combos[[11]]), 1), matrix(NA, ncol(combos[[12]]), 1), matrix(NA, ncol(combos[[13]]), 1), matrix(NA, ncol(combos[[14]]), 1), matrix(NA, ncol(combos[[15]]), 1), matrix(NA, ncol(combos[[16]]), 1)) # took ~ 2.5 hrs on my laptop, commented out for safety #system.time(for(ii in 1:length(combos)) { # for(jj in 1:ncol(combos[[ii]])) { # sls <- sleepstudy[sleepstudy$Subject %in% combos[[ii]][,jj],] # out[[ii]][jj] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') # } # }) # pad with zeros, not all were equal # from http://stackoverflow.com/questions/11148429/r-convert-asymmetric-list-to-matrix-number-of-elements-in-each-sub-list-diffe max.len <- max(sapply(out, length)) corrected.list <- lapply(out, function(x) {c(x, rep(NA, max.len - length(x)))}) mat <- do.call(rbind, corrected.list) mat <- data.frame(t(mat)) names(mat) <- paste0('s',3:18) mat <- gather(mat, run, value) ggplot(mat, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
ngroups <- 3:18 reps <- 500 out2<- matrix(NA, length(ngroups), reps) for (ii in 1:length(ngroups)) { for(j in 1:reps) { sls <- sleepstudy[sleepstudy$Subject %in% sample(unique(sleepstudy$Subject), ngroups[i], replace = FALSE),] out2[i,j] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') } } out2 <- data.frame(t(out2)) names(out2) <- paste0('s',3:18) out2 <- gather(out2, run, value) ggplot(out2, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
library(lme4) library(ggplot2) library(tidyr) m0 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy, control = lmerControl(optimizer = "nloptwrap")) # set the number of factor levels ngroups <- 3:18 # generate all possible combinations combos <- lapply(X = ngroups, FUN = function(x) combn(unique(sleepstudy$Subject), x)) # allocate output (sorry, this code is entirely un-optimized) out <- list(matrix(NA, ncol(combos[[1]]), 1), matrix(NA, ncol(combos[[2]]), 1), matrix(NA, ncol(combos[[3]]), 1), matrix(NA, ncol(combos[[4]]), 1), matrix(NA, ncol(combos[[5]]), 1), matrix(NA, ncol(combos[[6]]), 1), matrix(NA, ncol(combos[[7]]), 1), matrix(NA, ncol(combos[[8]]), 1), matrix(NA, ncol(combos[[9]]), 1), matrix(NA, ncol(combos[[10]]), 1), matrix(NA, ncol(combos[[11]]), 1), matrix(NA, ncol(combos[[12]]), 1), matrix(NA, ncol(combos[[13]]), 1), matrix(NA, ncol(combos[[14]]), 1), matrix(NA, ncol(combos[[15]]), 1), matrix(NA, ncol(combos[[16]]), 1)) # took ~ 2.5 hrs on my laptop, commented out for safety #system.time(for(ii in 1:length(combos)) { # for(jj in 1:ncol(combos[[ii]])) { # sls <- sleepstudy[sleepstudy$Subject %in% combos[[ii]][,jj],] # out[[ii]][jj] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') # } # }) # pad with zeros, not all were equal # from http://stackoverflow.com/questions/11148429/r-convert-asymmetric-list-to-matrix-number-of-elements-in-each-sub-list-diffe max.len <- max(sapply(out, length)) corrected.list <- lapply(out, function(x) {c(x, rep(NA, max.len - length(x)))}) mat <- do.call(rbind, corrected.list) mat <- data.frame(t(mat)) names(mat) <- paste0('s',3:18) mat <- gather(mat, run, value) ggplot(mat, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
ngroups <- 3:18 reps <- 500 out2<- matrix(NA, length(ngroups), reps) for (ii in 1:length(ngroups)) { for(j in 1:reps) { sls <- sleepstudy[sleepstudy$Subject %in% sample(unique(sleepstudy$Subject), ngroups[i], replace = FALSE),] out2[i,j] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') } } out2 <- data.frame(t(out2)) names(out2) <- paste0('s',3:18) out2 <- gather(out2, run, value) ggplot(out2, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 
Source Link
alexforrence
  • 833
  • 1
  • 11
  • 14

For what it's worth, I did a bit of a simulation study to look at the stability of the variance estimate for a relatively simple LMM (using the sleepstudy dataset available through lme4). The first way generates all possible subject combinations for ngroups number of subjects, and refits the model for each possible combination. The second takes several random subsets of subjects.

library(lme4) library(ggplot2) library(tidyr) m0 <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy, control = lmerControl(optimizer = "nloptwrap")) # set the number of factor levels ngroups <- 3:18 # generate all possible combinations combos <- lapply(X = ngroups, FUN = function(x) combn(unique(sleepstudy$Subject), x)) # allocate output (sorry, this code is entirely un-optimized) out <- list(matrix(NA, ncol(combos[[1]]), 1), matrix(NA, ncol(combos[[2]]), 1), matrix(NA, ncol(combos[[3]]), 1), matrix(NA, ncol(combos[[4]]), 1), matrix(NA, ncol(combos[[5]]), 1), matrix(NA, ncol(combos[[6]]), 1), matrix(NA, ncol(combos[[7]]), 1), matrix(NA, ncol(combos[[8]]), 1), matrix(NA, ncol(combos[[9]]), 1), matrix(NA, ncol(combos[[10]]), 1), matrix(NA, ncol(combos[[11]]), 1), matrix(NA, ncol(combos[[12]]), 1), matrix(NA, ncol(combos[[13]]), 1), matrix(NA, ncol(combos[[14]]), 1), matrix(NA, ncol(combos[[15]]), 1), matrix(NA, ncol(combos[[16]]), 1)) # took ~ 2.5 hrs on my laptop, commented out for safety #system.time(for(ii in 1:length(combos)) { # for(jj in 1:ncol(combos[[ii]])) { # sls <- sleepstudy[sleepstudy$Subject %in% combos[[ii]][,jj],] # out[[ii]][jj] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') # } # }) # pad with zeros, not all were equal # from http://stackoverflow.com/questions/11148429/r-convert-asymmetric-list-to-matrix-number-of-elements-in-each-sub-list-diffe max.len <- max(sapply(out, length)) corrected.list <- lapply(out, function(x) {c(x, rep(NA, max.len - length(x)))}) mat <- do.call(rbind, corrected.list) mat <- data.frame(t(mat)) names(mat) <- paste0('s',3:18) mat <- gather(mat, run, value) ggplot(mat, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 

The dotted black line is the original point estimate of the variance, and the facets represent different numbers of subjects (s3 being groups of three subjects, s4 being four, etc.). enter image description here

And the alternative way:

ngroups <- 3:18 reps <- 500 out2<- matrix(NA, length(ngroups), reps) for (ii in 1:length(ngroups)) { for(j in 1:reps) { sls <- sleepstudy[sleepstudy$Subject %in% sample(unique(sleepstudy$Subject), ngroups[i], replace = FALSE),] out2[i,j] <- attr(VarCorr(update(m0, data = sls))$Subject, 'stddev') } } out2 <- data.frame(t(out2)) names(out2) <- paste0('s',3:18) out2 <- gather(out2, run, value) ggplot(out2, aes(x = value, fill = run)) + geom_histogram(bins = 60) + geom_vline(xintercept = 37.12, linetype = 'longdash', aes(colour = 'original')) + facet_wrap(~run, scales = 'free_y') + scale_x_continuous(breaks = seq(0, 100, by = 20)) + theme_bw() + guides(fill = FALSE) 

enter image description here

It appears (for this example, anyway) that the variance doesn't really stabilize until there are at least 14 subjects, if not later.