Function boot is base package boot has an argument strata. Split by group and apply a boot function with, for instance, by stratifying by location.
library(boot) bootfun <- function(data, i) { d <- data[i,] fit <- lm(RR ~ predictor, data = d) coef(fit) } set.seed(2022) df <- data.frame( group=rep(c('region1','region2'), 100), subgroup=rep(c('location1','location2', 'location2', 'location1'), 25), predictor = rep(c('soil1','soil2','soil3','soil4'), 25), RR=rnorm(200), stringsAsFactors = TRUE ) R <- 1000 b_list <- by(df, df$group, \(X) { boot(X, bootfun, R, strata = X$subgroup) }) b_list$region1 #> #> STRATIFIED BOOTSTRAP #> #> #> Call: #> boot(data = X, statistic = bootfun, R = R, strata = X$subgroup) #> #> #> Bootstrap Statistics : #> original bias std. error #> t1* -0.2608885 0.000469295 0.1541482 #> t2* 0.3502007 -0.004239248 0.2083503 b_list$region2 #> #> STRATIFIED BOOTSTRAP #> #> #> Call: #> boot(data = X, statistic = bootfun, R = R, strata = X$subgroup) #> #> #> Bootstrap Statistics : #> original bias std. error #> t1* -0.03727332 -0.0001557172 0.1422502 #> t2* 0.11987005 0.0016393125 0.1952310 lapply(b_list, boot.ci) #> Warning in sqrt(tv[, 2L]): NaNs produced #> Warning in sqrt(tv[, 2L]): NaNs produced #> $region1 #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> FUN(boot.out = X[[i]]) #> #> Intervals : #> Level Normal Basic Studentized #> 95% (-0.5635, 0.0408 ) (-0.5611, 0.0545 ) (-0.8227, -0.0225 ) #> #> Level Percentile BCa #> 95% (-0.5762, 0.0393 ) (-0.5733, 0.0446 ) #> Calculations and Intervals on Original Scale #> #> $region2 #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> FUN(boot.out = X[[i]]) #> #> Intervals : #> Level Normal Basic Studentized #> 95% (-0.3159, 0.2417 ) (-0.3260, 0.2460 ) (-0.3493, 0.1757 ) #> #> Level Percentile BCa #> 95% (-0.3206, 0.2514 ) (-0.3321, 0.2352 ) #> Calculations and Intervals on Original Scale
Created on 2022-10-25 with reprex v2.0.2
Edit
To get the bootstrapped confidence intervals of each coefficient, the code below uses two nested loops. The outer loop is by region, according to the original data partition. The inner loop is on index, meaning, on the matrix t returned by boot, see help("boot"), section Value. The index are the column numbers in any of
b_list$region1$t b_list$region2$t
each of them with 3 columns.
library(boot) npars <- ncol(b_list$region1$t) ci_list <- lapply(b_list, \(region) { ci <- lapply(seq.int(npars), \(index) { boot.ci(region, index = index, type = c("norm","basic", "perc", "bca")) }) setNames(ci, c("Intercept", "soil", "temperature")) }) ci_list$region1$Intercept #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = region, type = c("norm", "basic", "perc", #> "bca"), index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.2517, 0.6059 ) (-0.2423, 0.6043 ) #> #> Level Percentile BCa #> 95% (-0.2410, 0.6056 ) (-0.2414, 0.6048 ) #> Calculations and Intervals on Original Scale ci_list$region2$temperature #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = region, type = c("norm", "basic", "perc", #> "bca"), index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.2317, 0.0420 ) (-0.2416, 0.0404 ) #> #> Level Percentile BCa #> 95% (-0.2278, 0.0542 ) (-0.2265, 0.0570 ) #> Calculations and Intervals on Original Scale
Created on 2022-10-25 with reprex v2.0.2
Edit 2
Like I say in a comment below, in the new data the soil type uniquely identifies pairs of region and location, unique(df[1:3]) shows it. And it becomes useless to split by group and stratify within groups.
bootfun2 <- function(data, i) { d <- data[i,] fit <- lm(RR ~ temperature + soil_type, data = d) coef(fit) } unique(df[1:3]) # soil type uniquely identifies region/location #> group subgroup soil_type #> 1 region1 location1 soil1 #> 2 region2 location2 soil2 #> 3 region1 location2 soil3 #> 4 region2 location1 soil4 fit <- lm(RR ~ temperature + soil_type, data = df) coef(fit) #> (Intercept) temperature soil_typesoil2 soil_typesoil3 soil_typesoil4 #> 0.25928498 -0.06352205 -0.17739104 -0.05243836 -0.20408527 set.seed(2022) R <- 1000 b_3 <- boot(df, bootfun2, R) b_3 #> #> ORDINARY NONPARAMETRIC BOOTSTRAP #> #> #> Call: #> boot(data = df, statistic = bootfun2, R = R) #> #> #> Bootstrap Statistics : #> original bias std. error #> t1* 0.25928498 0.005724634 0.18033509 #> t2* -0.06352205 -0.002910677 0.05161868 #> t3* -0.17739104 0.004932486 0.18665594 #> t4* -0.05243836 0.005796168 0.19602658 #> t5* -0.20408527 0.004914674 0.20355549 btype <- c("norm","basic", "perc", "bca") ci_list3 <- lapply(seq_len(ncol(b_3$t)), \(index) { boot.ci(b_3, type = btype, index = index) }) names(ci_list3) <- names(coef(fit)) ci_list3 #> $`(Intercept)` #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = b_3, type = btype, index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.0999, 0.6070 ) (-0.0868, 0.6172 ) #> #> Level Percentile BCa #> 95% (-0.0986, 0.6054 ) (-0.0992, 0.6034 ) #> Calculations and Intervals on Original Scale #> #> $temperature #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = b_3, type = btype, index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.1618, 0.0406 ) (-0.1631, 0.0401 ) #> #> Level Percentile BCa #> 95% (-0.1672, 0.0360 ) (-0.1552, 0.0503 ) #> Calculations and Intervals on Original Scale #> #> $soil_typesoil2 #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = b_3, type = btype, index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.5482, 0.1835 ) (-0.5541, 0.1955 ) #> #> Level Percentile BCa #> 95% (-0.5503, 0.1994 ) (-0.5542, 0.1927 ) #> Calculations and Intervals on Original Scale #> #> $soil_typesoil3 #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = b_3, type = btype, index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.4424, 0.3260 ) (-0.4399, 0.3068 ) #> #> Level Percentile BCa #> 95% (-0.4117, 0.3350 ) (-0.4116, 0.3350 ) #> Calculations and Intervals on Original Scale #> #> $soil_typesoil4 #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS #> Based on 1000 bootstrap replicates #> #> CALL : #> boot.ci(boot.out = b_3, type = btype, index = index) #> #> Intervals : #> Level Normal Basic #> 95% (-0.6080, 0.1900 ) (-0.6116, 0.2127 ) #> #> Level Percentile BCa #> 95% (-0.6208, 0.2035 ) (-0.6284, 0.1801 ) #> Calculations and Intervals on Original Scale
Created on 2022-10-25 with reprex v2.0.2