EDIT: Reduced the size of the dataset
A sample data:
df <- data.frame(loc.id = rep(1:10, each = 80*36), year = rep(rep(1980:2015, each = 80), times = 10), day = rep(rep(1:80, times = 36),times = 10), rain = runif(10*36*80, min = 0 , max = 5), swc = runif(10*36*80,min = 0, max = 50), SW.max = rep(runif(10, min = 100, max = 200), each = 80*36), SW.ini = runif(10*36*80), PETc = runif(10*36*80, min = 0 , max = 1.3), SW = NA, PAW = NA, aetc = NA) df contains daily data (80 days) for 1980-2015 for 10 locations. For each location X year combination, I want to do following calculation
list.result <- list() # create a list to store all results ptm <- proc.time() n <- 0 for(i in seq_along(unique(df$loc.id))){ location <- unique(df$loc.id)[i] print(location) for(j in seq_along(unique(df$year))){ yr <- unique(df$year)[j] print(yr) df_year <- df[df$loc.id == location & df$year == yr,] # subset data for location i and year y # for the first row of data frame, i need to calculate some values SW.ini <- df_year$SW.ini[1] SW.max <- df_year$SW.max[1] df_year$PAW[1] <- SW.ini + df_year$rain[1] df_year$aetc[1] <- ifelse(df_year$PAW[1] >= df_year$swc[1], df_year$PETc[1],(df_year$PAW[1]/df_year$swc[1])*df_year$PETc[1]) df_year$aetc[1] <- ifelse(df_year$aetc[1] > df_year$PAW[1], df_year$PAW[1], df_year$aetc[1]) df_year$SW[1] <- SW.ini + df_year$rain[1] - df_year$aetc[1] df_year$SW[1] <- ifelse(df_year$SW[1] > SW.max, SW.max, ifelse(df_year$SW[1] < 0, 0,df_year$SW[1])) # for row 2 till row n of df_year, I need to do this: for (day in 2:nrow(df_year)){ df_year$PAW[day] <- df_year$SW[day - 1] + df_year$rain[day] df_year$aetc[day] <- ifelse(df_year$PAW[day] >= df_year$swc[day], df_year$PETc[day], (df_year$PAW[day]/df_year$swc[day]) * df_year$PETc[day]) df_year$aetc[day] <- ifelse(df_year$aetc[day] > df_year$PAW[day], df_year$PAW[day],df_year$aetc[day]) df_year$SW[day] <- df_year$SW[day - 1] + df_year$rain[day] - df_year$aetc[day] df_year$SW[day] <- ifelse(df_year$SW[day] > SW.max,SW.max, ifelse(df_year$SW[day] < 0, 0,df_year$SW[day])) } n <- n + 1 list.result[[n]] <- df_year }} proc.time() - ptm user system elapsed 8.64 0.00 8.75 final.dat <- rbindlist(list.result) This loop is sequential and I thought it is a good candidate for foreach in R. I have not really worked with foreach so doing some online research brought me to this:
library(doParallel) cl <- makeCluster(4) # if I understood this correctly, it assings number of cores to be used registerDoParallel(cl) foreach(i = seq_along(unique(df$loc.id)) %dopar% { list.result <- list() for(j in seq_along(1980:2015)){ df_year <- df[df$loc.id == unique(df$loc.id)[i] & df$year == unique(df$year)[j],] # subset data for location i and year y # for the first row of data frame, i need to calculate some values SW.ini <- df_year$SW.ini[1] SW.max <- df_year$SW.max[1] df_year$PAW[1] <- SW.ini + df_year$rain[1] df_year$aetc[1] <- ifelse(df_year$PAW[1] >= df_year$swc[1], df_year$PETc[1],(df_year$PAW[1]/df_year$swc[1])*df_year$PETc[1]) df_year$aetc[1] <- ifelse(df_year$aetc[1] > df_year$PAW[1], df_year$PAW[1], df_year$aetc[1]) df_year$SW[1] <- SW.ini + df_year$rain[1] - df_year$aetc[1] df_year$SW[1] <- ifelse(df_year$SW[1] > SW.max, SW.max, ifelse(df_year$SW[1] < 0, 0,df_year$SW[1])) # for row 2 till row n of df_year, I need to do this: for (day in 2:nrow(df_year)){ df_year$PAW[day] <- df_year$SW[day - 1] + df_year$rain[day] df_year$aetc[day] <- ifelse(df_year$PAW[day] >= df_year$swc[day], df_year$PETc[day], (df_year$PAW[day]/df_year$swc[day]) * df_year$PETc[day]) df_year$aetc[day] <- ifelse(df_year$aetc[day] > df_year$PAW[day], df_year$PAW[day],df_year$aetc[day]) df_year$SW[day] <- df_year$SW[day - 1] + df_year$rain[day] - df_year$aetc[day] df_year$SW[day] <- ifelse(df_year$SW[day] > SW.max,SW.max, ifelse(df_year$SW[day] < 0, 0,df_year$SW[day])) } list.result[[j]] <- df_year } dat <- rbindlist(list.result) fwrite(dat,paste0(i,"dat.csv")) } My questions are:
1) Is the above data a good candidate for foreach
2) There is a for-loop within the foreach. Does that make sense?
3) How do I make the above foreach run and return all the results
lapplyorpurrr::mapto loop through all 3000 locations. That would get rid of 1 loopReduce. See these links for examples: stackoverflow.com/questions/40412516/… | stackoverflow.com/questions/34624110/…