I have a table cluster (with more than one column):
head(cluster[,c('cuil_direccion')]) [1] "PJE INDEA 98 5 " [2] "PJE INDE 98 5 " [3] "B 34 VIV RECRE 57 00 " [4] "S CASA DE GO 600 " [5] "RCCA 958 00 o " [6] "JUAN B 1900 " I need to run a function that for each line extracts the numbers and paste them in a list. I'm using: str_extract_all. Since the table is huge I'd like to split data and use different cores for each split. I tried:
library(foreach) library(doParallel) registerDoParallel(cores=detectCores(all.tests=TRUE)) crea_tabla <- function(x){ xlst <- split(x, 1:nrow(x)) pred <- foreach(i = xlst, .combine = rbind) %dopar% { library(stringr) d<-data.frame(dir='a', E_numdir=1) j=1 DIR<-i$cuil_direccion[j] E_NUMDIR <- str_extract_all(DIR,"\\(?[0-9]+\\)?")[[1]] d<-rbind(d, data.frame( dir=DIR , E_numdir=toString(E_NUMDIR))) j=1+j } } then I ran
crea_tabla(cluster) And I get an empty result.
I'm not sure about the way doparallel uses data. E.G this part:
library(stringr) d<-data.frame(dir='a', E_numdir=1) j=1 Should I write before or after %dopar%?
EDITION
num_cores<-detectCores(all.tests=TRUE) registerDoParallel(cores=detectCores(all.tests=TRUE)) crea_tabla <- function(x, num_cores){ xlst <- split(x, 1:nrow(x)) j=1 d<-data.frame(dir='a', E_numdir=1) pred <- foreach(i = seq_along(xlst), .combine = rbind) %dopar% { print(i*num_cores/nrow(x)) library(stringr) DIR<-xlst[[i]]$cuil_direccion E_NUMDIR <- str_extract_all(DIR,"\\(?[0-9]+\\)?")[[1]] data.frame(dir=DIR , E_numdir=toString(E_NUMDIR)) } d <- rbind(d, pred) return(d) } a<-crea_tabla(cluster, num_cores)