I am always interested in the quickest ways to iterate over rows so I ran a few benchmarks of the other two answers and two more approaches. One is exactly the same as the answer by margusl except it uses purrr::pmap() rather than dplyr::rowwise(), which can be a faster alternative.
I also added an approach which converts to matrix, transposes and iterates over columns:
x <- t(as.matrix(df[, c("A", "B", "C", "D")])) df$Z <- sapply(seq_len(nrow(df)), \(i){ overlaps <- intersect( x[, i], strsplit(df$X[[i]], ",")[[1]] ) if (length(overlaps) == 0) NA_character_ else overlaps })
This approach turns out to be much faster than the others. I also tried iterating over columns using data.table::transpose() and in this case it is almost as fast as t(), but not faster. So the message is, if you can, iterate over columns instead of rows.
Full benchmark results
I generated random strings in the same format as your data frame for between 10 and 10^5 rows and compared the approaches. purrr::pmap() is comparable in speed to the base R approach. rowwise() is particularly slow with a lot more memory usage (500mb with 1e5 rows, compared with 800kb for Friede's base R approach). With 1e5 rows, the base R approach takes 13 seconds, purrr::pmap() 15 seconds, dplyr::rowwise() 2m40 seconds and iterating over columns takes 4 seconds. The full results are:
# A tibble: 25 × 10 expression n min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time <bch:expr> <dbl> <dbl> <dbl> <dbl> <bch:byt> <dbl> <int> <dbl> <dbl> 1 base 10 1.05 1.12 860. 0B 8.69 99 1 115. 2 rowwise 10 15.3 16.8 57.6 53.25KB 0 29 0 503. 3 purrr 10 1.29 1.44 599. 10.57KB 0 100 0 167. 4 data.table 10 2.50 2.77 345. 187.04KB 3.48 99 1 287. 5 base_columns 10 0.319 0.347 2655. 736B 0 100 0 37.7 6 base 100 11.4 15.6 63.6 848B 2.19 29 1 456. 7 rowwise 100 198. 203. 4.92 510.5KB 2.46 2 1 406. 8 purrr 100 11.5 12.9 70.8 105.04KB 0 36 0 509. 9 data.table 100 5.48 6.14 151. 207.79KB 5.60 27 1 179. 10 base_columns 100 2.34 2.50 392. 9.09KB 0 100 0 255. 11 base 1000 110. 117. 8.38 7.86KB 1.68 5 1 596. 12 rowwise 1000 1437. 1447. 0.683 4.93MB 1.14 3 5 4393. 13 purrr 1000 124. 152. 6.80 1.02MB 1.70 4 1 588. 14 data.table 1000 34.5 37.0 26.6 381.34KB 0 14 0 527. 15 base_columns 1000 21.8 25.3 37.8 86.41KB 1.99 19 1 503. 16 base 10000 1322. 1479. 0.633 83.5KB 1.69 3 8 4738. 17 rowwise 10000 15603. 16366. 0.0597 49.18MB 1.19 3 60 50284. 18 purrr 10000 1282. 1306. 0.770 10.22MB 1.28 3 5 3894. 19 data.table 10000 392. 416. 2.37 2.12MB 0.790 3 1 1266. 20 base_columns 10000 276. 281. 3.55 909.53KB 1.18 3 1 844. 21 base 100000 11919. 11959. 0.0829 781.78KB 0.608 3 22 36184. 22 rowwise 100000 145657. 158549. 0.00644 491.72MB 0.403 3 188 465947. 23 purrr 100000 14887. 16189. 0.0615 102.23MB 0.390 3 19 48778. 24 data.table 100000 4521. 4526. 0.217 19.3MB 0.434 3 6 13832. 25 base_columns 100000 3061. 4401. 0.233 8.63MB 0.311 3 4 12870.

Benchmark code
library(dplyr) library(stringr) library(data.table) results <- bench::press( n = 10^(1:5), { rand_strings <- CJ(letter = LETTERS[1:4], number = 0:9)[ , strings := paste0(letter, number) ][] df <- data.frame( ID = seq(n), A = sample(rand_strings[letter == "A", strings], n, replace = TRUE), B = sample(rand_strings[letter == "B", strings], n, replace = TRUE), C = sample(rand_strings[letter == "C", strings], n, replace = TRUE), D = sample(rand_strings[letter == "D", strings], n, replace = TRUE), X = sapply( seq(n), \(i) paste(sample(rand_strings$strings, sample(0:10, 1), replace = TRUE), collapse = ",") ) ) dt <- as.data.table(df) bench::mark( max_iterations = 100, min_iterations = 3, time_unit = "ms", check = FALSE, base = { # Friede approach lapply(seq_len(nrow(df)), \(i) { x <- df[i, LETTERS[1L:4L]] x <- x[x %in% unlist(strsplit(df$X[i], ","))] if (ncol(x)) paste0(x, collapse = ",") else NA }) }, rowwise = { # margusl approach df |> rowwise() |> mutate(Z = intersect( c(across(A:D)), str_split(X, ",", simplify = TRUE) ) |> str_c(collapse = ",") |> na_if("")) |> ungroup() }, purrr = { df |> purrr::pmap(\(ID, A, B, C, D, X) { intersect( c(A, B, C, D), str_split(X, ",", simplify = TRUE) ) |> str_c(collapse = ",") |> na_if("") }) |> unlist() }, data.table = { x <- transpose(dt[, .(A, B, C, D)]) dt[, Z := sapply(seq_len(nrow(df)), \(i) intersect( x[[i]], strsplit(df$X[[i]], ",")[[1]] ) |> paste(collapse = ","))][Z == "", Z := NA_character_] } ) } ) # Plotting the results library(ggplot2) autoplot(results) + facet_wrap( vars(n), scales = "free_x", labeller = labeller(n = \(x) sprintf("Num rows: %s", x)) ) + theme_bw() + theme(legend.position = "bottom")