################################################################### folder_path <- "diamond csv storage" csv_file_path <- paste0(folder_path, "/diamonds.csv") xlsx_file_path <- paste0(folder_path, "/diamonds.xlsx") ################################################################### # running for the first time, this section creates the csv + excel files if (FALSE) { library(tidyverse) if (!dir.exists(folder_path)) {dir.create(folder_path)} diamonds %>% write_csv(csv_file_path) diamonds %>% openxlsx::write.xlsx(xlsx_file_path, asTable = T) } ################################################################### # from: # https://www.jottr.org/2023/01/10/progressr-0.13.0/ progressr::handlers(global = TRUE) options(cli.progress_handlers = "progressr") ################################################################### # for 300 files 13.64 sec elapsed if (FALSE) { library(tidyverse) how_many_files <- 300 tictoc::tic() import_of_files <- tibble(row_id = 1:how_many_files, file_path = csv_file_path) %>% mutate(imported_data = map(file_path, ~.x %>% read_csv(show_col_types = F, progress = FALSE), .progress = TRUE)) tictoc::toc() } ################################################################### # for 300 files 4.96 sec elapsed if (FALSE) { library(tidyverse) how_many_files <- 300 tictoc::tic() import_of_files <- tibble(row_id = 1:how_many_files, file_path = csv_file_path) %>% mutate(imported_data = map(file_path, ~.x %>% vroom::vroom(show_col_types = F, progress = FALSE), .progress = TRUE)) tictoc::toc() } ################################################################### # for 100 files 61.35 sec elapsed if (FALSE) { library(tidyverse) how_many_files <- 100 tictoc::tic() import_of_files <- tibble(row_id = 1:how_many_files, file_path = xlsx_file_path) %>% mutate(imported_data = map(file_path, ~.x %>% readxl::read_excel(), .progress = TRUE)) tictoc::toc() } ################################################################### # with future map # for 100 files 19.87 sec elapsed if (FALSE) { library(tidyverse) library(furrr) # this also loads the future package all_cores <- parallelly::availableCores() # what's the difference to? all_cores <- parallelly::availableCores(logical = FALSE) -is this line even required? future::plan(multisession, workers = all_cores) how_many_files <- 100 tictoc::tic() import_of_files <- tibble(row_id = 1:how_many_files, file_path = xlsx_file_path) %>% mutate(imported_data = future_map(file_path, ~.x %>% readxl::read_excel(progress = FALSE), .progress = TRUE)) tictoc::toc() } ################################################################### # for 100 files 20.33 sec elapsed if (FALSE) { library(tidyverse) library(doSNOW) library(doParallel) # all_cores <- parallel::detectCores(logical = FALSE) all_cores <- parallel::detectCores() cl <- parallel::makePSOCKcluster(all_cores) doSNOW::registerDoSNOW(cl) how_many_files <- 100 pb <- txtProgressBar(max = how_many_files, style = 3) progress <- function(n) setTxtProgressBar(pb, n) opts <- list(progress = progress) tictoc::tic() result_of_loop <- foreach(file_id=1:how_many_files, .options.snow = opts, .packages = c("tidyverse")) %dopar% { readxl::read_excel(xlsx_file_path, progress = FALSE) } import_of_files <- result_of_loop %>% list_rbind() tictoc::toc() stopCluster(cl) close(pb) } ################################################################### # for 100 files 18.78 sec elapsed if (FALSE) { library(tidyverse) library(doFuture) doFuture::registerDoFuture() plan(multisession) how_many_files <- 100 tictoc::tic() result_of_loop <- foreach(file_id=1:how_many_files) %dopar% { readxl::read_excel(xlsx_file_path, progress = FALSE) } import_of_files <- result_of_loop %>% list_rbind() tictoc::toc() }