Setup

We’ll use a bootstrap calculation function that simulates long-running computations:

library(mirai)
#> 
#> Attaching package: 'mirai'
#> The following object is masked from 'package:bakerrr':
#> 
#>     status
library(bakerrr)

long_stat_calc <- function(x, n_boot, sleep_time) {
  # x: numeric vector
  # n_boot: number of bootstraps
  # sleep_time: pause after each bootstrap (sec)

  if (!is.numeric(x)) stop("Input x must be numeric.")
  if (length(x) < 2) stop("Input x must have at least 2 values.")

  start_time <- Sys.time()
  boot_means <- numeric(n_boot)

  for (i in seq_len(n_boot)) {
    boot_means[i] <- mean(sample(x, replace = TRUE))
    if (sleep_time > 0) Sys.sleep(sleep_time)
  }

  end_time <- Sys.time()

  result <- list(
    boot_mean = mean(boot_means),
    boot_sd   = sd(boot_means),
    elapsed   = difftime(end_time, start_time, units = "secs")
  )
  class(result) <- "long_stat_calc"
  result
}

# Print method for easy reporting
print.long_stat_calc <- function(x, ...) {
  cat("Bootstrap Mean:", x$boot_mean, "\n")
  cat("Bootstrap SD:  ", x$boot_sd, "\n")
  cat("Elapsed Time:  ", x$elapsed, "seconds\n")
}

Data Preparation

# Arguments for 10 parallel jobs
args_list <- list(
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002),
  list(x = rnorm(100), n_boot = 3000, sleep_time = 0.002)
)

mirai Implementation

mirai provides a lightweight, async-focused approach:

# Clean slate
mirai::daemons(0)
set.seed(10)

mirai_timing <- system.time({
  mirai::daemons(6)  # Start 6 daemon processes

  res <- mirai::mirai_map(
    .x = list(
      rnorm(100), rnorm(100), rnorm(100), rnorm(100),
      rnorm(100), rnorm(100), rnorm(100), rnorm(100),
      rnorm(100), rnorm(100)
    ),
    .f = long_stat_calc,
    .args = list(n_boot = 3000, sleep_time = 0.002)
  )

  # Check progress and collect results
  res[.progress]
  mirai_results <- res[.flat]
})
#> ■■■■                              10% | ETA:  1m
#> ■■■■■■■■■■■■■■■■■■■■■■            70% | ETA:  6s
#> ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■  100% | ETA:  0s

print(mirai_timing)
#>    user  system elapsed 
#>   0.050   0.014  13.360
mirai::daemons(0)  # Clean up

bakerrr Implementation

bakerrr offers an object-oriented approach with built-in job management:

bakerrr_timing <- system.time({
  baker <- bakerrr::bakerrr(
    long_stat_calc,
    args_list = args_list,
    n_daemons = 6
    # Optional: bg_args = list(stdout = "out.log", stderr = "error.log") # nolint
  ) |>
    bakerrr::run_jobs(wait_for_results = TRUE)

  bakerrr_results <- baker@results
})

print(bakerrr_timing)
#>    user  system elapsed 
#>  14.192   2.939  13.732

Comparison

Performance

Both approaches show similar performance for CPU-bound tasks, with actual timing dependent on:

Task complexity
Number of workers
System resources
Overhead differences

API Design

mirai:

Functional programming style
Explicit daemon management
Direct result collection
Minimal syntax

bakerrr:

Object-oriented approach
Automatic resource management
Built-in logging options
Method chaining support

Use Cases

Choose mirai when:

You need fine-grained control over async operations
Working with streaming or reactive computations
Minimal dependencies are important
Direct integration with other async patterns

Choose bakerrr when:

You prefer object-oriented workflows
Built-in logging and error handling are valuable
Working within larger application frameworks
Method chaining fits your coding style

Results Inspection

# Both approaches return similar structured results
str(mirai_results[[1]])
#>  num -0.0305
str(bakerrr_results[[1]])
#> List of 3
#>  $ boot_mean: num -0.0963
#>  $ boot_sd  : num 0.107
#>  $ elapsed  : 'difftime' num 6.45031642913818
#>   ..- attr(*, "units")= chr "secs"
#>  - attr(*, "class")= chr "long_stat_calc"

# Print first result from each method
print(mirai_results[[1]])
#> [1] -0.03050123
print(bakerrr_results[[1]])
#> Bootstrap Mean: -0.09631596 
#> Bootstrap SD:   0.1072799 
#> Elapsed Time:   6.450316 seconds

Conclusion

Both mirai and bakerrr provide effective parallel processing capabilities. The choice depends on your specific requirements:

mirai: Lightweight, functional, explicit control
bakerrr: Object-oriented, feature-rich, automatic management

For production workflows requiring robust error handling and logging, bakerrr may be preferable. For performance-critical applications needing minimal overhead, mirai could be the better choice.

Session Info

sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Red Hat Enterprise Linux 8.10 (Ootpa)
#> 
#> Matrix products: default
#> BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] mirai_2.5.0   bakerrr_0.2.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] crayon_1.5.3      vctrs_0.6.5       cli_3.6.5         knitr_1.50       
#>  [5] rlang_1.1.6       xfun_0.53         processx_3.8.6    purrr_1.1.0      
#>  [9] S7_0.2.0          jsonlite_2.0.0    carrier_0.3.0.4   glue_1.8.0       
#> [13] nanonext_1.7.0    htmltools_0.5.8.1 ps_1.9.1          sass_0.4.10      
#> [17] rmarkdown_2.29    evaluate_1.0.5    jquerylib_0.1.4   fastmap_1.2.0    
#> [21] yaml_2.3.10       lifecycle_1.0.4   config_0.3.2      compiler_4.4.2   
#> [25] fs_1.6.6          rstudioapi_0.17.1 digest_0.6.37     R6_2.6.1         
#> [29] parallel_4.4.2    magrittr_2.0.4    callr_3.7.6       bslib_0.9.0      
#> [33] tools_4.4.2       cachem_1.1.0

mirai & bakerrr

Overview