--- title: "Performance Optimization" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Performance Optimization} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center", message = FALSE, warning = FALSE ) ``` ## Introduction CytoSPACER is designed for high performance through C++ implementations and parallel processing. This vignette provides guidance on optimizing performance for large datasets. ## Performance Architecture CytoSPACER uses several strategies for optimal performance: 1. **C++ Backend**: Core algorithms implemented in C++ via Rcpp 2. **Parallel Processing**: Multi-core support via the `future` framework 3. **Chunked Processing**: Memory-efficient handling of large datasets 4. **Sparse Matrix Support**: Efficient handling of sparse data ```{r load-library} library(CytoSPACER) ``` ## Benchmarking ### C++ vs R Implementation The C++ implementations provide significant speedups: ```{r benchmark-cpp} # Create test data set.seed(42) n_genes <- 500 n_cells <- 1000 n_spots <- 200 sc_data <- matrix(rnorm(n_genes * n_cells), nrow = n_genes, ncol = n_cells) st_data <- matrix(rnorm(n_genes * n_spots), nrow = n_genes, ncol = n_spots) rownames(sc_data) <- rownames(st_data) <- paste0("Gene", seq_len(n_genes)) colnames(sc_data) <- paste0("Cell", seq_len(n_cells)) colnames(st_data) <- paste0("Spot", seq_len(n_spots)) # Benchmark correlation computation time_cpp <- system.time({ cost_cpp <- compute_cost_matrix(sc_data, st_data, method = "pearson", use_cpp = TRUE) }) time_r <- system.time({ cost_r <- compute_cost_matrix(sc_data, st_data, method = "pearson", use_cpp = FALSE) }) cat("C++ implementation:", round(time_cpp["elapsed"], 3), "seconds\n") cat("R implementation:", round(time_r["elapsed"], 3), "seconds\n") cat("Speedup:", round(time_r["elapsed"] / time_cpp["elapsed"], 1), "x\n") ``` ### LAP Solver Performance ```{r benchmark-lap} # Create cost matrices of different sizes sizes <- c(100, 200, 500) times <- numeric(length(sizes)) for (i in seq_along(sizes)) { n <- sizes[i] cost <- matrix(runif(n * n), nrow = n, ncol = n) times[i] <- system.time({ assignment <- solve_lap(cost) })["elapsed"] } # Display results results_df <- data.frame( Size = paste0(sizes, "x", sizes), Time_sec = round(times, 4) ) print(results_df) ``` ## Memory Optimization ### Sparse Matrix Handling CytoSPACER efficiently handles sparse matrices: ```{r sparse-handling} library(Matrix) # Create sparse data (typical for scRNA-seq) sparse_data <- rsparsematrix(1000, 5000, density = 0.1) rownames(sparse_data) <- paste0("Gene", 1:1000) colnames(sparse_data) <- paste0("Cell", 1:5000) # Check memory usage dense_size <- object.size(as.matrix(sparse_data)) sparse_size <- object.size(sparse_data) cat("Dense matrix size:", format(dense_size, units = "MB"), "\n") cat("Sparse matrix size:", format(sparse_size, units = "MB"), "\n") cat("Memory savings:", round((1 - as.numeric(sparse_size)/as.numeric(dense_size)) * 100), "%\n") ``` ### Downsampling For very large scRNA-seq datasets, downsampling reduces memory and computation: ```{r downsampling} # Create large count matrix large_counts <- matrix(rpois(500 * 10000, lambda = 10), nrow = 500, ncol = 10000) rownames(large_counts) <- paste0("Gene", 1:500) colnames(large_counts) <- paste0("Cell", 1:10000) # Downsample to target count time_downsample <- system.time({ downsampled <- downsample_expression(large_counts, target_count = 1500, seed = 42) }) cat("Downsampling time:", round(time_downsample["elapsed"], 3), "seconds\n") cat("Original total counts:", sum(large_counts[,1]), "\n") cat("Downsampled total counts:", sum(downsampled[,1]), "\n") ``` ## Parallel Processing ### Setting Up Workers ```{r parallel-setup} library(future) # Check available cores n_cores <- parallel::detectCores() cat("Available cores:", n_cores, "\n") # Recommended: use n_cores - 1 for parallel processing n_workers <- max(1, n_cores - 1) cat("Recommended workers:", n_workers, "\n") ``` ### Parallel LAP Solving ```{r parallel-lap} # Create multiple cost matrices cost_list <- lapply(1:4, function(i) { matrix(runif(200 * 200), nrow = 200, ncol = 200) }) # Sequential processing time_seq <- system.time({ results_seq <- lapply(cost_list, solve_lap) }) # Parallel processing time_par <- system.time({ results_par <- solve_lap_parallel(cost_list, n_workers = 2, progress = FALSE) }) cat("Sequential time:", round(time_seq["elapsed"], 3), "seconds\n") cat("Parallel time:", round(time_par["elapsed"], 3), "seconds\n") ``` ### Using Future Plans ```{r future-plans, eval=FALSE} library(future) # Multisession (recommended for most cases) plan(multisession, workers = 4) # Multicore (Unix/macOS only, more efficient) plan(multicore, workers = 4) # Sequential (for debugging) plan(sequential) # Run CytoSPACER with custom plan results <- run_cytospace( sc_data = sc_data, cell_types = cell_types, st_data = st_data, coordinates = coordinates, n_workers = NULL # Uses current plan ) # Reset to default plan(sequential) ``` ## Chunked Processing For very large datasets, CytoSPACER uses chunked processing: ```{r chunked-processing, eval=FALSE} # Large dataset example results <- run_cytospace( sc_data = sc_data, cell_types = cell_types, st_data = st_data, coordinates = coordinates, chunk_size = 5000, # Process 5000 cells at a time n_workers = 4 ) ``` ### How Chunking Works 1. **Partition**: Cells are divided into chunks of `chunk_size` 2. **Subproblem Creation**: Each chunk creates a subproblem with relevant spots 3. **Parallel Solving**: Subproblems are solved independently 4. **Aggregation**: Results are combined into final assignment ```{r chunking-demo} # Demonstrate partitioning n_cells <- 15000 chunk_size <- 5000 n_chunks <- ceiling(n_cells / chunk_size) cat("Total cells:", n_cells, "\n") cat("Chunk size:", chunk_size, "\n") cat("Number of chunks:", n_chunks, "\n") ``` ## Optimization Guidelines ### Dataset Size Recommendations | Dataset Size | Recommended Settings | |--------------|---------------------| | < 10,000 cells | Default settings | | 10,000 - 50,000 cells | `n_workers = 4`, `chunk_size = 10000` | | 50,000 - 200,000 cells | `n_workers = 8`, `chunk_size = 5000`, `downsample = TRUE` | | > 200,000 cells | Subsample reference, `n_workers = max`, `chunk_size = 5000` | ### Memory Guidelines ```{r memory-guidelines} # Estimate memory requirements estimate_memory <- function(n_cells, n_spots, n_genes) { # Cost matrix: n_cells x n_spots (double precision) cost_matrix_mb <- (n_cells * n_spots * 8) / (1024^2) # Expression matrices sc_matrix_mb <- (n_genes * n_cells * 8) / (1024^2) st_matrix_mb <- (n_genes * n_spots * 8) / (1024^2) total_mb <- cost_matrix_mb + sc_matrix_mb + st_matrix_mb cat("Estimated memory requirements:\n") cat(" Cost matrix:", round(cost_matrix_mb), "MB\n") cat(" scRNA-seq matrix:", round(sc_matrix_mb), "MB\n") cat(" ST matrix:", round(st_matrix_mb), "MB\n") cat(" Total (approx):", round(total_mb), "MB\n") } # Example: 50,000 cells, 5,000 spots, 20,000 genes estimate_memory(50000, 5000, 20000) ``` ### Best Practices 1. **Use sparse matrices** when possible (typical scRNA-seq data is >90% zeros) 2. **Downsample scRNA-seq** to ~1500 counts per cell before analysis 3. **Subset genes** to highly variable genes if memory is limited 4. **Monitor memory** during analysis: ```{r monitor-memory} # Check current memory usage gc_info <- gc() mem_used <- sum(gc_info[, 2]) cat("Current memory used:", round(mem_used), "MB\n") ``` 5. **Use appropriate chunk size**: - Larger chunks = faster but more memory - Smaller chunks = slower but less memory ## Profiling ### Time Profiling ```{r time-profiling} # Profile individual steps set.seed(42) n_cells <- 50 n_spots <- 100 sc_test <- matrix(rpois(100 * n_cells, 10), nrow = 100) st_test <- matrix(rpois(100 * n_spots, 50), nrow = 100) rownames(sc_test) <- rownames(st_test) <- paste0("Gene", 1:100) colnames(sc_test) <- paste0("Cell", 1:n_cells) colnames(st_test) <- paste0("Spot", 1:n_spots) # Normalization time_norm <- system.time({ sc_norm <- normalize_expression(sc_test) st_norm <- normalize_expression(st_test) }) # Cost matrix time_cost <- system.time({ cost <- compute_cost_matrix(sc_norm, st_norm, method = "pearson") }) # LAP solving (rows must be <= cols) time_lap <- system.time({ assignment <- solve_lap(cost) }) cat("Timing breakdown:\n") cat(" Normalization:", round(time_norm["elapsed"], 4), "s\n") cat(" Cost matrix:", round(time_cost["elapsed"], 4), "s\n") cat(" LAP solving:", round(time_lap["elapsed"], 4), "s\n") ``` ## Troubleshooting Performance Issues ### Issue: Out of Memory ```{r oom-solutions, eval=FALSE} # Solution 1: Reduce chunk size results <- run_cytospace(..., chunk_size = 2000) # Solution 2: Downsample more aggressively results <- run_cytospace(..., downsample_target = 1000) # Solution 3: Subset to highly variable genes hvg <- VariableFeatures(seurat_obj)[1:2000] sc_subset <- sc_data[hvg, ] st_subset <- st_data[hvg, ] ``` ### Issue: Slow Performance ```{r slow-solutions, eval=FALSE} # Solution 1: Increase workers results <- run_cytospace(..., n_workers = 8) # Solution 2: Use larger chunks (if memory allows) results <- run_cytospace(..., chunk_size = 20000) # Solution 3: Use C++ implementations (default) cost <- compute_cost_matrix(..., use_cpp = TRUE) ``` ## Session Info ```{r session-info} sessionInfo() ```