--- title: "Advanced Usage" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 fig_caption: true vignette: > %\VignetteIndexEntry{Advanced Usage} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, fig.align = "center", message = FALSE, warning = FALSE ) ``` ## Introduction This vignette covers advanced NOVA usage including: - Performance optimization - Custom LR databases - Parallel processing - Integration with other tools - Programmatic workflows ## Setup ```{r load} library(NOVA) library(data.table) ``` ## Performance Optimization ### Parallel Processing NOVA supports parallel computation via the `future` package: ```{r parallel, eval=FALSE} # Enable parallel processing library(future) # Use all cores minus 1 plan(multisession, workers = parallel::detectCores() - 1) # Run analysis (automatically parallelized) result <- ExtractEdges( expression = expr, annotation = annotation, species = "human" ) # Reset to sequential plan(sequential) ``` ### NOVA Options Configure global behavior: ```{r options} # View current options cat("Verbose:", getOption("nova.verbose", TRUE), "\n") cat("Parallel:", getOption("nova.parallel", TRUE), "\n") cat("Workers:", getOption("nova.workers", parallel::detectCores() - 1), "\n") # Customize options options( nova.verbose = TRUE, # Print progress messages nova.parallel = TRUE, # Enable parallelization nova.workers = 4 # Number of parallel workers ) ``` ### Memory Efficiency For large datasets, use sparse matrices: ```{r memory} # Check if expression is sparse expr_example <- Matrix::Matrix(matrix(0, 1000, 1000), sparse = TRUE) cat("Dense size:", object.size(as.matrix(expr_example)), "bytes\n") cat("Sparse size:", object.size(expr_example), "bytes\n") # NOVA automatically handles sparse matrices efficiently ``` ## Custom Ligand-Receptor Database ### Creating Custom Database ```{r custom_db} # Create custom LR database custom_lr <- data.table::data.table( ligand = c("CXCL12", "CCL2", "IL6", "TGFB1", "VEGFA"), receptor = c("CXCR4", "CCR2", "IL6R", "TGFBR1", "KDR"), category = c("chemokine", "chemokine", "cytokine", "growth_factor", "growth_factor"), source = rep("custom", 5) ) print(custom_lr) ``` ### Using Custom Database ```{r use_custom, eval=FALSE} # Use custom database in analysis result <- ExtractEdges( expression = expr, annotation = annotation, species = "human", lr_database = custom_lr # Custom database ) ``` ### Extending Built-in Database ```{r extend_db} # Get built-in database builtin_lr <- GetLRDatabase("lrc2p") # Add custom pairs custom_pairs <- data.table::data.table( ligand = c("CUSTOM_LIG1", "CUSTOM_LIG2"), receptor = c("CUSTOM_REC1", "CUSTOM_REC2") ) # Combine (ensure matching columns) extended_lr <- rbind(builtin_lr[, .(ligand, receptor)], custom_pairs, fill = TRUE) cat("Extended database size:", nrow(extended_lr), "pairs\n") ``` ## Filtering and Subsetting ### Advanced Filtering ```{r filter_setup, eval=FALSE} # Create example result set.seed(42) n_genes <- 100 n_cells <- 200 expr <- matrix(abs(rnorm(n_genes * n_cells)), n_genes, n_cells) lr_db <- GetLRDatabase("lrc2p") rownames(expr) <- c(unique(lr_db$ligand)[1:50], unique(lr_db$receptor)[1:50]) colnames(expr) <- paste0("Cell", 1:n_cells) expr <- Matrix::Matrix(expr, sparse = TRUE) clusters <- sample(c("A", "B", "C"), n_cells, replace = TRUE) annotation <- data.frame(cell = colnames(expr), cluster = clusters) result <- ExtractEdges(expr, annotation, species = "human") ``` ```{r filtering, eval=FALSE} # Filter by multiple criteria filtered <- FilterEdges( result, min_pct = 0.1, min_mean = 0.5, min_specificity = 0.2 ) cat("Original edges:", nrow(result$edges), "\n") cat("Filtered edges:", nrow(filtered$edges), "\n") ``` ### Subsetting by Cluster ```{r subset, eval=FALSE} # Get edges for specific cluster pairs edges_A_to_B <- GetEdges(result, sending = "A", target = "B") edges_from_A <- GetEdges(result, sending = "A") edges_to_C <- GetEdges(result, target = "C") cat("A -> B:", nrow(edges_A_to_B), "edges\n") cat("A -> any:", nrow(edges_from_A), "edges\n") cat("any -> C:", nrow(edges_to_C), "edges\n") ``` ## Programmatic Workflows ### Batch Processing ```{r batch, eval=FALSE} # Process multiple samples samples <- c("sample1", "sample2", "sample3") results <- list() for (sample in samples) { # Load data expr <- readRDS(paste0(sample, "_expression.rds")) ann <- read.csv(paste0(sample, "_annotation.csv")) # Run analysis results[[sample]] <- ExtractEdges( expression = expr, annotation = ann, species = "human" ) cat("Processed", sample, ":", nrow(results[[sample]]$edges), "edges\n") } # Combine results all_edges <- rbindlist(lapply(names(results), function(s) { edges <- results[[s]]$edges edges$sample <- s return(edges) })) ``` ### Custom Analysis Pipeline ```{r pipeline, eval=FALSE} # Define analysis function analyze_communication <- function(seurat_obj, cluster_col = "cell_type", species = "human", ...) { # Convert Seurat object nova_input <- SeuratToNOVA(seurat_obj, cluster_col = cluster_col) # Run analysis result <- ExtractEdges( expression = nova_input$expression, annotation = nova_input$annotation, species = species, ... ) # Store back in Seurat seurat_obj <- AddNOVAResults(seurat_obj, result) return(list(seurat = seurat_obj, nova = result)) } # Use the pipeline output <- analyze_communication( seurat_obj, cluster_col = "cell_type", species = "mouse", min_pct = 0.1 ) ``` ## Integration with Other Tools ### Export for Cytoscape ```{r cytoscape, eval=FALSE} # Export edges for Cytoscape visualization edges <- result$edges[, .( source = sending_cluster, target = target_cluster, interaction = paste(ligand, receptor, sep = "-"), weight = edge_specificity_mean )] write.csv(edges, "cytoscape_edges.csv", row.names = FALSE) # Export node attributes nodes <- data.frame( id = unique(c(edges$source, edges$target)), type = "cluster" ) write.csv(nodes, "cytoscape_nodes.csv", row.names = FALSE) ``` ### Integration with CellChat/LIANA ```{r integration, eval=FALSE} # Convert NOVA results to CellChat format nova_to_cellchat <- function(result) { edges <- result$edges # Create interaction data frame df <- data.frame( source = edges$sending_cluster, target = edges$target_cluster, ligand = edges$ligand, receptor = edges$receptor, prob = edges$edge_specificity_mean, pval = NA # NOVA doesn't compute p-values ) return(df) } ``` ## Troubleshooting ### Common Issues **1. No edges detected** ```{r troubleshoot1, eval=FALSE} # Check gene overlap with database lr_db <- GetLRDatabase("lrc2p") genes_in_data <- rownames(expr) ligand_overlap <- sum(lr_db$ligand %in% genes_in_data) receptor_overlap <- sum(lr_db$receptor %in% genes_in_data) cat("Ligands found:", ligand_overlap, "\n") cat("Receptors found:", receptor_overlap, "\n") # Lower thresholds if needed result <- ExtractEdges(expr, annotation, species = "human", min_pct = 0) ``` **2. Memory issues** ```{r troubleshoot2, eval=FALSE} # Use sparse matrix expr_sparse <- Matrix::Matrix(as.matrix(expr), sparse = TRUE) # Process clusters in batches unique_clusters <- unique(annotation$cluster) for (i in seq(1, length(unique_clusters), by = 5)) { subset_clusters <- unique_clusters[i:min(i+4, length(unique_clusters))] # Process subset... } ``` **3. Species mapping issues** ```{r troubleshoot3, eval=FALSE} # Check gene name format head(rownames(expr)) # Should match species conventions # Verify species parameter result <- ExtractEdges(expr, annotation, species = "mouse") # Not "Mouse" or "MOUSE" ``` ## Session Info ```{r session} sessionInfo() ``` ## Author **Zaoqu Liu** - Email: liuzaoqu@163.com - GitHub: [Zaoqu-Liu](https://github.com/Zaoqu-Liu)