This vignette covers advanced NOVA usage including:
NOVA supports parallel computation via the future
package:
Configure global behavior:
# View current options
cat("Verbose:", getOption("nova.verbose", TRUE), "\n")
#> Verbose: TRUE
cat("Parallel:", getOption("nova.parallel", TRUE), "\n")
#> Parallel: TRUE
cat("Workers:", getOption("nova.workers", parallel::detectCores() - 1), "\n")
#> Workers: 3
# Customize options
options(
nova.verbose = TRUE, # Print progress messages
nova.parallel = TRUE, # Enable parallelization
nova.workers = 4 # Number of parallel workers
)For large datasets, use sparse matrices:
# Check if expression is sparse
expr_example <- Matrix::Matrix(matrix(0, 1000, 1000), sparse = TRUE)
cat("Dense size:", object.size(as.matrix(expr_example)), "bytes\n")
#> Dense size: 8000216 bytes
cat("Sparse size:", object.size(expr_example), "bytes\n")
#> Sparse size: 9240 bytes
# NOVA automatically handles sparse matrices efficiently# Create custom LR database
custom_lr <- data.table::data.table(
ligand = c("CXCL12", "CCL2", "IL6", "TGFB1", "VEGFA"),
receptor = c("CXCR4", "CCR2", "IL6R", "TGFBR1", "KDR"),
category = c("chemokine", "chemokine", "cytokine", "growth_factor", "growth_factor"),
source = rep("custom", 5)
)
print(custom_lr)
#> ligand receptor category source
#> <char> <char> <char> <char>
#> 1: CXCL12 CXCR4 chemokine custom
#> 2: CCL2 CCR2 chemokine custom
#> 3: IL6 IL6R cytokine custom
#> 4: TGFB1 TGFBR1 growth_factor custom
#> 5: VEGFA KDR growth_factor custom# Get built-in database
builtin_lr <- GetLRDatabase("lrc2p")
# Add custom pairs
custom_pairs <- data.table::data.table(
ligand = c("CUSTOM_LIG1", "CUSTOM_LIG2"),
receptor = c("CUSTOM_REC1", "CUSTOM_REC2")
)
# Combine (ensure matching columns)
extended_lr <- rbind(builtin_lr[, .(ligand, receptor)],
custom_pairs,
fill = TRUE)
cat("Extended database size:", nrow(extended_lr), "pairs\n")
#> Extended database size: 2295 pairs# Create example result
set.seed(42)
n_genes <- 100
n_cells <- 200
expr <- matrix(abs(rnorm(n_genes * n_cells)), n_genes, n_cells)
lr_db <- GetLRDatabase("lrc2p")
rownames(expr) <- c(unique(lr_db$ligand)[1:50], unique(lr_db$receptor)[1:50])
colnames(expr) <- paste0("Cell", 1:n_cells)
expr <- Matrix::Matrix(expr, sparse = TRUE)
clusters <- sample(c("A", "B", "C"), n_cells, replace = TRUE)
annotation <- data.frame(cell = colnames(expr), cluster = clusters)
result <- ExtractEdges(expr, annotation, species = "human")# Get edges for specific cluster pairs
edges_A_to_B <- GetEdges(result, sending = "A", target = "B")
edges_from_A <- GetEdges(result, sending = "A")
edges_to_C <- GetEdges(result, target = "C")
cat("A -> B:", nrow(edges_A_to_B), "edges\n")
cat("A -> any:", nrow(edges_from_A), "edges\n")
cat("any -> C:", nrow(edges_to_C), "edges\n")# Process multiple samples
samples <- c("sample1", "sample2", "sample3")
results <- list()
for (sample in samples) {
# Load data
expr <- readRDS(paste0(sample, "_expression.rds"))
ann <- read.csv(paste0(sample, "_annotation.csv"))
# Run analysis
results[[sample]] <- ExtractEdges(
expression = expr,
annotation = ann,
species = "human"
)
cat("Processed", sample, ":", nrow(results[[sample]]$edges), "edges\n")
}
# Combine results
all_edges <- rbindlist(lapply(names(results), function(s) {
edges <- results[[s]]$edges
edges$sample <- s
return(edges)
}))# Define analysis function
analyze_communication <- function(seurat_obj,
cluster_col = "cell_type",
species = "human",
...) {
# Convert Seurat object
nova_input <- SeuratToNOVA(seurat_obj, cluster_col = cluster_col)
# Run analysis
result <- ExtractEdges(
expression = nova_input$expression,
annotation = nova_input$annotation,
species = species,
...
)
# Store back in Seurat
seurat_obj <- AddNOVAResults(seurat_obj, result)
return(list(seurat = seurat_obj, nova = result))
}
# Use the pipeline
output <- analyze_communication(
seurat_obj,
cluster_col = "cell_type",
species = "mouse",
min_pct = 0.1
)# Export edges for Cytoscape visualization
edges <- result$edges[, .(
source = sending_cluster,
target = target_cluster,
interaction = paste(ligand, receptor, sep = "-"),
weight = edge_specificity_mean
)]
write.csv(edges, "cytoscape_edges.csv", row.names = FALSE)
# Export node attributes
nodes <- data.frame(
id = unique(c(edges$source, edges$target)),
type = "cluster"
)
write.csv(nodes, "cytoscape_nodes.csv", row.names = FALSE)# Convert NOVA results to CellChat format
nova_to_cellchat <- function(result) {
edges <- result$edges
# Create interaction data frame
df <- data.frame(
source = edges$sending_cluster,
target = edges$target_cluster,
ligand = edges$ligand,
receptor = edges$receptor,
prob = edges$edge_specificity_mean,
pval = NA # NOVA doesn't compute p-values
)
return(df)
}1. No edges detected
# Check gene overlap with database
lr_db <- GetLRDatabase("lrc2p")
genes_in_data <- rownames(expr)
ligand_overlap <- sum(lr_db$ligand %in% genes_in_data)
receptor_overlap <- sum(lr_db$receptor %in% genes_in_data)
cat("Ligands found:", ligand_overlap, "\n")
cat("Receptors found:", receptor_overlap, "\n")
# Lower thresholds if needed
result <- ExtractEdges(expr, annotation, species = "human", min_pct = 0)2. Memory issues
# Use sparse matrix
expr_sparse <- Matrix::Matrix(as.matrix(expr), sparse = TRUE)
# Process clusters in batches
unique_clusters <- unique(annotation$cluster)
for (i in seq(1, length(unique_clusters), by = 5)) {
subset_clusters <- unique_clusters[i:min(i+4, length(unique_clusters))]
# Process subset...
}3. Species mapping issues
sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] data.table_1.18.4 NOVA_1.0.0 rmarkdown_2.31
#>
#> loaded via a namespace (and not attached):
#> [1] Matrix_1.7-5 gtable_0.3.6 jsonlite_2.0.0 dplyr_1.2.1
#> [5] compiler_4.6.0 tidyselect_1.2.1 Rcpp_1.1.1-1.1 parallel_4.6.0
#> [9] jquerylib_0.1.4 scales_1.4.0 yaml_2.3.12 fastmap_1.2.0
#> [13] lattice_0.22-9 ggplot2_4.0.3 R6_2.6.1 generics_0.1.4
#> [17] knitr_1.51 tibble_3.3.1 maketools_1.3.2 bslib_0.11.0
#> [21] pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.2.0 cachem_1.1.0
#> [25] xfun_0.57 sass_0.4.10 sys_3.4.3 S7_0.2.2
#> [29] otel_0.2.0 cli_3.6.6 magrittr_2.0.5 digest_0.6.39
#> [33] grid_4.6.0 lifecycle_1.0.5 vctrs_0.7.3 evaluate_1.0.5
#> [37] glue_1.8.1 farver_2.1.2 buildtools_1.0.0 tools_4.6.0
#> [41] pkgconfig_2.0.3 htmltools_0.5.9