Advanced Usage

Introduction

This vignette covers advanced NOVA usage including:

Performance optimization
Custom LR databases
Parallel processing
Integration with other tools
Programmatic workflows

Setup

library(NOVA)
library(data.table)

Performance Optimization

Parallel Processing

NOVA supports parallel computation via the future package:

# Enable parallel processing
library(future)

# Use all cores minus 1
plan(multisession, workers = parallel::detectCores() - 1)

# Run analysis (automatically parallelized)
result <- ExtractEdges(
  expression = expr,
  annotation = annotation,
  species = "human"
)

# Reset to sequential
plan(sequential)

NOVA Options

Configure global behavior:

# View current options
cat("Verbose:", getOption("nova.verbose", TRUE), "\n")
#> Verbose: TRUE
cat("Parallel:", getOption("nova.parallel", TRUE), "\n")
#> Parallel: TRUE
cat("Workers:", getOption("nova.workers", parallel::detectCores() - 1), "\n")
#> Workers: 3

# Customize options
options(
  nova.verbose = TRUE,      # Print progress messages
  nova.parallel = TRUE,     # Enable parallelization
  nova.workers = 4          # Number of parallel workers
)

Memory Efficiency

For large datasets, use sparse matrices:

# Check if expression is sparse
expr_example <- Matrix::Matrix(matrix(0, 1000, 1000), sparse = TRUE)
cat("Dense size:", object.size(as.matrix(expr_example)), "bytes\n")
#> Dense size: 8000216 bytes
cat("Sparse size:", object.size(expr_example), "bytes\n")
#> Sparse size: 9240 bytes

# NOVA automatically handles sparse matrices efficiently

Custom Ligand-Receptor Database

Creating Custom Database

# Create custom LR database
custom_lr <- data.table::data.table(
  ligand = c("CXCL12", "CCL2", "IL6", "TGFB1", "VEGFA"),
  receptor = c("CXCR4", "CCR2", "IL6R", "TGFBR1", "KDR"),
  category = c("chemokine", "chemokine", "cytokine", "growth_factor", "growth_factor"),
  source = rep("custom", 5)
)

print(custom_lr)
#>    ligand receptor      category source
#>    <char>   <char>        <char> <char>
#> 1: CXCL12    CXCR4     chemokine custom
#> 2:   CCL2     CCR2     chemokine custom
#> 3:    IL6     IL6R      cytokine custom
#> 4:  TGFB1   TGFBR1 growth_factor custom
#> 5:  VEGFA      KDR growth_factor custom

Using Custom Database

# Use custom database in analysis
result <- ExtractEdges(
  expression = expr,
  annotation = annotation,
  species = "human",
  lr_database = custom_lr  # Custom database
)

Extending Built-in Database

# Get built-in database
builtin_lr <- GetLRDatabase("lrc2p")

# Add custom pairs
custom_pairs <- data.table::data.table(
  ligand = c("CUSTOM_LIG1", "CUSTOM_LIG2"),
  receptor = c("CUSTOM_REC1", "CUSTOM_REC2")
)

# Combine (ensure matching columns)
extended_lr <- rbind(builtin_lr[, .(ligand, receptor)], 
                     custom_pairs, 
                     fill = TRUE)
cat("Extended database size:", nrow(extended_lr), "pairs\n")
#> Extended database size: 2295 pairs

Filtering and Subsetting

Advanced Filtering

# Create example result
set.seed(42)
n_genes <- 100
n_cells <- 200

expr <- matrix(abs(rnorm(n_genes * n_cells)), n_genes, n_cells)
lr_db <- GetLRDatabase("lrc2p")
rownames(expr) <- c(unique(lr_db$ligand)[1:50], unique(lr_db$receptor)[1:50])
colnames(expr) <- paste0("Cell", 1:n_cells)
expr <- Matrix::Matrix(expr, sparse = TRUE)

clusters <- sample(c("A", "B", "C"), n_cells, replace = TRUE)
annotation <- data.frame(cell = colnames(expr), cluster = clusters)

result <- ExtractEdges(expr, annotation, species = "human")

# Filter by multiple criteria
filtered <- FilterEdges(
  result,
  min_pct = 0.1,
  min_mean = 0.5,
  min_specificity = 0.2
)

cat("Original edges:", nrow(result$edges), "\n")
cat("Filtered edges:", nrow(filtered$edges), "\n")

Subsetting by Cluster

# Get edges for specific cluster pairs
edges_A_to_B <- GetEdges(result, sending = "A", target = "B")
edges_from_A <- GetEdges(result, sending = "A")
edges_to_C <- GetEdges(result, target = "C")

cat("A -> B:", nrow(edges_A_to_B), "edges\n")
cat("A -> any:", nrow(edges_from_A), "edges\n")
cat("any -> C:", nrow(edges_to_C), "edges\n")

Programmatic Workflows

Batch Processing

# Process multiple samples
samples <- c("sample1", "sample2", "sample3")
results <- list()

for (sample in samples) {
  # Load data
  expr <- readRDS(paste0(sample, "_expression.rds"))
  ann <- read.csv(paste0(sample, "_annotation.csv"))
  
  # Run analysis
  results[[sample]] <- ExtractEdges(
    expression = expr,
    annotation = ann,
    species = "human"
  )
  
  cat("Processed", sample, ":", nrow(results[[sample]]$edges), "edges\n")
}

# Combine results
all_edges <- rbindlist(lapply(names(results), function(s) {
  edges <- results[[s]]$edges
  edges$sample <- s
  return(edges)
}))

Custom Analysis Pipeline

# Define analysis function
analyze_communication <- function(seurat_obj, 
                                   cluster_col = "cell_type",
                                   species = "human",
                                   ...) {
  # Convert Seurat object
  nova_input <- SeuratToNOVA(seurat_obj, cluster_col = cluster_col)
  
  # Run analysis
  result <- ExtractEdges(
    expression = nova_input$expression,
    annotation = nova_input$annotation,
    species = species,
    ...
  )
  
  # Store back in Seurat
  seurat_obj <- AddNOVAResults(seurat_obj, result)
  
  return(list(seurat = seurat_obj, nova = result))
}

# Use the pipeline
output <- analyze_communication(
  seurat_obj,
  cluster_col = "cell_type",
  species = "mouse",
  min_pct = 0.1
)

Integration with Other Tools

Export for Cytoscape

# Export edges for Cytoscape visualization
edges <- result$edges[, .(
  source = sending_cluster,
  target = target_cluster,
  interaction = paste(ligand, receptor, sep = "-"),
  weight = edge_specificity_mean
)]

write.csv(edges, "cytoscape_edges.csv", row.names = FALSE)

# Export node attributes
nodes <- data.frame(
  id = unique(c(edges$source, edges$target)),
  type = "cluster"
)
write.csv(nodes, "cytoscape_nodes.csv", row.names = FALSE)

Integration with CellChat/LIANA

# Convert NOVA results to CellChat format
nova_to_cellchat <- function(result) {
  edges <- result$edges
  
  # Create interaction data frame
  df <- data.frame(
    source = edges$sending_cluster,
    target = edges$target_cluster,
    ligand = edges$ligand,
    receptor = edges$receptor,
    prob = edges$edge_specificity_mean,
    pval = NA  # NOVA doesn't compute p-values
  )
  
  return(df)
}

Troubleshooting

Common Issues

1. No edges detected

# Check gene overlap with database
lr_db <- GetLRDatabase("lrc2p")
genes_in_data <- rownames(expr)
ligand_overlap <- sum(lr_db$ligand %in% genes_in_data)
receptor_overlap <- sum(lr_db$receptor %in% genes_in_data)

cat("Ligands found:", ligand_overlap, "\n")
cat("Receptors found:", receptor_overlap, "\n")

# Lower thresholds if needed
result <- ExtractEdges(expr, annotation, species = "human", min_pct = 0)

2. Memory issues

# Use sparse matrix
expr_sparse <- Matrix::Matrix(as.matrix(expr), sparse = TRUE)

# Process clusters in batches
unique_clusters <- unique(annotation$cluster)
for (i in seq(1, length(unique_clusters), by = 5)) {
  subset_clusters <- unique_clusters[i:min(i+4, length(unique_clusters))]
  # Process subset...
}

3. Species mapping issues

# Check gene name format
head(rownames(expr))  # Should match species conventions

# Verify species parameter
result <- ExtractEdges(expr, annotation, species = "mouse")  # Not "Mouse" or "MOUSE"

Session Info

sessionInfo()
#> R version 4.6.1 (2026-06-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 26.04 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.32.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] data.table_1.18.4 NOVA_1.0.0        rmarkdown_2.31   
#> 
#> loaded via a namespace (and not attached):
#>  [1] Matrix_1.7-5       gtable_0.3.6       jsonlite_2.0.0     dplyr_1.2.1       
#>  [5] compiler_4.6.1     tidyselect_1.2.1   Rcpp_1.1.2         parallel_4.6.1    
#>  [9] jquerylib_0.1.4    scales_1.4.0       yaml_2.3.12        fastmap_1.2.0     
#> [13] lattice_0.22-9     ggplot2_4.0.3      R6_2.6.1           generics_0.1.4    
#> [17] knitr_1.51         tibble_3.3.1       maketools_1.3.2    bslib_0.11.0      
#> [21] pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.3.0        cachem_1.1.0      
#> [25] xfun_0.60          sass_0.4.10        sys_3.4.3          S7_0.2.2          
#> [29] otel_0.2.0         cli_3.6.6          magrittr_2.0.5     digest_0.6.39     
#> [33] grid_4.6.1         lifecycle_1.0.5    vctrs_0.7.3        evaluate_1.0.5    
#> [37] glue_1.8.1         farver_2.1.2       buildtools_1.0.0   tools_4.6.1       
#> [41] pkgconfig_2.0.3    htmltools_0.5.9

Author

Zaoqu Liu

Email: [email protected]
GitHub: Zaoqu-Liu