---
title: "Advanced Usage"
author: "Zaoqu Liu"
date: "`r Sys.Date()`"
output: 
  rmarkdown::html_vignette:
    toc: true
    toc_depth: 3
    fig_caption: true
vignette: >
  %\VignetteIndexEntry{Advanced Usage}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6,
  fig.align = "center",
  message = FALSE,
  warning = FALSE
)
```

## Introduction

This vignette covers advanced NOVA usage including:

- Performance optimization
- Custom LR databases
- Parallel processing
- Integration with other tools
- Programmatic workflows

## Setup

```{r load}
library(NOVA)
library(data.table)
```

## Performance Optimization

### Parallel Processing

NOVA supports parallel computation via the `future` package:

```{r parallel, eval=FALSE}
# Enable parallel processing
library(future)

# Use all cores minus 1
plan(multisession, workers = parallel::detectCores() - 1)

# Run analysis (automatically parallelized)
result <- ExtractEdges(
  expression = expr,
  annotation = annotation,
  species = "human"
)

# Reset to sequential
plan(sequential)
```

### NOVA Options

Configure global behavior:

```{r options}
# View current options
cat("Verbose:", getOption("nova.verbose", TRUE), "\n")
cat("Parallel:", getOption("nova.parallel", TRUE), "\n")
cat("Workers:", getOption("nova.workers", parallel::detectCores() - 1), "\n")

# Customize options
options(
  nova.verbose = TRUE,      # Print progress messages
  nova.parallel = TRUE,     # Enable parallelization
  nova.workers = 4          # Number of parallel workers
)
```

### Memory Efficiency

For large datasets, use sparse matrices:

```{r memory}
# Check if expression is sparse
expr_example <- Matrix::Matrix(matrix(0, 1000, 1000), sparse = TRUE)
cat("Dense size:", object.size(as.matrix(expr_example)), "bytes\n")
cat("Sparse size:", object.size(expr_example), "bytes\n")

# NOVA automatically handles sparse matrices efficiently
```

## Custom Ligand-Receptor Database

### Creating Custom Database

```{r custom_db}
# Create custom LR database
custom_lr <- data.table::data.table(
  ligand = c("CXCL12", "CCL2", "IL6", "TGFB1", "VEGFA"),
  receptor = c("CXCR4", "CCR2", "IL6R", "TGFBR1", "KDR"),
  category = c("chemokine", "chemokine", "cytokine", "growth_factor", "growth_factor"),
  source = rep("custom", 5)
)

print(custom_lr)
```

### Using Custom Database

```{r use_custom, eval=FALSE}
# Use custom database in analysis
result <- ExtractEdges(
  expression = expr,
  annotation = annotation,
  species = "human",
  lr_database = custom_lr  # Custom database
)
```

### Extending Built-in Database

```{r extend_db}
# Get built-in database
builtin_lr <- GetLRDatabase("lrc2p")

# Add custom pairs
custom_pairs <- data.table::data.table(
  ligand = c("CUSTOM_LIG1", "CUSTOM_LIG2"),
  receptor = c("CUSTOM_REC1", "CUSTOM_REC2")
)

# Combine (ensure matching columns)
extended_lr <- rbind(builtin_lr[, .(ligand, receptor)], 
                     custom_pairs, 
                     fill = TRUE)
cat("Extended database size:", nrow(extended_lr), "pairs\n")
```

## Filtering and Subsetting

### Advanced Filtering

```{r filter_setup, eval=FALSE}
# Create example result
set.seed(42)
n_genes <- 100
n_cells <- 200

expr <- matrix(abs(rnorm(n_genes * n_cells)), n_genes, n_cells)
lr_db <- GetLRDatabase("lrc2p")
rownames(expr) <- c(unique(lr_db$ligand)[1:50], unique(lr_db$receptor)[1:50])
colnames(expr) <- paste0("Cell", 1:n_cells)
expr <- Matrix::Matrix(expr, sparse = TRUE)

clusters <- sample(c("A", "B", "C"), n_cells, replace = TRUE)
annotation <- data.frame(cell = colnames(expr), cluster = clusters)

result <- ExtractEdges(expr, annotation, species = "human")
```

```{r filtering, eval=FALSE}
# Filter by multiple criteria
filtered <- FilterEdges(
  result,
  min_pct = 0.1,
  min_mean = 0.5,
  min_specificity = 0.2
)

cat("Original edges:", nrow(result$edges), "\n")
cat("Filtered edges:", nrow(filtered$edges), "\n")
```

### Subsetting by Cluster

```{r subset, eval=FALSE}
# Get edges for specific cluster pairs
edges_A_to_B <- GetEdges(result, sending = "A", target = "B")
edges_from_A <- GetEdges(result, sending = "A")
edges_to_C <- GetEdges(result, target = "C")

cat("A -> B:", nrow(edges_A_to_B), "edges\n")
cat("A -> any:", nrow(edges_from_A), "edges\n")
cat("any -> C:", nrow(edges_to_C), "edges\n")
```

## Programmatic Workflows

### Batch Processing

```{r batch, eval=FALSE}
# Process multiple samples
samples <- c("sample1", "sample2", "sample3")
results <- list()

for (sample in samples) {
  # Load data
  expr <- readRDS(paste0(sample, "_expression.rds"))
  ann <- read.csv(paste0(sample, "_annotation.csv"))
  
  # Run analysis
  results[[sample]] <- ExtractEdges(
    expression = expr,
    annotation = ann,
    species = "human"
  )
  
  cat("Processed", sample, ":", nrow(results[[sample]]$edges), "edges\n")
}

# Combine results
all_edges <- rbindlist(lapply(names(results), function(s) {
  edges <- results[[s]]$edges
  edges$sample <- s
  return(edges)
}))
```

### Custom Analysis Pipeline

```{r pipeline, eval=FALSE}
# Define analysis function
analyze_communication <- function(seurat_obj, 
                                   cluster_col = "cell_type",
                                   species = "human",
                                   ...) {
  # Convert Seurat object
  nova_input <- SeuratToNOVA(seurat_obj, cluster_col = cluster_col)
  
  # Run analysis
  result <- ExtractEdges(
    expression = nova_input$expression,
    annotation = nova_input$annotation,
    species = species,
    ...
  )
  
  # Store back in Seurat
  seurat_obj <- AddNOVAResults(seurat_obj, result)
  
  return(list(seurat = seurat_obj, nova = result))
}

# Use the pipeline
output <- analyze_communication(
  seurat_obj,
  cluster_col = "cell_type",
  species = "mouse",
  min_pct = 0.1
)
```

## Integration with Other Tools

### Export for Cytoscape

```{r cytoscape, eval=FALSE}
# Export edges for Cytoscape visualization
edges <- result$edges[, .(
  source = sending_cluster,
  target = target_cluster,
  interaction = paste(ligand, receptor, sep = "-"),
  weight = edge_specificity_mean
)]

write.csv(edges, "cytoscape_edges.csv", row.names = FALSE)

# Export node attributes
nodes <- data.frame(
  id = unique(c(edges$source, edges$target)),
  type = "cluster"
)
write.csv(nodes, "cytoscape_nodes.csv", row.names = FALSE)
```

### Integration with CellChat/LIANA

```{r integration, eval=FALSE}
# Convert NOVA results to CellChat format
nova_to_cellchat <- function(result) {
  edges <- result$edges
  
  # Create interaction data frame
  df <- data.frame(
    source = edges$sending_cluster,
    target = edges$target_cluster,
    ligand = edges$ligand,
    receptor = edges$receptor,
    prob = edges$edge_specificity_mean,
    pval = NA  # NOVA doesn't compute p-values
  )
  
  return(df)
}
```

## Troubleshooting

### Common Issues

**1. No edges detected**
```{r troubleshoot1, eval=FALSE}
# Check gene overlap with database
lr_db <- GetLRDatabase("lrc2p")
genes_in_data <- rownames(expr)
ligand_overlap <- sum(lr_db$ligand %in% genes_in_data)
receptor_overlap <- sum(lr_db$receptor %in% genes_in_data)

cat("Ligands found:", ligand_overlap, "\n")
cat("Receptors found:", receptor_overlap, "\n")

# Lower thresholds if needed
result <- ExtractEdges(expr, annotation, species = "human", min_pct = 0)
```

**2. Memory issues**
```{r troubleshoot2, eval=FALSE}
# Use sparse matrix
expr_sparse <- Matrix::Matrix(as.matrix(expr), sparse = TRUE)

# Process clusters in batches
unique_clusters <- unique(annotation$cluster)
for (i in seq(1, length(unique_clusters), by = 5)) {
  subset_clusters <- unique_clusters[i:min(i+4, length(unique_clusters))]
  # Process subset...
}
```

**3. Species mapping issues**
```{r troubleshoot3, eval=FALSE}
# Check gene name format
head(rownames(expr))  # Should match species conventions

# Verify species parameter
result <- ExtractEdges(expr, annotation, species = "mouse")  # Not "Mouse" or "MOUSE"
```

## Session Info

```{r session}
sessionInfo()
```

## Author

**Zaoqu Liu**

- Email: liuzaoqu@163.com
- GitHub: [Zaoqu-Liu](https://github.com/Zaoqu-Liu)