---
title: "Seurat Integration Guide"
author: "Zaoqu Liu"
date: "`r Sys.Date()`"
output: 
  rmarkdown::html_vignette:
    toc: true
    toc_depth: 3
    fig_caption: true
vignette: >
  %\VignetteIndexEntry{Seurat Integration Guide}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo = TRUE,
  message = FALSE,
  warning = FALSE,
  collapse = TRUE,
  comment = "#>",
  fig.width = 10,
  fig.height = 6,
  out.width = "100%",
  eval = FALSE
)
```

## Introduction

SCEVAN seamlessly integrates with **Seurat**, the most widely used R toolkit for single-cell analysis. This guide demonstrates how to:

1. Extract count matrices from Seurat objects
2. Add SCEVAN results back to Seurat
3. Visualize CNA information in Seurat plots

**SCEVAN supports both Seurat v4 and v5 data structures.**

## Setup

```{r load-packages}
library(SCEVAN)
library(Seurat)
library(ggplot2)
library(dplyr)
```

## Extracting Data from Seurat

### Using getCountMtxFromSeurat()

SCEVAN provides a helper function that works with both Seurat v4 and v5:

```{r extract-counts}
# Load your Seurat object
seurat_obj <- readRDS("path/to/seurat_object.rds")

# Extract raw counts (v4/v5 compatible)
count_mtx <- getCountMtxFromSeurat(
  seurat_obj,
  assay = "RNA",      # Default assay
  layer = "counts"    # For Seurat v5
)

# Check dimensions
dim(count_mtx)
```

### Manual Extraction

For more control, you can extract manually:

```{r manual-extract}
# Seurat v4
count_mtx <- as.matrix(GetAssayData(seurat_obj, slot = "counts"))

# Seurat v5
count_mtx <- as.matrix(GetAssayData(seurat_obj, layer = "counts"))
```

## Running SCEVAN

### Standard Analysis

```{r run-scevan}
# Run SCEVAN pipeline
results <- pipelineCNA(
  count_mtx,
  sample = "MySample",
  par_cores = 4,
  organism = "human",
  SUBCLONES = TRUE
)

# View results
head(results)
```

### Using Seurat Clusters as Prior

You can use Seurat's clustering to identify potential normal cells:

```{r use-clusters}
# Get cluster assignments
clusters <- Idents(seurat_obj)

# Identify clusters with normal cell markers
# (e.g., immune clusters, fibroblast clusters)
normal_clusters <- c("T_cells", "Macrophages", "Fibroblasts")
potential_normals <- names(clusters)[clusters %in% normal_clusters]

# Run SCEVAN with known normals
results <- pipelineCNA(
  count_mtx,
  sample = "MySample_guided",
  norm_cell = potential_normals,
  SUBCLONES = TRUE
)
```

## Adding Results to Seurat

### Add Cell Classifications

```{r add-metadata}
# Ensure cell names match
results_matched <- results[colnames(seurat_obj), , drop = FALSE]

# Add all SCEVAN columns
seurat_obj <- AddMetaData(seurat_obj, metadata = results_matched)

# Check added columns
head(seurat_obj@meta.data)
```
### Add CNA Scores

```{r add-cna-scores}
# Load CNA matrix
load("./output/MySample_CNAmtx.RData")
load("./output/MySample_count_mtx_annot.RData")

# Calculate global CNA score per cell
global_cna <- colMeans(abs(CNA_mtx_relat))
seurat_obj$CNA_score <- global_cna[colnames(seurat_obj)]

# Calculate chromosome-specific scores
chr7_cna <- colMeans(CNA_mtx_relat[count_mtx_annot$seqnames == 7, ])
seurat_obj$chr7_cna <- chr7_cna[colnames(seurat_obj)]

chr10_cna <- colMeans(CNA_mtx_relat[count_mtx_annot$seqnames == 10, ])
seurat_obj$chr10_cna <- chr10_cna[colnames(seurat_obj)]
```

## Visualization in Seurat

### UMAP with Cell Classification

```{r umap-class}
# Color by malignant/normal classification
DimPlot(
  seurat_obj, 
  group.by = "class",
  cols = c("tumor" = "#E74C3C", "normal" = "#3498DB", "filtered" = "gray")
) +
  ggtitle("SCEVAN Cell Classification")
```

### UMAP with Subclones

```{r umap-subclones}
# Color by subclone (tumor cells only)
DimPlot(
  seurat_obj,
  group.by = "subclone",
  na.value = "lightgray"
) +
  ggtitle("Tumor Subclone Assignment")
```

### Feature Plot with CNA Scores

```{r feature-cna}
# Global CNA burden
FeaturePlot(
  seurat_obj,
  features = "CNA_score",
  cols = c("lightgray", "red")
) +
  ggtitle("Global CNA Score")

# Chromosome-specific
FeaturePlot(
  seurat_obj,
  features = c("chr7_cna", "chr10_cna"),
  cols = c("blue", "white", "red"),
  min.cutoff = -0.3,
  max.cutoff = 0.3
)
```

### Violin Plots

```{r violin-plots}
# CNA score by cell type
VlnPlot(
  seurat_obj,
  features = "CNA_score",
  group.by = "class",
  pt.size = 0
) +
  NoLegend()

# CNA by Seurat cluster
VlnPlot(
  seurat_obj,
  features = "CNA_score",
  group.by = "seurat_clusters",
  pt.size = 0
)
```

## Advanced Integration

### Region-Specific CNA Visualization

```{r region-specific}
# Define genomic regions of interest
regions <- list(
  EGFR = c(chr = 7, start = 55019017, end = 55211628),
  CDKN2A = c(chr = 9, start = 21967751, end = 21995301),
  PTEN = c(chr = 10, start = 87863113, end = 87971930)
)

# Calculate CNA for each region
for(gene in names(regions)) {
  r <- regions[[gene]]
  region_cna <- colMeans(CNA_mtx_relat[
    count_mtx_annot$seqnames == r["chr"] &
    count_mtx_annot$start >= r["start"] &
    count_mtx_annot$end <= r["end"], , drop = FALSE
  ])
  seurat_obj[[paste0(gene, "_cna")]] <- region_cna[colnames(seurat_obj)]
}

# Visualize
FeaturePlot(seurat_obj, features = c("EGFR_cna", "PTEN_cna"))
```

### Combined Visualization

```{r combined-viz}
# Create combined plot with classification and clustering
p1 <- DimPlot(seurat_obj, group.by = "class") + 
  ggtitle("SCEVAN Classification") + NoLegend()

p2 <- DimPlot(seurat_obj, group.by = "seurat_clusters") + 
  ggtitle("Seurat Clusters") + NoLegend()

p3 <- FeaturePlot(seurat_obj, features = "CNA_score") + 
  ggtitle("CNA Score")

p4 <- DimPlot(seurat_obj, group.by = "subclone", na.value = "lightgray") + 
  ggtitle("Subclones")

# Combine
(p1 | p2) / (p3 | p4)
```

## Downstream Analysis

### Differential Expression by Subclone

```{r de-subclone}
# Set identity to subclone
Idents(seurat_obj) <- "subclone"

# Find markers for each subclone
subclone_markers <- FindAllMarkers(
  seurat_obj,
  only.pos = TRUE,
  min.pct = 0.25,
  logfc.threshold = 0.25
)

# Top markers per subclone
top_markers <- subclone_markers %>%
  group_by(cluster) %>%
  top_n(10, avg_log2FC)

print(top_markers)
```

### Pathway Analysis per Subclone

```{r pathway-subclone}
# Use marker genes for pathway analysis
# Example with enrichR or clusterProfiler

# Get genes from subclone 1
s1_genes <- subclone_markers %>%
  filter(cluster == 1, p_val_adj < 0.05) %>%
  pull(gene)

# Run enrichment (requires clusterProfiler)
# enrichGO(s1_genes, OrgDb = org.Hs.eg.db, keyType = "SYMBOL")
```

## Creating New Seurat Object

### From Scratch with SCEVAN Results

```{r new-seurat}
# Create new Seurat object with SCEVAN metadata
seurat_new <- CreateSeuratObject(
  counts = count_mtx,
  meta.data = results,
  project = "SCEVAN_analysis"
)

# Standard Seurat workflow
seurat_new <- NormalizeData(seurat_new)
seurat_new <- FindVariableFeatures(seurat_new)
seurat_new <- ScaleData(seurat_new)
seurat_new <- RunPCA(seurat_new)
seurat_new <- RunUMAP(seurat_new, dims = 1:30)

# Visualize with SCEVAN results
DimPlot(seurat_new, group.by = "class")
```

## Best Practices

### Memory Management

```{r memory-tips}
# For large datasets, process in chunks
# or use sparse matrices

# Check memory usage
format(object.size(seurat_obj), units = "GB")

# Remove intermediate objects
rm(CNA_mtx_relat)
gc()
```

### Saving Results

```{r save-results}
# Save enhanced Seurat object
saveRDS(seurat_obj, "seurat_with_SCEVAN.rds")

# Save just the metadata
write.csv(
  seurat_obj@meta.data,
  "SCEVAN_metadata.csv",
  row.names = TRUE
)
```

## Troubleshooting

### Common Issues

| Issue | Solution |
|-------|----------|
| Cell name mismatch | Check for `-` vs `.` in barcodes |
| Missing cells | Verify cells passed SCEVAN filtering |
| Empty CNA values | Check if cell is classified as "filtered" |

### Cell Name Formatting

```{r fix-names}
# SCEVAN may convert "-" to "."
# Fix cell names if needed
colnames(count_mtx) <- gsub("-", ".", colnames(count_mtx))

# Or in Seurat object
seurat_obj <- RenameCells(
  seurat_obj,
  new.names = gsub("-", ".", colnames(seurat_obj))
)
```

## Session Info

```{r session-info, eval=TRUE}
sessionInfo()
```