--- title: "Seurat Integration Guide" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 fig_caption: true vignette: > %\VignetteIndexEntry{Seurat Integration Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE, collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 6, out.width = "100%", eval = FALSE ) ``` ## Introduction SCEVAN seamlessly integrates with **Seurat**, the most widely used R toolkit for single-cell analysis. This guide demonstrates how to: 1. Extract count matrices from Seurat objects 2. Add SCEVAN results back to Seurat 3. Visualize CNA information in Seurat plots **SCEVAN supports both Seurat v4 and v5 data structures.** ## Setup ```{r load-packages} library(SCEVAN) library(Seurat) library(ggplot2) library(dplyr) ``` ## Extracting Data from Seurat ### Using getCountMtxFromSeurat() SCEVAN provides a helper function that works with both Seurat v4 and v5: ```{r extract-counts} # Load your Seurat object seurat_obj <- readRDS("path/to/seurat_object.rds") # Extract raw counts (v4/v5 compatible) count_mtx <- getCountMtxFromSeurat( seurat_obj, assay = "RNA", # Default assay layer = "counts" # For Seurat v5 ) # Check dimensions dim(count_mtx) ``` ### Manual Extraction For more control, you can extract manually: ```{r manual-extract} # Seurat v4 count_mtx <- as.matrix(GetAssayData(seurat_obj, slot = "counts")) # Seurat v5 count_mtx <- as.matrix(GetAssayData(seurat_obj, layer = "counts")) ``` ## Running SCEVAN ### Standard Analysis ```{r run-scevan} # Run SCEVAN pipeline results <- pipelineCNA( count_mtx, sample = "MySample", par_cores = 4, organism = "human", SUBCLONES = TRUE ) # View results head(results) ``` ### Using Seurat Clusters as Prior You can use Seurat's clustering to identify potential normal cells: ```{r use-clusters} # Get cluster assignments clusters <- Idents(seurat_obj) # Identify clusters with normal cell markers # (e.g., immune clusters, fibroblast clusters) normal_clusters <- c("T_cells", "Macrophages", "Fibroblasts") potential_normals <- names(clusters)[clusters %in% normal_clusters] # Run SCEVAN with known normals results <- pipelineCNA( count_mtx, sample = "MySample_guided", norm_cell = potential_normals, SUBCLONES = TRUE ) ``` ## Adding Results to Seurat ### Add Cell Classifications ```{r add-metadata} # Ensure cell names match results_matched <- results[colnames(seurat_obj), , drop = FALSE] # Add all SCEVAN columns seurat_obj <- AddMetaData(seurat_obj, metadata = results_matched) # Check added columns head(seurat_obj@meta.data) ``` ### Add CNA Scores ```{r add-cna-scores} # Load CNA matrix load("./output/MySample_CNAmtx.RData") load("./output/MySample_count_mtx_annot.RData") # Calculate global CNA score per cell global_cna <- colMeans(abs(CNA_mtx_relat)) seurat_obj$CNA_score <- global_cna[colnames(seurat_obj)] # Calculate chromosome-specific scores chr7_cna <- colMeans(CNA_mtx_relat[count_mtx_annot$seqnames == 7, ]) seurat_obj$chr7_cna <- chr7_cna[colnames(seurat_obj)] chr10_cna <- colMeans(CNA_mtx_relat[count_mtx_annot$seqnames == 10, ]) seurat_obj$chr10_cna <- chr10_cna[colnames(seurat_obj)] ``` ## Visualization in Seurat ### UMAP with Cell Classification ```{r umap-class} # Color by malignant/normal classification DimPlot( seurat_obj, group.by = "class", cols = c("tumor" = "#E74C3C", "normal" = "#3498DB", "filtered" = "gray") ) + ggtitle("SCEVAN Cell Classification") ``` ### UMAP with Subclones ```{r umap-subclones} # Color by subclone (tumor cells only) DimPlot( seurat_obj, group.by = "subclone", na.value = "lightgray" ) + ggtitle("Tumor Subclone Assignment") ``` ### Feature Plot with CNA Scores ```{r feature-cna} # Global CNA burden FeaturePlot( seurat_obj, features = "CNA_score", cols = c("lightgray", "red") ) + ggtitle("Global CNA Score") # Chromosome-specific FeaturePlot( seurat_obj, features = c("chr7_cna", "chr10_cna"), cols = c("blue", "white", "red"), min.cutoff = -0.3, max.cutoff = 0.3 ) ``` ### Violin Plots ```{r violin-plots} # CNA score by cell type VlnPlot( seurat_obj, features = "CNA_score", group.by = "class", pt.size = 0 ) + NoLegend() # CNA by Seurat cluster VlnPlot( seurat_obj, features = "CNA_score", group.by = "seurat_clusters", pt.size = 0 ) ``` ## Advanced Integration ### Region-Specific CNA Visualization ```{r region-specific} # Define genomic regions of interest regions <- list( EGFR = c(chr = 7, start = 55019017, end = 55211628), CDKN2A = c(chr = 9, start = 21967751, end = 21995301), PTEN = c(chr = 10, start = 87863113, end = 87971930) ) # Calculate CNA for each region for(gene in names(regions)) { r <- regions[[gene]] region_cna <- colMeans(CNA_mtx_relat[ count_mtx_annot$seqnames == r["chr"] & count_mtx_annot$start >= r["start"] & count_mtx_annot$end <= r["end"], , drop = FALSE ]) seurat_obj[[paste0(gene, "_cna")]] <- region_cna[colnames(seurat_obj)] } # Visualize FeaturePlot(seurat_obj, features = c("EGFR_cna", "PTEN_cna")) ``` ### Combined Visualization ```{r combined-viz} # Create combined plot with classification and clustering p1 <- DimPlot(seurat_obj, group.by = "class") + ggtitle("SCEVAN Classification") + NoLegend() p2 <- DimPlot(seurat_obj, group.by = "seurat_clusters") + ggtitle("Seurat Clusters") + NoLegend() p3 <- FeaturePlot(seurat_obj, features = "CNA_score") + ggtitle("CNA Score") p4 <- DimPlot(seurat_obj, group.by = "subclone", na.value = "lightgray") + ggtitle("Subclones") # Combine (p1 | p2) / (p3 | p4) ``` ## Downstream Analysis ### Differential Expression by Subclone ```{r de-subclone} # Set identity to subclone Idents(seurat_obj) <- "subclone" # Find markers for each subclone subclone_markers <- FindAllMarkers( seurat_obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25 ) # Top markers per subclone top_markers <- subclone_markers %>% group_by(cluster) %>% top_n(10, avg_log2FC) print(top_markers) ``` ### Pathway Analysis per Subclone ```{r pathway-subclone} # Use marker genes for pathway analysis # Example with enrichR or clusterProfiler # Get genes from subclone 1 s1_genes <- subclone_markers %>% filter(cluster == 1, p_val_adj < 0.05) %>% pull(gene) # Run enrichment (requires clusterProfiler) # enrichGO(s1_genes, OrgDb = org.Hs.eg.db, keyType = "SYMBOL") ``` ## Creating New Seurat Object ### From Scratch with SCEVAN Results ```{r new-seurat} # Create new Seurat object with SCEVAN metadata seurat_new <- CreateSeuratObject( counts = count_mtx, meta.data = results, project = "SCEVAN_analysis" ) # Standard Seurat workflow seurat_new <- NormalizeData(seurat_new) seurat_new <- FindVariableFeatures(seurat_new) seurat_new <- ScaleData(seurat_new) seurat_new <- RunPCA(seurat_new) seurat_new <- RunUMAP(seurat_new, dims = 1:30) # Visualize with SCEVAN results DimPlot(seurat_new, group.by = "class") ``` ## Best Practices ### Memory Management ```{r memory-tips} # For large datasets, process in chunks # or use sparse matrices # Check memory usage format(object.size(seurat_obj), units = "GB") # Remove intermediate objects rm(CNA_mtx_relat) gc() ``` ### Saving Results ```{r save-results} # Save enhanced Seurat object saveRDS(seurat_obj, "seurat_with_SCEVAN.rds") # Save just the metadata write.csv( seurat_obj@meta.data, "SCEVAN_metadata.csv", row.names = TRUE ) ``` ## Troubleshooting ### Common Issues | Issue | Solution | |-------|----------| | Cell name mismatch | Check for `-` vs `.` in barcodes | | Missing cells | Verify cells passed SCEVAN filtering | | Empty CNA values | Check if cell is classified as "filtered" | ### Cell Name Formatting ```{r fix-names} # SCEVAN may convert "-" to "." # Fix cell names if needed colnames(count_mtx) <- gsub("-", ".", colnames(count_mtx)) # Or in Seurat object seurat_obj <- RenameCells( seurat_obj, new.names = gsub("-", ".", colnames(seurat_obj)) ) ``` ## Session Info ```{r session-info, eval=TRUE} sessionInfo() ```