--- title: "SpaGER: Seurat Integration Guide" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{SpaGER: Seurat Integration Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE, eval = FALSE ) ``` ## Overview SpaGER provides seamless integration with **Seurat** (v4 and v5), the most widely used R package for single-cell analysis. This vignette demonstrates how to use SpaGER with Seurat objects. ## Prerequisites ```{r prereq, eval=TRUE} library(SpaGER) # Check Seurat availability if (requireNamespace("Seurat", quietly = TRUE)) { check_seurat_version() } else { message("Seurat not installed. Code examples shown but not executed.") } ``` ## Basic Workflow with Seurat ### Load Your Data ```{r load_data} library(Seurat) # Load your spatial and scRNA-seq Seurat objects # Example: Visium + scRNA-seq reference spatial_obj <- readRDS("path/to/spatial_seurat.rds") scrna_obj <- readRDS("path/to/scrna_seurat.rds") # Check objects spatial_obj scrna_obj ``` ### Prepare Data (Optional) ```{r prepare} # Ensure data is normalized spatial_obj <- prepare_seurat(spatial_obj, normalize = TRUE) scrna_obj <- prepare_seurat(scrna_obj, normalize = TRUE) ``` ### Run SpaGE ```{r run_spage} # Predict unmeasured genes spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, n_pv = 30, n_neighbors = 50, assay_name = "SpaGE", # Name for new assay verbose = TRUE ) # Check the new assay was added Assays(spatial_obj) ``` ### Access Predictions ```{r access} # Switch to SpaGE assay DefaultAssay(spatial_obj) <- "SpaGE" # Get predicted expression matrix predicted_expr <- GetAssayData(spatial_obj, slot = "data") dim(predicted_expr) # View specific genes head(predicted_expr[c("Gad1", "Slc17a7", "Mbp"), ]) ``` ## Seurat v4 vs v5 SpaGER automatically detects your Seurat version and uses the appropriate interface: | Feature | Seurat v4 | Seurat v5 | |---------|-----------|-----------| | Data access | `slot` parameter | `layer` parameter | | Default slot/layer | "data" | "data" | | Assay creation | `CreateAssayObject()` | `CreateAssay5Object()` | ### Explicit Version Control ```{r version_control} # For Seurat v4 (explicit) spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, slot = "data", # v4 style n_pv = 30 ) # For Seurat v5 (explicit) spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, layer = "data", # v5 style n_pv = 30 ) ``` ## Predict Specific Genes ```{r specific_genes} # Define genes of interest marker_genes <- c( "Gad1", "Gad2", # GABAergic markers "Slc17a7", "Slc17a6", # Glutamatergic markers "Mbp", "Plp1", # Oligodendrocyte markers "Aqp4", "Gfap" # Astrocyte markers ) # Predict only these genes spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, genes_to_predict = marker_genes, n_pv = 30, assay_name = "SpaGE_markers" ) ``` ## Return Data Frame Instead ```{r return_df} # Don't add to Seurat object, return data frame predicted_df <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, n_pv = 30, add_to_object = FALSE # Return data frame ) class(predicted_df) dim(predicted_df) ``` ## Working with Different Assays ```{r assays} # Specify which assay to use from each object spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, assay_spatial = "Spatial", # Use Spatial assay assay_rna = "RNA", # Use RNA assay n_pv = 30 ) ``` ## Visualization After Prediction ```{r visualize} # Set to predicted assay DefaultAssay(spatial_obj) <- "SpaGE" # Spatial feature plot SpatialFeaturePlot(spatial_obj, features = "Gad1") # Expression heatmap DoHeatmap(spatial_obj, features = marker_genes) # Violin plot VlnPlot(spatial_obj, features = "Gad1", group.by = "seurat_clusters") ``` ## Batch Processing Multiple Gene Sets ```{r batch} # Define gene sets gene_sets <- list( excitatory = c("Slc17a7", "Slc17a6", "Camk2a"), inhibitory = c("Gad1", "Gad2", "Slc32a1"), glial = c("Mbp", "Gfap", "Aqp4", "Cx3cr1") ) # Predict all gene sets # Note: For Seurat objects, process one set at a time for (set_name in names(gene_sets)) { assay_name <- paste0("SpaGE_", set_name) spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, genes_to_predict = gene_sets[[set_name]], n_pv = 30, assay_name = assay_name, verbose = FALSE ) } Assays(spatial_obj) ``` ## Tips for Best Results ### 1. Matching Cell Types Ensure your scRNA-seq reference contains cell types present in your spatial data: ```{r cell_types} # Check cell type composition table(scrna_obj$cell_type) # Optionally subset to relevant cell types scrna_subset <- subset(scrna_obj, cell_type %in% c("Neuron", "Astrocyte", "Oligodendrocyte")) ``` ### 2. Gene Filtering ```{r filtering} # Filter lowly expressed genes before SpaGE scrna_obj <- scrna_obj[rowSums(GetAssayData(scrna_obj) > 0) > 50, ] ``` ### 3. Normalize Consistently ```{r normalize} # Use same normalization for both datasets spatial_obj <- NormalizeData(spatial_obj, normalization.method = "LogNormalize") scrna_obj <- NormalizeData(scrna_obj, normalization.method = "LogNormalize") ``` ## Complete Example Workflow ```{r complete} # 1. Load data spatial_obj <- LoadSeuratRds("spatial.rds") scrna_obj <- LoadSeuratRds("scrna.rds") # 2. Prepare (normalize if needed) spatial_obj <- prepare_seurat(spatial_obj) scrna_obj <- prepare_seurat(scrna_obj) # 3. Define genes to predict genes <- setdiff(rownames(scrna_obj), rownames(spatial_obj)) # 4. Run SpaGE spatial_obj <- SpaGE.Seurat( spatial_seurat = spatial_obj, rna_seurat = scrna_obj, genes_to_predict = genes[1:100], # First 100 unmeasured genes n_pv = 30, n_neighbors = 50 ) # 5. Visualize DefaultAssay(spatial_obj) <- "SpaGE" SpatialFeaturePlot(spatial_obj, features = genes[1:4]) # 6. Save saveRDS(spatial_obj, "spatial_with_predictions.rds") ``` ## Troubleshooting ### Common Issues 1. **"No shared genes"**: Ensure gene names match between datasets ```{r trouble1} # Check gene name overlap shared <- intersect(rownames(spatial_obj), rownames(scrna_obj)) length(shared) ``` 2. **Memory issues**: For large datasets, predict genes in batches ```{r trouble2} # Split genes into batches all_genes <- setdiff(rownames(scrna_obj), rownames(spatial_obj)) batches <- split(all_genes, ceiling(seq_along(all_genes) / 100)) ``` 3. **Slow performance**: Reduce n_pv or n_neighbors ```{r trouble3} # Faster but potentially less accurate spatial_obj <- SpaGE.Seurat(spatial_obj, scrna_obj, n_pv = 20, n_neighbors = 30) ``` ## Session Information ```{r session, eval=TRUE} sessionInfo() ```