--- title: "SpaGER: Quick Start Guide" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{SpaGER: Quick Start Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE ) ``` ## Introduction **SpaGER** (Spatial Gene Expression in R) is a high-performance R implementation of the SpaGE algorithm for predicting genome-wide expression profiles in spatial transcriptomics data through integration with scRNA-seq reference datasets. ### Why SpaGER? Spatial transcriptomics technologies provide invaluable spatial context but often measure only a limited panel of genes. SpaGER addresses this limitation by: - Leveraging scRNA-seq data to impute unmeasured genes in spatial data - Using domain adaptation via Principal Vectors (PVs) - Providing C++ acceleration for high performance - Supporting seamless Seurat integration ## Installation ```{r install, eval=FALSE} # From R-Universe (recommended) install.packages("SpaGER", repos = "https://zaoqu-liu.r-universe.dev") # From GitHub remotes::install_github("Zaoqu-Liu/SpaGER") ``` ## Basic Usage ### Load Package ```{r load} library(SpaGER) ``` ### Generate Simulated Data For demonstration, we create simulated spatial and scRNA-seq datasets: ```{r simulate} set.seed(42) # Simulate scRNA-seq reference data n_rna_cells <- 500 n_spatial_cells <- 200 n_shared_genes <- 100 n_rna_only_genes <- 50 # scRNA-seq data: cells x genes rna_data <- matrix( abs(rnorm(n_rna_cells * (n_shared_genes + n_rna_only_genes), mean = 5, sd = 2)), nrow = n_rna_cells ) colnames(rna_data) <- c( paste0("SharedGene", 1:n_shared_genes), paste0("RNAOnlyGene", 1:n_rna_only_genes) ) rownames(rna_data) <- paste0("RNACell", 1:n_rna_cells) # Spatial data: only shared genes spatial_data <- matrix( abs(rnorm(n_spatial_cells * n_shared_genes, mean = 5, sd = 2)), nrow = n_spatial_cells ) colnames(spatial_data) <- paste0("SharedGene", 1:n_shared_genes) rownames(spatial_data) <- paste0("SpatialSpot", 1:n_spatial_cells) cat("scRNA-seq data:", nrow(rna_data), "cells x", ncol(rna_data), "genes\n") cat("Spatial data:", nrow(spatial_data), "cells x", ncol(spatial_data), "genes\n") ``` ### Run SpaGE Prediction ```{r predict} # Predict unmeasured genes predicted <- SpaGE( spatial_data = as.data.frame(spatial_data), rna_data = as.data.frame(rna_data), n_pv = 30, # Number of principal vectors n_neighbors = 50, # k for KNN imputation verbose = TRUE ) # Check results cat("\nPredicted:", ncol(predicted), "genes for", nrow(predicted), "spatial spots\n") head(predicted[, 1:5]) ``` ### Predict Specific Genes ```{r specific_genes} # Predict only specific genes of interest genes_of_interest <- c("RNAOnlyGene1", "RNAOnlyGene10", "RNAOnlyGene25") predicted_specific <- SpaGE( spatial_data = as.data.frame(spatial_data), rna_data = as.data.frame(rna_data), n_pv = 30, genes_to_predict = genes_of_interest, verbose = FALSE ) cat("Predicted genes:", colnames(predicted_specific), "\n") ``` ## Cross-Validation Evaluate prediction accuracy using leave-one-gene-out cross-validation: ```{r cv} # Run CV on a subset of shared genes cv_genes <- paste0("SharedGene", 1:10) cv_results <- SpaGE_cv( spatial_data = as.data.frame(spatial_data), rna_data = as.data.frame(rna_data[, c(paste0("SharedGene", 1:n_shared_genes))]), n_pv = 20, genes = cv_genes, verbose = FALSE ) # Summary cat("Cross-validation Results:\n") cat("Mean Spearman correlation:", round(mean(cv_results$correlation), 3), "\n") cat("Median Spearman correlation:", round(median(cv_results$correlation), 3), "\n") ``` ### Visualize CV Results ```{r cv_plot, fig.width=6, fig.height=4} # Plot correlation distribution hist(cv_results$correlation, breaks = 20, main = "Leave-One-Out Cross-Validation", xlab = "Spearman Correlation", col = "#3498db", border = "white") abline(v = mean(cv_results$correlation), col = "red", lwd = 2, lty = 2) legend("topright", legend = paste("Mean =", round(mean(cv_results$correlation), 3)), col = "red", lty = 2, lwd = 2) ``` ## Accessing Metadata SpaGE returns additional metadata as attributes: ```{r metadata} # Access metadata from prediction result cat("Number of PVs requested:", attr(predicted, "n_pv"), "\n") cat("Number of PVs used:", attr(predicted, "n_pv_used"), "\n") cat("Number of shared genes:", attr(predicted, "n_shared_genes"), "\n") cat("Top PV similarities:", round(head(attr(predicted, "similarities"), 5), 3), "\n") ``` ## Session Information ```{r session} sessionInfo() ```