--- title: "Advanced Usage and Best Practices" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Advanced Usage and Best Practices} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, message = FALSE, warning = FALSE ) ``` ## Introduction This guide covers advanced features and best practices for using scGate effectively in complex analysis scenarios. ```{r load-packages} library(scGate) library(Seurat) library(ggplot2) library(patchwork) ``` ## Building Hierarchical Models ### Multi-Level Gating Strategy Similar to flow cytometry, scGate supports hierarchical gating: ```{r hierarchical-model} # Build a hierarchical model for CD8+ T cells cd8_model <- gating_model( level = 1, name = "Immune", signature = c("PTPRC") # CD45 ) cd8_model <- gating_model( model = cd8_model, level = 1, name = "notStromal", signature = c("COL1A1", "COL1A2", "FAP"), positive = FALSE ) cd8_model <- gating_model( model = cd8_model, level = 2, name = "Tcell", signature = c("CD3D", "CD3E", "CD2") ) cd8_model <- gating_model( model = cd8_model, level = 2, name = "notBcell", signature = c("MS4A1", "CD19"), positive = FALSE ) cd8_model <- gating_model( model = cd8_model, level = 3, name = "CD8", signature = c("CD8A", "CD8B") ) cd8_model <- gating_model( model = cd8_model, level = 3, name = "notCD4", signature = c("CD4"), positive = FALSE ) # View the model print(cd8_model) ``` ### Model Conversion Utilities ```{r model-conversion} # Models can be saved as TSV files # write.table(cd8_model, "cd8_model.tsv", sep = "\t", row.names = FALSE) # And loaded back # my_model <- load_scGate_model("cd8_model.tsv") ``` ## Multi-Class Classification ### Annotating Multiple Cell Types ```{r multiclass-setup, eval=FALSE} # Load example data data(query.seurat) # Define multiple models model_list <- list( "Bcell" = gating_model(name = "Bcell", signature = c("MS4A1", "CD19")), "Tcell" = gating_model(name = "Tcell", signature = c("CD3D", "CD3E")), "Myeloid" = gating_model(name = "Myeloid", signature = c("CD14", "LYZ")) ) # Apply all models simultaneously query.seurat <- scGate( data = query.seurat, model = model_list, reduction = "pca" ) # View multi-class results table(query.seurat$scGate_multi, useNA = "ifany") ``` ### Visualizing Multi-Class Results ```{r multiclass-viz, eval=FALSE, fig.width=10, fig.height=4} p1 <- DimPlot(query.seurat, group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("Original") + NoLegend() p2 <- DimPlot(query.seurat, group.by = "scGate_multi", label = TRUE, repel = TRUE) + ggtitle("scGate Multi-class") p1 + p2 ``` ## Working with Integrated Data ### Using Pre-computed Reductions For batch-corrected data, use existing dimensionality reductions: ```{r integrated-data, eval=FALSE} # Example with Harmony-integrated data seurat_integrated <- scGate( data = seurat_integrated, model = my_model, reduction = "harmony" # Use Harmony reduction ) # Example with UMAP seurat_integrated <- scGate( data = seurat_integrated, model = my_model, reduction = "umap" ) ``` ### Assay Selection ```{r assay-selection, eval=FALSE} # For integrated objects, specify the assay seurat_obj <- scGate( data = seurat_obj, model = my_model, assay = "RNA", # Use RNA assay for UCell scoring reduction = "harmony" # Use Harmony for kNN ) ``` ## Parameter Optimization ### Key Parameters ```{r parameter-table, echo=FALSE} params <- data.frame( Parameter = c("pos.thr", "neg.thr", "k.param", "smooth.decay", "param_decay", "min.cells"), Default = c("0.2", "0.2", "30", "0.1", "0.25", "30"), Description = c( "Threshold for positive signatures", "Threshold for negative signatures", "Number of neighbors for kNN smoothing", "Decay rate for neighbor weights", "Parameter decay across levels", "Minimum cells for clustering" ), Recommendation = c( "Lower (0.1-0.15) for rare populations", "Higher (0.25-0.3) for stringent filtering", "Higher (50-100) for noisy data", "Lower (0.05) for more smoothing", "Higher (0.3-0.4) for deep hierarchies", "Lower (10-20) for small datasets" ) ) knitr::kable(params, caption = "scGate Parameter Guide") ``` ### Threshold Tuning ```{r threshold-tuning, eval=FALSE, fig.width=10, fig.height=4} # Test different thresholds data(query.seurat) model <- gating_model(name = "Tcell", signature = c("CD3D", "CD3E")) # Compare thresholds results <- list() for (thr in c(0.1, 0.2, 0.3)) { temp <- scGate(query.seurat, model = model, pos.thr = thr, reduction = "pca") results[[paste0("thr_", thr)]] <- table(temp$is.pure)["Pure"] } # Display results data.frame( Threshold = c(0.1, 0.2, 0.3), Pure_Cells = unlist(results) ) ``` ## Performance Evaluation ### Using Ground Truth ```{r performance-eval} # Demonstrate performance metrics with simulated data set.seed(42) n_cells <- 100 # Simulate ground truth and predictions ground_truth <- sample(c(TRUE, FALSE), n_cells, replace = TRUE, prob = c(0.3, 0.7)) predictions <- ground_truth # Add some noise predictions[sample(which(ground_truth), 3)] <- FALSE predictions[sample(which(!ground_truth), 5)] <- TRUE # Calculate metrics metrics <- performance.metrics( actual = ground_truth, pred = predictions ) print(metrics) ``` ### Interpreting Metrics ```{r metrics-interpretation} # Create interpretation guide interpretation <- data.frame( Metric = c("Precision (PREC)", "Recall (REC)", "MCC"), Range = c("0-1", "0-1", "-1 to 1"), Interpretation = c( "Fraction of 'Pure' cells that are true positives", "Fraction of true positives identified as 'Pure'", "Overall classification quality (0 = random, 1 = perfect)" ) ) knitr::kable(interpretation) ``` ## Parallel Processing ### Multi-core Processing ```{r parallel-processing, eval=FALSE} library(BiocParallel) # Simple multi-core result <- scGate( data = seurat_obj, model = model_list, ncores = 4 ) # Custom BPPARAM for more control result <- scGate( data = seurat_obj, model = model_list, BPPARAM = MulticoreParam( workers = 4, progressbar = TRUE ) ) ``` ## Gene Blacklisting ### Default Blacklist scGate excludes certain genes from dimensionality reduction: ```{r blacklist} # View default blacklist categories data(genes.blacklist.default) names(genes.blacklist.default) # View genes in each category lapply(genes.blacklist.default, head) ``` ### Custom Blacklist ```{r custom-blacklist, eval=FALSE} # Add custom genes to blacklist my_blacklist <- c( genes.blacklist.default$Mito, genes.blacklist.default$cellCycle, c("CUSTOM_GENE1", "CUSTOM_GENE2") ) result <- scGate( data = seurat_obj, model = my_model, genes.blacklist = my_blacklist ) # Or disable blacklisting entirely result <- scGate( data = seurat_obj, model = my_model, genes.blacklist = NULL ) ``` ## Troubleshooting ### Common Issues ```{r troubleshooting, echo=FALSE} issues <- data.frame( Problem = c( "All cells classified as Impure", "Too many cells classified as Pure", "Slow performance", "Memory issues" ), Possible_Cause = c( "Threshold too high or wrong markers", "Threshold too low", "Large dataset with many models", "Too many cells or features" ), Solution = c( "Lower pos.thr, check marker expression", "Increase pos.thr or add negative markers", "Use ncores > 1, pre-computed reduction", "Subset data, reduce maxRank" ) ) knitr::kable(issues, caption = "Troubleshooting Guide") ``` ### Diagnostic Checks ```{r diagnostics} # Check marker expression data(query.seurat) # Verify markers are present markers <- c("CD3D", "CD3E", "MS4A1") present <- markers %in% rownames(query.seurat) data.frame(Marker = markers, Present = present) ``` ## Best Practices Summary 1. **Start simple**: Begin with single markers, add complexity as needed 2. **Validate markers**: Ensure markers are expressed in your data 3. **Use negative markers**: Improve specificity with exclusion criteria 4. **Save levels**: Enable `save.levels = TRUE` for debugging 5. **Tune thresholds**: Adjust based on your specific dataset 6. **Leverage parallelization**: Use multiple cores for large datasets 7. **Document models**: Keep records of your gating strategies ## Session Info ```{r session-info} sessionInfo() ```