--- title: "Advanced Usage" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Advanced Usage} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, warning = FALSE, message = FALSE ) ``` ## Introduction This vignette covers advanced usage scenarios for scPharm, including: - Custom threshold calibration - Multi-drug analysis strategies - Combination therapy optimization - Integration with external tools - Performance tuning ```{r load-packages} library(ggplot2) library(dplyr) ``` ## 1. Threshold Calibration with Normal Tissue ### Using scPharmGenNullDist For accurate cell classification, calibrating thresholds using normal tissue samples is recommended: ```{r null-dist, eval=FALSE} library(scPharm) # Load normal tissue reference normal_seurat <- readRDS("normal_tissue.rds") # Generate null distribution null_dist <- scPharmGenNullDist( normal_seurat, cancer = "LUAD", drug = "Erlotinib", nmcs = 50, nfeatures = 200 ) # Extract calibrated thresholds threshold_s <- null_dist$threshold.s threshold_r <- null_dist$threshold.r cat("Sensitive threshold:", threshold_s, "\n") cat("Resistant threshold:", threshold_r, "\n") ``` ### Apply Calibrated Thresholds ```{r apply-thresholds, eval=FALSE} # Load tumor sample tumor_seurat <- readRDS("tumor_sample.rds") # Run analysis with calibrated thresholds result <- scPharmIdentify( tumor_seurat, type = "tissue", cancer = "LUAD", drug = "Erlotinib", threshold.s = threshold_s, threshold.r = threshold_r ) ``` ### Threshold Selection Strategy | Scenario | Recommendation | |----------|----------------| | Strict classification | Higher `threshold.s`, lower `threshold.r` | | Lenient classification | Lower `threshold.s`, higher `threshold.r` | | Balanced | Use `scPharmGenNullDist()` defaults | ## 2. Multi-Drug Analysis ### Pan-Drug Screening ```{r pan-drug, eval=FALSE} # Analyze all available drugs result_all <- scPharmIdentify( seurat_obj, type = "cellline", cancer = "BRCA", drug = "all", cores = 8 # Parallel processing ) # Get drug ranking dr_scores <- scPharmDr(result_all) print(head(dr_scores, 20)) ``` ### Drug Class Analysis ```{r drug-class, eval=FALSE} # Load drug metadata data(drug_info, package = "scPharm") # Filter by drug class chemo_drugs <- drug_info %>% filter(DRUG_TYPE == "chemotherapy") %>% pull(DRUG_NAME) targeted_drugs <- drug_info %>% filter(DRUG_TYPE == "targeted") %>% pull(DRUG_NAME) # Analyze specific drug classes result_chemo <- scPharmIdentify( seurat_obj, cancer = "BRCA", drug = chemo_drugs ) result_targeted <- scPharmIdentify( seurat_obj, cancer = "BRCA", drug = targeted_drugs ) ``` ### Multi-Cancer Analysis ```{r multi-cancer, eval=FALSE} # Analyze across multiple cancer contexts cancers <- c("BRCA", "LUAD", "COREAD") results <- lapply(cancers, function(cancer) { result <- scPharmIdentify( seurat_obj, type = "cellline", cancer = cancer, drug = "Paclitaxel" ) # Extract NES values nes_col <- grep("scPharm_nes_", colnames(result@meta.data), value = TRUE) data.frame( cancer = cancer, mean_nes = mean(result@meta.data[[nes_col]], na.rm = TRUE), sd_nes = sd(result@meta.data[[nes_col]], na.rm = TRUE) ) }) # Combine results multi_cancer_df <- do.call(rbind, results) print(multi_cancer_df) ``` ## 3. Combination Therapy Optimization ### scPharmCombo Analysis ```{r combo-basic, eval=FALSE} # Get drug ranking first dr_scores <- scPharmDr(result) # Identify combinations for top drugs combos <- scPharmCombo(result, dr_scores, topN = 10) # Examine results for (drug_name in names(combos)) { cat("\n=== Drug:", drug_name, "===\n") print(head(combos[[drug_name]]$combination_scores)) } ``` ### Combination Selection Criteria The combination score considers: 1. **Complementarity**: Do drugs target different resistant populations? 2. **Coverage**: What percentage of resistant cells become sensitive? 3. **Synergy potential**: Based on known drug interactions ```{r combo-viz, echo=FALSE, fig.cap="Drug combination evaluation"} # Simulate combination data set.seed(42) combo_data <- expand.grid( Drug1 = c("Docetaxel", "Paclitaxel", "Erlotinib"), Drug2 = c("Cisplatin", "Gefitinib", "Tamoxifen", "Imatinib") ) %>% mutate( combo_score = runif(n(), 0.2, 0.9), coverage = runif(n(), 0.4, 0.95) ) ggplot(combo_data, aes(x = Drug2, y = Drug1, fill = combo_score)) + geom_tile(color = "white", size = 1) + geom_text(aes(label = sprintf("%.0f%%", coverage * 100)), color = "white", fontface = "bold") + scale_fill_gradient(low = "#e74c3c", high = "#27ae60", name = "Combo Score") + labs( title = "Drug Combination Matrix", subtitle = "Numbers show coverage of resistant cells", x = "Secondary Drug", y = "Primary Drug" ) + theme_minimal() + theme( plot.title = element_text(face = "bold"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid = element_blank() ) ``` ## 4. Integration with Seurat Workflows ### Pre-computed Embeddings ```{r seurat-integration, eval=FALSE} # Use existing Seurat clustering seurat_obj <- FindVariableFeatures(seurat_obj) seurat_obj <- ScaleData(seurat_obj) seurat_obj <- RunPCA(seurat_obj) seurat_obj <- FindNeighbors(seurat_obj) seurat_obj <- FindClusters(seurat_obj) seurat_obj <- RunUMAP(seurat_obj, dims = 1:30) # Run scPharm (MCA is computed independently) result <- scPharmIdentify(seurat_obj, ...) # Visualize with Seurat functions DimPlot(result, group.by = "scPharm_label_Docetaxel") FeaturePlot(result, features = "scPharm_nes_Docetaxel") ``` ### Cluster-Level Analysis ```{r cluster-analysis, eval=FALSE} # Aggregate NES by cluster cluster_summary <- result@meta.data %>% group_by(seurat_clusters) %>% summarise( n_cells = n(), mean_nes = mean(scPharm_nes_Docetaxel, na.rm = TRUE), pct_sensitive = mean(scPharm_label_Docetaxel == "sensitive") * 100, pct_resistant = mean(scPharm_label_Docetaxel == "resistant") * 100 ) print(cluster_summary) ``` ## 5. Performance Tuning ### Memory Management For large datasets, consider: ```{r memory, eval=FALSE} # Process in chunks cell_chunks <- split(colnames(seurat_obj), ceiling(seq_along(colnames(seurat_obj)) / 5000)) results <- lapply(cell_chunks, function(cells) { subset_obj <- subset(seurat_obj, cells = cells) scPharmIdentify(subset_obj, ...) }) # Merge results final_result <- merge(results[[1]], results[-1]) ``` ### Parallel Processing ```{r parallel, eval=FALSE} # Detect available cores n_cores <- parallel::detectCores() - 1 # Run with multiple cores result <- scPharmIdentify( seurat_obj, drug = "all", cores = n_cores ) ``` ### Parameter Optimization | Parameter | Effect on Speed | Effect on Accuracy | |-----------|-----------------|-------------------| | `nmcs` ↑ | Slower | Higher (to a point) | | `nfeatures` ↑ | Minimal | Higher specificity | | `cores` ↑ | Faster | None | ## 6. Custom Drug Signatures ### Using External Gene Sets ```{r custom-signatures, eval=FALSE} # Load custom drug sensitivity signatures custom_signatures <- list( DrugA_sensitive = c("GENE1", "GENE2", "GENE3", ...), DrugA_resistant = c("GENE4", "GENE5", "GENE6", ...) ) # Note: This requires modifying internal functions # Contact maintainer for custom signature integration ``` ## 7. Quality Control ### Pre-analysis Checks ```{r qc-checks, eval=FALSE} # Check gene coverage data(bulkdata, package = "scPharm") gene_overlap <- intersect(rownames(seurat_obj), rownames(bulkdata)) cat("Gene overlap:", length(gene_overlap), "/", nrow(bulkdata), "\n") # Minimum recommended: 5000 genes if (length(gene_overlap) < 5000) { warning("Low gene overlap may affect accuracy") } # Check cell numbers if (ncol(seurat_obj) < 100) { warning("Small cell numbers may lead to unstable estimates") } ``` ### Post-analysis Validation ```{r validation, eval=FALSE} # Check NES distribution nes_values <- result@meta.data$scPharm_nes_Docetaxel # Should be roughly normal/bimodal hist(nes_values, breaks = 50, main = "NES Distribution") # Check classification balance table(result@meta.data$scPharm_label_Docetaxel) ``` ## 8. Troubleshooting ### Common Issues | Issue | Possible Cause | Solution | |-------|----------------|----------| | All cells classified as "other" | Thresholds too strict | Lower threshold_s, raise threshold_r | | No tumor cells detected | CNV detection failed | Provide tumor.cells manually | | Memory error | Dataset too large | Process in chunks | | Slow performance | Too many drugs | Use parallel processing | ### Debug Mode ```{r debug, eval=FALSE} # Verbose output options(scPharm.verbose = TRUE) # Step-by-step execution result <- scPharmIdentify( seurat_obj, type = "cellline", cancer = "BRCA", drug = "Docetaxel" ) ``` ## 9. Exporting Results ### To Data Frame ```{r export-df, eval=FALSE} # Extract all scPharm columns scpharm_cols <- grep("^scPharm_|^cell\\.label$", colnames(result@meta.data), value = TRUE) export_df <- result@meta.data[, scpharm_cols] export_df$cell_id <- rownames(export_df) write.csv(export_df, "scpharm_results.csv", row.names = FALSE) ``` ### To Seurat Object ```{r export-seurat, eval=FALSE} # Save annotated Seurat object saveRDS(result, "annotated_seurat.rds") ``` ## Session Info ```{r session-info} sessionInfo() ```