--- title: "Advanced Usage and Parameter Tuning" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Advanced Usage and Parameter Tuning} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, warning = FALSE, message = FALSE ) ``` ## Introduction This vignette covers advanced usage scenarios and parameter tuning for scTenifoldKnk. ```{r load} library(scTenifoldKnk) library(Matrix) # Load example data data_path <- system.file("single-cell/example.csv", package = "scTenifoldKnk") scRNAseq <- as.matrix(read.csv(data_path, row.names = 1)) ``` ## Parameter Reference ### Main Function Parameters | Parameter | Default | Description | |-----------|---------|-------------| | `qc_mtThreshold` | 0.1 | Max mitochondrial ratio | | `qc_minLSize` | 1000 | Min library size | | `nc_nNet` | 10 | Number of networks | | `nc_nCells` | 500 | Cells per network | | `nc_nComp` | 3 | Principal components | | `nc_q` | 0.9 | Quantile threshold | | `td_K` | 3 | Tensor rank | | `td_maxIter` | 1000 | Max iterations | | `ma_nDim` | 2 | Manifold dimensions | ## Using Individual Functions ### 1. Quality Control ```{r qc} # Custom quality control (using minLSize=0 for demo data) qc_data <- scQC(scRNAseq, mtThreshold = 0.2, minLSize = 0) cat("Before QC:", ncol(scRNAseq), "cells\n") cat("After QC:", ncol(qc_data), "cells\n") ``` ### 2. Network Construction ```{r network} # Build single network net_single <- pcNetFast( qc_data, nComp = 5, # More PCs for complex data scaleScores = TRUE, symmetric = FALSE, q = 0.95, # Higher threshold = sparser network verbose = FALSE ) # Check network properties cat("Non-zero edges:", sum(net_single != 0), "\n") cat("Sparsity:", round(1 - sum(net_single != 0) / length(net_single), 4), "\n") ``` ### 3. Multiple Networks ```{r multiple_nets} # Build multiple networks for tensor decomposition networks <- makeNetworksFast( qc_data, nNet = 10, nCells = min(200, ncol(qc_data)), nComp = 3, q = 0.9, nCores = 1, verbose = FALSE, seed = 42 ) cat("Generated", length(networks), "networks\n") ``` ### 4. Tensor Decomposition ```{r tensor} # Denoise networks denoised <- tensorDecomposition( xList = networks, K = 3, # Tensor rank maxIter = 500, maxError = 1e-5, useCpp = TRUE # Use C++ for speed ) cat("Denoised network dimensions:", dim(denoised$X), "\n") ``` ### 5. Manifold Alignment ```{r manifold} # Prepare networks WT <- as.matrix(denoised$X) diag(WT) <- 0 # Create knockout KO <- WT KO["G50", ] <- 0 # Align manifolds aligned <- manifoldAlignment( Matrix(WT, sparse = TRUE), Matrix(KO, sparse = TRUE), d = 3 # 3D embedding ) cat("Aligned dimensions:", dim(aligned), "\n") ``` ### 6. Differential Regulation ```{r diff_reg} # Analyze differential regulation dr <- dRegulation(aligned, gKO = "G50") head(dr) ``` ## Parameter Tuning Guidelines ### Network Construction Parameters ```{r tune_network, fig.width=10, fig.height=4} # Compare different nComp values par(mfrow = c(1, 3)) for (nc in c(3, 5, 10)) { net <- pcNetFast(qc_data, nComp = nc, q = 0.9, verbose = FALSE) nnz <- sum(net != 0) # Visualize edge weight distribution weights <- abs(net@x) hist(weights, breaks = 30, col = "#3498DB", border = "white", main = paste("nComp =", nc, "\n(", nnz, "edges)"), xlab = "Edge Weight", cex.main = 1.2) } ``` ### Quantile Threshold Effect ```{r tune_q, fig.width=10, fig.height=4} par(mfrow = c(1, 3)) q_values <- c(0.8, 0.9, 0.95) for (q in q_values) { net <- pcNetFast(qc_data, nComp = 3, q = q, verbose = FALSE) nnz <- sum(net != 0) sparsity <- round(1 - nnz / prod(dim(net)), 4) # Network degree distribution degrees <- rowSums(net != 0) hist(degrees, breaks = 20, col = "#2ECC71", border = "white", main = paste("q =", q, "\n(Sparsity:", sparsity, ")"), xlab = "Node Degree", cex.main = 1.2) } ``` ### Tensor Rank Selection ```{r tune_k, fig.width=8, fig.height=5} # Compare reconstruction error for different K values networks_small <- makeNetworksFast(qc_data, nNet = 5, nCells = 100, nComp = 3, verbose = FALSE, seed = 1) k_values <- c(2, 3, 5, 7, 10) errors <- numeric(length(k_values)) for (i in seq_along(k_values)) { td <- tensorDecomposition(networks_small, K = k_values[i], maxIter = 100, useCpp = TRUE) # Compute reconstruction error reconstructed <- as.matrix(td$X) original_mean <- Reduce(`+`, lapply(networks_small, as.matrix)) / length(networks_small) errors[i] <- sqrt(mean((reconstructed - original_mean)^2)) } plot(k_values, errors, type = "b", pch = 19, col = "#E74C3C", lwd = 2, xlab = "Tensor Rank (K)", ylab = "Reconstruction RMSE", main = "Tensor Rank Selection") grid() # Mark elbow point optimal_k <- k_values[which.min(diff(diff(errors)) > 0)] abline(v = optimal_k, lty = 2, col = "#3498DB") legend("topright", paste("Suggested K =", optimal_k), bty = "n") ``` ## Comparing Multiple Knockouts ```{r multi_ko, fig.width=10, fig.height=6} # Compare knockouts of different genes ko_genes <- c("G1", "G50", "G100") ko_results <- list() for (gene in ko_genes) { ko_results[[gene]] <- scTenifoldKnk( scRNAseq, gKO = gene, qc_minLSize = 0, nc_nNet = 3, nc_nCells = 100, verbose = FALSE ) } # Compare top affected genes par(mar = c(5, 8, 4, 2)) comparison_data <- sapply(ko_genes, function(g) { dr <- ko_results[[g]]$diffRegulation dr$FC[1:10] }) rownames(comparison_data) <- ko_results[[ko_genes[1]]]$diffRegulation$gene[1:10] barplot(t(comparison_data), beside = TRUE, col = c("#E74C3C", "#3498DB", "#2ECC71"), las = 2, main = "Top 10 Affected Genes by Different Knockouts", ylab = "Fold Change", cex.names = 0.8) legend("topright", legend = paste("KO:", ko_genes), fill = c("#E74C3C", "#3498DB", "#2ECC71"), bty = "n") ``` ## Performance Considerations ### Memory Usage ```{r memory} # Estimate memory for different dataset sizes estimate_memory <- function(n_genes, n_cells, n_networks) { # Network matrix: n_genes x n_genes x 8 bytes (double) net_size <- n_genes^2 * 8 / 1e6 # MB # Total for all networks total <- net_size * n_networks * 2 # WT + KO return(round(total, 2)) } sizes <- expand.grid( genes = c(100, 500, 1000, 2000), networks = c(5, 10, 20) ) sizes$memory_MB <- mapply(estimate_memory, sizes$genes, 1000, sizes$networks) knitr::kable(sizes, col.names = c("Genes", "Networks", "Est. Memory (MB)"), caption = "Estimated Memory Usage") ``` ### Parallel Processing ```{r parallel} # Check available cores cat("Available cores:", parallel::detectCores(), "\n") # Benchmark sequential vs parallel (if multiple cores available) if (parallel::detectCores() > 1) { # Sequential t1 <- system.time({ nets1 <- makeNetworksFast(qc_data, nNet = 5, nCells = 50, nCores = 1, verbose = FALSE) }) # Parallel t2 <- system.time({ nets2 <- makeNetworksFast(qc_data, nNet = 5, nCells = 50, nCores = 2, verbose = FALSE) }) cat("Sequential time:", round(t1[3], 2), "s\n") cat("Parallel time:", round(t2[3], 2), "s\n") cat("Speedup:", round(t1[3] / t2[3], 2), "x\n") } ``` ## Best Practices ### Recommended Workflow 1. **Quality Control**: Use appropriate thresholds based on your data 2. **Network Parameters**: Start with defaults, adjust based on network density 3. **Tensor Rank**: Use K=3-5 for most applications 4. **Validation**: Compare with known biology when possible ### Troubleshooting | Issue | Possible Cause | Solution | |-------|----------------|----------| | Empty network | High q threshold | Lower q to 0.8-0.9 | | Slow runtime | Large dataset | Reduce nCells or nNet | | Memory error | Too many genes | Filter low-variance genes | | No significant genes | Weak knockout effect | Check if gene is expressed | ## Session Info ```{r session} sessionInfo() ```