--- title: "Visualization and Result Interpretation" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Visualization and Result Interpretation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, fig.align = "center", dpi = 100, message = FALSE, warning = FALSE ) ``` ## Introduction This vignette demonstrates how to visualize and interpret TorchDecon results. Effective visualization is crucial for understanding cell type composition patterns and validating deconvolution accuracy. **Author**: Zaoqu Liu (liuzaoqu@163.com) ## Setup ```{r load-packages, eval=FALSE} library(TorchDecon) library(Seurat) library(ggplot2) library(reshape2) library(pheatmap) library(RColorBrewer) ``` ## Generate Example Data and Run Deconvolution ```{r run-deconvolution, eval=FALSE} # Generate example data set.seed(42) example_data <- GenerateExampleData( n_cells = 1000, n_genes = 500, n_celltypes = 5, n_bulk_samples = 50 ) # Run deconvolution result <- RunTorchDecon( seurat_object = example_data$seurat, bulk_data = example_data$bulk_data, n_samples = 1000, num_steps = 2000, verbose = FALSE ) predictions <- result$predictions ``` ## Visualization Methods ### 1. Stacked Bar Plot The stacked bar plot is the most intuitive way to visualize cell type composition across samples. ```{r stacked-bar, eval=FALSE} # Prepare data for plotting plot_data <- predictions plot_data$Sample <- rownames(plot_data) # Convert to long format plot_long <- melt(plot_data, id.vars = "Sample", variable.name = "CellType", value.name = "Fraction") # Define color palette colors <- brewer.pal(n = ncol(predictions), name = "Set2") # Create stacked bar plot ggplot(plot_long, aes(x = Sample, y = Fraction, fill = CellType)) + geom_bar(stat = "identity", width = 0.8) + scale_fill_manual(values = colors) + theme_minimal() + theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 8), legend.position = "right", panel.grid.minor = element_blank() ) + labs( title = "Cell Type Composition Across Samples", subtitle = "TorchDecon Deconvolution Results", x = "Sample", y = "Cell Type Fraction", fill = "Cell Type" ) ``` ### 2. Heatmap Visualization Heatmaps provide a comprehensive view of cell type proportions with hierarchical clustering. ```{r heatmap, eval=FALSE} # Create annotation for samples (e.g., sample groups) annotation_row <- data.frame( Group = factor(rep(c("Group1", "Group2"), each = 25)) ) rownames(annotation_row) <- rownames(predictions) # Define colors ann_colors <- list( Group = c(Group1 = "#E41A1C", Group2 = "#377EB8") ) # Create heatmap pheatmap( as.matrix(predictions), color = colorRampPalette(c("white", "steelblue", "darkblue"))(100), cluster_rows = TRUE, cluster_cols = FALSE, show_rownames = TRUE, show_colnames = TRUE, annotation_row = annotation_row, annotation_colors = ann_colors, fontsize = 10, fontsize_row = 8, main = "Cell Type Fractions Heatmap" ) ``` ### 3. Box Plot Comparison Box plots are useful for comparing cell type proportions between groups. ```{r boxplot, eval=FALSE} # Add group information plot_long$Group <- rep(c("Control", "Treatment"), each = nrow(plot_long)/2) # Create box plot ggplot(plot_long, aes(x = CellType, y = Fraction, fill = Group)) + geom_boxplot(outlier.shape = 21, outlier.size = 2) + scale_fill_manual(values = c("Control" = "#66C2A5", "Treatment" = "#FC8D62")) + theme_minimal() + theme( axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "top" ) + labs( title = "Cell Type Composition by Group", x = "Cell Type", y = "Fraction" ) + stat_compare_means(method = "wilcox.test", label = "p.signif") ``` ### 4. Training History Plot Monitoring training loss is essential for ensuring model convergence. ```{r training-history, eval=FALSE} # Get training history history <- GetTrainingHistory(result$ensemble) # Plot training loss ggplot(history, aes(x = step, y = loss, color = model)) + geom_line(alpha = 0.7, size = 0.8) + scale_color_brewer(palette = "Set1") + scale_y_log10() + theme_minimal() + theme(legend.position = "top") + labs( title = "Training Loss Over Time", subtitle = "Ensemble Model Training Progress", x = "Training Step", y = "Loss (log scale)", color = "Model" ) ``` ### 5. Correlation Plot (with Ground Truth) When ground truth is available, visualize prediction accuracy. ```{r correlation-plot, eval=FALSE} # Assuming we have ground truth (e.g., from simulation) true_fractions <- result$simulation$cell_fractions # Flatten matrices for correlation pred_vec <- as.vector(as.matrix(predictions)) true_vec <- as.vector(as.matrix(true_fractions[rownames(predictions), ])) # Create data frame cor_data <- data.frame( Predicted = pred_vec, True = true_vec ) # Calculate correlation r <- cor(pred_vec, true_vec) # Create scatter plot ggplot(cor_data, aes(x = True, y = Predicted)) + geom_point(alpha = 0.5, color = "steelblue") + geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") + geom_smooth(method = "lm", se = TRUE, color = "darkblue") + annotate("text", x = 0.1, y = 0.9, label = paste0("r = ", round(r, 3)), size = 5, fontface = "bold") + coord_fixed(ratio = 1) + xlim(0, 1) + ylim(0, 1) + theme_minimal() + labs( title = "Prediction Accuracy", subtitle = "Predicted vs. True Cell Type Fractions", x = "True Fraction", y = "Predicted Fraction" ) ``` ### 6. Per-Cell Type Accuracy Evaluate accuracy for each cell type separately. ```{r per-celltype-accuracy, eval=FALSE} # Calculate metrics per cell type celltypes <- colnames(predictions) metrics_list <- lapply(celltypes, function(ct) { pred <- predictions[[ct]] true <- true_fractions[[ct]] data.frame( CellType = ct, RMSE = sqrt(mean((pred - true)^2)), MAE = mean(abs(pred - true)), Correlation = cor(pred, true) ) }) metrics_df <- do.call(rbind, metrics_list) # Plot correlation by cell type ggplot(metrics_df, aes(x = reorder(CellType, -Correlation), y = Correlation, fill = CellType)) + geom_bar(stat = "identity", width = 0.7) + geom_hline(yintercept = 0.9, linetype = "dashed", color = "red") + scale_fill_brewer(palette = "Set2") + theme_minimal() + theme( axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none" ) + labs( title = "Prediction Accuracy by Cell Type", subtitle = "Pearson Correlation (dashed line = 0.9 threshold)", x = "Cell Type", y = "Correlation" ) ``` ### 7. Pie Chart for Individual Samples Pie charts can effectively show composition for selected samples. ```{r pie-chart, eval=FALSE} # Select a sample sample_name <- rownames(predictions)[1] sample_data <- predictions[sample_name, ] # Prepare data pie_data <- data.frame( CellType = names(sample_data), Fraction = as.numeric(sample_data) ) pie_data$Percentage <- paste0(round(pie_data$Fraction * 100, 1), "%") # Create pie chart ggplot(pie_data, aes(x = "", y = Fraction, fill = CellType)) + geom_bar(stat = "identity", width = 1) + coord_polar("y", start = 0) + scale_fill_brewer(palette = "Set2") + geom_text(aes(label = Percentage), position = position_stack(vjust = 0.5), color = "white", fontface = "bold") + theme_void() + labs( title = paste("Cell Type Composition:", sample_name), fill = "Cell Type" ) ``` ## Advanced Visualization ### Ensemble Model Comparison Compare predictions from individual models in the ensemble. ```{r ensemble-comparison, eval=FALSE} # Get individual predictions all_preds <- PredictFractions(result$ensemble, example_data$bulk_data, return_all = TRUE, verbose = FALSE) # Prepare comparison data comparison <- data.frame( M256 = as.vector(as.matrix(all_preds$individual$m256)), M512 = as.vector(as.matrix(all_preds$individual$m512)), M1024 = as.vector(as.matrix(all_preds$individual$m1024)), Ensemble = as.vector(as.matrix(all_preds$average)) ) # Create pairs plot pairs(comparison, lower.panel = panel.smooth, upper.panel = function(x, y) { usr <- par("usr") par(usr = c(0, 1, 0, 1)) r <- round(cor(x, y), 3) text(0.5, 0.5, paste0("r=", r), cex = 1.5) }, main = "Model Agreement in Ensemble") ``` ## Summary This vignette demonstrated various visualization techniques for TorchDecon results: 1. **Stacked bar plots** - Overview of composition across samples 2. **Heatmaps** - Clustered visualization with annotations 3. **Box plots** - Group comparisons 4. **Training curves** - Model convergence monitoring 5. **Correlation plots** - Accuracy assessment 6. **Per-cell type metrics** - Detailed performance evaluation 7. **Pie charts** - Individual sample composition Effective visualization helps validate deconvolution results and communicate findings clearly. --- **Package Author**: Zaoqu Liu **Contact**: liuzaoqu@163.com **GitHub**: https://github.com/Zaoqu-Liu/TorchDecon