---
title: "Visualization and Result Interpretation"
author: "Zaoqu Liu"
date: "`r Sys.Date()`"
output: 
  rmarkdown::html_vignette:
    toc: true
    toc_depth: 3
vignette: >
  %\VignetteIndexEntry{Visualization and Result Interpretation}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6,
  fig.align = "center",
  dpi = 100,
  message = FALSE,
  warning = FALSE
)
```

## Introduction

This vignette demonstrates how to visualize and interpret TorchDecon results. Effective visualization is crucial for understanding cell type composition patterns and validating deconvolution accuracy.
 
**Author**: Zaoqu Liu (liuzaoqu@163.com)

## Setup

```{r load-packages, eval=FALSE}
library(TorchDecon)
library(Seurat)
library(ggplot2)
library(reshape2)
library(pheatmap)
library(RColorBrewer)
```

## Generate Example Data and Run Deconvolution

```{r run-deconvolution, eval=FALSE}
# Generate example data
set.seed(42)
example_data <- GenerateExampleData(
  n_cells = 1000,
  n_genes = 500,
  n_celltypes = 5,
  n_bulk_samples = 50
)

# Run deconvolution
result <- RunTorchDecon(
  seurat_object = example_data$seurat,
  bulk_data = example_data$bulk_data,
  n_samples = 1000,
  num_steps = 2000,
  verbose = FALSE
)

predictions <- result$predictions
```

## Visualization Methods

### 1. Stacked Bar Plot

The stacked bar plot is the most intuitive way to visualize cell type composition across samples.

```{r stacked-bar, eval=FALSE}
# Prepare data for plotting
plot_data <- predictions
plot_data$Sample <- rownames(plot_data)

# Convert to long format
plot_long <- melt(plot_data, id.vars = "Sample", 
                  variable.name = "CellType", 
                  value.name = "Fraction")

# Define color palette
colors <- brewer.pal(n = ncol(predictions), name = "Set2")

# Create stacked bar plot
ggplot(plot_long, aes(x = Sample, y = Fraction, fill = CellType)) +
  geom_bar(stat = "identity", width = 0.8) +
  scale_fill_manual(values = colors) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    legend.position = "right",
    panel.grid.minor = element_blank()
  ) +
  labs(
    title = "Cell Type Composition Across Samples",
    subtitle = "TorchDecon Deconvolution Results",
    x = "Sample",
    y = "Cell Type Fraction",
    fill = "Cell Type"
  )
```

### 2. Heatmap Visualization

Heatmaps provide a comprehensive view of cell type proportions with hierarchical clustering.

```{r heatmap, eval=FALSE}
# Create annotation for samples (e.g., sample groups)
annotation_row <- data.frame(
  Group = factor(rep(c("Group1", "Group2"), each = 25))
)
rownames(annotation_row) <- rownames(predictions)

# Define colors
ann_colors <- list(
  Group = c(Group1 = "#E41A1C", Group2 = "#377EB8")
)

# Create heatmap
pheatmap(
  as.matrix(predictions),
  color = colorRampPalette(c("white", "steelblue", "darkblue"))(100),
  cluster_rows = TRUE,
  cluster_cols = FALSE,
  show_rownames = TRUE,
  show_colnames = TRUE,
  annotation_row = annotation_row,
  annotation_colors = ann_colors,
  fontsize = 10,
  fontsize_row = 8,
  main = "Cell Type Fractions Heatmap"
)
```

### 3. Box Plot Comparison

Box plots are useful for comparing cell type proportions between groups.

```{r boxplot, eval=FALSE}
# Add group information
plot_long$Group <- rep(c("Control", "Treatment"), each = nrow(plot_long)/2)

# Create box plot
ggplot(plot_long, aes(x = CellType, y = Fraction, fill = Group)) +
  geom_boxplot(outlier.shape = 21, outlier.size = 2) +
  scale_fill_manual(values = c("Control" = "#66C2A5", "Treatment" = "#FC8D62")) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  ) +
  labs(
    title = "Cell Type Composition by Group",
    x = "Cell Type",
    y = "Fraction"
  ) +
  stat_compare_means(method = "wilcox.test", label = "p.signif")
```

### 4. Training History Plot

Monitoring training loss is essential for ensuring model convergence.

```{r training-history, eval=FALSE}
# Get training history
history <- GetTrainingHistory(result$ensemble)

# Plot training loss
ggplot(history, aes(x = step, y = loss, color = model)) +
  geom_line(alpha = 0.7, size = 0.8) +
  scale_color_brewer(palette = "Set1") +
  scale_y_log10() +
  theme_minimal() +
  theme(legend.position = "top") +
  labs(
    title = "Training Loss Over Time",
    subtitle = "Ensemble Model Training Progress",
    x = "Training Step",
    y = "Loss (log scale)",
    color = "Model"
  )
```

### 5. Correlation Plot (with Ground Truth)

When ground truth is available, visualize prediction accuracy.

```{r correlation-plot, eval=FALSE}
# Assuming we have ground truth (e.g., from simulation)
true_fractions <- result$simulation$cell_fractions

# Flatten matrices for correlation
pred_vec <- as.vector(as.matrix(predictions))
true_vec <- as.vector(as.matrix(true_fractions[rownames(predictions), ]))

# Create data frame
cor_data <- data.frame(
  Predicted = pred_vec,
  True = true_vec
)

# Calculate correlation
r <- cor(pred_vec, true_vec)

# Create scatter plot
ggplot(cor_data, aes(x = True, y = Predicted)) +
  geom_point(alpha = 0.5, color = "steelblue") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
  geom_smooth(method = "lm", se = TRUE, color = "darkblue") +
  annotate("text", x = 0.1, y = 0.9, 
           label = paste0("r = ", round(r, 3)), 
           size = 5, fontface = "bold") +
  coord_fixed(ratio = 1) +
  xlim(0, 1) + ylim(0, 1) +
  theme_minimal() +
  labs(
    title = "Prediction Accuracy",
    subtitle = "Predicted vs. True Cell Type Fractions",
    x = "True Fraction",
    y = "Predicted Fraction"
  )
```

### 6. Per-Cell Type Accuracy

Evaluate accuracy for each cell type separately.

```{r per-celltype-accuracy, eval=FALSE}
# Calculate metrics per cell type
celltypes <- colnames(predictions)
metrics_list <- lapply(celltypes, function(ct) {
  pred <- predictions[[ct]]
  true <- true_fractions[[ct]]
  data.frame(
    CellType = ct,
    RMSE = sqrt(mean((pred - true)^2)),
    MAE = mean(abs(pred - true)),
    Correlation = cor(pred, true)
  )
})
metrics_df <- do.call(rbind, metrics_list)

# Plot correlation by cell type
ggplot(metrics_df, aes(x = reorder(CellType, -Correlation), y = Correlation, fill = CellType)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_hline(yintercept = 0.9, linetype = "dashed", color = "red") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  ) +
  labs(
    title = "Prediction Accuracy by Cell Type",
    subtitle = "Pearson Correlation (dashed line = 0.9 threshold)",
    x = "Cell Type",
    y = "Correlation"
  )
```

### 7. Pie Chart for Individual Samples

Pie charts can effectively show composition for selected samples.

```{r pie-chart, eval=FALSE}
# Select a sample
sample_name <- rownames(predictions)[1]
sample_data <- predictions[sample_name, ]

# Prepare data
pie_data <- data.frame(
  CellType = names(sample_data),
  Fraction = as.numeric(sample_data)
)
pie_data$Percentage <- paste0(round(pie_data$Fraction * 100, 1), "%")

# Create pie chart
ggplot(pie_data, aes(x = "", y = Fraction, fill = CellType)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  scale_fill_brewer(palette = "Set2") +
  geom_text(aes(label = Percentage), 
            position = position_stack(vjust = 0.5),
            color = "white", fontface = "bold") +
  theme_void() +
  labs(
    title = paste("Cell Type Composition:", sample_name),
    fill = "Cell Type"
  )
```

## Advanced Visualization

### Ensemble Model Comparison

Compare predictions from individual models in the ensemble.

```{r ensemble-comparison, eval=FALSE}
# Get individual predictions
all_preds <- PredictFractions(result$ensemble, example_data$bulk_data, 
                               return_all = TRUE, verbose = FALSE)

# Prepare comparison data
comparison <- data.frame(
  M256 = as.vector(as.matrix(all_preds$individual$m256)),
  M512 = as.vector(as.matrix(all_preds$individual$m512)),
  M1024 = as.vector(as.matrix(all_preds$individual$m1024)),
  Ensemble = as.vector(as.matrix(all_preds$average))
)

# Create pairs plot
pairs(comparison, 
      lower.panel = panel.smooth,
      upper.panel = function(x, y) {
        usr <- par("usr")
        par(usr = c(0, 1, 0, 1))
        r <- round(cor(x, y), 3)
        text(0.5, 0.5, paste0("r=", r), cex = 1.5)
      },
      main = "Model Agreement in Ensemble")
```

## Summary

This vignette demonstrated various visualization techniques for TorchDecon results:

1. **Stacked bar plots** - Overview of composition across samples
2. **Heatmaps** - Clustered visualization with annotations
3. **Box plots** - Group comparisons
4. **Training curves** - Model convergence monitoring
5. **Correlation plots** - Accuracy assessment
6. **Per-cell type metrics** - Detailed performance evaluation
7. **Pie charts** - Individual sample composition

Effective visualization helps validate deconvolution results and communicate findings clearly.

---

**Package Author**: Zaoqu Liu  
**Contact**: liuzaoqu@163.com  
**GitHub**: https://github.com/Zaoqu-Liu/TorchDecon