--- title: "Visualization Guide" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Visualization Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, fig.align = "center", dpi = 100 ) ``` ## Introduction This vignette demonstrates various visualization approaches for CellProgramMapper results. We cover: 1. Usage matrix heatmaps 2. Program activity on UMAP/t-SNE 3. Score distributions 4. Program correlation analysis 5. Publication-ready figures ## Load Example Data ```{r load-libs, eval=TRUE, message=FALSE, warning=FALSE} library(CellProgramMapper) # For visualization if (!requireNamespace("ggplot2", quietly = TRUE)) { install.packages("ggplot2") } if (!requireNamespace("pheatmap", quietly = TRUE)) { install.packages("pheatmap") } library(ggplot2) library(pheatmap) ``` ## Simulated Example Data For demonstration, we create simulated results: ```{r simulate-data, eval=TRUE} set.seed(42) # Simulate usage matrix (cells × programs) n_cells <- 500 n_programs <- 8 # Create usage with structure group <- sample(1:4, n_cells, replace = TRUE) usage <- matrix(0, nrow = n_cells, ncol = n_programs) # Each group has dominant programs for (i in 1:n_cells) { base_usage <- runif(n_programs, 0.01, 0.1) dominant <- ((group[i] - 1) * 2 + 1):((group[i] - 1) * 2 + 2) base_usage[dominant] <- runif(2, 0.3, 0.5) usage[i, ] <- base_usage / sum(base_usage) # Normalize } colnames(usage) <- paste0("GEP", 1:n_programs) rownames(usage) <- paste0("Cell", 1:n_cells) # Simulate UMAP coordinates umap <- data.frame( UMAP1 = c(rnorm(n_cells/4, -3, 1), rnorm(n_cells/4, 3, 1), rnorm(n_cells/4, 0, 1), rnorm(n_cells/4, 0, 1)), UMAP2 = c(rnorm(n_cells/4, 0, 1), rnorm(n_cells/4, 0, 1), rnorm(n_cells/4, -3, 1), rnorm(n_cells/4, 3, 1)) ) rownames(umap) <- rownames(usage) # Simulate scores scores <- data.frame( Exhaustion = rowSums(usage[, c("GEP1", "GEP2")]), Cytotoxicity = rowSums(usage[, c("GEP3", "GEP4")]), Proliferation = rowSums(usage[, c("GEP5", "GEP6")]) ) ``` ## Usage Matrix Heatmap The usage matrix shows the contribution of each GEP to each cell's expression profile. ```{r usage-heatmap, eval=TRUE, fig.cap="Usage matrix heatmap showing GEP contributions across cells"} # Order cells by dominant program dominant_program <- apply(usage, 1, which.max) cell_order <- order(dominant_program, decreasing = FALSE) # Create annotation annotation_row <- data.frame( DominantGEP = factor(paste0("GEP", dominant_program)) ) rownames(annotation_row) <- rownames(usage) # Color palette colors <- colorRampPalette(c("#f7fbff", "#08306b"))(100) pheatmap( usage[cell_order, ], cluster_rows = FALSE, cluster_cols = TRUE, show_rownames = FALSE, annotation_row = annotation_row[cell_order, , drop = FALSE], color = colors, main = "Cell × GEP Usage Matrix", fontsize = 10 ) ``` ## Program Activity on UMAP Visualize GEP activity overlaid on dimensionality reduction. ```{r umap-activity, eval=TRUE, fig.height=8, fig.cap="GEP activity visualized on UMAP coordinates"} # Combine data plot_data <- cbind(umap, usage) # Create multi-panel plot par(mfrow = c(2, 4), mar = c(4, 4, 2, 2)) for (gep in colnames(usage)) { colors <- colorRampPalette(c("#f7f7f7", "#d73027"))(100) vals <- plot_data[[gep]] col_idx <- cut(vals, breaks = 100, labels = FALSE) plot(plot_data$UMAP1, plot_data$UMAP2, col = colors[col_idx], pch = 16, cex = 0.6, xlab = "UMAP1", ylab = "UMAP2", main = gep) } ``` ## Score Distributions ### Violin Plots ```{r score-violin, eval=TRUE, fig.cap="Score distributions across groups"} # Create data for violin plot scores$Group <- factor(paste0("Group", group)) # Reshape for ggplot library(reshape2) scores_long <- melt(scores, id.vars = "Group", variable.name = "Score", value.name = "Value") ggplot(scores_long, aes(x = Group, y = Value, fill = Score)) + geom_violin(alpha = 0.7, scale = "width") + geom_boxplot(width = 0.1, fill = "white", alpha = 0.8) + facet_wrap(~Score, scales = "free_y") + theme_minimal() + labs(title = "Score Distributions by Group", x = "Cell Group", y = "Score Value") + theme(legend.position = "none", strip.text = element_text(face = "bold", size = 12)) ``` ### Ridge Plot ```{r ridge-plot, eval=TRUE, fig.height=6, fig.cap="Ridge plot of score distributions"} # Simple ridge plot using base R par(mfrow = c(3, 1), mar = c(3, 4, 2, 1)) for (score_name in c("Exhaustion", "Cytotoxicity", "Proliferation")) { # Get density for each group densities <- lapply(1:4, function(g) { d <- density(scores[[score_name]][scores$Group == paste0("Group", g)]) data.frame(x = d$x, y = d$y, group = g) }) # Plot x_range <- range(sapply(densities, function(d) range(d$x))) y_max <- max(sapply(densities, function(d) max(d$y))) * 1.2 plot(NULL, xlim = x_range, ylim = c(-0.5, y_max * 4.5), xlab = score_name, ylab = "", yaxt = "n", main = paste0(score_name, " Score")) cols <- c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3") for (i in 1:4) { d <- densities[[i]] offset <- (i - 1) * y_max polygon(c(d$x, rev(d$x)), c(d$y + offset, rep(offset, length(d$x))), col = adjustcolor(cols[i], alpha.f = 0.6), border = cols[i]) } axis(2, at = (0:3) * y_max + y_max/2, labels = paste0("Group", 1:4), las = 1) } ``` ## Program Correlation Analysis ### Correlation Heatmap ```{r corr-heatmap, eval=TRUE, fig.cap="Correlation between GEPs"} # Compute correlation usage_cor <- cor(usage) # Custom color palette cor_colors <- colorRampPalette(c("#2166ac", "#f7f7f7", "#b2182b"))(100) pheatmap( usage_cor, color = cor_colors, breaks = seq(-1, 1, length.out = 101), display_numbers = TRUE, number_format = "%.2f", fontsize_number = 8, main = "GEP Correlation Matrix" ) ``` ### Network Visualization ```{r corr-network, eval=TRUE, fig.cap="GEP correlation network"} # Simple network plot par(mar = c(1, 1, 2, 1)) plot(NULL, xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5), xlab = "", ylab = "", axes = FALSE, main = "GEP Correlation Network") # Arrange nodes in circle n <- ncol(usage) angles <- seq(0, 2*pi, length.out = n + 1)[1:n] x_pos <- cos(angles) y_pos <- sin(angles) # Draw edges (correlations > 0.3) for (i in 1:(n-1)) { for (j in (i+1):n) { r <- usage_cor[i, j] if (abs(r) > 0.3) { col <- if (r > 0) "#b2182b" else "#2166ac" lwd <- abs(r) * 3 segments(x_pos[i], y_pos[i], x_pos[j], y_pos[j], col = adjustcolor(col, alpha.f = 0.5), lwd = lwd) } } } # Draw nodes points(x_pos, y_pos, pch = 19, cex = 4, col = "#4a4a4a") text(x_pos, y_pos, colnames(usage), col = "white", cex = 0.7, font = 2) legend("topright", legend = c("Positive corr.", "Negative corr."), col = c("#b2182b", "#2166ac"), lwd = 2, bty = "n") ``` ## Publication-Ready Figures ### Combined Panel Figure ```{r pub-figure, eval=TRUE, fig.width=10, fig.height=8, fig.cap="Publication-ready combined visualization"} par(mfrow = c(2, 2), mar = c(4, 4, 3, 2)) # Panel A: UMAP colored by dominant GEP dominant <- factor(paste0("GEP", apply(usage, 1, which.max))) cols <- rainbow(8, alpha = 0.7) plot(umap$UMAP1, umap$UMAP2, col = cols[dominant], pch = 16, xlab = "UMAP1", ylab = "UMAP2", main = "A. Dominant GEP") legend("topright", levels(dominant), col = cols, pch = 16, cex = 0.6, ncol = 2) # Panel B: Usage bar plot (top cells) top_cells <- head(order(usage[, "GEP1"], decreasing = TRUE), 20) barplot(t(usage[top_cells, ]), col = cols, border = NA, las = 2, xlab = "", ylab = "Usage", main = "B. Top GEP1 Cells") # Panel C: Score comparison boxplot(Exhaustion ~ Group, data = scores, col = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3"), xlab = "Group", ylab = "Exhaustion Score", main = "C. Exhaustion by Group") # Panel D: Program correlation image(1:8, 1:8, usage_cor[8:1, ], col = cor_colors, xlab = "", ylab = "", axes = FALSE, main = "D. GEP Correlations") axis(1, at = 1:8, labels = colnames(usage), las = 2, cex.axis = 0.8) axis(2, at = 1:8, labels = rev(colnames(usage)), las = 1, cex.axis = 0.8) ``` ## Saving High-Resolution Figures ```r # For publication (300 DPI) pdf("figures/usage_heatmap.pdf", width = 8, height = 6) pheatmap(usage[cell_order, ], ...) dev.off() # For presentations (PNG) png("figures/umap_panel.png", width = 2400, height = 1800, res = 300) # ... plotting code ... dev.off() # For vector graphics (SVG) svg("figures/correlation_network.svg", width = 8, height = 6) # ... plotting code ... dev.off() ``` ## Integration with Seurat When working with Seurat objects, use built-in visualization functions: ```r # Add results to Seurat seurat_obj <- add_results_to_seurat(seurat_obj, result) # Seurat's DimPlot Seurat::DimPlot(seurat_obj, group.by = "Multinomial_Label", label = TRUE, repel = TRUE) # Seurat's FeaturePlot for GEP usage Seurat::FeaturePlot(seurat_obj, features = c("GEP1", "GEP2", "GEP3", "GEP4"), ncol = 2) # Seurat's VlnPlot for scores Seurat::VlnPlot(seurat_obj, features = "Exhaustion_Score", group.by = "seurat_clusters") ``` ## Tips for Effective Visualization 1. **Color choice**: Use colorblind-friendly palettes 2. **Scale bars**: Always include when showing continuous values 3. **Labels**: Make text readable (≥8pt for publication) 4. **Consistency**: Use same color scheme across related figures 5. **Simplicity**: Avoid cluttering with unnecessary elements ## Session Info ```{r session} sessionInfo() ```