--- title: "Bulk RNA-seq Metabolic Flux Analysis" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Bulk RNA-seq Metabolic Flux Analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE ) ``` ## Overview This vignette demonstrates the complete workflow for metabolic flux analysis from bulk RNA-seq data using METAFLUX. ## Load Package and Data ```{r load-package} library(METAFLUX) library(ggplot2) # Load example bulk RNA-seq data data("bulk_test_example") data("human_blood") data("human_gem") # Inspect data cat("Gene expression matrix:\n") cat(" Dimensions:", dim(bulk_test_example), "\n") cat(" Samples:", colnames(bulk_test_example), "\n") cat(" Example genes:", head(rownames(bulk_test_example), 5), "\n") ``` ## Step 1: Calculate MRAS Metabolic Reaction Activity Scores (MRAS) integrate gene expression with metabolic network topology. ```{r calc-mras} # Calculate MRAS mras <- calculate_reaction_score(bulk_test_example) # Check output cat("\nMRAS matrix:\n") cat(" Dimensions:", dim(mras), "\n") cat(" Score range:", range(as.matrix(mras)), "\n") ``` ### MRAS Distribution ```{r mras-dist, fig.cap="Distribution of MRAS values"} mras_values <- as.vector(as.matrix(mras)) mras_df <- data.frame(MRAS = mras_values[mras_values > 0]) ggplot(mras_df, aes(x = MRAS)) + geom_histogram(bins = 50, fill = "#3498db", color = "white", alpha = 0.8) + labs( title = "Distribution of Metabolic Reaction Activity Scores", x = "MRAS Value", y = "Count" ) + theme_minimal() + theme(plot.title = element_text(hjust = 0.5, face = "bold")) ``` ## Step 2: Compute Metabolic Fluxes ```{r compute-flux} # Run flux balance analysis flux <- compute_flux(mras = mras, medium = human_blood) # Check results cat("\nFlux matrix:\n") cat(" Dimensions:", dim(flux), "\n") cat(" Flux range:", range(flux), "\n") ``` ### Flux Interpretation ```{r flux-interpret, fig.cap="Interpretation of flux values"} # Classify reactions by flux direction flux_mean <- rowMeans(flux) flux_class <- data.frame( Direction = c("Production (+)", "Consumption (-)", "Inactive (≈0)"), Count = c( sum(flux_mean > 0.001), sum(flux_mean < -0.001), sum(abs(flux_mean) <= 0.001) ) ) ggplot(flux_class, aes(x = Direction, y = Count, fill = Direction)) + geom_bar(stat = "identity", width = 0.6) + geom_text(aes(label = Count), vjust = -0.5, size = 5) + scale_fill_manual(values = c("#27ae60", "#e74c3c", "#95a5a6")) + labs( title = "Reaction Classification by Flux Direction", x = "", y = "Number of Reactions" ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "none" ) + ylim(0, max(flux_class$Count) * 1.1) ``` ## Step 3: Key Metabolic Indicators ### Central Carbon Metabolism ```{r central-carbon, fig.cap="Central carbon metabolism flux"} # Define key reactions central_carbon <- c( "Glucose uptake" = "HMR_9034", "Lactate secretion" = "HMR_9135", "Pyruvate transport" = "HMR_9133", "Glutamine uptake" = "HMR_9063" ) # Extract and plot cc_flux <- flux[central_carbon, , drop = FALSE] rownames(cc_flux) <- names(central_carbon) cc_df <- data.frame( Reaction = rep(rownames(cc_flux), ncol(cc_flux)), Sample = rep(colnames(cc_flux), each = nrow(cc_flux)), Flux = as.vector(as.matrix(cc_flux)) ) ggplot(cc_df, aes(x = Reaction, y = Flux, fill = Sample)) + geom_bar(stat = "identity", position = "dodge") + geom_hline(yintercept = 0, linetype = "dashed") + scale_fill_brewer(palette = "Set2") + labs( title = "Central Carbon Metabolism", subtitle = "Key reaction fluxes across samples", x = "", y = "Flux" ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, face = "bold"), axis.text.x = element_text(angle = 30, hjust = 1) ) ``` ### Biomass Production ```{r biomass, fig.cap="Biomass production flux (growth rate proxy)"} biomass_flux <- flux["biomass_human", ] biomass_df <- data.frame( Sample = names(biomass_flux), Flux = as.numeric(biomass_flux) ) ggplot(biomass_df, aes(x = Sample, y = Flux, fill = Sample)) + geom_bar(stat = "identity", width = 0.6) + scale_fill_brewer(palette = "Set2") + labs( title = "Biomass Production Rate", subtitle = "Proxy for cellular growth rate", x = "Sample", y = "Biomass Flux" ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "none" ) ``` ## Step 4: Pathway Analysis ```{r pathway-analysis, fig.cap="Top metabolic pathways by activity"} # Map reactions to pathways pathway_map <- human_gem$SUBSYSTEM names(pathway_map) <- human_gem$ID # Calculate pathway activity pathways <- unique(pathway_map) pathway_activity <- sapply(pathways, function(pw) { rxns <- names(pathway_map)[pathway_map == pw] rxns <- intersect(rxns, rownames(flux)) if (length(rxns) > 0) mean(abs(flux[rxns, ])) else NA }) # Top 12 pathways top_pw <- sort(pathway_activity[!is.na(pathway_activity)], decreasing = TRUE)[1:12] pw_df <- data.frame( Pathway = factor(names(top_pw), levels = rev(names(top_pw))), Activity = top_pw ) ggplot(pw_df, aes(x = Activity, y = Pathway)) + geom_bar(stat = "identity", fill = "#2c3e50") + labs( title = "Top 12 Active Metabolic Pathways", x = "Mean Absolute Flux", y = "" ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, face = "bold"), axis.text.y = element_text(size = 9) ) ``` ## Step 5: Quality Control ### Steady-State Verification ```{r steady-state} # Load stoichiometric matrix Hgem <- METAFLUX:::Hgem # Check steady-state constraint: S*v = 0 sv_violations <- sapply(1:ncol(flux), function(i) { sv <- Hgem$S %*% flux[, i] max(abs(sv)) }) cat("Steady-state constraint check:\n") cat(" Max violations per sample:", round(sv_violations, 6), "\n") cat(" All < 0.001:", all(sv_violations < 0.001), "\n") ``` ### Gene Coverage ```{r gene-coverage, fig.cap="Metabolic gene coverage"} gene_num <- METAFLUX:::gene_num input_genes <- rownames(bulk_test_example) metabolic_genes <- rownames(gene_num) coverage <- sum(input_genes %in% metabolic_genes) total <- length(metabolic_genes) cov_df <- data.frame( Category = c("Detected", "Missing"), Count = c(coverage, total - coverage) ) ggplot(cov_df, aes(x = "", y = Count, fill = Category)) + geom_bar(stat = "identity", width = 1) + coord_polar("y") + scale_fill_manual(values = c("Detected" = "#27ae60", "Missing" = "#e74c3c")) + labs( title = "Metabolic Gene Coverage", subtitle = sprintf("%.1f%% (%d/%d genes)", coverage/total*100, coverage, total) ) + theme_void() + theme( plot.title = element_text(hjust = 0.5, face = "bold"), plot.subtitle = element_text(hjust = 0.5) ) ``` ## Summary ```{r summary} cat("================================================\n") cat("METAFLUX Bulk RNA-seq Analysis Complete\n") cat("================================================\n\n") cat("Input:\n") cat(sprintf(" Genes: %d\n", nrow(bulk_test_example))) cat(sprintf(" Samples: %d\n", ncol(bulk_test_example))) cat(sprintf(" Metabolic gene coverage: %.1f%%\n", coverage/total*100)) cat("\nOutput:\n") cat(sprintf(" Reactions: %d\n", nrow(flux))) cat(sprintf(" Flux range: [%.4f, %.4f]\n", min(flux), max(flux))) cat("\nKey findings:\n") cat(sprintf(" Active reactions: %d\n", sum(abs(rowMeans(flux)) > 0.001))) cat(sprintf(" Glucose uptake (mean): %.4f\n", mean(flux["HMR_9034", ]))) cat(sprintf(" Lactate secretion (mean): %.4f\n", mean(flux["HMR_9135", ]))) ``` ## Author **Zaoqu Liu** - Package maintainer and developer - GitHub: [Zaoqu-Liu](https://github.com/Zaoqu-Liu) - ORCID: [0000-0002-0452-742X](https://orcid.org/0000-0002-0452-742X) - Email: liuzaoqu@163.com ## Session Information ```{r session} sessionInfo() ```