---
title: "Bulk RNA-seq Metabolic Flux Analysis"
author: "Zaoqu Liu"
date: "`r Sys.Date()`"
output: 
  rmarkdown::html_vignette:
    toc: true
    toc_depth: 3
vignette: >
  %\VignetteIndexEntry{Bulk RNA-seq Metabolic Flux Analysis}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  warning = FALSE,
  message = FALSE
)
```

## Overview

This vignette demonstrates the complete workflow for metabolic flux analysis from bulk RNA-seq data using METAFLUX.

## Load Package and Data

```{r load-package}
library(METAFLUX)
library(ggplot2)

# Load example bulk RNA-seq data
data("bulk_test_example")
data("human_blood")
data("human_gem")

# Inspect data
cat("Gene expression matrix:\n")
cat("  Dimensions:", dim(bulk_test_example), "\n")
cat("  Samples:", colnames(bulk_test_example), "\n")
cat("  Example genes:", head(rownames(bulk_test_example), 5), "\n")
```

## Step 1: Calculate MRAS

Metabolic Reaction Activity Scores (MRAS) integrate gene expression with metabolic network topology.

```{r calc-mras}
# Calculate MRAS
mras <- calculate_reaction_score(bulk_test_example)

# Check output
cat("\nMRAS matrix:\n")
cat("  Dimensions:", dim(mras), "\n")
cat("  Score range:", range(as.matrix(mras)), "\n")
```

### MRAS Distribution

```{r mras-dist, fig.cap="Distribution of MRAS values"}
mras_values <- as.vector(as.matrix(mras))
mras_df <- data.frame(MRAS = mras_values[mras_values > 0])

ggplot(mras_df, aes(x = MRAS)) +
  geom_histogram(bins = 50, fill = "#3498db", color = "white", alpha = 0.8) +
  labs(
    title = "Distribution of Metabolic Reaction Activity Scores",
    x = "MRAS Value",
    y = "Count"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
```

## Step 2: Compute Metabolic Fluxes

```{r compute-flux}
# Run flux balance analysis
flux <- compute_flux(mras = mras, medium = human_blood)

# Check results
cat("\nFlux matrix:\n")
cat("  Dimensions:", dim(flux), "\n")
cat("  Flux range:", range(flux), "\n")
```

### Flux Interpretation

```{r flux-interpret, fig.cap="Interpretation of flux values"}
# Classify reactions by flux direction
flux_mean <- rowMeans(flux)
flux_class <- data.frame(
  Direction = c("Production (+)", "Consumption (-)", "Inactive (≈0)"),
  Count = c(
    sum(flux_mean > 0.001),
    sum(flux_mean < -0.001),
    sum(abs(flux_mean) <= 0.001)
  )
)

ggplot(flux_class, aes(x = Direction, y = Count, fill = Direction)) +
  geom_bar(stat = "identity", width = 0.6) +
  geom_text(aes(label = Count), vjust = -0.5, size = 5) +
  scale_fill_manual(values = c("#27ae60", "#e74c3c", "#95a5a6")) +
  labs(
    title = "Reaction Classification by Flux Direction",
    x = "",
    y = "Number of Reactions"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  ) +
  ylim(0, max(flux_class$Count) * 1.1)
```

## Step 3: Key Metabolic Indicators

### Central Carbon Metabolism

```{r central-carbon, fig.cap="Central carbon metabolism flux"}
# Define key reactions
central_carbon <- c(
  "Glucose uptake" = "HMR_9034",
  "Lactate secretion" = "HMR_9135",
  "Pyruvate transport" = "HMR_9133",
  "Glutamine uptake" = "HMR_9063"
)

# Extract and plot
cc_flux <- flux[central_carbon, , drop = FALSE]
rownames(cc_flux) <- names(central_carbon)

cc_df <- data.frame(
  Reaction = rep(rownames(cc_flux), ncol(cc_flux)),
  Sample = rep(colnames(cc_flux), each = nrow(cc_flux)),
  Flux = as.vector(as.matrix(cc_flux))
)

ggplot(cc_df, aes(x = Reaction, y = Flux, fill = Sample)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Central Carbon Metabolism",
    subtitle = "Key reaction fluxes across samples",
    x = "",
    y = "Flux"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 30, hjust = 1)
  )
```

### Biomass Production

```{r biomass, fig.cap="Biomass production flux (growth rate proxy)"}
biomass_flux <- flux["biomass_human", ]

biomass_df <- data.frame(
  Sample = names(biomass_flux),
  Flux = as.numeric(biomass_flux)
)

ggplot(biomass_df, aes(x = Sample, y = Flux, fill = Sample)) +
  geom_bar(stat = "identity", width = 0.6) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Biomass Production Rate",
    subtitle = "Proxy for cellular growth rate",
    x = "Sample",
    y = "Biomass Flux"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "none"
  )
```

## Step 4: Pathway Analysis

```{r pathway-analysis, fig.cap="Top metabolic pathways by activity"}
# Map reactions to pathways
pathway_map <- human_gem$SUBSYSTEM
names(pathway_map) <- human_gem$ID

# Calculate pathway activity
pathways <- unique(pathway_map)
pathway_activity <- sapply(pathways, function(pw) {
  rxns <- names(pathway_map)[pathway_map == pw]
  rxns <- intersect(rxns, rownames(flux))
  if (length(rxns) > 0) mean(abs(flux[rxns, ])) else NA
})

# Top 12 pathways
top_pw <- sort(pathway_activity[!is.na(pathway_activity)], decreasing = TRUE)[1:12]

pw_df <- data.frame(
  Pathway = factor(names(top_pw), levels = rev(names(top_pw))),
  Activity = top_pw
)

ggplot(pw_df, aes(x = Activity, y = Pathway)) +
  geom_bar(stat = "identity", fill = "#2c3e50") +
  labs(
    title = "Top 12 Active Metabolic Pathways",
    x = "Mean Absolute Flux",
    y = ""
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.y = element_text(size = 9)
  )
```

## Step 5: Quality Control

### Steady-State Verification

```{r steady-state}
# Load stoichiometric matrix
Hgem <- METAFLUX:::Hgem

# Check steady-state constraint: S*v = 0
sv_violations <- sapply(1:ncol(flux), function(i) {
  sv <- Hgem$S %*% flux[, i]
  max(abs(sv))
})

cat("Steady-state constraint check:\n")
cat("  Max violations per sample:", round(sv_violations, 6), "\n")
cat("  All < 0.001:", all(sv_violations < 0.001), "\n")
```

### Gene Coverage

```{r gene-coverage, fig.cap="Metabolic gene coverage"}
gene_num <- METAFLUX:::gene_num
input_genes <- rownames(bulk_test_example)
metabolic_genes <- rownames(gene_num)

coverage <- sum(input_genes %in% metabolic_genes)
total <- length(metabolic_genes)

cov_df <- data.frame(
  Category = c("Detected", "Missing"),
  Count = c(coverage, total - coverage)
)

ggplot(cov_df, aes(x = "", y = Count, fill = Category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y") +
  scale_fill_manual(values = c("Detected" = "#27ae60", "Missing" = "#e74c3c")) +
  labs(
    title = "Metabolic Gene Coverage",
    subtitle = sprintf("%.1f%% (%d/%d genes)", coverage/total*100, coverage, total)
  ) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5)
  )
```

## Summary

```{r summary}
cat("================================================\n")
cat("METAFLUX Bulk RNA-seq Analysis Complete\n")
cat("================================================\n\n")

cat("Input:\n")
cat(sprintf("  Genes: %d\n", nrow(bulk_test_example)))
cat(sprintf("  Samples: %d\n", ncol(bulk_test_example)))
cat(sprintf("  Metabolic gene coverage: %.1f%%\n", coverage/total*100))

cat("\nOutput:\n")
cat(sprintf("  Reactions: %d\n", nrow(flux)))
cat(sprintf("  Flux range: [%.4f, %.4f]\n", min(flux), max(flux)))

cat("\nKey findings:\n")
cat(sprintf("  Active reactions: %d\n", sum(abs(rowMeans(flux)) > 0.001)))
cat(sprintf("  Glucose uptake (mean): %.4f\n", mean(flux["HMR_9034", ])))
cat(sprintf("  Lactate secretion (mean): %.4f\n", mean(flux["HMR_9135", ])))
```

## Author

**Zaoqu Liu** - Package maintainer and developer

- GitHub: [Zaoqu-Liu](https://github.com/Zaoqu-Liu)
- ORCID: [0000-0002-0452-742X](https://orcid.org/0000-0002-0452-742X)
- Email: liuzaoqu@163.com

## Session Information

```{r session}
sessionInfo()
```