--- title: "Getting Started with MOFSR" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Getting Started with MOFSR} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE, eval = FALSE ) ``` ## Introduction **MOFSR** (Multi-Omics Fusion for Subtype Recognition) is a comprehensive R package for integrative analysis of multi-omics data. The package provides: - **15 multi-omics clustering algorithms** with internal implementations - **17 classification methods** for subtype prediction - **Comprehensive visualization** tools - **Parallel computing** support for large-scale analyses ### Author **Zaoqu Liu** - Email: liuzaoqu@163.com - GitHub: [https://github.com/Zaoqu-Liu](https://github.com/Zaoqu-Liu) - ORCID: [0000-0002-0452-742X](https://orcid.org/0000-0002-0452-742X) ## Installation ```{r eval=FALSE} # From R-Universe (Recommended) install.packages("MOFSR", repos = "https://zaoqu-liu.r-universe.dev") # From GitHub remotes::install_github("Zaoqu-Liu/MOFSR") ``` ## Quick Start ### Generate Simulated Multi-Omics Data ```{r generate-data} library(MOFSR) set.seed(42) # Simulate three-omics data with 3 subtypes n_samples <- 60 n_features <- c(500, 200, 100) # mRNA, miRNA, methylation # Generate cluster labels true_clusters <- rep(1:3, each = 20) # Generate data with cluster structure generate_omics <- function(n, p, clusters) { n_clusters <- length(unique(clusters)) centers <- matrix(rnorm(n_clusters * p, sd = 2), n_clusters, p) data <- t(sapply(clusters, function(k) { centers[k, ] + rnorm(p, sd = 1) })) colnames(data) <- paste0("Feature_", seq_len(p)) rownames(data) <- paste0("Sample_", seq_len(n)) return(t(data)) # Return features x samples } data_list <- list( mRNA = generate_omics(n_samples, n_features[1], true_clusters), miRNA = generate_omics(n_samples, n_features[2], true_clusters), methylation = generate_omics(n_samples, n_features[3], true_clusters) ) cat("Data dimensions:\n") sapply(data_list, dim) ``` ### Run SNF Clustering ```{r snf-clustering} # Run Similarity Network Fusion result_snf <- run_snf(data_list, n_clusters = 3) # View results head(result_snf) # Calculate Adjusted Rand Index ari <- .adjusted_rand_index(result_snf$Cluster, true_clusters) cat("\nAdjusted Rand Index (SNF):", round(ari, 4)) ``` ### UMAP Visualization ```{r umap-viz, fig.cap="UMAP visualization of multi-omics clustering results"} # Compute UMAP coordinates umap_coords <- compute_umap(data_list, n_epochs = 100, seed = 42) # Plot with cluster colors plot_umap(umap_coords, result_snf, title = "SNF Clustering Results") ``` ## Compare Multiple Algorithms ```{r compare-algorithms} # Run multiple algorithms algorithms <- c("SNF", "RGCCA", "CPCA") results <- lapply(algorithms, function(alg) { run_integration(data_list, algorithm = alg, n_clusters = 3) }) names(results) <- algorithms # Compare clustering agreement ari_matrix <- compare_clusterings(results) print(round(ari_matrix, 3)) ``` ```{r compare-viz, fig.cap="Algorithm agreement measured by Adjusted Rand Index"} plot_algorithm_comparison(results, title = "Algorithm Agreement (ARI)") ``` ## Consensus Clustering ```{r consensus, fig.cap="Consensus matrix heatmap showing clustering stability"} # Run consensus clustering on mRNA data cc_result <- consensus_cluster(data_list$mRNA, maxK = 5, reps = 50, seed = 42) # Plot consensus heatmap for K=3 plot_consensus_heatmap(cc_result[[3]]$consensusMatrix, title = "Consensus Matrix (K=3)") ``` ```{r pac, fig.cap="PAC scores for optimal cluster number selection"} # Calculate PAC for optimal K selection pac_values <- calc_pac(cc_result) print(pac_values) # Plot quality metrics plot_cluster_quality(pac_values, title = "Cluster Quality (PAC)") ``` ## Data Preprocessing ```{r preprocessing} # Normalize data data_norm <- normalize_omics(data_list, method = "zscore") # Filter low-variance features data_filtered <- filter_low_variance(data_list, min_var = 0.01) # Quality control summary qc <- qc_summary(data_list) print(qc) ``` ## Session Info ```{r session} sessionInfo() ```