Biological networks are fundamental representations of molecular interactions that underlie cellular processes. OmnipathR provides comprehensive tools for constructing, analyzing, and visualizing molecular interaction networks from the OmniPath database and other integrated resources.
This vignette demonstrates advanced network analysis workflows including:
Biological networks can be represented as graphs \(G = (V, E)\) where:
Key network metrics include:
| Metric | Formula | Biological Interpretation |
|---|---|---|
| Degree | \(k_i = \sum_j A_{ij}\) | Number of interaction partners |
| Betweenness | \(B_i = \sum_{s \neq i \neq t} \frac{\sigma_{st}(i)}{\sigma_{st}}\) | Information flow through a node |
| Clustering | \(C_i = \frac{2e_i}{k_i(k_i-1)}\) | Local network density |
OmniPath networks exhibit characteristic properties: 1. Scale-free topology: Degree distribution follows power law \(P(k) \sim k^{-\gamma}\) 2. Small-world property: Short average path length with high clustering 3. Modularity: Functional modules correspond to biological pathways
# Retrieve high-confidence protein-protein interactions
interactions <- omnipath(
resources = c("SIGNOR", "SignaLink3"),
organism = 9606 # Human
)
# Filter for directed, signed interactions
interactions_filtered <- interactions %>%
filter(
is_directed == 1,
!is.na(consensus_direction)
)
cat("Total interactions:", nrow(interactions), "\n")
#> Total interactions: 66412
cat("Filtered interactions:", nrow(interactions_filtered), "\n")
#> Filtered interactions: 66412For meaningful analysis, we typically focus on the largest connected component:
# Extract giant component
gc <- giant_component(network)
cat("Giant component nodes:", vcount(gc), "\n")
#> Giant component nodes: 6049
cat("Giant component edges:", ecount(gc), "\n")
#> Giant component edges: 66266
cat("Proportion of original:",
round(vcount(gc)/vcount(network) * 100, 1), "%\n")
#> Proportion of original: 96.2 %# Calculate centrality measures
V(gc)$degree <- degree(gc)
V(gc)$betweenness <- betweenness(gc, normalized = TRUE)
# Create centrality data frame
centrality_df <- data.frame(
gene = V(gc)$name,
degree = V(gc)$degree,
betweenness = V(gc)$betweenness
) %>%
arrange(desc(degree))
# Top hub genes
head(centrality_df, 15)
#> gene degree betweenness
#> 1 GNAS 372 0.0122322
#> 2 BIRC6_RPS27A 356 0.0000000
#> 3 BIRC6_UBA52 356 0.0000000
#> 4 BIRC6_UBB 356 0.0000000
#> 5 BIRC6_UBC 356 0.0000000
#> 6 RPS27A_UBE2A 356 0.0000000
#> 7 RPS27A_UBE2B 356 0.0000000
#> 8 RPS27A_UBE2C 356 0.0000000
#> 9 RPS27A_UBE2D1 356 0.0000000
#> 10 RPS27A_UBE2D2 356 0.0000000
#> 11 RPS27A_UBE2D3 356 0.0000000
#> 12 RPS27A_UBE2D4 356 0.0000000
#> 13 RPS27A_UBE2E1 356 0.0000000
#> 14 RPS27A_UBE2E2 356 0.0000000
#> 15 RPS27A_UBE2E3 356 0.0000000# Degree distribution
degree_dist <- data.frame(
degree = degree(gc)
) %>%
count(degree) %>%
mutate(
log_degree = log10(degree),
log_freq = log10(n)
)
# Plot degree distribution
ggplot(degree_dist, aes(x = log_degree, y = log_freq)) +
geom_point(color = "#007B7F", size = 2, alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "#E74C3C", linetype = "dashed") +
labs(
title = "Degree Distribution (Log-Log Scale)",
subtitle = "Scale-free networks show linear relationship",
x = expression(log[10](k)),
y = expression(log[10](P(k)))
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "gray50")
)Degree distribution of the OmniPath signaling network on log-log scale. The linear relationship indicates scale-free topology.
# Visualize top hub genes
top_hubs <- centrality_df %>% head(20)
ggplot(top_hubs, aes(x = reorder(gene, degree), y = degree)) +
geom_col(fill = "#007B7F", alpha = 0.8) +
geom_text(aes(label = degree), hjust = -0.2, size = 3) +
coord_flip() +
labs(
title = "Top 20 Hub Genes",
subtitle = "Ranked by degree centrality",
x = "Gene",
y = "Degree"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
axis.text.y = element_text(size = 9)
) +
expand_limits(y = max(top_hubs$degree) * 1.1)Top 20 hub genes ranked by degree centrality in the OmniPath network.
# Find all paths between key signaling proteins
paths <- find_all_paths(
graph = gc,
start = c("EGFR"),
end = c("AKT1"),
attr = "name",
maxlen = 3
)
cat("Found", length(paths), "paths from EGFR to AKT1\n")
#> Found 26 paths from EGFR to AKT1
# Display example paths
if(length(paths) > 0) {
cat("\nExample paths:\n")
for(i in 1:min(5, length(paths))) {
cat("Path", i, ":", paste(paths[[i]], collapse = " -> "), "\n")
}
}
#>
#> Example paths:
#> Path 1 : EGFR -> ATM -> BRCA1 -> AKT1
#> Path 2 : EGFR -> ATM -> PRKDC -> AKT1
#> Path 3 : EGFR -> CALM1 -> CAMKK1 -> AKT1
#> Path 4 : EGFR -> CALM2 -> CAMKK1 -> AKT1
#> Path 5 : EGFR -> CALM3 -> CAMKK1 -> AKT1# Extract subnetwork around key nodes
key_genes <- c("TP53", "EGFR", "AKT1", "MTOR", "MAPK1", "SRC")
# Get first neighbors
subnet_nodes <- unique(unlist(lapply(key_genes, function(g) {
if(g %in% V(gc)$name) {
c(g, names(neighbors(gc, g)))
}
})))
# Create subnetwork
if(length(subnet_nodes) > 0) {
subnet <- induced_subgraph(gc, subnet_nodes[subnet_nodes %in% V(gc)$name])
cat("Subnetwork nodes:", vcount(subnet), "\n")
cat("Subnetwork edges:", ecount(subnet), "\n")
# Simple visualization
V(subnet)$color <- ifelse(V(subnet)$name %in% key_genes, "#E74C3C", "#3498DB")
V(subnet)$size <- ifelse(V(subnet)$name %in% key_genes, 12, 6)
plot(subnet,
vertex.label = ifelse(V(subnet)$name %in% key_genes, V(subnet)$name, ""),
vertex.label.cex = 0.8,
vertex.label.color = "black",
edge.arrow.size = 0.3,
edge.color = "gray70",
layout = layout_with_fr(subnet),
main = "Oncogenic Signaling Subnetwork")
}
#> Subnetwork nodes: 659
#> Subnetwork edges: 2428Subnetwork around key oncogenic signaling proteins showing first-order neighbors.
# Get top hub genes
hub_genes <- centrality_df %>%
head(10) %>%
pull(gene)
# Retrieve functional annotations
hub_annotations <- annotations(
proteins = hub_genes,
resources = c("HPA_subcellular")
)
if(nrow(hub_annotations) > 0) {
# Summarize annotations
annotation_summary <- hub_annotations %>%
group_by(genesymbol, label) %>%
summarise(value = first(value), .groups = "drop") %>%
head(20)
print(annotation_summary)
}
#> # A tibble: 5 × 3
#> genesymbol label value
#> <chr> <chr> <chr>
#> 1 GNAS additional_location Cytosol;Plasma membrane
#> 2 GNAS location Cytosol
#> 3 GNAS main_location Nucleoplasm
#> 4 GNAS reliability Approved
#> 5 GNAS status additionalsessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
#> [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] tidyr_1.3.2 tibble_3.3.1 bookdown_0.46 magrittr_2.0.5 ggraph_2.2.2 igraph_2.3.1
#> [7] ggplot2_4.0.3 dplyr_1.2.1 Matrix_1.7-5 OmnipathR_3.19.1 BiocStyle_2.41.0
#>
#> loaded via a namespace (and not attached):
#> [1] tidyselect_1.2.1 viridisLite_0.4.3 farver_2.1.2 blob_1.3.0 viridis_0.6.5
#> [6] R.utils_2.13.0 S7_0.2.2 fastmap_1.2.0 tweenr_2.0.3 XML_3.99-0.23
#> [11] digest_0.6.39 timechange_0.4.0 lifecycle_1.0.5 RSQLite_3.52.0 compiler_4.6.0
#> [16] rlang_1.2.0 sass_0.4.10 progress_1.2.3 tools_4.6.0 utf8_1.2.6
#> [21] yaml_2.3.12 knitr_1.51 labeling_0.4.3 graphlayouts_1.2.3 prettyunits_1.2.0
#> [26] bit_4.6.0 curl_7.1.0 xml2_1.5.2 RColorBrewer_1.1-3 R.matlab_3.7.0
#> [31] withr_3.0.2 purrr_1.2.2 sys_3.4.3 R.oo_1.27.1 polyclip_1.10-7
#> [36] grid_4.6.0 scales_1.4.0 MASS_7.3-65 cli_3.6.6 rmarkdown_2.31
#> [41] crayon_1.5.3 generics_0.1.4 otel_0.2.0 httr_1.4.8 tzdb_0.5.0
#> [46] sessioninfo_1.2.3 readxl_1.5.0 DBI_1.3.0 cachem_1.1.0 ggforce_0.5.0
#> [51] stringr_1.6.0 splines_4.6.0 rvest_1.0.5 parallel_4.6.0 BiocManager_1.30.27
#> [56] selectr_0.5-1 cellranger_1.1.0 vctrs_0.7.3 jsonlite_2.0.0 hms_1.1.4
#> [61] ggrepel_0.9.8 bit64_4.8.2 maketools_1.3.2 jquerylib_0.1.4 glue_1.8.1
#> [66] lubridate_1.9.5 stringi_1.8.7 gtable_0.3.6 later_1.4.8 logger_0.4.2
#> [71] pillar_1.11.1 rappdirs_0.3.4 htmltools_0.5.9 R6_2.6.1 httr2_1.2.2
#> [76] tcltk_4.6.0 tidygraph_1.3.1 vroom_1.7.1 evaluate_1.0.5 lattice_0.22-9
#> [81] readr_2.2.0 R.methodsS3_1.8.2 backports_1.5.1 memoise_2.0.1 bslib_0.11.0
#> [86] Rcpp_1.1.1-1.1 zip_2.3.3 nlme_3.1-169 gridExtra_2.3 checkmate_2.3.4
#> [91] mgcv_1.9-4 xfun_0.57 fs_2.1.0 buildtools_1.0.0 pkgconfig_2.0.3Türei D, Korcsmáros T, Saez-Rodriguez J. OmniPath: guidelines and gateway for literature-curated signaling pathway resources. Nature Methods 2016;13:966-967.
Türei D, et al. Integrated intra- and intercellular signaling knowledge for multicellular omics analysis. Molecular Systems Biology 2021;17:e9923.
Csardi G, Nepusz T. The igraph software package for complex network research. InterJournal Complex Systems 2006;1695.