Multi-Species Analysis

Introduction

NOVA supports cell-cell communication analysis across 21 species through integration with NCBI HomoloGene. This enables:

  • Analysis of model organism data (mouse, rat, zebrafish, etc.)
  • Cross-species comparative studies
  • Translational research applications

Supported Species

library(NOVA)

# View all supported species
species <- supported_species()
print(species)
#>                human                mouse           chimpanzee 
#>               "9606"              "10090"               "9598" 
#>                  dog               monkey               cattle 
#>               "9615"               "9544"               "9913" 
#>                  rat              chicken                 frog 
#>              "10116"               "9031"               "8364" 
#>            zebrafish             fruitfly             mosquito 
#>               "7955"               "7227"               "7165" 
#>             nematode           thalecress                 rice 
#>               "6239"               "3702"               "4530" 
#>      riceblastfungus           bakeryeast     neurosporacrassa 
#>             "318829"               "4932"               "5141" 
#>         fissionyeast eremotheciumgossypii  kluyveromyceslactis 
#>               "4896"              "33169"              "28985"

Species Details

Species Common Name Taxonomy ID Model Use
human Human 9606 Clinical research
mouse Mouse 10090 Disease models
rat Rat 10116 Pharmacology
zebrafish Zebrafish 7955 Development
fruitfly Drosophila 7227 Genetics
nematode C. elegans 6239 Neuroscience

Homology Mapping

How It Works

NOVA uses NCBI HomoloGene to map gene symbols between species:

  1. Query species genes → HomoloGene IDs
  2. HomoloGene IDs → Target species orthologs
  3. Apply mapping to ligand-receptor database
# Get homology mapping from mouse to human
mapping <- GetHomologyMapping(from = "mouse", to = "human")
head(mapping)
#>    from_symbol to_symbol
#>         <char>    <char>
#> 1:       Acadm     ACADM
#> 2:      Acadvl    ACADVL
#> 3:       Acat1     ACAT1
#> 4:       Acvr1     ACVR1
#> 5:        Sgca      SGCA
#> 6:        Adsl      ADSL
cat("\nTotal mappings:", nrow(mapping), "\n")
#> 
#> Total mappings: 16766

Converting Gene Symbols

# Example mouse genes
mouse_genes <- c("Cd4", "Cd8a", "Ptprc", "Itgam", "Cd19")

# Convert to human symbols
human_genes <- ConvertGeneSymbols(mouse_genes, from = "mouse", to = "human")
print(data.frame(mouse = mouse_genes, human = human_genes))
#>       mouse human
#> Cd4     Cd4   CD4
#> Cd8a   Cd8a  CD8A
#> Ptprc Ptprc PTPRC
#> Itgam Itgam ITGAM
#> Cd19   Cd19  CD19

Analyzing Mouse Data

Standard Workflow

set.seed(123)

# Simulate mouse single-cell data
n_genes <- 200
n_cells <- 300

# Create expression matrix with mouse gene names
expr <- matrix(0, nrow = n_genes, ncol = n_cells)
expressed <- sample(length(expr), size = length(expr) * 0.25)
expr[expressed] <- abs(rnorm(length(expressed), mean = 2, sd = 1))

# Get mouse LR pairs
lr_db <- GetLRDatabase("lrc2p")
mouse_mapping <- GetHomologyMapping("human", "mouse")

# Map some human ligands/receptors to mouse
mouse_ligands <- mouse_mapping$to_symbol[match(lr_db$ligand[1:30], mouse_mapping$from_symbol)]
mouse_receptors <- mouse_mapping$to_symbol[match(lr_db$receptor[1:30], mouse_mapping$from_symbol)]

# Remove NAs
mouse_ligands <- na.omit(mouse_ligands)
mouse_receptors <- na.omit(mouse_receptors)

# Set gene names
gene_names <- c(as.character(mouse_ligands[1:20]), 
                as.character(mouse_receptors[1:20]),
                paste0("MouseGene", 41:n_genes))
rownames(expr) <- gene_names
colnames(expr) <- paste0("Cell", 1:n_cells)

# Create annotation
clusters <- sample(c("T_cells", "B_cells", "Macrophages", "Fibroblasts"), 
                   n_cells, replace = TRUE)
annotation <- data.frame(
  cell = colnames(expr),
  cluster = clusters
)

# Run analysis specifying mouse
result <- ExtractEdges(
  expression = Matrix::Matrix(expr, sparse = TRUE),
  annotation = annotation,
  species = "mouse",  # Specify species
  database = "lrc2p",
  min_pct = 0.05
)

print(result)

Cross-Species Comparison

Comparative Study Design

When comparing communication across species:

# Human analysis
human_result <- ExtractEdges(
  expression = human_expr,
  annotation = human_ann,
  species = "human",
  database = "lrc2p"
)

# Mouse analysis (genes auto-converted)
mouse_result <- ExtractEdges(
  expression = mouse_expr,
  annotation = mouse_ann,
  species = "mouse",
  database = "lrc2p"
)

# Compare conserved interactions
human_pairs <- paste(human_result$edges$ligand, 
                     human_result$edges$receptor, sep = "-")
mouse_pairs <- paste(mouse_result$edges$ligand, 
                     mouse_result$edges$receptor, sep = "-")

conserved <- intersect(human_pairs, mouse_pairs)
cat("Conserved LR interactions:", length(conserved), "\n")

Gene ID Types

NOVA supports multiple gene identifier types:

# View supported ID types
id_types <- supported_id_types()
print(id_types)
#> [1] "symbol"  "entrez"  "ensembl" "uniprot" "hgnc"    "mgi"     "custom"

Converting Between ID Types

# Convert Ensembl IDs to symbols
ensembl_ids <- c("ENSG00000153563", "ENSG00000010610")
symbols <- ConvertGeneIDs(ensembl_ids, from = "ensembl", to = "symbol", species = "human")

Special Considerations

1. One-to-Many Mappings

Some genes have multiple orthologs:

# Check for duplicated mappings
mapping <- GetHomologyMapping("mouse", "human")
dup_genes <- mapping$from_symbol[duplicated(mapping$from_symbol)]
cat("Genes with multiple human orthologs:", length(unique(dup_genes)), "\n")
#> Genes with multiple human orthologs: 0

2. Missing Orthologs

Not all genes have orthologs:

# Example: genes without orthologs
all_mouse_genes <- c("Actb", "Gapdh", "NoOrtholog123")
converted <- ConvertGeneSymbols(all_mouse_genes, "mouse", "human")
print(data.frame(mouse = all_mouse_genes, human = converted))
#>                       mouse human
#> Actb                   Actb  ACTB
#> Gapdh                 Gapdh GAPDH
#> NoOrtholog123 NoOrtholog123  <NA>

3. Species-Specific Genes

Some genes are species-specific and won’t have orthologs. These are automatically excluded from analysis.

Best Practices

Workflow Recommendations

  1. Start with human database: The LR database is human-centric
  2. Verify ortholog coverage: Check how many genes map successfully
  3. Report unmapped genes: Document genes that couldn’t be mapped
  4. Validate key interactions: Confirm important findings in species-specific literature

Quality Control

# Check ortholog mapping rate
mapping <- GetHomologyMapping("mouse", "human")
lr_db <- GetLRDatabase("lrc2p")

# How many ligands can be mapped?
ligand_mapped <- sum(lr_db$ligand %in% mapping$from_symbol)
receptor_mapped <- sum(lr_db$receptor %in% mapping$from_symbol)

cat("Ligands mappable to mouse:", ligand_mapped, "/", length(unique(lr_db$ligand)), "\n")
#> Ligands mappable to mouse: 27 / 829
cat("Receptors mappable to mouse:", receptor_mapped, "/", length(unique(lr_db$receptor)), "\n")
#> Receptors mappable to mouse: 3 / 690

Session Info

sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ggplot2_4.0.3     data.table_1.18.4 NOVA_1.0.0        rmarkdown_2.31   
#> 
#> loaded via a namespace (and not attached):
#>  [1] Matrix_1.7-5       gtable_0.3.6       jsonlite_2.0.0     dplyr_1.2.1       
#>  [5] compiler_4.6.0     tidyselect_1.2.1   Rcpp_1.1.1-1.1     parallel_4.6.0    
#>  [9] jquerylib_0.1.4    scales_1.4.0       yaml_2.3.12        fastmap_1.2.0     
#> [13] lattice_0.22-9     R6_2.6.1           generics_0.1.4     knitr_1.51        
#> [17] tibble_3.3.1       maketools_1.3.2    bslib_0.11.0       pillar_1.11.1     
#> [21] RColorBrewer_1.1-3 rlang_1.2.0        cachem_1.1.0       xfun_0.57         
#> [25] sass_0.4.10        sys_3.4.3          S7_0.2.2           otel_0.2.0        
#> [29] cli_3.6.6          withr_3.0.2        magrittr_2.0.5     digest_0.6.39     
#> [33] grid_4.6.0         lifecycle_1.0.5    vctrs_0.7.3        evaluate_1.0.5    
#> [37] glue_1.8.1         farver_2.1.2       buildtools_1.0.0   tools_4.6.0       
#> [41] pkgconfig_2.0.3    htmltools_0.5.9

Author

Zaoqu Liu