Best Practices and Troubleshooting

Data Preparation

Input Requirements

Connectome requires a properly processed Seurat object:

library(Seurat)
library(Connectome)

# Check your object
seurat_obj

# Required slots
Assays(seurat_obj)              # Should include "RNA"
GetAssayData(seurat_obj, "data") # Normalized data
GetAssayData(seurat_obj, "scale.data")  # Scaled data (optional but recommended)
Idents(seurat_obj)              # Cell identities

Preprocessing Checklist

# 1. Quality control (before Connectome)
seurat_obj <- subset(seurat_obj, 
                     nFeature_RNA > 200 & 
                     nFeature_RNA < 5000 &
                     percent.mt < 20)

# 2. Normalize
seurat_obj <- NormalizeData(seurat_obj)

# 3. Scale (recommended for scaled edge weights)
seurat_obj <- FindVariableFeatures(seurat_obj)
seurat_obj <- ScaleData(seurat_obj)

# 4. Set identities
Idents(seurat_obj) <- "cell_type"  # Your cell type column

Cell Type Considerations

Minimum cells per cluster:

# Check cell counts
table(Idents(seurat_obj))

# Recommended: at least 50-100 cells per cluster
# Use min.cells.per.ident to exclude small populations
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  min.cells.per.ident = 50
)

Cell type naming:

# Avoid special characters in cell type names
# Good: "T_cell", "Macrophage_M1", "Epithelial"
# Bad: "T cell", "Macrophage/Monocyte", "Epi (type 1)"

# Fix naming issues
levels(seurat_obj) <- gsub(" ", "_", levels(seurat_obj))
levels(seurat_obj) <- gsub("/", "_", levels(seurat_obj))

Parameter Optimization

CreateConnectome Parameters

Parameter	Default	Recommendation
`min.cells.per.ident`	NULL	50-100 for robust estimates
`max.cells.per.ident`	NULL	500-1000 for large datasets
`p.values`	TRUE	FALSE for exploratory analysis
`calculate.DOR`	FALSE	TRUE for specificity analysis
`include.putative`	TRUE	FALSE for high-confidence only

# Standard analysis
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  min.cells.per.ident = 50,
  p.values = TRUE,
  include.putative = TRUE
)

# High-stringency analysis
connectome_strict <- CreateConnectome(
  seurat_obj,
  species = "human",
  min.cells.per.ident = 100,
  p.values = TRUE,
  include.putative = FALSE,  # Literature-supported only
  calculate.DOR = TRUE
)

# Fast exploratory analysis
connectome_fast <- CreateConnectome(
  seurat_obj,
  species = "human",
  max.cells.per.ident = 500,  # Downsample
  p.values = FALSE,
  include.putative = TRUE
)

FilterConnectome Parameters

Parameter	Description	Typical Range
`min.pct`	Minimum expression fraction	0.05 - 0.25
`min.z`	Minimum z-score	0 - 1
`min.exp`	Minimum expression level	0.1 - 0.5
`max.p`	Maximum adjusted p-value	0.01 - 0.05
`min.DOR`	Minimum log-DOR	0 - 2

# Lenient filtering (discovery)
conn_lenient <- FilterConnectome(
  connectome,
  min.pct = 0.05,
  min.z = 0
)

# Standard filtering
conn_standard <- FilterConnectome(
  connectome,
  min.pct = 0.1,
  min.z = 0.25,
  max.p = 0.05
)

# Stringent filtering (high confidence)
conn_stringent <- FilterConnectome(
  connectome,
  min.pct = 0.2,
  min.z = 0.5,
  max.p = 0.01,
  min.DOR = 1
)

Performance Optimization

Large Datasets

# Downsample before analysis
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  max.cells.per.ident = 500,  # Downsample to 500 cells/cluster
  p.values = FALSE             # Skip p-values for speed
)

# Check memory usage
format(object.size(connectome), units = "MB")

Parallel Processing

# For p-value calculation (requires future packages)
connectome <- CreateConnectome(
  seurat_obj,
  species = "human",
  p.values = TRUE,
  parallel = TRUE,
  n.cores = 4
)

Common Issues and Solutions

Issue 1: No edges after filtering

# Check raw connectome size
nrow(connectome)

# Check available values
summary(connectome$percent.source)
summary(connectome$ligand.scale)

# Solution: Relax filtering parameters
conn_filtered <- FilterConnectome(
  connectome,
  min.pct = 0.05,  # Lower threshold
  min.z = -Inf     # Accept all z-scores
)

Issue 2: Missing cell types in visualization

# Check which cell types are in the connectome
unique(c(connectome$source, connectome$target))

# Solution: Use include.all.nodes in NetworkPlot
NetworkPlot(connectome, include.all.nodes = TRUE)

Issue 3: Memory errors

# Solution 1: Downsample
seurat_small <- subset(seurat_obj, 
                       cells = WhichCells(seurat_obj, downsample = 500))

# Solution 2: Process in chunks
cell_types <- unique(Idents(seurat_obj))
conn_list <- list()
for (i in seq_along(cell_types)) {
  for (j in seq_along(cell_types)) {
    # Process pairs individually
  }
}

Issue 4: Species mismatch

# Check gene names
head(rownames(seurat_obj))

# For mouse data with human gene symbols
# Use species = "human" if genes are in human format

# For proper mouse symbols
connectome <- CreateConnectome(seurat_obj, species = "mouse")

Issue 5: Custom ligand-receptor database

# Create custom database
my_lr_db <- data.frame(
  ligand = c("MYL9", "VEGFA", "IL6"),
  receptor = c("ITGA1", "KDR", "IL6R"),
  mode = c("integrin", "growth_factor", "cytokine")
)

connectome <- CreateConnectome(
  seurat_obj,
  LR.database = "custom",
  custom.list = my_lr_db
)

Quality Control

Sanity Checks

# 1. Check connectome dimensions
dim(connectome)
# Expected: (n_celltypes^2 * n_lr_pairs) rows × ~15 columns

# 2. Check for missing values
sum(is.na(connectome$ligand.expression))
sum(is.na(connectome$percent.source))

# 3. Verify cell types
setequal(
  unique(c(connectome$source, connectome$target)),
  unique(as.character(Idents(seurat_obj)))
)

# 4. Check expression distributions
hist(connectome$ligand.expression, breaks = 50, main = "Ligand Expression")
hist(connectome$ligand.scale, breaks = 50, main = "Ligand Z-scores")

Biological Validation

# Check known interactions
known_interactions <- subset(
  connectome,
  (ligand == "VEGFA" & receptor == "KDR") |
  (ligand == "IL6" & receptor == "IL6R") |
  (ligand == "TNF" & receptor == "TNFRSF1A")
)

# These should have reasonable expression in relevant cell types
print(known_interactions[, c("source", "target", "pair", 
                             "ligand.expression", "recept.expression")])

Reproducibility

Setting Seeds

# For downsampling reproducibility
set.seed(42)
seurat_obj <- subset(seurat_obj, 
                     cells = WhichCells(seurat_obj, downsample = 500))

set.seed(42)
connectome <- CreateConnectome(seurat_obj, species = "human")

Saving Results

# Save connectome object
saveRDS(connectome, "connectome_analysis.rds")

# Export as CSV for external tools
write.csv(connectome, "connectome_edges.csv", row.names = FALSE)

# Save filtered version
conn_filtered <- FilterConnectome(connectome, min.pct = 0.1, min.z = 0.25)
write.csv(conn_filtered, "connectome_filtered.csv", row.names = FALSE)

Session Documentation

# Document analysis parameters
analysis_params <- list(
  date = Sys.Date(),
  species = "human",
  n_cells = ncol(seurat_obj),
  n_clusters = length(unique(Idents(seurat_obj))),
  filter_params = list(min.pct = 0.1, min.z = 0.25, max.p = 0.05),
  n_edges_raw = nrow(connectome),
  n_edges_filtered = nrow(conn_filtered)
)
saveRDS(analysis_params, "analysis_parameters.rds")

Session Info

sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] rmarkdown_2.31
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.39    R6_2.6.1         fastmap_1.2.0    xfun_0.59       
#>  [5] maketools_1.3.2  cachem_1.1.0     knitr_1.51       htmltools_0.5.9 
#>  [9] buildtools_1.0.0 lifecycle_1.0.5  cli_3.6.6        sass_0.4.10     
#> [13] jquerylib_0.1.4  compiler_4.6.0   sys_3.4.3        tools_4.6.0     
#> [17] evaluate_1.0.5   bslib_0.11.0     yaml_2.3.12      otel_0.2.0      
#> [21] jsonlite_2.0.0   rlang_1.2.0