--- title: "Best Practices and Troubleshooting" author: "Zaoqu Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 vignette: > %\VignetteIndexEntry{Best Practices and Troubleshooting} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE, eval = FALSE ) ``` ## Data Preparation ### Input Requirements Connectome requires a properly processed Seurat object: ```{r requirements} library(Seurat) library(Connectome) # Check your object seurat_obj # Required slots Assays(seurat_obj) # Should include "RNA" GetAssayData(seurat_obj, "data") # Normalized data GetAssayData(seurat_obj, "scale.data") # Scaled data (optional but recommended) Idents(seurat_obj) # Cell identities ``` ### Preprocessing Checklist ```{r preprocessing} # 1. Quality control (before Connectome) seurat_obj <- subset(seurat_obj, nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 20) # 2. Normalize seurat_obj <- NormalizeData(seurat_obj) # 3. Scale (recommended for scaled edge weights) seurat_obj <- FindVariableFeatures(seurat_obj) seurat_obj <- ScaleData(seurat_obj) # 4. Set identities Idents(seurat_obj) <- "cell_type" # Your cell type column ``` ### Cell Type Considerations **Minimum cells per cluster:** ```{r min-cells} # Check cell counts table(Idents(seurat_obj)) # Recommended: at least 50-100 cells per cluster # Use min.cells.per.ident to exclude small populations connectome <- CreateConnectome( seurat_obj, species = "human", min.cells.per.ident = 50 ) ``` **Cell type naming:** ```{r naming} # Avoid special characters in cell type names # Good: "T_cell", "Macrophage_M1", "Epithelial" # Bad: "T cell", "Macrophage/Monocyte", "Epi (type 1)" # Fix naming issues levels(seurat_obj) <- gsub(" ", "_", levels(seurat_obj)) levels(seurat_obj) <- gsub("/", "_", levels(seurat_obj)) ``` ## Parameter Optimization ### CreateConnectome Parameters | Parameter | Default | Recommendation | |-----------|---------|----------------| | `min.cells.per.ident` | NULL | 50-100 for robust estimates | | `max.cells.per.ident` | NULL | 500-1000 for large datasets | | `p.values` | TRUE | FALSE for exploratory analysis | | `calculate.DOR` | FALSE | TRUE for specificity analysis | | `include.putative` | TRUE | FALSE for high-confidence only | ```{r create-params} # Standard analysis connectome <- CreateConnectome( seurat_obj, species = "human", min.cells.per.ident = 50, p.values = TRUE, include.putative = TRUE ) # High-stringency analysis connectome_strict <- CreateConnectome( seurat_obj, species = "human", min.cells.per.ident = 100, p.values = TRUE, include.putative = FALSE, # Literature-supported only calculate.DOR = TRUE ) # Fast exploratory analysis connectome_fast <- CreateConnectome( seurat_obj, species = "human", max.cells.per.ident = 500, # Downsample p.values = FALSE, include.putative = TRUE ) ``` ### FilterConnectome Parameters | Parameter | Description | Typical Range | |-----------|-------------|---------------| | `min.pct` | Minimum expression fraction | 0.05 - 0.25 | | `min.z` | Minimum z-score | 0 - 1 | | `min.exp` | Minimum expression level | 0.1 - 0.5 | | `max.p` | Maximum adjusted p-value | 0.01 - 0.05 | | `min.DOR` | Minimum log-DOR | 0 - 2 | ```{r filter-params} # Lenient filtering (discovery) conn_lenient <- FilterConnectome( connectome, min.pct = 0.05, min.z = 0 ) # Standard filtering conn_standard <- FilterConnectome( connectome, min.pct = 0.1, min.z = 0.25, max.p = 0.05 ) # Stringent filtering (high confidence) conn_stringent <- FilterConnectome( connectome, min.pct = 0.2, min.z = 0.5, max.p = 0.01, min.DOR = 1 ) ``` ## Performance Optimization ### Large Datasets ```{r large-data} # Downsample before analysis connectome <- CreateConnectome( seurat_obj, species = "human", max.cells.per.ident = 500, # Downsample to 500 cells/cluster p.values = FALSE # Skip p-values for speed ) # Check memory usage format(object.size(connectome), units = "MB") ``` ### Parallel Processing ```{r parallel} # For p-value calculation (requires future packages) connectome <- CreateConnectome( seurat_obj, species = "human", p.values = TRUE, parallel = TRUE, n.cores = 4 ) ``` ## Common Issues and Solutions ### Issue 1: No edges after filtering ```{r issue1} # Check raw connectome size nrow(connectome) # Check available values summary(connectome$percent.source) summary(connectome$ligand.scale) # Solution: Relax filtering parameters conn_filtered <- FilterConnectome( connectome, min.pct = 0.05, # Lower threshold min.z = -Inf # Accept all z-scores ) ``` ### Issue 2: Missing cell types in visualization ```{r issue2} # Check which cell types are in the connectome unique(c(connectome$source, connectome$target)) # Solution: Use include.all.nodes in NetworkPlot NetworkPlot(connectome, include.all.nodes = TRUE) ``` ### Issue 3: Memory errors ```{r issue3} # Solution 1: Downsample seurat_small <- subset(seurat_obj, cells = WhichCells(seurat_obj, downsample = 500)) # Solution 2: Process in chunks cell_types <- unique(Idents(seurat_obj)) conn_list <- list() for (i in seq_along(cell_types)) { for (j in seq_along(cell_types)) { # Process pairs individually } } ``` ### Issue 4: Species mismatch ```{r issue4} # Check gene names head(rownames(seurat_obj)) # For mouse data with human gene symbols # Use species = "human" if genes are in human format # For proper mouse symbols connectome <- CreateConnectome(seurat_obj, species = "mouse") ``` ### Issue 5: Custom ligand-receptor database ```{r issue5} # Create custom database my_lr_db <- data.frame( ligand = c("MYL9", "VEGFA", "IL6"), receptor = c("ITGA1", "KDR", "IL6R"), mode = c("integrin", "growth_factor", "cytokine") ) connectome <- CreateConnectome( seurat_obj, LR.database = "custom", custom.list = my_lr_db ) ``` ## Quality Control ### Sanity Checks ```{r qc} # 1. Check connectome dimensions dim(connectome) # Expected: (n_celltypes^2 * n_lr_pairs) rows × ~15 columns # 2. Check for missing values sum(is.na(connectome$ligand.expression)) sum(is.na(connectome$percent.source)) # 3. Verify cell types setequal( unique(c(connectome$source, connectome$target)), unique(as.character(Idents(seurat_obj))) ) # 4. Check expression distributions hist(connectome$ligand.expression, breaks = 50, main = "Ligand Expression") hist(connectome$ligand.scale, breaks = 50, main = "Ligand Z-scores") ``` ### Biological Validation ```{r validation} # Check known interactions known_interactions <- subset( connectome, (ligand == "VEGFA" & receptor == "KDR") | (ligand == "IL6" & receptor == "IL6R") | (ligand == "TNF" & receptor == "TNFRSF1A") ) # These should have reasonable expression in relevant cell types print(known_interactions[, c("source", "target", "pair", "ligand.expression", "recept.expression")]) ``` ## Reproducibility ### Setting Seeds ```{r seeds} # For downsampling reproducibility set.seed(42) seurat_obj <- subset(seurat_obj, cells = WhichCells(seurat_obj, downsample = 500)) set.seed(42) connectome <- CreateConnectome(seurat_obj, species = "human") ``` ### Saving Results ```{r saving} # Save connectome object saveRDS(connectome, "connectome_analysis.rds") # Export as CSV for external tools write.csv(connectome, "connectome_edges.csv", row.names = FALSE) # Save filtered version conn_filtered <- FilterConnectome(connectome, min.pct = 0.1, min.z = 0.25) write.csv(conn_filtered, "connectome_filtered.csv", row.names = FALSE) ``` ### Session Documentation ```{r session-doc} # Document analysis parameters analysis_params <- list( date = Sys.Date(), species = "human", n_cells = ncol(seurat_obj), n_clusters = length(unique(Idents(seurat_obj))), filter_params = list(min.pct = 0.1, min.z = 0.25, max.p = 0.05), n_edges_raw = nrow(connectome), n_edges_filtered = nrow(conn_filtered) ) saveRDS(analysis_params, "analysis_parameters.rds") ``` ## Session Info ```{r session, eval=TRUE} sessionInfo() ```