Analyzing CNVs across multiple samples enables:
This vignette demonstrates multi-sample workflows in fastCNV.
When samples share similar normal cell populations, use a pooled reference:
results <- fastCNV(
seuratObj = sample_list,
sampleName = sample_names,
referenceVar = "cell_type",
referenceLabel = c("Normal_epithelial", "Fibroblast"),
pooledReference = TRUE, # Combine references across samples
prepareCounts = TRUE,
getCNVPerChromosomeArm = TRUE,
getCNVClusters = TRUE
)When to use pooled reference:
| Scenario | Pooled Reference | Individual Reference |
|---|---|---|
| Same tissue type | ✓ Recommended | ✓ Also valid |
| Different tissues | ✗ Avoid | ✓ Recommended |
| Batch effects | ✗ May amplify | ✓ Reduces batch effects |
| Low reference cells | ✓ Increases power | ✗ May be noisy |
library(dplyr)
library(ggplot2)
# Extract CNV fractions from all samples
cnv_summary <- lapply(seq_along(results), function(i) {
data.frame(
sample = sample_names[i],
cnv_fraction = results[[i]]$cnv_fraction,
cnv_cluster = results[[i]]$cnv_clusters
)
}) %>% bind_rows()
# Compare CNV burden across samples
ggplot(cnv_summary, aes(x = sample, y = cnv_fraction, fill = sample)) +
geom_boxplot() +
theme_minimal() +
labs(
title = "CNV Burden Comparison Across Samples",
x = "Sample",
y = "CNV Fraction"
)# Extract arm-level CNVs
arm_summary <- lapply(seq_along(results), function(i) {
arm_data <- results[[i]]@meta.data %>%
select(starts_with("arm_")) %>%
colMeans()
data.frame(
sample = sample_names[i],
arm = names(arm_data),
mean_cnv = arm_data
)
}) %>% bind_rows()
# Heatmap of arm-level CNVs
library(tidyr)
arm_matrix <- arm_summary %>%
pivot_wider(names_from = sample, values_from = mean_cnv) %>%
column_to_rownames("arm") %>%
as.matrix()
heatmap(arm_matrix, scale = "none", col = colorRampPalette(c("blue", "white", "red"))(100))# Define CNV presence threshold
cnv_threshold <- 0.15
# Function to identify recurrent CNVs
identify_recurrent_cnvs <- function(results, threshold = 0.15) {
# Get CNV calls per sample
cnv_calls <- lapply(results, function(obj) {
scores <- as.matrix(obj@assays$CNVScoresTrimmed@data)
# Fraction of cells with CNV per window
colMeans(abs(scores) > threshold)
})
# Combine across samples
cnv_matrix <- do.call(cbind, cnv_calls)
colnames(cnv_matrix) <- sample_names
# Identify windows with CNV in multiple samples
recurrent <- rowSums(cnv_matrix > 0.1) >= 2 # CNV in ≥2 samples
list(
matrix = cnv_matrix,
recurrent_windows = which(recurrent)
)
}
recurrent <- identify_recurrent_cnvs(results)
message("Recurrent CNV regions: ", length(recurrent$recurrent_windows))# Create recurrence plot
recurrence_data <- data.frame(
window = 1:nrow(recurrent$matrix),
n_samples = rowSums(recurrent$matrix > 0.1)
)
ggplot(recurrence_data, aes(x = window, y = n_samples)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_hline(yintercept = 2, linetype = "dashed", color = "red") +
theme_minimal() +
labs(
title = "CNV Recurrence Across Samples",
x = "Genomic Window",
y = "Number of Samples with CNV"
)# PCA on CNV profiles
cnv_profiles <- lapply(results, function(obj) {
colMeans(as.matrix(obj@assays$CNVScores@data))
})
cnv_matrix <- do.call(rbind, cnv_profiles)
rownames(cnv_matrix) <- sample_names
# PCA
pca <- prcomp(cnv_matrix, scale = TRUE)
plot(pca$x[, 1:2], pch = 19, col = 1:length(sample_names))
text(pca$x[, 1:2], labels = sample_names, pos = 3)# Strategy 1: Use sample-specific references (default)
results_individual <- fastCNV(
seuratObj = sample_list,
sampleName = sample_names,
referenceVar = "cell_type",
referenceLabel = c("Normal"),
pooledReference = FALSE # Each sample uses its own reference
)
# Strategy 2: Regress out batch in downstream analysis
# After merging, use Seurat integration methods# Merge samples for combined visualization
merged_results <- merge(results[[1]], results[2:3])
# Add sample labels
merged_results$sample_id <- factor(
merged_results$orig.ident,
levels = sample_names
)
# Plot combined heatmap (shows all samples)
plotCNVResults(
seuratObj = merged_results,
referenceVar = "cell_type",
tumorLabel = "Tumor",
splitPlotOnVar = "sample_id"
)# For large cohorts, process samples sequentially
process_sample <- function(seurat_path, sample_name, ref_var, ref_label) {
# Load sample
obj <- readRDS(seurat_path)
# Run CNV analysis
result <- fastCNV(
seuratObj = obj,
sampleName = sample_name,
referenceVar = ref_var,
referenceLabel = ref_label,
prepareCounts = TRUE,
getCNVPerChromosomeArm = TRUE,
getCNVClusters = TRUE,
doPlot = FALSE # Skip plotting to save memory
)
# Save result
saveRDS(result, paste0(sample_name, "_cnv_result.rds"))
# Clean up
rm(obj)
invisible(gc())
return(invisible(NULL))
}
# Process cohort
sample_paths <- c("patient_A.rds", "patient_B.rds", "patient_C.rds")
sample_names <- c("Patient_A", "Patient_B", "Patient_C")
for (i in seq_along(sample_paths)) {
message("Processing: ", sample_names[i])
process_sample(
sample_paths[i],
sample_names[i],
ref_var = "cell_type",
ref_label = c("Normal")
)
}library(future)
library(future.apply)
# Set up parallel backend
plan(multisession, workers = 4)
# Process samples in parallel
results <- future_lapply(seq_along(sample_list), function(i) {
fastCNV(
seuratObj = sample_list[[i]],
sampleName = sample_names[i],
referenceVar = "cell_type",
referenceLabel = c("Normal"),
doPlot = FALSE
)
})
# Reset to sequential
plan(sequential)# 1. Load cohort data
cohort <- list(
tumor_1 = readRDS("GBM_patient_1.rds"),
tumor_2 = readRDS("GBM_patient_2.rds"),
tumor_3 = readRDS("GBM_patient_3.rds")
)
# 2. Run CNV analysis with pooled reference
cnv_results <- fastCNV(
seuratObj = cohort,
sampleName = names(cohort),
referenceVar = "cell_type",
referenceLabel = c("Oligodendrocyte", "Astrocyte"),
pooledReference = TRUE,
windowSize = 150,
getCNVClusters = TRUE
)
# 3. Identify recurrent GBM-associated CNVs
# Expected: chr7 gain, chr10 loss
# 4. Compare clonal composition across patients
clone_composition <- lapply(cnv_results, function(obj) {
table(obj$cnv_clusters) / ncol(obj)
})
# 5. Export results
saveRDS(cnv_results, "GBM_cohort_cnv_results.rds")Key points for multi-sample analysis:
sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] rmarkdown_2.31
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.39 R6_2.6.1 fastmap_1.2.0 xfun_0.59
#> [5] maketools_1.3.2 cachem_1.1.0 knitr_1.51 htmltools_0.5.9
#> [9] buildtools_1.0.0 lifecycle_1.0.5 cli_3.6.6 sass_0.4.10
#> [13] jquerylib_0.1.4 compiler_4.6.0 sys_3.4.3 tools_4.6.0
#> [17] evaluate_1.0.5 bslib_0.11.0 yaml_2.3.12 otel_0.2.0
#> [21] jsonlite_2.0.0 rlang_1.2.0