This vignette demonstrates how to analyze multiple biomarkers
simultaneously using the SeroCOPMulti class. We’ll simulate
three biomarkers with different correlates of protection:
We’ll create three biomarkers with different characteristics:
n <- 250 # Sample size
# Biomarker 1: Strong CoP (IgG)
# High ceiling-floor difference, clear dose-response
titre_IgG <- rnorm(n, mean = 2.5, sd = 1.2)
prob_IgG <- 0.02 + 0.68 / (1 + exp(2.5 * (titre_IgG - 2.0)))
# Biomarker 2: Weak CoP (IgA)
# Moderate ceiling-floor difference, weaker slope
titre_IgA <- rnorm(n, mean = 1.8, sd = 1.5)
prob_IgA <- 0.15 + 0.55 / (1 + exp(1.0 * (titre_IgA - 1.5)))
# Biomarker 3: No CoP (Non-specific)
# No relationship with infection - flat line
titre_Nonspec <- rnorm(n, mean = 3.0, sd = 1.0)
prob_Nonspec <- rep(0.35, n) # Constant probability
# Generate infection outcomes
# Use weighted average with noise
prob_combined <- 0.5 * prob_IgG + 0.3 * prob_IgA + 0.2 * prob_Nonspec
infected <- rbinom(n, 1, prob_combined)
# Combine into matrix
titre_matrix <- cbind(
IgG = titre_IgG,
IgA = titre_IgA,
Nonspecific = titre_Nonspec
)
# Store true parameters for later comparison
true_params <- list(
IgG = list(floor = 0.02, ceiling = 0.70, ec50 = 2.0, slope = 2.5),
IgA = list(floor = 0.15, ceiling = 0.70, ec50 = 1.5, slope = 1.0),
Nonspecific = list(floor = 0.35, ceiling = 0.70, ec50 = 3.0, slope = 0.01)
)
cat(sprintf("Simulated %d samples with 3 biomarkers\n", n))
#> Simulated 250 samples with 3 biomarkers
cat(sprintf("Overall infection rate: %.1f%%\n", mean(infected) * 100))
#> Overall infection rate: 30.4%
# Create visualization of true relationships
plot_data <- data.frame(
titre = c(titre_IgG, titre_IgA, titre_Nonspec),
prob = c(prob_IgG, prob_IgA, prob_Nonspec),
infected = rep(infected, 3),
biomarker = rep(c("IgG (Strong CoP)", "IgA (Weak CoP)",
"Nonspecific (No CoP)"), each = n)
)
ggplot(plot_data, aes(x = titre, y = prob)) +
geom_line(color = "red", linewidth = 1, alpha = 0.7) +
geom_point(aes(y = infected), alpha = 0.3, size = 0.8) +
facet_wrap(~biomarker, scales = "free_x") +
labs(
title = "True Infection Probability by Biomarker",
x = "Antibody Titre (log scale)",
y = "Probability of Infection"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
Now we’ll use the SeroCOPMulti class to fit models for
all biomarkers:
# Initialize multi-biomarker model
multi_model <- SeroCOPMulti$new(
titre = titre_matrix,
infected = infected,
biomarker_names = c("IgG", "IgA", "Nonspecific")
)
#> SeroCOPMulti initialized with 250 observations and 3 biomarkers
#> Biomarkers: IgG, IgA, Nonspecific
#> Infection rate: 30.4%
# Fit all models
# Note: Using reduced iterations for vignette speed
multi_model$fit_all(
chains = 4,
iter = 1000,
warmup = 500,
cores = 1
)
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
#> Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#tail-ess
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
#> Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#tail-ess
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
#> Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#tail-essNow let’s extract the correlate of protection directly from the Stan
model for each biomarker. The protection probability is calculated
within the model as 1 - (prob_infection / ceiling).
# Extract protection probabilities for each biomarker
cop_results <- list()
for (biomarker in multi_model$biomarker_names) {
model <- multi_model$models[[biomarker]]
# Get protection probabilities directly from Stan model
protection_samples <- model$predict_protection()
correlate_of_protection <- colMeans(protection_samples)
# Also get risk for comparison
risk_samples <- model$predict()
correlate_of_risk <- colMeans(risk_samples)
cop_results[[biomarker]] <- data.frame(
titre = model$titre,
correlate_of_risk = correlate_of_risk,
correlate_of_protection = correlate_of_protection
)
}
# Display summary for each biomarker
for (biomarker in multi_model$biomarker_names) {
cat(sprintf("\n=== %s ===\n", biomarker))
print(head(cop_results[[biomarker]], 5))
}
#>
#> === IgG ===
#> titre correlate_of_risk correlate_of_protection
#> 1 3.244908 0.2355256 0.7246047
#> 2 2.542770 0.2749729 0.6783408
#> 3 3.427785 0.2275813 0.7338912
#> 4 4.026987 0.2063750 0.7586709
#> 5 2.945171 0.2504316 0.7071590
#>
#> === IgA ===
#> titre correlate_of_risk correlate_of_protection
#> 1 2.835740 0.2766442 0.6337947
#> 2 2.701161 0.2808055 0.6284272
#> 3 2.604183 0.2838555 0.6244977
#> 4 3.136495 0.2676413 0.6454331
#> 5 1.077544 0.3376566 0.5558069
#>
#> === Nonspecific ===
#> titre correlate_of_risk correlate_of_protection
#> 1 1.706775 0.3512910 0.5200332
#> 2 4.967871 0.2709885 0.6271061
#> 3 3.522967 0.2990792 0.5882780
#> 4 3.957672 0.2898791 0.6008559
#> 5 4.374356 0.2817068 0.6121462
# Combine CoP data for all biomarkers
cop_plot_data <- list()
for (i in seq_along(multi_model$biomarker_names)) {
biomarker <- multi_model$biomarker_names[i]
model <- multi_model$models[[biomarker]]
# Create prediction grid
titre_grid <- seq(min(model$titre), max(model$titre), length.out = 100)
# Extract protection probabilities from Stan model
cop_matrix <- model$predict_protection(newdata = titre_grid)
# Calculate summary statistics
cop_mean <- colMeans(cop_matrix)
cop_lower <- apply(cop_matrix, 2, quantile, probs = 0.025)
cop_upper <- apply(cop_matrix, 2, quantile, probs = 0.975)
cop_plot_data[[i]] <- data.frame(
biomarker = biomarker,
titre = titre_grid,
cop = cop_mean,
lower = cop_lower,
upper = cop_upper
)
}
cop_plot_df <- do.call(rbind, cop_plot_data)
# Plot all CoP curves
ggplot(cop_plot_df, aes(x = titre, y = cop, color = biomarker, fill = biomarker)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, color = NA) +
geom_line(linewidth = 1) +
facet_wrap(~biomarker, scales = "free_x") +
labs(
title = "Correlate of Protection Curves by Biomarker",
x = "Antibody Titre (log scale)",
y = "Correlate of Protection"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "none"
)
The correlate of protection curves show how protection increases with titre for each biomarker.
comparison <- multi_model$compare_biomarkers()
#>
#> === Biomarker Comparison ===
#>
#> biomarker auc auc_lower auc_upper brier loo_elpd loo_se
#> IgG 0.6160012 0.5350460 0.6969564 0.2026267 -151.3358 6.852408
#> IgA 0.5559589 0.4795334 0.6323843 0.2105623 -154.8649 5.978793
#> Nonspecific 0.5062008 0.4277909 0.5846108 0.2125480 -156.0416 5.877900The comparison table shows:
This plot shows biomarkers positioned by their predictive performance:
multi_model$plot_comparison()
#>
#> === Biomarker Comparison ===
#>
#> biomarker auc auc_lower auc_upper brier loo_elpd loo_se
#> IgG 0.6160012 0.5350460 0.6969564 0.2026267 -151.3358 6.852408
#> IgA 0.5559589 0.4795334 0.6323843 0.2105623 -154.8649 5.978793
#> Nonspecific 0.5062008 0.4277909 0.5846108 0.2125480 -156.0416 5.877900
#> `height` was translated to `width`.
Interpretation:
Let’s check if we recovered the true parameters for each biomarker:
# Extract parameters for each biomarker
for (biomarker in c("IgG", "IgA", "Nonspecific")) {
cat(sprintf("\n=== %s ===\n", biomarker))
model <- multi_model$models[[biomarker]]
params <- extract_parameters(model)
cat("\nEstimated parameters:\n")
print(params[, c("parameter", "mean", "lower", "upper")])
cat("\nTrue parameters:\n")
true <- true_params[[biomarker]]
cat(sprintf(" floor: %.3f\n", true$floor))
cat(sprintf(" ceiling: %.3f\n", true$ceiling))
cat(sprintf(" ec50: %.3f\n", true$ec50))
cat(sprintf(" slope: %.3f\n", true$slope))
}
#>
#> === IgG ===
#>
#> Estimated parameters:
#> parameter mean lower upper
#> 1 floor 0.1591710 0.007286481 0.3278561
#> 2 ceiling 0.8701495 0.618644383 0.9957030
#> 3 ec50 0.5282921 -0.941247100 1.9136135
#> 4 slope 0.9383067 0.269414845 2.4420292
#>
#> True parameters:
#> floor: 0.020
#> ceiling: 0.700
#> ec50: 2.000
#> slope: 2.500
#>
#> === IgA ===
#>
#> Estimated parameters:
#> parameter mean lower upper
#> 1 floor 0.1012184 0.001955156 0.3324208
#> 2 ceiling 0.7860048 0.509050642 0.9947033
#> 3 ec50 -0.5379052 -3.434444065 3.4021940
#> 4 slope 0.3942397 0.021572361 2.0887926
#>
#> True parameters:
#> floor: 0.150
#> ceiling: 0.700
#> ec50: 1.500
#> slope: 1.000
#>
#> === Nonspecific ===
#>
#> Estimated parameters:
#> parameter mean lower upper
#> 1 floor 0.1430507 0.001287225 0.3747879
#> 2 ceiling 0.7598000 0.507571821 0.9861794
#> 3 ec50 1.2508301 -1.441352035 4.7272448
#> 4 slope 0.7290948 0.011401633 3.7262757
#>
#> True parameters:
#> floor: 0.350
#> ceiling: 0.700
#> ec50: 3.000
#> slope: 0.010
# Create combined recovery plot
recovery_list <- list()
for (i in seq_along(multi_model$biomarker_names)) {
biomarker <- multi_model$biomarker_names[i]
model <- multi_model$models[[biomarker]]
params <- extract_parameters(model)
recovery_list[[i]] <- data.frame(
biomarker = biomarker,
parameter = params$parameter,
estimated = params$mean,
lower = params$lower,
upper = params$upper,
true = c(
true_params[[biomarker]]$floor,
true_params[[biomarker]]$ceiling,
true_params[[biomarker]]$ec50,
true_params[[biomarker]]$slope
)
)
}
recovery_df <- do.call(rbind, recovery_list)
ggplot(recovery_df, aes(x = parameter, color = biomarker)) +
geom_pointrange(
aes(y = estimated, ymin = lower, ymax = upper),
position = position_dodge(width = 0.5),
size = 0.8
) +
geom_point(
aes(y = true),
shape = 4,
size = 3,
stroke = 1.5,
position = position_dodge(width = 0.5)
) +
facet_wrap(~parameter, scales = "free_y", ncol = 2) +
labs(
title = "Parameter Recovery Across Biomarkers",
subtitle = "Points with bars: Estimated (95% CI) | X: True value",
y = "Value",
color = "Biomarker"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.title.x = element_blank()
)
# Plot ROC curves for each biomarker
par(mfrow = c(1, 3))
for (biomarker in multi_model$biomarker_names) {
model <- multi_model$models[[biomarker]]
pred <- colMeans(model$predict())
roc_obj <- pROC::roc(model$infected, pred, quiet = TRUE)
plot(roc_obj,
main = sprintf("%s\nAUC = %.3f", biomarker, pROC::auc(roc_obj)),
col = "steelblue",
lwd = 2)
abline(a = 0, b = 1, lty = 2, col = "gray")
}
# Compare LOO-CV across biomarkers
cat("\n=== Leave-One-Out Cross-Validation Comparison ===\n\n")
#>
#> === Leave-One-Out Cross-Validation Comparison ===
for (biomarker in multi_model$biomarker_names) {
cat(sprintf("--- %s ---\n", biomarker))
model <- multi_model$models[[biomarker]]
print(model$loo)
cat("\n")
}
#> --- IgG ---
#>
#> Computed from 2000 by 250 log-likelihood matrix.
#>
#> Estimate SE
#> elpd_loo -151.3 6.9
#> p_loo 2.7 0.2
#> looic 302.7 13.7
#> ------
#> MCSE of elpd_loo is 0.1.
#> MCSE and ESS estimates assume MCMC draws (r_eff in [0.2, 0.8]).
#>
#> All Pareto k estimates are good (k < 0.7).
#> See help('pareto-k-diagnostic') for details.
#>
#> --- IgA ---
#>
#> Computed from 2000 by 250 log-likelihood matrix.
#>
#> Estimate SE
#> elpd_loo -154.9 6.0
#> p_loo 2.0 0.1
#> looic 309.7 12.0
#> ------
#> MCSE of elpd_loo is 0.0.
#> MCSE and ESS estimates assume MCMC draws (r_eff in [0.3, 0.7]).
#>
#> All Pareto k estimates are good (k < 0.7).
#> See help('pareto-k-diagnostic') for details.
#>
#> --- Nonspecific ---
#>
#> Computed from 2000 by 250 log-likelihood matrix.
#>
#> Estimate SE
#> elpd_loo -156.0 5.9
#> p_loo 1.9 0.2
#> looic 312.1 11.8
#> ------
#> MCSE of elpd_loo is 0.1.
#> MCSE and ESS estimates assume MCMC draws (r_eff in [0.2, 0.8]).
#>
#> All Pareto k estimates are good (k < 0.7).
#> See help('pareto-k-diagnostic') for details.This analysis demonstrates:
The SeroCOPMulti class enables efficient comparison of
multiple biomarkers to identify the best correlates of protection.
sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_4.0.2 seroCOP_0.1.0
#>
#> loaded via a namespace (and not attached):
#> [1] gtable_0.3.6 tensorA_0.36.2.1 xfun_0.56
#> [4] bslib_0.10.0 QuickJSR_1.9.0 processx_3.8.6
#> [7] inline_0.3.21 lattice_0.22-7 callr_3.7.6
#> [10] ps_1.9.1 vctrs_0.7.1 tools_4.5.2
#> [13] generics_0.1.4 stats4_4.5.2 parallel_4.5.2
#> [16] tibble_3.3.1 pkgconfig_2.0.3 brms_2.23.0
#> [19] Matrix_1.7-4 checkmate_2.3.4 RColorBrewer_1.1-3
#> [22] S7_0.2.1 desc_1.4.3 distributional_0.6.0
#> [25] RcppParallel_5.1.11-2 lifecycle_1.0.5 compiler_4.5.2
#> [28] farver_2.1.2 stringr_1.6.0 textshaping_1.0.5
#> [31] Brobdingnag_1.2-9 codetools_0.2-20 htmltools_0.5.9
#> [34] sass_0.4.10 bayesplot_1.15.0 yaml_2.3.12
#> [37] pillar_1.11.1 pkgdown_2.2.0 jquerylib_0.1.4
#> [40] cachem_1.1.0 StanHeaders_2.32.10 bridgesampling_1.2-1
#> [43] abind_1.4-8 nlme_3.1-168 posterior_1.6.1
#> [46] rstan_2.32.7 tidyselect_1.2.1 digest_0.6.39
#> [49] mvtnorm_1.3-3 stringi_1.8.7 dplyr_1.2.0
#> [52] labeling_0.4.3 fastmap_1.2.0 grid_4.5.2
#> [55] cli_3.6.5 magrittr_2.0.4 loo_2.9.0
#> [58] pkgbuild_1.4.8 withr_3.0.2 scales_1.4.0
#> [61] backports_1.5.0 rmarkdown_2.30 matrixStats_1.5.0
#> [64] gridExtra_2.3 ragg_1.5.1 coda_0.19-4.1
#> [67] evaluate_1.0.5 knitr_1.51 rstantools_2.6.0
#> [70] rlang_1.1.7 Rcpp_1.1.1 glue_1.8.0
#> [73] pROC_1.19.0.1 jsonlite_2.0.0 R6_2.6.1
#> [76] systemfonts_1.3.2 fs_1.6.7