This vignette demonstrates how to analyze multiple biomarkers
simultaneously using the SeroCOPMulti class. We’ll simulate
three biomarkers with different correlates of protection:
We’ll create three biomarkers with different characteristics:
n <- 250 # Sample size
# Biomarker 1: Strong CoP (IgG)
# High ceiling-floor difference, clear dose-response
titre_IgG <- rnorm(n, mean = 2.5, sd = 1.2)
prob_IgG <- 0.02 + 0.68 / (1 + exp(2.5 * (titre_IgG - 2.0)))
# Biomarker 2: Weak CoP (IgA)
# Moderate ceiling-floor difference, weaker slope
titre_IgA <- rnorm(n, mean = 1.8, sd = 1.5)
prob_IgA <- 0.15 + 0.55 / (1 + exp(1.0 * (titre_IgA - 1.5)))
# Biomarker 3: No CoP (Non-specific)
# No relationship with infection - flat line
titre_Nonspec <- rnorm(n, mean = 3.0, sd = 1.0)
prob_Nonspec <- rep(0.35, n) # Constant probability
# Generate infection outcomes
# Use weighted average with noise
prob_combined <- 0.5 * prob_IgG + 0.3 * prob_IgA + 0.2 * prob_Nonspec
infected <- rbinom(n, 1, prob_combined)
# Combine into matrix
titre_matrix <- cbind(
IgG = titre_IgG,
IgA = titre_IgA,
Nonspecific = titre_Nonspec
)
# Store true parameters for later comparison
true_params <- list(
IgG = list(floor = 0.02, ceiling = 0.70, ec50 = 2.0, slope = 2.5),
IgA = list(floor = 0.15, ceiling = 0.70, ec50 = 1.5, slope = 1.0),
Nonspecific = list(floor = 0.35, ceiling = 0.70, ec50 = 3.0, slope = 0.01)
)
cat(sprintf("Simulated %d samples with 3 biomarkers\n", n))
#> Simulated 250 samples with 3 biomarkers
cat(sprintf("Overall infection rate: %.1f%%\n", mean(infected) * 100))
#> Overall infection rate: 30.4%
# Create visualization of true relationships
plot_data <- data.frame(
titre = c(titre_IgG, titre_IgA, titre_Nonspec),
prob = c(prob_IgG, prob_IgA, prob_Nonspec),
infected = rep(infected, 3),
biomarker = rep(c("IgG (Strong CoP)", "IgA (Weak CoP)",
"Nonspecific (No CoP)"), each = n)
)
ggplot(plot_data, aes(x = titre, y = prob)) +
geom_line(color = "red", linewidth = 1, alpha = 0.7) +
geom_point(aes(y = infected), alpha = 0.3, size = 0.8) +
facet_wrap(~biomarker, scales = "free_x") +
labs(
title = "True Infection Probability by Biomarker",
x = "Antibody Titre (log scale)",
y = "Probability of Infection"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
Now we’ll use the SeroCOPMulti class to fit models for
all biomarkers:
# Initialize multi-biomarker model
multi_model <- SeroCOPMulti$new(
titre = titre_matrix,
infected = infected,
biomarker_names = c("IgG", "IgA", "Nonspecific")
)
#> SeroCOPMulti initialized with 250 observations and 3 biomarkers
#> Biomarkers: IgG, IgA, Nonspecific
#> Infection rate: 30.4%
# Fit all models
# Note: Using reduced iterations for vignette speed
multi_model$fit_all(
chains = 4,
iter = 1000,
warmup = 500,
cores = 1
)
#> Warning: There were 7 divergent transitions after warmup. See
#> https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
#> to find out why this is a problem and how to eliminate them.
#> Warning: Examine the pairs() plot to diagnose sampling problems
#> Warning: The largest R-hat is 1.06, indicating chains have not mixed.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#r-hat
#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#bulk-ess
#> Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
#> Running the chains for more iterations may help. See
#> https://mc-stan.org/misc/warnings.html#tail-essNow let’s extract the correlate of protection directly from the Stan
model for each biomarker. The protection probability is calculated
within the model as 1 - (prob_infection / ceiling).
# Extract protection probabilities for each biomarker
cop_results <- list()
for (biomarker in multi_model$biomarker_names) {
model <- multi_model$models[[biomarker]]
# Get protection probabilities directly from Stan model
protection_samples <- model$predict_protection()
correlate_of_protection <- colMeans(protection_samples)
# Also get risk for comparison
risk_samples <- model$predict()
correlate_of_risk <- colMeans(risk_samples)
cop_results[[biomarker]] <- data.frame(
titre = model$titre,
correlate_of_risk = correlate_of_risk,
correlate_of_protection = correlate_of_protection
)
}
# Display summary for each biomarker
for (biomarker in multi_model$biomarker_names) {
cat(sprintf("\n=== %s ===\n", biomarker))
print(head(cop_results[[biomarker]], 5))
}
#>
#> === IgG ===
#> titre correlate_of_risk correlate_of_protection
#> 1 3.244908 0.2357641 0.7245537
#> 2 2.542770 0.2755910 0.6779430
#> 3 3.427785 0.2278343 0.7338181
#> 4 4.026987 0.2069205 0.7582467
#> 5 2.945171 0.2507444 0.7070407
#>
#> === IgA ===
#> titre correlate_of_risk correlate_of_protection
#> 1 2.835740 0.2751900 0.6401453
#> 2 2.701161 0.2795020 0.6346020
#> 3 2.604183 0.2826617 0.6305464
#> 4 3.136495 0.2658576 0.6521772
#> 5 1.077544 0.3383128 0.5599809
#>
#> === Nonspecific ===
#> titre correlate_of_risk correlate_of_protection
#> 1 1.706775 0.3548008 0.5093546
#> 2 4.967871 0.2702419 0.6214752
#> 3 3.522967 0.3004944 0.5803874
#> 4 3.957672 0.2906375 0.5936726
#> 5 4.374356 0.2818339 0.6056241
# Combine CoP data for all biomarkers
cop_plot_data <- list()
for (i in seq_along(multi_model$biomarker_names)) {
biomarker <- multi_model$biomarker_names[i]
model <- multi_model$models[[biomarker]]
# Create prediction grid
titre_grid <- seq(min(model$titre), max(model$titre), length.out = 100)
# Extract protection probabilities from Stan model
cop_matrix <- model$predict_protection(newdata = titre_grid)
# Calculate summary statistics
cop_mean <- colMeans(cop_matrix)
cop_lower <- apply(cop_matrix, 2, quantile, probs = 0.025)
cop_upper <- apply(cop_matrix, 2, quantile, probs = 0.975)
cop_plot_data[[i]] <- data.frame(
biomarker = biomarker,
titre = titre_grid,
cop = cop_mean,
lower = cop_lower,
upper = cop_upper
)
}
cop_plot_df <- do.call(rbind, cop_plot_data)
# Plot all CoP curves
ggplot(cop_plot_df, aes(x = titre, y = cop, color = biomarker, fill = biomarker)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, color = NA) +
geom_line(linewidth = 1) +
facet_wrap(~biomarker, scales = "free_x") +
labs(
title = "Correlate of Protection Curves by Biomarker",
x = "Antibody Titre (log scale)",
y = "Correlate of Protection"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "none"
)
The correlate of protection curves show how protection increases with titre for each biomarker.
comparison <- multi_model$compare_biomarkers()
#>
#> === Biomarker Comparison ===
#>
#> biomarker auc auc_lower auc_upper brier loo_elpd loo_se
#> IgG 0.6160012 0.5350460 0.6969564 0.2025876 -151.2143 6.823139
#> IgA 0.5559589 0.4795334 0.6323843 0.2106491 -154.8922 6.000309
#> Nonspecific 0.5062008 0.4277909 0.5846108 0.2126332 -156.1159 5.824113The comparison table shows:
This plot shows biomarkers positioned by their predictive performance:
multi_model$plot_comparison()
#>
#> === Biomarker Comparison ===
#>
#> biomarker auc auc_lower auc_upper brier loo_elpd loo_se
#> IgG 0.6160012 0.5350460 0.6969564 0.2025876 -151.2143 6.823139
#> IgA 0.5559589 0.4795334 0.6323843 0.2106491 -154.8922 6.000309
#> Nonspecific 0.5062008 0.4277909 0.5846108 0.2126332 -156.1159 5.824113
#> `height` was translated to `width`.
Interpretation:
Let’s check if we recovered the true parameters for each biomarker:
# Extract parameters for each biomarker
for (biomarker in c("IgG", "IgA", "Nonspecific")) {
cat(sprintf("\n=== %s ===\n", biomarker))
model <- multi_model$models[[biomarker]]
params <- extract_parameters(model)
cat("\nEstimated parameters:\n")
print(params[, c("parameter", "mean", "lower", "upper")])
cat("\nTrue parameters:\n")
true <- true_params[[biomarker]]
cat(sprintf(" floor: %.3f\n", true$floor))
cat(sprintf(" ceiling: %.3f\n", true$ceiling))
cat(sprintf(" ec50: %.3f\n", true$ec50))
cat(sprintf(" slope: %.3f\n", true$slope))
}
#>
#> === IgG ===
#>
#> Estimated parameters:
#> parameter mean lower upper
#> 1 floor 0.1660107 0.01275554 0.3347004
#> 2 ceiling 0.8703555 0.60333227 0.9961154
#> 3 ec50 0.5421771 -0.88452657 1.8635541
#> 4 slope 0.9460338 0.26529839 2.3566466
#>
#> True parameters:
#> floor: 0.020
#> ceiling: 0.700
#> ec50: 2.000
#> slope: 2.500
#>
#> === IgA ===
#>
#> Estimated parameters:
#> parameter mean lower upper
#> 1 floor 0.1036086 0.002270747 0.3431934
#> 2 ceiling 0.7976040 0.510658992 0.9897215
#> 3 ec50 -0.6485946 -3.506394986 3.4382701
#> 4 slope 0.4313444 0.059038219 2.2865424
#>
#> True parameters:
#> floor: 0.150
#> ceiling: 0.700
#> ec50: 1.500
#> slope: 1.000
#>
#> === Nonspecific ===
#>
#> Estimated parameters:
#> parameter mean lower upper
#> 1 floor 0.1287951 0.001468574 0.3838501
#> 2 ceiling 0.7456344 0.501800563 0.9861539
#> 3 ec50 1.3476899 -1.428447632 4.8608415
#> 4 slope 0.6116823 0.008360370 3.2373810
#>
#> True parameters:
#> floor: 0.350
#> ceiling: 0.700
#> ec50: 3.000
#> slope: 0.010
# Create combined recovery plot
recovery_list <- list()
for (i in seq_along(multi_model$biomarker_names)) {
biomarker <- multi_model$biomarker_names[i]
model <- multi_model$models[[biomarker]]
params <- extract_parameters(model)
recovery_list[[i]] <- data.frame(
biomarker = biomarker,
parameter = params$parameter,
estimated = params$mean,
lower = params$lower,
upper = params$upper,
true = c(
true_params[[biomarker]]$floor,
true_params[[biomarker]]$ceiling,
true_params[[biomarker]]$ec50,
true_params[[biomarker]]$slope
)
)
}
recovery_df <- do.call(rbind, recovery_list)
ggplot(recovery_df, aes(x = parameter, color = biomarker)) +
geom_pointrange(
aes(y = estimated, ymin = lower, ymax = upper),
position = position_dodge(width = 0.5),
size = 0.8
) +
geom_point(
aes(y = true),
shape = 4,
size = 3,
stroke = 1.5,
position = position_dodge(width = 0.5)
) +
facet_wrap(~parameter, scales = "free_y", ncol = 2) +
labs(
title = "Parameter Recovery Across Biomarkers",
subtitle = "Points with bars: Estimated (95% CI) | X: True value",
y = "Value",
color = "Biomarker"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.title.x = element_blank()
)
# Plot ROC curves for each biomarker
par(mfrow = c(1, 3))
for (biomarker in multi_model$biomarker_names) {
model <- multi_model$models[[biomarker]]
pred <- colMeans(model$predict())
roc_obj <- pROC::roc(model$infected, pred, quiet = TRUE)
plot(roc_obj,
main = sprintf("%s\nAUC = %.3f", biomarker, pROC::auc(roc_obj)),
col = "steelblue",
lwd = 2)
abline(a = 0, b = 1, lty = 2, col = "gray")
}
# Compare LOO-CV across biomarkers
cat("\n=== Leave-One-Out Cross-Validation Comparison ===\n\n")
#>
#> === Leave-One-Out Cross-Validation Comparison ===
for (biomarker in multi_model$biomarker_names) {
cat(sprintf("--- %s ---\n", biomarker))
model <- multi_model$models[[biomarker]]
print(model$loo)
cat("\n")
}
#> --- IgG ---
#>
#> Computed from 2000 by 250 log-likelihood matrix.
#>
#> Estimate SE
#> elpd_loo -151.2 6.8
#> p_loo 2.6 0.2
#> looic 302.4 13.6
#> ------
#> MCSE of elpd_loo is 0.1.
#> MCSE and ESS estimates assume MCMC draws (r_eff in [0.3, 0.8]).
#>
#> All Pareto k estimates are good (k < 0.7).
#> See help('pareto-k-diagnostic') for details.
#>
#> --- IgA ---
#>
#> Computed from 2000 by 250 log-likelihood matrix.
#>
#> Estimate SE
#> elpd_loo -154.9 6.0
#> p_loo 2.0 0.1
#> looic 309.8 12.0
#> ------
#> MCSE of elpd_loo is 0.0.
#> MCSE and ESS estimates assume MCMC draws (r_eff in [0.4, 0.8]).
#>
#> All Pareto k estimates are good (k < 0.7).
#> See help('pareto-k-diagnostic') for details.
#>
#> --- Nonspecific ---
#>
#> Computed from 2000 by 250 log-likelihood matrix.
#>
#> Estimate SE
#> elpd_loo -156.1 5.8
#> p_loo 1.9 0.2
#> looic 312.2 11.6
#> ------
#> MCSE of elpd_loo is 0.1.
#> MCSE and ESS estimates assume MCMC draws (r_eff in [0.1, 0.4]).
#>
#> All Pareto k estimates are good (k < 0.7).
#> See help('pareto-k-diagnostic') for details.This analysis demonstrates:
The SeroCOPMulti class enables efficient comparison of
multiple biomarkers to identify the best correlates of protection.
sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_4.0.1 seroCOP_0.1.0
#>
#> loaded via a namespace (and not attached):
#> [1] gtable_0.3.6 tensorA_0.36.2.1 xfun_0.54
#> [4] bslib_0.9.0 QuickJSR_1.8.1 processx_3.8.6
#> [7] inline_0.3.21 lattice_0.22-7 callr_3.7.6
#> [10] ps_1.9.1 vctrs_0.6.5 tools_4.5.2
#> [13] generics_0.1.4 stats4_4.5.2 parallel_4.5.2
#> [16] tibble_3.3.0 pkgconfig_2.0.3 brms_2.23.0
#> [19] Matrix_1.7-4 checkmate_2.3.3 RColorBrewer_1.1-3
#> [22] S7_0.2.1 desc_1.4.3 distributional_0.5.0
#> [25] RcppParallel_5.1.11-1 lifecycle_1.0.4 compiler_4.5.2
#> [28] farver_2.1.2 stringr_1.6.0 textshaping_1.0.4
#> [31] Brobdingnag_1.2-9 codetools_0.2-20 htmltools_0.5.8.1
#> [34] sass_0.4.10 bayesplot_1.14.0 yaml_2.3.11
#> [37] pillar_1.11.1 pkgdown_2.2.0 jquerylib_0.1.4
#> [40] cachem_1.1.0 StanHeaders_2.32.10 bridgesampling_1.2-1
#> [43] abind_1.4-8 nlme_3.1-168 posterior_1.6.1
#> [46] rstan_2.32.7 tidyselect_1.2.1 digest_0.6.39
#> [49] mvtnorm_1.3-3 stringi_1.8.7 dplyr_1.1.4
#> [52] labeling_0.4.3 fastmap_1.2.0 grid_4.5.2
#> [55] cli_3.6.5 magrittr_2.0.4 loo_2.8.0
#> [58] pkgbuild_1.4.8 withr_3.0.2 scales_1.4.0
#> [61] backports_1.5.0 rmarkdown_2.30 matrixStats_1.5.0
#> [64] gridExtra_2.3 ragg_1.5.0 coda_0.19-4.1
#> [67] evaluate_1.0.5 knitr_1.50 rstantools_2.5.0
#> [70] rlang_1.1.6 Rcpp_1.1.0 glue_1.8.0
#> [73] pROC_1.19.0.1 jsonlite_2.0.0 R6_2.6.1
#> [76] systemfonts_1.3.1 fs_1.6.6