biokinetics.Rmd
The underlying biokinetics
model was developed in the
following paper: Russell TW et
al. Real-time estimation of immunological responses against emerging
SARS-CoV-2 variants in the UK: a mathematical modelling study. Lancet
Infect Dis. 2024 Sep 11:S1473-3099(24)00484-5
This vignette demonstrates how to use the epikinetics
package to replicate some of the key paper figures.
To initialise a model object, the only required argument is a path to the data in CSV format, or a data.table. See biokinetics for all available arguments. In this vignette we use a dataset representing the Delta wave which is installed with this package, specifying a regression model that just looks at the effect of infection history.
The fit
method then has the same function signature as
the underlying cmdstanr::sample
method. Here we specify a relatively small number of iterations of the
algorithm to limit the time it takes to compile this vignette.
dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- epikinetics::biokinetics$new(data = dat,
lower_censoring_limit = 40,
covariate_formula = ~0 + infection_history)
delta <- mod$fit(parallel_chains = 4,
iter_warmup = 50,
iter_sampling = 200,
threads_per_chain = 4)
Figure 2 from the paper shows population level fits for each wave, disaggregated by infection history and titre type. Here we reproduce the facets for the Delta wave. Once the model has been fitted, simulate population trajectories using the fitted population parameters.
res <- mod$simulate_population_trajectories()
#> INFO [2025-06-17 14:51:05] Summarising fits
#> INFO [2025-06-17 14:51:08] Adjusting by regression coefficients
#> INFO [2025-06-17 14:51:10] Summarising into quantiles
head(res)
#> time_since_last_exp me lo hi titre_type infection_history
#> <int> <num> <num> <num> <char> <char>
#> 1: 0 112.8460 88.59188 143.0309 Ancestral Infection naive
#> 2: 1 143.0190 115.66984 177.9631 Ancestral Infection naive
#> 3: 2 181.7228 149.73768 219.3123 Ancestral Infection naive
#> 4: 3 229.5554 191.88651 274.0090 Ancestral Infection naive
#> 5: 4 290.3409 243.29860 348.8412 Ancestral Infection naive
#> 6: 5 365.8364 306.09625 452.1910 Ancestral Infection naive
See data for the definition of the returned columns.
Using ggplot2
:
library(ggplot2)
custom_theme <- theme_linedraw() + theme(
legend.position = "bottom",
text = element_text(size = 8, family = "Helvetica"),
strip.text.x.top = element_text(size = 8, family = "Helvetica"),
strip.text.x = element_text(size = 8, family = "Helvetica"),
strip.background = element_rect(fill = "white"),
strip.text = element_text(colour = 'black'),
strip.placement = "outside",
legend.box = "vertical",
legend.margin = margin(),
)
custom_palette <- c("#CC6677", "#DDCC77", "#88CCEE")
plot_data <- res
plot_data[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot(data = plot_data) +
geom_line(aes(x = time_since_last_exp,
y = me,
colour = titre_type)) +
geom_ribbon(aes(x = time_since_last_exp,
ymin = lo,
ymax = hi,
fill = titre_type), alpha = 0.65) +
scale_y_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280,
2560, 5120),
labels = c("40", "80", "160", "320", "640", "1280", "2560", "5120"),
limits = c(40, 10240)) +
scale_x_continuous(breaks = c(0, 30, 60, 90, 120),
labels = c("0", "30", "60", "90", "120"),
expand = c(0, 0)) +
coord_cartesian(clip = "off") +
labs(x = "Time since last exposure (days)",
y = expression(paste("Titre (IC"[50], ")"))) +
facet_wrap(infection_history ~ titre_type) +
scale_colour_manual(values = custom_palette) +
scale_fill_manual(values = custom_palette) +
guides(colour = "none", fill = "none") +
custom_theme
Figure 4 shows the relationship between infection history and titre values at peak and set points, for different titre types. We first simulate estimates for these from the fitted model:
res <- mod$population_stationary_points(n_draws = 2000)
#> INFO [2025-06-17 14:51:12] Extracting parameters
#> INFO [2025-06-17 14:51:12] Adjusting by covariates
#> INFO [2025-06-17 14:51:12] Calculating peak and switch titre values
#> INFO [2025-06-17 14:51:12] Recovering covariate names
#> INFO [2025-06-17 14:51:12] Calculating medians
head(res)
#> infection_history titre_type mu_p mu_s rel_drop_me mu_p_me mu_s_me
#> <char> <char> <num> <num> <num> <num> <num>
#> 1: Infection naive Ancestral 1168.928 225.6553 0.1893087 1233.49 234.331
#> 2: Infection naive Ancestral 1237.139 262.1662 0.1893087 1233.49 234.331
#> 3: Infection naive Ancestral 1275.875 243.4242 0.1893087 1233.49 234.331
#> 4: Infection naive Ancestral 1311.227 259.0683 0.1893087 1233.49 234.331
#> 5: Infection naive Ancestral 1295.196 206.9455 0.1893087 1233.49 234.331
#> 6: Infection naive Ancestral 1299.692 247.3273 0.1893087 1233.49 234.331
The values we’re going to plot are the mean peak titre values
(mu_p
) and mean set point titre values (mu_s
),
for different titre types and infection histories. See data for a full definition of all the returned
columns. Using ggplot2
:
plot_data <- res[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot(data = plot_data, aes(
x = mu_p, y = mu_s,
colour = titre_type)) +
geom_density_2d(
aes(
group = interaction(
infection_history,
titre_type))) +
geom_point(data = plot_data,
alpha = 0.05, size = 0.2) +
geom_point(aes(x = mu_p_me, y = mu_s_me,
shape = infection_history),
colour = "black") +
geom_path(aes(x = mu_p_me, y = mu_s_me,
group = titre_type),
colour = "black") +
geom_vline(xintercept = 2560, linetype = "twodash", colour = "gray30") +
scale_x_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
labels = c(expression(" " <= 40),
"80", "160", "320", "640", "1280", "2560", "5120", "10240"),
limits = c(NA, 10240)) +
geom_hline(yintercept = 2560, linetype = "twodash", colour = "gray30") +
scale_y_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
labels = c(expression(" " <= 40),
"80", "160", "320", "640", "1280", "2560", "5120", "10240"),
limits = c(NA, 5120)) +
custom_theme +
scale_shape_manual(values = c(1, 2, 3)) +
labs(x = expression(paste("Population-level titre value at peak (IC"[50], ")")),
y = expression(paste("Population-level titre value at set-point (IC"[50], ")"))) +
scale_colour_manual(values = custom_palette) +
guides(colour = guide_legend(title = "Titre type", override.aes = list(alpha = 1, size = 1)),
shape = guide_legend(title = "Infection history"))
Figure 5 uses simulations of individual trajectories to derive population level titre values. This can take a while so here we set the maximum number of draws to just 250 for speed.
res <- mod$simulate_individual_trajectories(n_draws = 250)
#> INFO [2025-06-17 14:51:13] Extracting parameters
#> INFO [2025-06-17 14:51:17] Simulating individual trajectories
#> INFO [2025-06-17 14:51:21] Recovering covariate names
#> INFO [2025-06-17 14:51:21] Calculating exposure days. Adjusting exposures by 0 days
#> INFO [2025-06-17 14:51:24] Resampling
#> Registered S3 method overwritten by 'mosaic':
#> method from
#> fortify.SpatialPolygonsDataFrame ggplot2
#> INFO [2025-06-17 14:51:33] Summarising into population quantiles
head(res)
#> calendar_day titre_type me lo hi time_shift
#> <IDat> <char> <num> <num> <num> <num>
#> 1: 2021-03-08 Ancestral 1185.991 890.1526 1534.772 0
#> 2: 2021-03-09 Ancestral 1128.211 887.7483 1444.668 0
#> 3: 2021-03-10 Ancestral 1190.466 942.7031 1514.890 0
#> 4: 2021-03-11 Ancestral 1165.086 888.5316 1481.972 0
#> 5: 2021-03-12 Ancestral 1160.733 879.8674 1452.659 0
#> 6: 2021-03-13 Ancestral 1183.584 926.5421 1498.611 0
See data for a definition of all the returned columns. Figure 5 A plots the derived population trajectories. Here we replicate a portion of the graph from the paper, from the minimum date in the dataset until the date at which the next variant emerged.
date_delta <- lubridate::ymd("2021-05-07")
date_ba2 <- lubridate::ymd("2022-01-24")
res$wave <- "Delta"
dat$wave <- "Delta"
plot_data <- merge(
res, dat[, .(
min_date = min(day), max_date = max(day)), by = wave])[
, .SD[calendar_day >= min_date & calendar_day <= date_ba2], by = wave]
plot_data[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot() + geom_line(
data = plot_data,
aes(x = calendar_day,
y = me,
group = interaction(titre_type, wave),
colour = titre_type),
alpha = 0.2) +
geom_ribbon(
data = plot_data,
aes(x = calendar_day,
ymin = lo,
ymax = hi,
group = interaction(titre_type, wave)
),
alpha = 0.2) +
labs(title = "Population-level titre values",
tag = "A",
x = "Date",
y = expression(paste("Titre (IC"[50], ")"))) +
scale_colour_manual(
values = custom_palette) +
scale_fill_manual(
values = custom_palette) +
scale_x_date(
date_labels = "%b %Y",
limits = c(min(dat$day), date_ba2)) +
geom_smooth(
data = plot_data,
aes(x = calendar_day,
y = me,
fill = titre_type,
colour = titre_type,
group = interaction(titre_type, wave)),
alpha = 0.5, span = 0.2) +
custom_theme +
guides(colour = guide_legend(title = "Titre type"), fill = "none",)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Figure 5 C shows counterfactual trajectories varying vaccination
timings. To do this, we simulate individual trajectories with different
time_shift
arguments. Here we set the maximum number of
draws to just 50 for speed.
time_shift_values <- seq(-75, 75, by = 15)
indices <- seq_along(time_shift_values)
results_list <- lapply(indices, function(index) {
shift <- time_shift_values[index]
mod$simulate_individual_trajectories(n_draws = 50, time_shift = shift)
})
#> INFO [2025-06-17 14:51:35] Extracting parameters
#> INFO [2025-06-17 14:51:38] Simulating individual trajectories
#> INFO [2025-06-17 14:51:39] Recovering covariate names
#> INFO [2025-06-17 14:51:39] Calculating exposure days. Adjusting exposures by -75 days
#> INFO [2025-06-17 14:51:39] Resampling
#> INFO [2025-06-17 14:51:41] Summarising into population quantiles
#> INFO [2025-06-17 14:51:42] Extracting parameters
#> INFO [2025-06-17 14:51:45] Simulating individual trajectories
#> INFO [2025-06-17 14:51:46] Recovering covariate names
#> INFO [2025-06-17 14:51:46] Calculating exposure days. Adjusting exposures by -60 days
#> INFO [2025-06-17 14:51:46] Resampling
#> INFO [2025-06-17 14:51:48] Summarising into population quantiles
#> INFO [2025-06-17 14:51:49] Extracting parameters
#> INFO [2025-06-17 14:51:52] Simulating individual trajectories
#> INFO [2025-06-17 14:51:53] Recovering covariate names
#> INFO [2025-06-17 14:51:53] Calculating exposure days. Adjusting exposures by -45 days
#> INFO [2025-06-17 14:51:53] Resampling
#> INFO [2025-06-17 14:51:55] Summarising into population quantiles
#> INFO [2025-06-17 14:51:56] Extracting parameters
#> INFO [2025-06-17 14:51:59] Simulating individual trajectories
#> INFO [2025-06-17 14:52:00] Recovering covariate names
#> INFO [2025-06-17 14:52:00] Calculating exposure days. Adjusting exposures by -30 days
#> INFO [2025-06-17 14:52:01] Resampling
#> INFO [2025-06-17 14:52:02] Summarising into population quantiles
#> INFO [2025-06-17 14:52:03] Extracting parameters
#> INFO [2025-06-17 14:52:06] Simulating individual trajectories
#> INFO [2025-06-17 14:52:07] Recovering covariate names
#> INFO [2025-06-17 14:52:07] Calculating exposure days. Adjusting exposures by -15 days
#> INFO [2025-06-17 14:52:07] Resampling
#> INFO [2025-06-17 14:52:09] Summarising into population quantiles
#> INFO [2025-06-17 14:52:10] Extracting parameters
#> INFO [2025-06-17 14:52:13] Simulating individual trajectories
#> INFO [2025-06-17 14:52:13] Recovering covariate names
#> INFO [2025-06-17 14:52:13] Calculating exposure days. Adjusting exposures by 0 days
#> INFO [2025-06-17 14:52:14] Resampling
#> INFO [2025-06-17 14:52:16] Summarising into population quantiles
#> INFO [2025-06-17 14:52:16] Extracting parameters
#> INFO [2025-06-17 14:52:20] Simulating individual trajectories
#> INFO [2025-06-17 14:52:20] Recovering covariate names
#> INFO [2025-06-17 14:52:20] Calculating exposure days. Adjusting exposures by 15 days
#> INFO [2025-06-17 14:52:21] Resampling
#> INFO [2025-06-17 14:52:22] Summarising into population quantiles
#> INFO [2025-06-17 14:52:23] Extracting parameters
#> INFO [2025-06-17 14:52:26] Simulating individual trajectories
#> INFO [2025-06-17 14:52:27] Recovering covariate names
#> INFO [2025-06-17 14:52:27] Calculating exposure days. Adjusting exposures by 30 days
#> INFO [2025-06-17 14:52:27] Resampling
#> INFO [2025-06-17 14:52:29] Summarising into population quantiles
#> INFO [2025-06-17 14:52:30] Extracting parameters
#> INFO [2025-06-17 14:52:33] Simulating individual trajectories
#> INFO [2025-06-17 14:52:34] Recovering covariate names
#> INFO [2025-06-17 14:52:34] Calculating exposure days. Adjusting exposures by 45 days
#> INFO [2025-06-17 14:52:34] Resampling
#> INFO [2025-06-17 14:52:36] Summarising into population quantiles
#> INFO [2025-06-17 14:52:37] Extracting parameters
#> INFO [2025-06-17 14:52:40] Simulating individual trajectories
#> INFO [2025-06-17 14:52:40] Recovering covariate names
#> INFO [2025-06-17 14:52:40] Calculating exposure days. Adjusting exposures by 60 days
#> INFO [2025-06-17 14:52:41] Resampling
#> INFO [2025-06-17 14:52:43] Summarising into population quantiles
#> INFO [2025-06-17 14:52:43] Extracting parameters
#> INFO [2025-06-17 14:52:46] Simulating individual trajectories
#> INFO [2025-06-17 14:52:47] Recovering covariate names
#> INFO [2025-06-17 14:52:47] Calculating exposure days. Adjusting exposures by 75 days
#> INFO [2025-06-17 14:52:48] Resampling
#> INFO [2025-06-17 14:52:49] Summarising into population quantiles
combined_data <- data.table::data.table(data.table::rbindlist(results_list))
Plotting the median values:
plot_data <- combined_data[calendar_day == date_delta]
plot_data <- plot_data[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot(
data = plot_data) +
geom_line(aes(
x = time_shift, y = me, colour = titre_type)) +
geom_pointrange(aes(
x = time_shift, y = me, ymin = lo,
ymax = hi, colour = titre_type)) +
scale_x_continuous(
breaks = time_shift_values) +
scale_y_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280),
labels = c("40", "80", "160", "320", "640", "1280")) +
scale_colour_manual(
values = custom_palette) +
labs(title = "Population titre values varying vaccine timings",
x = "Time shift (days)",
y = expression(paste("Titre (IC"[50], ")"))) +
custom_theme +
theme(legend.position = "none") +
guides(colour = guide_legend(title = "Titre Type", nrow = 1, override.aes = list(alpha = 1, size = 1))) +
labs(tag = "C",
title = "Counterfactual population-level titres varying vaccination timings")