biokinetics.Rmd
The underlying biokinetics
model was developed in the
following paper: Russell TW et
al. Real-time estimation of immunological responses against emerging
SARS-CoV-2 variants in the UK: a mathematical modelling study. Lancet
Infect Dis. 2024 Sep 11:S1473-3099(24)00484-5
This vignette demonstrates how to use the epikinetics
package to replicate some of the key paper figures.
To initialise a model object, the only required argument is a path to the data in CSV format, or a data.table. See biokinetics for all available arguments. In this vignette we use a dataset representing the Delta wave which is installed with this package, specifying a regression model that just looks at the effect of infection history.
The fit
method then has the same function signature as
the underlying cmdstanr::sample
method. Here we specify a relatively small number of iterations of the
algorithm to limit the time it takes to compile this vignette.
dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- epikinetics::biokinetics$new(data = dat,
lower_censoring_limit = 40,
covariate_formula = ~0 + infection_history)
delta <- mod$fit(parallel_chains = 4,
iter_warmup = 50,
iter_sampling = 200,
threads_per_chain = 4)
Figure 2 from the paper shows population level fits for each wave, disaggregated by infection history and titre type. Here we reproduce the facets for the Delta wave. Once the model has been fitted, simulate population trajectories using the fitted population parameters.
res <- mod$simulate_population_trajectories()
#> INFO [2024-12-11 16:41:17] Summarising fits
#> INFO [2024-12-11 16:41:21] Adjusting by regression coefficients
#> INFO [2024-12-11 16:41:23] Summarising into quantiles
head(res)
#> time_since_last_exp me lo hi titre_type infection_history
#> <int> <num> <num> <num> <char> <char>
#> 1: 0 112.6822 84.4386 143.8971 Ancestral Infection naive
#> 2: 1 142.5849 110.8551 176.8130 Ancestral Infection naive
#> 3: 2 180.8341 145.1370 219.0680 Ancestral Infection naive
#> 4: 3 228.8394 188.6540 274.4430 Ancestral Infection naive
#> 5: 4 290.0701 242.2203 346.9681 Ancestral Infection naive
#> 6: 5 367.5040 308.0682 451.2246 Ancestral Infection naive
See data for the definition of the returned columns.
Using ggplot2
:
library(ggplot2)
custom_theme <- theme_linedraw() + theme(
legend.position = "bottom",
text = element_text(size = 8, family = "Helvetica"),
strip.text.x.top = element_text(size = 8, family = "Helvetica"),
strip.text.x = element_text(size = 8, family = "Helvetica"),
strip.background = element_rect(fill = "white"),
strip.text = element_text(colour = 'black'),
strip.placement = "outside",
legend.box = "vertical",
legend.margin = margin(),
)
custom_palette <- c("#CC6677", "#DDCC77", "#88CCEE")
plot_data <- res
plot_data[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot(data = plot_data) +
geom_line(aes(x = time_since_last_exp,
y = me,
colour = titre_type)) +
geom_ribbon(aes(x = time_since_last_exp,
ymin = lo,
ymax = hi,
fill = titre_type), alpha = 0.65) +
scale_y_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280,
2560, 5120),
labels = c("40", "80", "160", "320", "640", "1280", "2560", "5120"),
limits = c(40, 10240)) +
scale_x_continuous(breaks = c(0, 30, 60, 90, 120),
labels = c("0", "30", "60", "90", "120"),
expand = c(0, 0)) +
coord_cartesian(clip = "off") +
labs(x = "Time since last exposure (days)",
y = expression(paste("Titre (IC"[50], ")"))) +
facet_wrap(infection_history ~ titre_type) +
scale_colour_manual(values = custom_palette) +
scale_fill_manual(values = custom_palette) +
guides(colour = "none", fill = "none") +
custom_theme
Figure 4 shows the relationship between infection history and titre values at peak and set points, for different titre types. We first simulate estimates for these from the fitted model:
res <- mod$population_stationary_points(n_draws = 2000)
#> INFO [2024-12-11 16:41:24] Extracting parameters
#> INFO [2024-12-11 16:41:25] Adjusting by covariates
#> INFO [2024-12-11 16:41:25] Calculating peak and switch titre values
#> INFO [2024-12-11 16:41:25] Recovering covariate names
#> INFO [2024-12-11 16:41:25] Calculating medians
head(res)
#> infection_history titre_type mu_p mu_s rel_drop_me mu_p_me mu_s_me
#> <char> <char> <num> <num> <num> <num> <num>
#> 1: Infection naive Ancestral 1232.680 256.3040 0.1926824 1231.001 237.135
#> 2: Infection naive Ancestral 1203.460 222.4226 0.1926824 1231.001 237.135
#> 3: Infection naive Ancestral 1237.530 223.8834 0.1926824 1231.001 237.135
#> 4: Infection naive Ancestral 1287.212 231.6793 0.1926824 1231.001 237.135
#> 5: Infection naive Ancestral 1326.172 257.9224 0.1926824 1231.001 237.135
#> 6: Infection naive Ancestral 1353.322 270.6602 0.1926824 1231.001 237.135
The values we’re going to plot are the mean peak titre values
(mu_p
) and mean set point titre values (mu_s
),
for different titre types and infection histories. See data for a full definition of all the returned
columns. Using ggplot2
:
plot_data <- res[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot(data = plot_data, aes(
x = mu_p, y = mu_s,
colour = titre_type)) +
geom_density_2d(
aes(
group = interaction(
infection_history,
titre_type))) +
geom_point(data = plot_data,
alpha = 0.05, size = 0.2) +
geom_point(aes(x = mu_p_me, y = mu_s_me,
shape = infection_history),
colour = "black") +
geom_path(aes(x = mu_p_me, y = mu_s_me,
group = titre_type),
colour = "black") +
geom_vline(xintercept = 2560, linetype = "twodash", colour = "gray30") +
scale_x_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
labels = c(expression(" " <= 40),
"80", "160", "320", "640", "1280", "2560", "5120", "10240"),
limits = c(NA, 10240)) +
geom_hline(yintercept = 2560, linetype = "twodash", colour = "gray30") +
scale_y_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
labels = c(expression(" " <= 40),
"80", "160", "320", "640", "1280", "2560", "5120", "10240"),
limits = c(NA, 5120)) +
custom_theme +
scale_shape_manual(values = c(1, 2, 3)) +
labs(x = expression(paste("Population-level titre value at peak (IC"[50], ")")),
y = expression(paste("Population-level titre value at set-point (IC"[50], ")"))) +
scale_colour_manual(values = custom_palette) +
guides(colour = guide_legend(title = "Titre type", override.aes = list(alpha = 1, size = 1)),
shape = guide_legend(title = "Infection history"))
Figure 5 uses simulations of individual trajectories to derive population level titre values. This can take a while so here we set the maximum number of draws to just 250 for speed.
res <- mod$simulate_individual_trajectories(n_draws = 250)
#> INFO [2024-12-11 16:41:26] Extracting parameters
#> INFO [2024-12-11 16:41:30] Simulating individual trajectories
#> INFO [2024-12-11 16:41:34] Recovering covariate names
#> INFO [2024-12-11 16:41:34] Calculating exposure days. Adjusting exposures by 0 days
#> INFO [2024-12-11 16:41:37] Resampling
#> Registered S3 method overwritten by 'mosaic':
#> method from
#> fortify.SpatialPolygonsDataFrame ggplot2
#> INFO [2024-12-11 16:41:47] Summarising into population quantiles
head(res)
#> calendar_day titre_type me lo hi time_shift
#> <IDat> <char> <num> <num> <num> <num>
#> 1: 2021-03-08 Ancestral 1145.101 886.7598 1504.746 0
#> 2: 2021-03-09 Ancestral 1127.169 881.6767 1452.714 0
#> 3: 2021-03-10 Ancestral 1159.459 899.3294 1478.496 0
#> 4: 2021-03-11 Ancestral 1134.535 918.3473 1474.952 0
#> 5: 2021-03-12 Ancestral 1128.106 884.1373 1525.385 0
#> 6: 2021-03-13 Ancestral 1164.565 909.3966 1484.574 0
See data for a definition of all the returned columns. Figure 5 A plots the derived population trajectories. Here we replicate a portion of the graph from the paper, from the minimum date in the dataset until the date at which the next variant emerged.
date_delta <- lubridate::ymd("2021-05-07")
date_ba2 <- lubridate::ymd("2022-01-24")
res$wave <- "Delta"
dat$wave <- "Delta"
plot_data <- merge(
res, dat[, .(
min_date = min(day), max_date = max(day)), by = wave])[
, .SD[calendar_day >= min_date & calendar_day <= date_ba2], by = wave]
plot_data[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot() + geom_line(
data = plot_data,
aes(x = calendar_day,
y = me,
group = interaction(titre_type, wave),
colour = titre_type),
alpha = 0.2) +
geom_ribbon(
data = plot_data,
aes(x = calendar_day,
ymin = lo,
ymax = hi,
group = interaction(titre_type, wave)
),
alpha = 0.2) +
labs(title = "Population-level titre values",
tag = "A",
x = "Date",
y = expression(paste("Titre (IC"[50], ")"))) +
scale_colour_manual(
values = custom_palette) +
scale_fill_manual(
values = custom_palette) +
scale_x_date(
date_labels = "%b %Y",
limits = c(min(dat$day), date_ba2)) +
geom_smooth(
data = plot_data,
aes(x = calendar_day,
y = me,
fill = titre_type,
colour = titre_type,
group = interaction(titre_type, wave)),
alpha = 0.5, span = 0.2) +
custom_theme +
guides(colour = guide_legend(title = "Titre type"), fill = "none",)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Figure 5 C shows counterfactual trajectories varying vaccination
timings. To do this, we simulate individual trajectories with different
time_shift
arguments. Here we set the maximum number of
draws to just 50 for speed.
time_shift_values <- seq(-75, 75, by = 15)
indices <- seq_along(time_shift_values)
results_list <- lapply(indices, function(index) {
shift <- time_shift_values[index]
mod$simulate_individual_trajectories(n_draws = 50, time_shift = shift)
})
#> INFO [2024-12-11 16:41:48] Extracting parameters
#> INFO [2024-12-11 16:41:52] Simulating individual trajectories
#> INFO [2024-12-11 16:41:53] Recovering covariate names
#> INFO [2024-12-11 16:41:53] Calculating exposure days. Adjusting exposures by -75 days
#> INFO [2024-12-11 16:41:53] Resampling
#> INFO [2024-12-11 16:41:55] Summarising into population quantiles
#> INFO [2024-12-11 16:41:56] Extracting parameters
#> INFO [2024-12-11 16:41:59] Simulating individual trajectories
#> INFO [2024-12-11 16:42:00] Recovering covariate names
#> INFO [2024-12-11 16:42:00] Calculating exposure days. Adjusting exposures by -60 days
#> INFO [2024-12-11 16:42:00] Resampling
#> INFO [2024-12-11 16:42:02] Summarising into population quantiles
#> INFO [2024-12-11 16:42:03] Extracting parameters
#> INFO [2024-12-11 16:42:06] Simulating individual trajectories
#> INFO [2024-12-11 16:42:06] Recovering covariate names
#> INFO [2024-12-11 16:42:06] Calculating exposure days. Adjusting exposures by -45 days
#> INFO [2024-12-11 16:42:07] Resampling
#> INFO [2024-12-11 16:42:09] Summarising into population quantiles
#> INFO [2024-12-11 16:42:09] Extracting parameters
#> INFO [2024-12-11 16:42:13] Simulating individual trajectories
#> INFO [2024-12-11 16:42:13] Recovering covariate names
#> INFO [2024-12-11 16:42:13] Calculating exposure days. Adjusting exposures by -30 days
#> INFO [2024-12-11 16:42:14] Resampling
#> INFO [2024-12-11 16:42:16] Summarising into population quantiles
#> INFO [2024-12-11 16:42:16] Extracting parameters
#> INFO [2024-12-11 16:42:20] Simulating individual trajectories
#> INFO [2024-12-11 16:42:20] Recovering covariate names
#> INFO [2024-12-11 16:42:20] Calculating exposure days. Adjusting exposures by -15 days
#> INFO [2024-12-11 16:42:21] Resampling
#> INFO [2024-12-11 16:42:23] Summarising into population quantiles
#> INFO [2024-12-11 16:42:23] Extracting parameters
#> INFO [2024-12-11 16:42:26] Simulating individual trajectories
#> INFO [2024-12-11 16:42:27] Recovering covariate names
#> INFO [2024-12-11 16:42:27] Calculating exposure days. Adjusting exposures by 0 days
#> INFO [2024-12-11 16:42:27] Resampling
#> INFO [2024-12-11 16:42:29] Summarising into population quantiles
#> INFO [2024-12-11 16:42:30] Extracting parameters
#> INFO [2024-12-11 16:42:33] Simulating individual trajectories
#> INFO [2024-12-11 16:42:34] Recovering covariate names
#> INFO [2024-12-11 16:42:34] Calculating exposure days. Adjusting exposures by 15 days
#> INFO [2024-12-11 16:42:34] Resampling
#> INFO [2024-12-11 16:42:36] Summarising into population quantiles
#> INFO [2024-12-11 16:42:37] Extracting parameters
#> INFO [2024-12-11 16:42:40] Simulating individual trajectories
#> INFO [2024-12-11 16:42:41] Recovering covariate names
#> INFO [2024-12-11 16:42:41] Calculating exposure days. Adjusting exposures by 30 days
#> INFO [2024-12-11 16:42:41] Resampling
#> INFO [2024-12-11 16:42:43] Summarising into population quantiles
#> INFO [2024-12-11 16:42:44] Extracting parameters
#> INFO [2024-12-11 16:42:47] Simulating individual trajectories
#> INFO [2024-12-11 16:42:48] Recovering covariate names
#> INFO [2024-12-11 16:42:48] Calculating exposure days. Adjusting exposures by 45 days
#> INFO [2024-12-11 16:42:48] Resampling
#> INFO [2024-12-11 16:42:50] Summarising into population quantiles
#> INFO [2024-12-11 16:42:51] Extracting parameters
#> INFO [2024-12-11 16:42:54] Simulating individual trajectories
#> INFO [2024-12-11 16:42:54] Recovering covariate names
#> INFO [2024-12-11 16:42:54] Calculating exposure days. Adjusting exposures by 60 days
#> INFO [2024-12-11 16:42:55] Resampling
#> INFO [2024-12-11 16:42:57] Summarising into population quantiles
#> INFO [2024-12-11 16:42:57] Extracting parameters
#> INFO [2024-12-11 16:43:00] Simulating individual trajectories
#> INFO [2024-12-11 16:43:01] Recovering covariate names
#> INFO [2024-12-11 16:43:01] Calculating exposure days. Adjusting exposures by 75 days
#> INFO [2024-12-11 16:43:02] Resampling
#> INFO [2024-12-11 16:43:04] Summarising into population quantiles
combined_data <- data.table::data.table(data.table::rbindlist(results_list))
Plotting the median values:
plot_data <- combined_data[calendar_day == date_delta]
plot_data <- plot_data[, titre_type := forcats::fct_relevel(
titre_type,
c("Ancestral", "Alpha", "Delta"))]
ggplot(
data = plot_data) +
geom_line(aes(
x = time_shift, y = me, colour = titre_type)) +
geom_pointrange(aes(
x = time_shift, y = me, ymin = lo,
ymax = hi, colour = titre_type)) +
scale_x_continuous(
breaks = time_shift_values) +
scale_y_continuous(
trans = "log2",
breaks = c(40, 80, 160, 320, 640, 1280),
labels = c("40", "80", "160", "320", "640", "1280")) +
scale_colour_manual(
values = custom_palette) +
labs(title = "Population titre values varying vaccine timings",
x = "Time shift (days)",
y = expression(paste("Titre (IC"[50], ")"))) +
custom_theme +
theme(legend.position = "none") +
guides(colour = guide_legend(title = "Titre Type", nrow = 1, override.aes = list(alpha = 1, size = 1))) +
labs(tag = "C",
title = "Counterfactual population-level titres varying vaccination timings")