biokinetics case study • epikinetics

The underlying biokinetics model was developed in the following paper: Russell TW et al. Real-time estimation of immunological responses against emerging SARS-CoV-2 variants in the UK: a mathematical modelling study. Lancet Infect Dis. 2024 Sep 11:S1473-3099(24)00484-5

This vignette demonstrates how to use the epikinetics package to replicate some of the key paper figures.

Fitting the model

To initialise a model object, the only required argument is a path to the data in CSV format, or a data.table. See biokinetics for all available arguments. In this vignette we use a dataset representing the Delta wave which is installed with this package, specifying a regression model that just looks at the effect of infection history.

The fit method then has the same function signature as the underlying cmdstanr::sample method. Here we specify a relatively small number of iterations of the algorithm to limit the time it takes to compile this vignette.

dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- epikinetics::biokinetics$new(data = dat,
                                    lower_censoring_limit = 40,
                                    covariate_formula = ~0 + infection_history)
delta <- mod$fit(parallel_chains = 4,
                 iter_warmup = 50,
                 iter_sampling = 200,
                 threads_per_chain = 4)

Figure 2

Figure 2 from the paper shows population level fits for each wave, disaggregated by infection history and titre type. Here we reproduce the facets for the Delta wave. Once the model has been fitted, simulate population trajectories using the fitted population parameters.

res <- mod$simulate_population_trajectories()
#> INFO [2024-12-11 16:41:17] Summarising fits
#> INFO [2024-12-11 16:41:21] Adjusting by regression coefficients
#> INFO [2024-12-11 16:41:23] Summarising into quantiles
head(res)
#>    time_since_last_exp       me       lo       hi titre_type infection_history
#>                  <int>    <num>    <num>    <num>     <char>            <char>
#> 1:                   0 112.6822  84.4386 143.8971  Ancestral   Infection naive
#> 2:                   1 142.5849 110.8551 176.8130  Ancestral   Infection naive
#> 3:                   2 180.8341 145.1370 219.0680  Ancestral   Infection naive
#> 4:                   3 228.8394 188.6540 274.4430  Ancestral   Infection naive
#> 5:                   4 290.0701 242.2203 346.9681  Ancestral   Infection naive
#> 6:                   5 367.5040 308.0682 451.2246  Ancestral   Infection naive

See data for the definition of the returned columns.

Using ggplot2:

library(ggplot2)
custom_theme <- theme_linedraw() + theme(
    legend.position = "bottom",
    text = element_text(size = 8, family = "Helvetica"),
    strip.text.x.top = element_text(size = 8, family = "Helvetica"),
    strip.text.x = element_text(size = 8, family = "Helvetica"),
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(colour = 'black'),
    strip.placement = "outside",
    legend.box = "vertical",
    legend.margin = margin(),
)

custom_palette <- c("#CC6677", "#DDCC77", "#88CCEE")

plot_data <- res
plot_data[, titre_type := forcats::fct_relevel(
  titre_type,
  c("Ancestral", "Alpha", "Delta"))]

ggplot(data = plot_data) +
  geom_line(aes(x = time_since_last_exp,
                y = me,
                colour = titre_type)) +
  geom_ribbon(aes(x = time_since_last_exp,
                  ymin = lo,
                  ymax = hi,
                  fill = titre_type), alpha = 0.65) +
  scale_y_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280,
               2560, 5120),
    labels = c("40", "80", "160", "320", "640", "1280", "2560", "5120"),
    limits = c(40, 10240)) +
  scale_x_continuous(breaks = c(0, 30, 60, 90, 120),
                     labels = c("0", "30", "60", "90", "120"),
                     expand = c(0, 0)) +
  coord_cartesian(clip = "off") +
  labs(x = "Time since last exposure (days)",
       y = expression(paste("Titre (IC"[50], ")"))) +
  facet_wrap(infection_history ~ titre_type) +
  scale_colour_manual(values = custom_palette) +
  scale_fill_manual(values = custom_palette) +
  guides(colour = "none", fill = "none") +
  custom_theme

Figure 4

Figure 4 shows the relationship between infection history and titre values at peak and set points, for different titre types. We first simulate estimates for these from the fitted model:

res <- mod$population_stationary_points(n_draws = 2000)
#> INFO [2024-12-11 16:41:24] Extracting parameters
#> INFO [2024-12-11 16:41:25] Adjusting by covariates
#> INFO [2024-12-11 16:41:25] Calculating peak and switch titre values
#> INFO [2024-12-11 16:41:25] Recovering covariate names
#> INFO [2024-12-11 16:41:25] Calculating medians
head(res)
#>    infection_history titre_type     mu_p     mu_s rel_drop_me  mu_p_me mu_s_me
#>               <char>     <char>    <num>    <num>       <num>    <num>   <num>
#> 1:   Infection naive  Ancestral 1232.680 256.3040   0.1926824 1231.001 237.135
#> 2:   Infection naive  Ancestral 1203.460 222.4226   0.1926824 1231.001 237.135
#> 3:   Infection naive  Ancestral 1237.530 223.8834   0.1926824 1231.001 237.135
#> 4:   Infection naive  Ancestral 1287.212 231.6793   0.1926824 1231.001 237.135
#> 5:   Infection naive  Ancestral 1326.172 257.9224   0.1926824 1231.001 237.135
#> 6:   Infection naive  Ancestral 1353.322 270.6602   0.1926824 1231.001 237.135

The values we’re going to plot are the mean peak titre values (mu_p) and mean set point titre values (mu_s), for different titre types and infection histories. See data for a full definition of all the returned columns. Using ggplot2:

plot_data <- res[, titre_type := forcats::fct_relevel(
  titre_type,
  c("Ancestral", "Alpha", "Delta"))]

ggplot(data = plot_data, aes(
  x = mu_p, y = mu_s,
  colour = titre_type)) +
  geom_density_2d(
    aes(
      group = interaction(
        infection_history,
        titre_type))) +
  geom_point(data = plot_data,
             alpha = 0.05, size = 0.2) +
  geom_point(aes(x = mu_p_me, y = mu_s_me,
                 shape = infection_history),
             colour = "black") +
  geom_path(aes(x = mu_p_me, y = mu_s_me,
                group = titre_type),
            colour = "black") +
  geom_vline(xintercept = 2560, linetype = "twodash", colour = "gray30") +
  scale_x_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
    labels = c(expression(" " <= 40),
               "80", "160", "320", "640", "1280", "2560", "5120", "10240"),
    limits = c(NA, 10240)) +
  geom_hline(yintercept = 2560, linetype = "twodash", colour = "gray30") +
  scale_y_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
    labels = c(expression(" " <= 40),
               "80", "160", "320", "640", "1280", "2560", "5120", "10240"),
    limits = c(NA, 5120)) +
  custom_theme +
  scale_shape_manual(values = c(1, 2, 3)) +
  labs(x = expression(paste("Population-level titre value at peak (IC"[50], ")")),
       y = expression(paste("Population-level titre value at set-point (IC"[50], ")"))) +
  scale_colour_manual(values = custom_palette) +
  guides(colour = guide_legend(title = "Titre type", override.aes = list(alpha = 1, size = 1)),
         shape = guide_legend(title = "Infection history"))

Figure 5

Figure 5 uses simulations of individual trajectories to derive population level titre values. This can take a while so here we set the maximum number of draws to just 250 for speed.

res <- mod$simulate_individual_trajectories(n_draws = 250)
#> INFO [2024-12-11 16:41:26] Extracting parameters
#> INFO [2024-12-11 16:41:30] Simulating individual trajectories
#> INFO [2024-12-11 16:41:34] Recovering covariate names
#> INFO [2024-12-11 16:41:34] Calculating exposure days. Adjusting exposures by 0 days
#> INFO [2024-12-11 16:41:37] Resampling
#> Registered S3 method overwritten by 'mosaic':
#>   method                           from   
#>   fortify.SpatialPolygonsDataFrame ggplot2
#> INFO [2024-12-11 16:41:47] Summarising into population quantiles
head(res)
#>    calendar_day titre_type       me       lo       hi time_shift
#>          <IDat>     <char>    <num>    <num>    <num>      <num>
#> 1:   2021-03-08  Ancestral 1145.101 886.7598 1504.746          0
#> 2:   2021-03-09  Ancestral 1127.169 881.6767 1452.714          0
#> 3:   2021-03-10  Ancestral 1159.459 899.3294 1478.496          0
#> 4:   2021-03-11  Ancestral 1134.535 918.3473 1474.952          0
#> 5:   2021-03-12  Ancestral 1128.106 884.1373 1525.385          0
#> 6:   2021-03-13  Ancestral 1164.565 909.3966 1484.574          0

See data for a definition of all the returned columns. Figure 5 A plots the derived population trajectories. Here we replicate a portion of the graph from the paper, from the minimum date in the dataset until the date at which the next variant emerged.

date_delta <- lubridate::ymd("2021-05-07")
date_ba2 <- lubridate::ymd("2022-01-24")

res$wave <- "Delta"
dat$wave <- "Delta"
plot_data <- merge(
  res, dat[, .(
    min_date = min(day), max_date = max(day)), by = wave])[
  , .SD[calendar_day >= min_date & calendar_day <= date_ba2], by = wave]

plot_data[, titre_type := forcats::fct_relevel(
  titre_type,
  c("Ancestral", "Alpha", "Delta"))]

ggplot() + geom_line(
  data = plot_data,
  aes(x = calendar_day,
      y = me,
      group = interaction(titre_type, wave),
      colour = titre_type),
  alpha = 0.2) +
  geom_ribbon(
    data = plot_data,
    aes(x = calendar_day,
        ymin = lo,
        ymax = hi,
        group = interaction(titre_type, wave)
    ),
    alpha = 0.2) +
  labs(title = "Population-level titre values",
       tag = "A",
       x = "Date",
       y = expression(paste("Titre (IC"[50], ")"))) +
  scale_colour_manual(
    values = custom_palette) +
  scale_fill_manual(
    values = custom_palette) +
  scale_x_date(
    date_labels = "%b %Y",
    limits = c(min(dat$day), date_ba2)) +
  geom_smooth(
    data = plot_data,
    aes(x = calendar_day,
        y = me,
        fill = titre_type,
        colour = titre_type,
        group = interaction(titre_type, wave)),
    alpha = 0.5, span = 0.2) +
  custom_theme +
  guides(colour = guide_legend(title = "Titre type"), fill = "none",)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Figure 5 C shows counterfactual trajectories varying vaccination timings. To do this, we simulate individual trajectories with different time_shift arguments. Here we set the maximum number of draws to just 50 for speed.

time_shift_values <- seq(-75, 75, by = 15)
indices <- seq_along(time_shift_values)

results_list <- lapply(indices, function(index) {
  shift <- time_shift_values[index]
  mod$simulate_individual_trajectories(n_draws = 50, time_shift = shift)
})
#> INFO [2024-12-11 16:41:48] Extracting parameters
#> INFO [2024-12-11 16:41:52] Simulating individual trajectories
#> INFO [2024-12-11 16:41:53] Recovering covariate names
#> INFO [2024-12-11 16:41:53] Calculating exposure days. Adjusting exposures by -75 days
#> INFO [2024-12-11 16:41:53] Resampling
#> INFO [2024-12-11 16:41:55] Summarising into population quantiles
#> INFO [2024-12-11 16:41:56] Extracting parameters
#> INFO [2024-12-11 16:41:59] Simulating individual trajectories
#> INFO [2024-12-11 16:42:00] Recovering covariate names
#> INFO [2024-12-11 16:42:00] Calculating exposure days. Adjusting exposures by -60 days
#> INFO [2024-12-11 16:42:00] Resampling
#> INFO [2024-12-11 16:42:02] Summarising into population quantiles
#> INFO [2024-12-11 16:42:03] Extracting parameters
#> INFO [2024-12-11 16:42:06] Simulating individual trajectories
#> INFO [2024-12-11 16:42:06] Recovering covariate names
#> INFO [2024-12-11 16:42:06] Calculating exposure days. Adjusting exposures by -45 days
#> INFO [2024-12-11 16:42:07] Resampling
#> INFO [2024-12-11 16:42:09] Summarising into population quantiles
#> INFO [2024-12-11 16:42:09] Extracting parameters
#> INFO [2024-12-11 16:42:13] Simulating individual trajectories
#> INFO [2024-12-11 16:42:13] Recovering covariate names
#> INFO [2024-12-11 16:42:13] Calculating exposure days. Adjusting exposures by -30 days
#> INFO [2024-12-11 16:42:14] Resampling
#> INFO [2024-12-11 16:42:16] Summarising into population quantiles
#> INFO [2024-12-11 16:42:16] Extracting parameters
#> INFO [2024-12-11 16:42:20] Simulating individual trajectories
#> INFO [2024-12-11 16:42:20] Recovering covariate names
#> INFO [2024-12-11 16:42:20] Calculating exposure days. Adjusting exposures by -15 days
#> INFO [2024-12-11 16:42:21] Resampling
#> INFO [2024-12-11 16:42:23] Summarising into population quantiles
#> INFO [2024-12-11 16:42:23] Extracting parameters
#> INFO [2024-12-11 16:42:26] Simulating individual trajectories
#> INFO [2024-12-11 16:42:27] Recovering covariate names
#> INFO [2024-12-11 16:42:27] Calculating exposure days. Adjusting exposures by 0 days
#> INFO [2024-12-11 16:42:27] Resampling
#> INFO [2024-12-11 16:42:29] Summarising into population quantiles
#> INFO [2024-12-11 16:42:30] Extracting parameters
#> INFO [2024-12-11 16:42:33] Simulating individual trajectories
#> INFO [2024-12-11 16:42:34] Recovering covariate names
#> INFO [2024-12-11 16:42:34] Calculating exposure days. Adjusting exposures by 15 days
#> INFO [2024-12-11 16:42:34] Resampling
#> INFO [2024-12-11 16:42:36] Summarising into population quantiles
#> INFO [2024-12-11 16:42:37] Extracting parameters
#> INFO [2024-12-11 16:42:40] Simulating individual trajectories
#> INFO [2024-12-11 16:42:41] Recovering covariate names
#> INFO [2024-12-11 16:42:41] Calculating exposure days. Adjusting exposures by 30 days
#> INFO [2024-12-11 16:42:41] Resampling
#> INFO [2024-12-11 16:42:43] Summarising into population quantiles
#> INFO [2024-12-11 16:42:44] Extracting parameters
#> INFO [2024-12-11 16:42:47] Simulating individual trajectories
#> INFO [2024-12-11 16:42:48] Recovering covariate names
#> INFO [2024-12-11 16:42:48] Calculating exposure days. Adjusting exposures by 45 days
#> INFO [2024-12-11 16:42:48] Resampling
#> INFO [2024-12-11 16:42:50] Summarising into population quantiles
#> INFO [2024-12-11 16:42:51] Extracting parameters
#> INFO [2024-12-11 16:42:54] Simulating individual trajectories
#> INFO [2024-12-11 16:42:54] Recovering covariate names
#> INFO [2024-12-11 16:42:54] Calculating exposure days. Adjusting exposures by 60 days
#> INFO [2024-12-11 16:42:55] Resampling
#> INFO [2024-12-11 16:42:57] Summarising into population quantiles
#> INFO [2024-12-11 16:42:57] Extracting parameters
#> INFO [2024-12-11 16:43:00] Simulating individual trajectories
#> INFO [2024-12-11 16:43:01] Recovering covariate names
#> INFO [2024-12-11 16:43:01] Calculating exposure days. Adjusting exposures by 75 days
#> INFO [2024-12-11 16:43:02] Resampling
#> INFO [2024-12-11 16:43:04] Summarising into population quantiles

combined_data <- data.table::data.table(data.table::rbindlist(results_list))

Plotting the median values:

plot_data <- combined_data[calendar_day == date_delta]
plot_data <- plot_data[, titre_type := forcats::fct_relevel(
    titre_type,
    c("Ancestral", "Alpha", "Delta"))]

ggplot(
  data = plot_data) +
  geom_line(aes(
    x = time_shift, y = me, colour = titre_type)) +
  geom_pointrange(aes(
    x = time_shift, y = me, ymin = lo,
    ymax = hi, colour = titre_type)) +
  scale_x_continuous(
    breaks = time_shift_values) +
  scale_y_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280),
    labels = c("40", "80", "160", "320", "640", "1280")) +
  scale_colour_manual(
    values = custom_palette) +
  labs(title = "Population titre values varying vaccine timings",
       x = "Time shift (days)",
       y = expression(paste("Titre (IC"[50], ")"))) +
  custom_theme +
  theme(legend.position = "none") +
  guides(colour = guide_legend(title = "Titre Type", nrow = 1, override.aes = list(alpha = 1, size = 1))) +
  labs(tag = "C",
       title = "Counterfactual population-level titres varying vaccination timings")