The underlying biokinetics model was developed in the following paper: Russell TW et al. Real-time estimation of immunological responses against emerging SARS-CoV-2 variants in the UK: a mathematical modelling study. Lancet Infect Dis. 2024 Sep 11:S1473-3099(24)00484-5

This vignette demonstrates how to use the epikinetics package to replicate some of the key paper figures.

Fitting the model

To initialise a model object, the only required argument is a path to the data in CSV format, or a data.table. See biokinetics for all available arguments. In this vignette we use a dataset representing the Delta wave which is installed with this package, specifying a regression model that just looks at the effect of infection history.

The fit method then has the same function signature as the underlying cmdstanr::sample method. Here we specify a relatively small number of iterations of the algorithm to limit the time it takes to compile this vignette.

dat <- data.table::fread(system.file("delta_full.rds", package = "epikinetics"))
mod <- epikinetics::biokinetics$new(data = dat, covariate_formula = ~0 + infection_history)
delta <- mod$fit(parallel_chains = 4,
                 iter_warmup = 50,
                 iter_sampling = 200,
                 threads_per_chain = 4)

Figure 2

Figure 2 from the paper shows population level fits for each wave, disaggregated by infection history and titre type. Here we reproduce the facets for the Delta wave. Once the model has been fitted, simulate population trajectories using the fitted population parameters.

res <- mod$simulate_population_trajectories()
head(res)
#>    time_since_last_exp       me        lo       hi titre_type infection_history
#>                  <int>    <num>     <num>    <num>     <char>            <char>
#> 1:                   0 120.2530  85.15613 154.0472  Ancestral   Infection naive
#> 2:                   1 149.3001 110.73969 187.5559  Ancestral   Infection naive
#> 3:                   2 186.2505 142.92775 228.2648  Ancestral   Infection naive
#> 4:                   3 233.1541 183.01946 279.9274  Ancestral   Infection naive
#> 5:                   4 291.1429 236.52809 348.3158  Ancestral   Infection naive
#> 6:                   5 363.2659 300.31515 433.2755  Ancestral   Infection naive

See data for the definition of the returned columns.

Using ggplot2:

library(ggplot2)
custom_theme <- theme_linedraw() + theme(
    legend.position = "bottom",
    text = element_text(size = 8, family = "Helvetica"),
    strip.text.x.top = element_text(size = 8, family = "Helvetica"),
    strip.text.x = element_text(size = 8, family = "Helvetica"),
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(colour = 'black'),
    strip.placement = "outside",
    legend.box = "vertical",
    legend.margin = margin(),
)

custom_palette <- c("#CC6677", "#DDCC77", "#88CCEE")

plot_data <- res
plot_data[, titre_type := forcats::fct_relevel(
  titre_type,
  c("Ancestral", "Alpha", "Delta"))]

ggplot(data = plot_data) +
  geom_line(aes(x = time_since_last_exp,
                y = me,
                colour = titre_type)) +
  geom_ribbon(aes(x = time_since_last_exp,
                  ymin = lo,
                  ymax = hi,
                  fill = titre_type), alpha = 0.65) +
  scale_y_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280,
               2560, 5120),
    labels = c("40", "80", "160", "320", "640", "1280", "2560", "5120"),
    limits = c(40, 10240)) +
  scale_x_continuous(breaks = c(0, 30, 60, 90, 120),
                     labels = c("0", "30", "60", "90", "120"),
                     expand = c(0, 0)) +
  coord_cartesian(clip = "off") +
  labs(x = "Time since last exposure (days)",
       y = expression(paste("Titre (IC"[50], ")"))) +
  facet_wrap(infection_history ~ titre_type) +
  scale_colour_manual(values = custom_palette) +
  scale_fill_manual(values = custom_palette) +
  guides(colour = "none", fill = "none") +
  custom_theme

Figure 4

Figure 4 shows the relationship between infection history and titre values at peak and set points, for different titre types. We first simulate estimates for these from the fitted model:

res <- mod$population_stationary_points(n_draws = 2000)
head(res)
#>    infection_history titre_type     mu_p     mu_s rel_drop_me  mu_p_me  mu_s_me
#>               <char>     <char>    <num>    <num>       <num>    <num>    <num>
#> 1:   Infection naive  Ancestral 1242.783 273.2506    0.189827 1227.795 229.0288
#> 2:   Infection naive  Ancestral 1227.432 249.1784    0.189827 1227.795 229.0288
#> 3:   Infection naive  Ancestral 1156.975 258.2754    0.189827 1227.795 229.0288
#> 4:   Infection naive  Ancestral 1133.219 253.3188    0.189827 1227.795 229.0288
#> 5:   Infection naive  Ancestral 1150.578 240.4378    0.189827 1227.795 229.0288
#> 6:   Infection naive  Ancestral 1214.544 244.0054    0.189827 1227.795 229.0288

The values we’re going to plot are the mean peak titre values (mu_p) and mean set point titre values (mu_s), for different titre types and infection histories. See data for a full definition of all the returned columns. Using ggplot2:

plot_data <- res[, titre_type := forcats::fct_relevel(
  titre_type,
  c("Ancestral", "Alpha", "Delta"))]

ggplot(data = plot_data, aes(
  x = mu_p, y = mu_s,
  colour = titre_type)) +
  geom_density_2d(
    aes(
      group = interaction(
        infection_history,
        titre_type))) +
  geom_point(data = plot_data,
             alpha = 0.05, size = 0.2) +
  geom_point(aes(x = mu_p_me, y = mu_s_me,
                 shape = infection_history),
             colour = "black") +
  geom_path(aes(x = mu_p_me, y = mu_s_me,
                group = titre_type),
            colour = "black") +
  geom_vline(xintercept = 2560, linetype = "twodash", colour = "gray30") +
  scale_x_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
    labels = c(expression(" " <= 40),
               "80", "160", "320", "640", "1280", "2560", "5120", "10240"),
    limits = c(NA, 10240)) +
  geom_hline(yintercept = 2560, linetype = "twodash", colour = "gray30") +
  scale_y_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280, 2560, 5120, 10240),
    labels = c(expression(" " <= 40),
               "80", "160", "320", "640", "1280", "2560", "5120", "10240"),
    limits = c(NA, 5120)) +
  custom_theme +
  scale_shape_manual(values = c(1, 2, 3)) +
  labs(x = expression(paste("Population-level titre value at peak (IC"[50], ")")),
       y = expression(paste("Population-level titre value at set-point (IC"[50], ")"))) +
  scale_colour_manual(values = custom_palette) +
  guides(colour = guide_legend(title = "Titre type", override.aes = list(alpha = 1, size = 1)),
         shape = guide_legend(title = "Infection history"))

Figure 5

Figure 5 uses simulations of individual trajectories to derive population level titre values. This can take a while so here we set the maximum number of draws to just 250 for speed.

res <- mod$simulate_individual_trajectories(n_draws = 250)
#> Registered S3 method overwritten by 'mosaic':
#>   method                           from   
#>   fortify.SpatialPolygonsDataFrame ggplot2
head(res)
#>    calendar_day titre_type       me       lo       hi time_shift
#>          <IDat>     <char>    <num>    <num>    <num>      <num>
#> 1:   2021-03-08  Ancestral 1139.289 850.1878 1530.986          0
#> 2:   2021-03-09  Ancestral 1131.811 887.2028 1522.606          0
#> 3:   2021-03-10  Ancestral 1170.492 891.3162 1533.867          0
#> 4:   2021-03-11  Ancestral 1134.420 904.7027 1453.014          0
#> 5:   2021-03-12  Ancestral 1151.549 873.5584 1494.535          0
#> 6:   2021-03-13  Ancestral 1190.612 909.6962 1536.645          0

See data for a definition of all the returned columns. Figure 5 A plots the derived population trajectories. Here we replicate a portion of the graph from the paper, from the minimum date in the dataset until the date at which the next variant emerged.

date_delta <- lubridate::ymd("2021-05-07")
date_ba2 <- lubridate::ymd("2022-01-24")

res$wave <- "Delta"
dat$wave <- "Delta"
plot_data <- merge(
  res, dat[, .(
    min_date = min(day), max_date = max(day)), by = wave])[
  , .SD[calendar_day >= min_date & calendar_day <= date_ba2], by = wave]

plot_data[, titre_type := forcats::fct_relevel(
  titre_type,
  c("Ancestral", "Alpha", "Delta"))]

ggplot() + geom_line(
  data = plot_data,
  aes(x = calendar_day,
      y = me,
      group = interaction(titre_type, wave),
      colour = titre_type),
  alpha = 0.2) +
  geom_ribbon(
    data = plot_data,
    aes(x = calendar_day,
        ymin = lo,
        ymax = hi,
        group = interaction(titre_type, wave)
    ),
    alpha = 0.2) +
  labs(title = "Population-level titre values",
       tag = "A",
       x = "Date",
       y = expression(paste("Titre (IC"[50], ")"))) +
  scale_colour_manual(
    values = custom_palette) +
  scale_fill_manual(
    values = custom_palette) +
  scale_x_date(
    date_labels = "%b %Y",
    limits = c(min(dat$day), date_ba2)) +
  geom_smooth(
    data = plot_data,
    aes(x = calendar_day,
        y = me,
        fill = titre_type,
        colour = titre_type,
        group = interaction(titre_type, wave)),
    alpha = 0.5, span = 0.2) +
  custom_theme +
  guides(colour = guide_legend(title = "Titre type"), fill = "none",)
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Figure 5 C shows counterfactual trajectories varying vaccination timings. To do this, we simulate individual trajectories with different time_shift arguments. Here we set the maximum number of draws to just 50 for speed.

time_shift_values <- seq(-75, 75, by = 15)
indices <- seq_along(time_shift_values)

results_list <- lapply(indices, function(index) {
  shift <- time_shift_values[index]
  mod$simulate_individual_trajectories(n_draws = 50, time_shift = shift)
})

combined_data <- data.table::data.table(data.table::rbindlist(results_list))

Plotting the median values:

plot_data <- combined_data[calendar_day == date_delta]
plot_data <- plot_data[, titre_type := forcats::fct_relevel(
    titre_type,
    c("Ancestral", "Alpha", "Delta"))]

ggplot(
  data = plot_data) +
  geom_line(aes(
    x = time_shift, y = me, colour = titre_type)) +
  geom_pointrange(aes(
    x = time_shift, y = me, ymin = lo,
    ymax = hi, colour = titre_type)) +
  scale_x_continuous(
    breaks = time_shift_values) +
  scale_y_continuous(
    trans = "log2",
    breaks = c(40, 80, 160, 320, 640, 1280),
    labels = c("40", "80", "160", "320", "640", "1280")) +
  scale_colour_manual(
    values = custom_palette) +
  labs(title = "Population titre values varying vaccine timings",
       x = "Time shift (days)",
       y = expression(paste("Titre (IC"[50], ")"))) +
  custom_theme +
  theme(legend.position = "none") +
  guides(colour = guide_legend(title = "Titre Type", nrow = 1, override.aes = list(alpha = 1, size = 1))) +
  labs(tag = "C",
       title = "Counterfactual population-level titres varying vaccination timings")