Code Appendix: Full R Script

This file provides the complete R code to run the entire analysis from start to finish. You can copy and paste this entire script into your RStudio console or run it as a single file.

Make sure you have already created the opel_model.stan file as described in Module 4.

# -----------------------------------------------------------------------------
# Step 1: Load Libraries
# -----------------------------------------------------------------------------
library(tidyverse)
library(lubridate)
library(rstan)
library(patchwork)
library(knitr)

# -----------------------------------------------------------------------------
# Step 2: Generate Synthetic Data
# -----------------------------------------------------------------------------
# Set a seed for reproducibility
set.seed(123)

# Define the time period for our data (2 years)
dates <- seq(ymd("2023-01-01"), ymd("2025-06-30"), by = "day")

# Generate synthetic data
n_days <- length(dates)

# Create a seasonal winter pressure effect
winter_pressure <- cos(2 * pi * (yday(dates) - 45) / 365) * 15 + 10
winter_pressure[winter_pressure < 0] <- 0

# Simulate daily admissions and staff absences
daily_admissions <- rpois(n_days, lambda = 50 + winter_pressure)
staff_absences <- rpois(n_days, lambda = 20 + winter_pressure / 2)

# Combine into a tibble
synthetic_data <- tibble(
  date = dates,
  daily_admissions = daily_admissions,
  staff_absences = staff_absences,
  winter_pressure = winter_pressure
)

# Model the probability of being in OPEL-4
prob_opel_4 <- plogis(-4.5 + 0.05 * daily_admissions + 0.08 * staff_absences)

# Assign an OPEL level based on probabilities
synthetic_data <- synthetic_data %>%
  mutate(
    opel_level = case_when(
      prob_opel_4 > 0.6 ~ 4,
      prob_opel_4 > 0.3 ~ 3,
      prob_opel_4 > 0.1 ~ 2,
      TRUE ~ sample(0:1, n(), replace = TRUE, prob = c(0.6, 0.4))
    )
  )

# Save the data to a file
write_csv(synthetic_data, "resources/nhs_opel_data.csv")


# -----------------------------------------------------------------------------
# Step 3: Prepare and Run the Stan Model
# -----------------------------------------------------------------------------
# Load the generated data
all_data <- read_csv("resources/nhs_opel_data.csv")

# Create the binary outcome variable
all_data <- all_data %>%
  mutate(is_opel_4 = ifelse(opel_level == 4, 1, 0))

# Run ARIMA forecasts for daily_admissions and staff_absences
library(forecast)

admissions_ts <- ts(all_data$daily_admissions, frequency = 7)
fit_admissions <- auto.arima(admissions_ts)
forecast_admissions <- forecast(fit_admissions, h = 10)

absences_ts <- ts(all_data$staff_absences, frequency = 7)
fit_absences <- auto.arima(absences_ts)
forecast_absences <- forecast(fit_absences, h = 10)

# Prepare data for Stan
stan_data <- list(
  N = nrow(all_data),
  y = all_data$is_opel_4,
  daily_admissions = all_data$daily_admissions,
  staff_absences = all_data$staff_absences,
  N_future = 10, # We want to predict 10 days into the future
  future_daily_admissions = as.array(forecast_admissions$mean),
  future_staff_absences = as.array(forecast_absences$mean)
)

# Compile and run the Stan model (ensure `opel_model.stan` is in the directory)
fit <- stan(
  file = "resources/opel_model.stan",
  data = stan_data,
  iter = 2000, chains = 4, cores = 1
)

# -----------------------------------------------------------------------------
# Step 4: Extract and Visualize Results
# -----------------------------------------------------------------------------
# The `y_pred` object is a matrix where each row is a simulation
# and each column is a day in the future (1=OPEL-4, 0=Not).
# The `p_opel_4_pred` object contains the underlying probabilities.
predictions_y <- rstan::extract(fit, pars = "y_pred")$y_pred
predictions_p <- rstan::extract(fit, pars = "p_opel_4_pred")$p_opel_4_pred

# Calculate the daily probability of OPEL-4 (median and 95% CI)
daily_prob_summary <- apply(predictions_p, 2, function(x) c(median = median(x), quantile(x, c(0.025, 0.975)))) %>%
  t() %>%
  as_tibble() %>%
  rename(median = `median`, lower = `2.5%`, upper = `97.5%`)

# 5. Visualize the Forecast (Cumulative Line with Daily Points and Credible Intervals)
forecast_df <- tibble(
  day = 1:10,
  daily_median = daily_prob_summary$median,
  daily_lower = daily_prob_summary$lower,
  daily_upper = daily_prob_summary$upper
)

# Generate the final plot
forecast_plot <- ggplot(forecast_df, aes(x = day)) +
  # Daily Probability with Credible Interval
  geom_ribbon(aes(ymin = daily_lower, ymax = daily_upper), fill = "gray", col="black",alpha=0.25) +
  geom_line(aes(y = daily_median), color = "#DC241F", linewidth = 1) +
  
  scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
  scale_x_continuous(breaks = 1:10) +
  labs(
    title = "10-Day Forecast: Daily Probability of OPEL-4",
    subtitle = "Red line/ribbon: Daily Probability (median and 95% CI).",
    x = "Days from Today",
    y = "Probability"
  ) +
  theme_minimal(base_size = 14)

# Print the plot
print(forecast_plot)

# You can also print the model summary
print(fit)