# -----------------------------------------------------------------------------
# Step 1: Load Libraries
# -----------------------------------------------------------------------------
library(tidyverse)
library(lubridate)
library(rstan)
library(patchwork)
library(knitr)
# -----------------------------------------------------------------------------
# Step 2: Generate Synthetic Data
# -----------------------------------------------------------------------------
# Set a seed for reproducibility
set.seed(123)
# Define the time period for our data (2 years)
<- seq(ymd("2023-01-01"), ymd("2025-06-30"), by = "day")
dates
# Generate synthetic data
<- length(dates)
n_days
# Create a seasonal winter pressure effect
<- cos(2 * pi * (yday(dates) - 45) / 365) * 15 + 10
winter_pressure < 0] <- 0
winter_pressure[winter_pressure
# Simulate daily admissions and staff absences
<- rpois(n_days, lambda = 50 + winter_pressure)
daily_admissions <- rpois(n_days, lambda = 20 + winter_pressure / 2)
staff_absences
# Combine into a tibble
<- tibble(
synthetic_data date = dates,
daily_admissions = daily_admissions,
staff_absences = staff_absences,
winter_pressure = winter_pressure
)
# Model the probability of being in OPEL-4
<- plogis(-4.5 + 0.05 * daily_admissions + 0.08 * staff_absences)
prob_opel_4
# Assign an OPEL level based on probabilities
<- synthetic_data %>%
synthetic_data mutate(
opel_level = case_when(
> 0.6 ~ 4,
prob_opel_4 > 0.3 ~ 3,
prob_opel_4 > 0.1 ~ 2,
prob_opel_4 TRUE ~ sample(0:1, n(), replace = TRUE, prob = c(0.6, 0.4))
)
)
# Save the data to a file
write_csv(synthetic_data, "resources/nhs_opel_data.csv")
# -----------------------------------------------------------------------------
# Step 3: Prepare and Run the Stan Model
# -----------------------------------------------------------------------------
# Load the generated data
<- read_csv("resources/nhs_opel_data.csv")
all_data
# Create the binary outcome variable
<- all_data %>%
all_data mutate(is_opel_4 = ifelse(opel_level == 4, 1, 0))
# Run ARIMA forecasts for daily_admissions and staff_absences
library(forecast)
<- ts(all_data$daily_admissions, frequency = 7)
admissions_ts <- auto.arima(admissions_ts)
fit_admissions <- forecast(fit_admissions, h = 10)
forecast_admissions
<- ts(all_data$staff_absences, frequency = 7)
absences_ts <- auto.arima(absences_ts)
fit_absences <- forecast(fit_absences, h = 10)
forecast_absences
# Prepare data for Stan
<- list(
stan_data N = nrow(all_data),
y = all_data$is_opel_4,
daily_admissions = all_data$daily_admissions,
staff_absences = all_data$staff_absences,
N_future = 10, # We want to predict 10 days into the future
future_daily_admissions = as.array(forecast_admissions$mean),
future_staff_absences = as.array(forecast_absences$mean)
)
# Compile and run the Stan model (ensure `opel_model.stan` is in the directory)
<- stan(
fit file = "resources/opel_model.stan",
data = stan_data,
iter = 2000, chains = 4, cores = 1
)
# -----------------------------------------------------------------------------
# Step 4: Extract and Visualize Results
# -----------------------------------------------------------------------------
# The `y_pred` object is a matrix where each row is a simulation
# and each column is a day in the future (1=OPEL-4, 0=Not).
# The `p_opel_4_pred` object contains the underlying probabilities.
<- rstan::extract(fit, pars = "y_pred")$y_pred
predictions_y <- rstan::extract(fit, pars = "p_opel_4_pred")$p_opel_4_pred
predictions_p
# Calculate the daily probability of OPEL-4 (median and 95% CI)
<- apply(predictions_p, 2, function(x) c(median = median(x), quantile(x, c(0.025, 0.975)))) %>%
daily_prob_summary t() %>%
as_tibble() %>%
rename(median = `median`, lower = `2.5%`, upper = `97.5%`)
# 5. Visualize the Forecast (Cumulative Line with Daily Points and Credible Intervals)
<- tibble(
forecast_df day = 1:10,
daily_median = daily_prob_summary$median,
daily_lower = daily_prob_summary$lower,
daily_upper = daily_prob_summary$upper
)
# Generate the final plot
<- ggplot(forecast_df, aes(x = day)) +
forecast_plot # Daily Probability with Credible Interval
geom_ribbon(aes(ymin = daily_lower, ymax = daily_upper), fill = "gray", col="black",alpha=0.25) +
geom_line(aes(y = daily_median), color = "#DC241F", linewidth = 1) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
scale_x_continuous(breaks = 1:10) +
labs(
title = "10-Day Forecast: Daily Probability of OPEL-4",
subtitle = "Red line/ribbon: Daily Probability (median and 95% CI).",
x = "Days from Today",
y = "Probability"
+
) theme_minimal(base_size = 14)
# Print the plot
print(forecast_plot)
# You can also print the model summary
print(fit)
Code Appendix: Full R Script
This file provides the complete R code to run the entire analysis from start to finish. You can copy and paste this entire script into your RStudio console or run it as a single file.
Make sure you have already created the opel_model.stan
file as described in Module 4.