Net Monetary Benefit (NMB) Analysis

Libraries

Load Required Libraries

library(tidyverse)
library(purrr)
library(gsheet)
library(raster)
library(ncdf4)
library(lubridate)
library(readxl)
library(writexl)
library(caret)
library(tidyr)
library(r4pde)
library(refund)
library(readr)
library(fdatest)
library(dplyr)
library(rlang)
library(rms)
library(pROC)
library(PresenceAbsence)
library(OptimalCutpoints)
library(ggtext)
library(scales)
library(PRROC)
library(patchwork)

Meta model (Ensemble stack)

data <- read_xlsx("plan/weather_data_final.xlsx")
df_predictors <- read_xlsx("plan/df_predictors.xlsx")


models <- list(
  model1 = lrm(factor(epidemic) ~ tmin + rcs(rh, 4), data = df_predictors, x = TRUE, y = TRUE),
  model2 = lrm(factor(epidemic) ~ rcs(rh, 4) + rcs(dew, 3), data = df_predictors, x = TRUE, y = TRUE),
  model3 = lrm(factor(epidemic) ~ tmin + prec2, data = df_predictors, x = TRUE, y = TRUE)
)

# Predicted probabilities
p1 <- predict(models$model1, type = "fitted")
p2 <- predict(models$model2, type = "fitted")
p3 <- predict(models$model3, type = "fitted")

# Real
actual <- df_predictors$epidemic

# # Ensembles
stack_data     <- data.frame(p1 = p1, p2 = p2, p3 = p3, epidemic = factor(df_predictors$epidemic))
meta_model     <- glm(epidemic ~ p1 + p2 + p3, data = stack_data, family = binomial)
ensemble_stack <- predict(meta_model, type = "response")

Net Monetary Benefit (NMB)

# Predicted probabilities and epidemic indicator
p_hat <-  predict(meta_model, type = "response")  # Predicted probability of epidemic
y  <- df_predictors$epidemic                      # Predicted probability of epidemic

# Risk-conditional severity: average index per study
sev_epid <- data %>% 
  group_by(study) %>% 
  summarise(index = mean(index)) %>% 
  mutate(index = index/100) %>%                   # Convert to fraction
  filter(index > 0.1) %>%                         # Only meaningful severities
  pull(index)

# Define global model parameters for NMB calculation (Duffeck et al. 2020)
# Yield, yield loss per index point, fungicide efficacy, cost, and crop price

# Monetary values in USD/ha (1 BRL = 0.1878 USD)
# Conversion rate
brl_to_usd <- 0.1878

# Attainable yield (kg/ha)
Y0_kg_ha <- 3645.3

# Yield loss per index point (Duffeck et al. 2020)
slope_kg <- 49.1
d_pct <- 100 * slope_kg / Y0_kg_ha  # ~1.347% loss per index point

# Fungicide efficacy (randomized sampling)
rE <- function(S) rbeta(S, 46, 67)  # Machado et al. 2017, tebuconazole

# Cost (USD) per application
rC <- function(S) pmax(rnorm(S, mean = 150, sd = 20), 0) * brl_to_usd

# Wheat price (USD/kg), simulated with variability
rP <- function(S) {
  pmax(rnorm(S, mean = 1.3 * brl_to_usd, sd = 0.1 * brl_to_usd), 0)
}


# Threshold grid for decision-making
ths <- seq(0.05, 0.60, by = 0.01)
S   <- 10000  # PSA sample size
set.seed(123)

# Risk-conditional severity per threshold
# Draws severity points either aligned with observations or from population
h_bw <- 0.06  # bandwidth for kernel weighting

have_aligned_sev <- exists("sev") && length(sev) == length(y)

if (have_aligned_sev) {
  # Severity aligned with each observation
  base_idx <- which(y == 1 & is.finite(p_hat) & is.finite(sev))
  p_ep  <- p_hat[base_idx]
  s_ep  <- sev[base_idx]  # fraction 0–1

  draw_s_pts <- function(pt, S){
    w <- exp(- (p_ep - pt)^2 / (2 * h_bw^2))
    if (sum(w) == 0 || all(!is.finite(w))) w <- rep(1, length(p_ep))
    w <- pmax(w, 1e-9)
    s_frac <- sample(s_ep, size = S, replace = TRUE, prob = w)
    100 * s_frac  # Return in POINTS
  }

} else {
  # Use population-level severity (years/locations with epidemic)
  stopifnot(exists("sev_epid"))
  s_base <- sev_epid[is.finite(sev_epid)]
  s_base <- s_base[s_base >= 0 & s_base <= 1]

  k_prec <- 25
  q <- seq(0, 1, by = 0.1)
  mu_q <- stats::quantile(s_base, probs = q, na.rm = TRUE)
  fit_mu <- stats::splinefun(x = q, y = mu_q, method = "monoH.FC")

  draw_s_pts <- function(pt, S){
    r <- mean(p_hat[y==1] <= pt, na.rm = TRUE)
    m <- min(max(fit_mu(r), 0), 1)
    alpha <- max(m * k_prec, 1e-3)
    beta  <- max((1 - m) * k_prec, 1e-3)
    100 * rbeta(S, alpha, beta)
  }
}

# Probabilistic Sensitivity Analysis (PSA)
# Compute population-level and per-treated-unit NMB for each threshold

compute_psa <- function(y, p_hat, ths, slope_kg, P, rE, rC, S = 10000){
  N <- length(y)
  map_dfr(ths, function(pt){
    pred <- p_hat >= pt
    TP   <- sum(pred & y == 1)
    Tt   <- sum(pred)
    PPV  <- if (Tt > 0) TP / Tt else 0

    # Sampling of efficacy, cost, and severity points
    E      <- rE(S)
    C      <- rC(S)
    P      <- rP(S)       # wheat price in USD/kg
    P_saca <- P * 60
    s_pts  <- draw_s_pts(pt, S)

    # Benefit per epidemic (USD/ha)
    B <- (d_pct / 100) * E * s_pts * Y0_kg_ha * P

    # NMB per population unit and per treated unit
    NMB_pop  <- (TP / N) * B - (Tt / N) * C
    NMB_trat <- PPV * B - C

    req_PPV <- pmin(C / pmax(B, 1e-9), 1)

    tibble(
      pt,
      NMB_mean      = mean(NMB_pop),
      NMB_lo        = unname(quantile(NMB_pop, 0.025)),
      NMB_hi        = unname(quantile(NMB_pop, 0.975)),
      Pr_pos        = mean(NMB_pop > 0),
      Pr_ge_1saca   = mean(NMB_pop >= P_saca),
      NMB_trat_mean = mean(NMB_trat),
      NMB_trat_lo = unname(quantile(NMB_trat, 0.025)),
      NMB_trat_hi = unname(quantile(NMB_trat, 0.975)),
      Pr_trat_pos   = mean(NMB_trat > 0),
      PPV           = PPV,
      PPV_req_med   = median(req_PPV),
      PPV_req_lo    = quantile(req_PPV, 0.10),
      PPV_req_hi    = quantile(req_PPV, 0.90)
    )
  })
}

# Run PSA and save results
res <- compute_psa(y, p_hat, ths, slope_kg, P_usd_per_kg, rE, rC, S)
write_csv(res, "plan/res.csv")