library(tidyverse)
library(purrr)
library(gsheet)
library(raster)
library(ncdf4)
library(lubridate)
library(readxl)
library(writexl)
library(caret)
library(tidyr)
library(r4pde)
library(refund)
library(readr)
library(fdatest)
library(dplyr)
library(rlang)
library(rms)
library(pROC)
library(PresenceAbsence)
library(OptimalCutpoints)
library(ggtext)
library(scales)
library(PRROC)
library(patchwork)
Net Monetary Benefit (NMB) Analysis
Libraries
Load Required Libraries
Meta model (Ensemble stack)
<- read_xlsx("plan/weather_data_final.xlsx")
data <- read_xlsx("plan/df_predictors.xlsx")
df_predictors
<- list(
models model1 = lrm(factor(epidemic) ~ tmin + rcs(rh, 4), data = df_predictors, x = TRUE, y = TRUE),
model2 = lrm(factor(epidemic) ~ rcs(rh, 4) + rcs(dew, 3), data = df_predictors, x = TRUE, y = TRUE),
model3 = lrm(factor(epidemic) ~ tmin + prec2, data = df_predictors, x = TRUE, y = TRUE)
)
# Predicted probabilities
<- predict(models$model1, type = "fitted")
p1 <- predict(models$model2, type = "fitted")
p2 <- predict(models$model3, type = "fitted")
p3
# Real
<- df_predictors$epidemic
actual
# # Ensembles
<- data.frame(p1 = p1, p2 = p2, p3 = p3, epidemic = factor(df_predictors$epidemic))
stack_data <- glm(epidemic ~ p1 + p2 + p3, data = stack_data, family = binomial)
meta_model <- predict(meta_model, type = "response") ensemble_stack
Net Monetary Benefit (NMB)
# Predicted probabilities and epidemic indicator
<- predict(meta_model, type = "response") # Predicted probability of epidemic
p_hat <- df_predictors$epidemic # Predicted probability of epidemic
y
# Risk-conditional severity: average index per study
<- data %>%
sev_epid group_by(study) %>%
summarise(index = mean(index)) %>%
mutate(index = index/100) %>% # Convert to fraction
filter(index > 0.1) %>% # Only meaningful severities
pull(index)
# Define global model parameters for NMB calculation (Duffeck et al. 2020)
# Yield, yield loss per index point, fungicide efficacy, cost, and crop price
# Monetary values in USD/ha (1 BRL = 0.1878 USD)
# Conversion rate
<- 0.1878
brl_to_usd
# Attainable yield (kg/ha)
<- 3645.3
Y0_kg_ha
# Yield loss per index point (Duffeck et al. 2020)
<- 49.1
slope_kg <- 100 * slope_kg / Y0_kg_ha # ~1.347% loss per index point
d_pct
# Fungicide efficacy (randomized sampling)
<- function(S) rbeta(S, 46, 67) # Machado et al. 2017, tebuconazole
rE
# Cost (USD) per application
<- function(S) pmax(rnorm(S, mean = 150, sd = 20), 0) * brl_to_usd
rC
# Wheat price (USD/kg), simulated with variability
<- function(S) {
rP pmax(rnorm(S, mean = 1.3 * brl_to_usd, sd = 0.1 * brl_to_usd), 0)
}
# Threshold grid for decision-making
<- seq(0.05, 0.60, by = 0.01)
ths <- 10000 # PSA sample size
S set.seed(123)
# Risk-conditional severity per threshold
# Draws severity points either aligned with observations or from population
<- 0.06 # bandwidth for kernel weighting
h_bw
<- exists("sev") && length(sev) == length(y)
have_aligned_sev
if (have_aligned_sev) {
# Severity aligned with each observation
<- which(y == 1 & is.finite(p_hat) & is.finite(sev))
base_idx <- p_hat[base_idx]
p_ep <- sev[base_idx] # fraction 0–1
s_ep
<- function(pt, S){
draw_s_pts <- exp(- (p_ep - pt)^2 / (2 * h_bw^2))
w if (sum(w) == 0 || all(!is.finite(w))) w <- rep(1, length(p_ep))
<- pmax(w, 1e-9)
w <- sample(s_ep, size = S, replace = TRUE, prob = w)
s_frac 100 * s_frac # Return in POINTS
}
else {
} # Use population-level severity (years/locations with epidemic)
stopifnot(exists("sev_epid"))
<- sev_epid[is.finite(sev_epid)]
s_base <- s_base[s_base >= 0 & s_base <= 1]
s_base
<- 25
k_prec <- seq(0, 1, by = 0.1)
q <- stats::quantile(s_base, probs = q, na.rm = TRUE)
mu_q <- stats::splinefun(x = q, y = mu_q, method = "monoH.FC")
fit_mu
<- function(pt, S){
draw_s_pts <- mean(p_hat[y==1] <= pt, na.rm = TRUE)
r <- min(max(fit_mu(r), 0), 1)
m <- max(m * k_prec, 1e-3)
alpha <- max((1 - m) * k_prec, 1e-3)
beta 100 * rbeta(S, alpha, beta)
} }
# Probabilistic Sensitivity Analysis (PSA)
# Compute population-level and per-treated-unit NMB for each threshold
<- function(y, p_hat, ths, slope_kg, P, rE, rC, S = 10000){
compute_psa <- length(y)
N map_dfr(ths, function(pt){
<- p_hat >= pt
pred <- sum(pred & y == 1)
TP <- sum(pred)
Tt <- if (Tt > 0) TP / Tt else 0
PPV
# Sampling of efficacy, cost, and severity points
<- rE(S)
E <- rC(S)
C <- rP(S) # wheat price in USD/kg
P <- P * 60
P_saca <- draw_s_pts(pt, S)
s_pts
# Benefit per epidemic (USD/ha)
<- (d_pct / 100) * E * s_pts * Y0_kg_ha * P
B
# NMB per population unit and per treated unit
<- (TP / N) * B - (Tt / N) * C
NMB_pop <- PPV * B - C
NMB_trat
<- pmin(C / pmax(B, 1e-9), 1)
req_PPV
tibble(
pt,NMB_mean = mean(NMB_pop),
NMB_lo = unname(quantile(NMB_pop, 0.025)),
NMB_hi = unname(quantile(NMB_pop, 0.975)),
Pr_pos = mean(NMB_pop > 0),
Pr_ge_1saca = mean(NMB_pop >= P_saca),
NMB_trat_mean = mean(NMB_trat),
NMB_trat_lo = unname(quantile(NMB_trat, 0.025)),
NMB_trat_hi = unname(quantile(NMB_trat, 0.975)),
Pr_trat_pos = mean(NMB_trat > 0),
PPV = PPV,
PPV_req_med = median(req_PPV),
PPV_req_lo = quantile(req_PPV, 0.10),
PPV_req_hi = quantile(req_PPV, 0.90)
)
})
}
# Run PSA and save results
<- compute_psa(y, p_hat, ths, slope_kg, P_usd_per_kg, rE, rC, S)
res write_csv(res, "plan/res.csv")