library(dplyr)
library(purrr)
library(gsheet)
library(raster)
library(ncdf4)
library(lubridate)
library(readxl)
library(writexl)
library(caret)
library(tidyr)
library(r4pde)
library(refund)
library(ggplot2)
library(readr)
library(nasapower)
Anthesis Date Estimation from Climate Data
Libraries
Load required libraries
Importing and Processing the Experimental Data
# Import trial dataset containing municipalities, planting dates, and coordinates
<- gsheet2tbl("https://docs.google.com/spreadsheets/d/1MBiKsosQ8Hob6LkS65_1pPU25hx1CO9i42Sm_xf28ww/edit?gid=2128367221 gid=2128367221")
gib
<- gib |>
trials mutate(
planting_date = as.Date(planting_date, format = "%Y-%m-%d"),
pd120 = as.Date(planting_date + 120), # 120-day growing period
study = as.factor(study),
year = as.factor(year)) |>
::select(study, planting_date, lon, lat, pd120) |>
dplyrrename(longitude = lon,
latitude = lat)
Weather Data Acquisition
From NASA POWER
<- trials %>%
weather_nasa rowwise() %>%
mutate(
weather = list(
get_power(
community = "AG",
lonlat = c(longitude, latitude),
pars = c("T2M", "T2M_MAX", "T2M_MIN", "RH2M", "PRECTOTCORR"),
dates = c(as.character(planting_date), as.character(pd120)),
temporal_api = "DAILY"
)
)%>%
) unnest(weather) |>
rename(Tmax = T2M_MAX,
Tmin = T2M_MIN,
date = YYYYMMDD)
From Xavier Dataset (NetCDF)
This repository is intended to demonstrate the structure and workflow of the code used in the project. To keep the repository lightweight and accessible on GitHub, the original input data files (provided by Xavier et al. 2022) have been excluded because they exceed GitHub’s file size limits.
The corresponding lines in the code that load these large .nc files are commented out (using ). This way, the code can still be inspected and followed without requiring the full dataset. However, the scripts will not reproduce the complete analysis unless the data are available locally.
The full dataset can be obtained from Xavier et al. (2022) at the following link: Xavier Dataset – Download here
After downloading, place the files in the data/
directory of this repository and uncomment the relevant lines in the scripts to run the full analysis.
# Keep only studies with available NetCDF coverage
<- trials |>
trials filter(study %in% 1:91)
# # Generic function to extract daily climate variables from NetCDF
# extract_nc_var <- function(netcdf_path, trials, varname, outfile) {
# nc_data <- nc_open(netcdf_path)
# # Convert time variable to Date
# time_var <- ncvar_get(nc_data, "time")
# time_units <- ncatt_get(nc_data, "time", "units")$value
# origin_date <- as.POSIXct(sub("hours since ", "", time_units), tz = "UTC")
# nc_dates <- as.Date(origin_date + as.difftime(time_var, units = "hours"))
# Grid coordinates
# lon_vals <- ncvar_get(nc_data, "longitude")
# lat_vals <- ncvar_get(nc_data, "latitude")
# Internal extractor for each trial
# get_data_for_row <- function(row) {
# date_seq <- seq.Date(row$planting_date, row$pd120, by = "day")
# date_idx <- match(date_seq, nc_dates)
# date_idx <- date_idx[!is.na(date_idx)]
# if (length(date_idx) == 0) return(NULL)
# lon_idx <- which.min(abs(lon_vals - row$longitude))
# lat_idx <- which.min(abs(lat_vals - row$latitude))
# values <- sapply(date_idx, function(i) {
# ncvar_get(nc_data, varname, start = c(lon_idx, lat_idx, i), count = c(1, 1, 1))
# })
# if (length(values) == 0 || all(is.na(values))) return(NULL)
# df <- data.frame(
# date = date_seq[!is.na(match(date_seq, nc_dates))],
# value = values,
# study = row$study,
# year = format(row$planting_date, "%Y"),
# planting_date = row$planting_date
# )
# names(df)[2] <- varname
# return(df)
# }
# result <- trials %>%
# split(1:nrow(.)) %>%
# map_dfr(get_data_for_row)
# write_xlsx(result, outfile)
# nc_close(nc_data)
# return(result)
# }
# # Extract Tmax and Tmin
# tmax <- extract_nc_var("data/Tmax_20010101_20240320_BR-DWGD_UFES_UTEXAS_v_3.2.3.nc", trials, "Tmax", "data/br_dwgd_tmax.xlsx")
# tmin <- extract_nc_var("data/Tmin_20010101_20240320_BR-DWGD_UFES_UTEXAS_v_3.2.3.nc", trials, "Tmin", "data/br_dwgd_tmin.xlsx")
# # Merge and compute mean daily temperature
#weather_xavier <- tmax |>
# mutate(Tmin = tmin$Tmin, T2M = (Tmax + Tmin)/2)
#write_xlsx(weather_xavier, "plan/weather_xavier_flowering.xlsx")
<- read_xlsx("plan/weather_xavier_flowering.xlsx") weather_xavier
Estimation of Anthesis (Flowering Date)
General Functions
# Temperature limits for wheat growth
<- 0 #Lower limit
Tmin <- 40 #Upper limit
Tmax
# Daily growing degree-days (GDD) calculation
<- function(T2M, Tmin, Tmax, Tbase) {
calculate_gdd_phase if (T2M < Tmin || T2M > Tmax) return(0)
<- T2M - Tbase
GDD return(ifelse(GDD > 0, GDD, 0))
}
# Basal temperatures and cumulative thresholds (by growth stage)
<- c(2.1, 4.76, 0.86, 8.44) #Sm-Em, Em-DA, DA-ET, ET-AN
Tbase <- c(80, 100, 300, 750) #Accumulated GDD thresholds
thresholds
# Function to assign phenological phases by study
<- function(df) {
calculate_phases_by_study <- 0
total_gdd <- 1
phase_num <- numeric(nrow(df))
gd_values <- numeric(nrow(df))
gdcumul_values <- character(nrow(df))
phases
for (day in 1:nrow(df)) {
<- df$T2M[day]
T2M <- calculate_gdd_phase(T2M, Tmin, Tmax, Tbase[phase_num])
GDD <- total_gdd + GDD
total_gdd <- GDD
gd_values[day] <- total_gdd
gdcumul_values[day]
if (is.na(total_gdd)) {
<- NA
phases[day] else if (total_gdd < thresholds[1]) {
} <- "Planting"
phases[day] else if (total_gdd < thresholds[2]) {
} <- "Emergence"
phases[day] else if (total_gdd < thresholds[3]) {
} <- "Double Ridge"
phases[day] else if (total_gdd < thresholds[4]) {
} <- "Terminal Spikelet"
phases[day] else {
} <- "Anthesis"
phases[day]
}
if (phase_num <= length(thresholds) && total_gdd >= thresholds[phase_num]) {
<- phase_num + 1
phase_num
}
}
<- df %>%
df mutate(gd = gd_values, gdcumul = gdcumul_values, phase = phases)
return(df)
}
# Function to compute anthesis date per dataset
<- function(weather_data) {
compute_anthesis %>%
weather_data group_by(study) %>%
group_modify(~ calculate_phases_by_study(.x)) %>%
ungroup() %>%
group_by(study) %>%
mutate(flowering = as.Date(date[phase == "Anthesis"][1]),
dias_para_antese = as.numeric(flowering - planting_date)) %>%
distinct(study, dias_para_antese, .keep_all = TRUE) %>%
::select(study, dias_para_antese)
dplyr }
Apply to NASA and Xavier Datasets
<- compute_anthesis(weather_nasa)
nasa <- compute_anthesis(weather_xavier) xavier
Final Note on Flowering Date
For subsequent analyses, the flowering date was defined as the mean number of days to anthesis across the NASA POWER and Xavier datasets, when both were available. For studies where only NASA POWER data existed, flowering was estimated exclusively from that source. The anthesis date was calculated as:
Planting date + estimated days to anthesis.