-
Notifications
You must be signed in to change notification settings - Fork 4
/
example_drn.R
126 lines (95 loc) · 3.72 KB
/
example_drn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
## File with exemplary usage of DRN function
# Load package
library(lubridate)
# Set path to data
data_path <- "/wind_gust_data"
# Set path to github functions
r_path <- getwd()
#### Example with full data ####
### Initialization
# Load station data
load(file = paste0(data_path, "/loc_data.RData"))
# Load training and test data
load(file = paste0(data_path, "/ens_data/ens_data_step18.RData"))
# Import postprocessing function
source(paste0(r_path, "/pp_drn.R"))
# Wind gust ensemble predictors
pred_ens <- c("ens_mean", "ens_sd")
# Ensemble mean and standard deviation of additional variables as predictors
pred_add <- paste0(rep(x = add_vars, each = 2),
rep(x = c("_mean", "_sd"), times = length(add_vars)))
# Temporal predictors
pred_tm <- c("yday")
# Spatial predictors
pred_loc <- c("lat", "lon", "altitude", "orog_diff", "loc_cover", "loc_bias")
# Define predictor variables
pred_vars <- c(pred_ens, pred_add, pred_tm, pred_loc)
### Data preprocessing
# Prepare data (need to include ensemble mean for bias prediction)
df_train <- prep_data(df = df_train,
pred_ens = pred_ens,
pred_add = pred_add,
pred_tm = pred_tm,
pred_loc = pred_loc,
loc_path = data_path)
df_test <- prep_data(df = df_test,
pred_ens = pred_ens,
pred_add = pred_add,
pred_tm = pred_tm,
pred_loc = pred_loc[!is.element(pred_loc, c("loc_bias", "loc_cover"))],
loc_path = data_path)
# Assign spatial predictors to test set (bias and coverage are calculated on training set!)
df_test <- assign_spatial_preds(preds = pred_loc,
df_train = df_train,
df_test = df_test)
# Set validation set
i_valid <- which(year(df_train[["init_tm"]]) == 2015)
### Postprocessing application and evaluation
# Apply DRN function
pred <- drn_pp(train = df_train,
X = df_test,
i_valid = i_valid,
loc_id_vec = loc_data[["station_id"]],
pred_vars = pred_vars,
nn_ls = list(n_sim = 2,
nn_verbose = TRUE))
# Take a look at some forecasts
head(pred[["f"]])
# Take a look at evaluation measures of postprocessed forecasts
summary(pred[["scores_pp"]])
# PIT histogram
hist(x = pred[["scores_pp"]][["pit"]],
freq = FALSE)
abline(h = 1,
lty = 2)
# Calculate CRPSS w.r.t. ensemble forecasts (in %)
print(100*(1 - mean(pred[["scores_pp"]][["crps"]])/mean(pred[["scores_ens"]][["crps"]])))
#### Example with exemplary data from Github ####
# Load training and test data
load(file = paste0(r_path, "/df_train.RData"))
load(file = paste0(r_path, "/df_test.RData"))
# Set validation set
i_valid <- 800:984
# Import postprocessing function
source(paste0(r_path, "/pp_drn.R"))
# Use variables besides observations and individual ensemble members as predictors
pred_vars <- names(df_train)[!is.element(names(df_train), c("obs", paste0("ens_", 1:20)))]
# Apply DRN function
pred <- drn_pp(train = df_train,
X = df_test,
i_valid = i_valid,
loc_id_vec = paste0(1:10),
pred_vars = pred_vars,
nn_ls = list(n_sim = 3,
nn_verbose = TRUE))
# Take a look at some forecasts
head(pred[["f"]])
# Take a look at evaluation measures of postprocessed forecasts
summary(pred[["scores_pp"]])
# PIT histogram
hist(x = pred[["scores_pp"]][["pit"]],
freq = FALSE)
abline(h = 1,
lty = 2)
# Calculate CRPSS w.r.t. ensemble forecasts (in %)
print(100*(1 - mean(pred[["scores_pp"]][["crps"]])/mean(pred[["scores_ens"]][["crps"]])))