-
Notifications
You must be signed in to change notification settings - Fork 51
/
default.config
170 lines (125 loc) · 7.77 KB
/
default.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
########################################################################################################################
# This is the configuration file for the MOS-X model. Parameters specified here are used to build and run the
# machine learning model to predict a few weather parameters at a given station location.
########################################################################################################################
# Global parameters go here.
# The root directory of this program
MOSX_ROOT =
# The root directory of the BUFRgruven executable program
BUFR_ROOT =
# The directory where site-specific data files are saved (defaults to %MOSX_ROOT/site_data)
SITE_ROOT =
# The 4-letter station ID of the location
station_id = KAUS
# Lowest pressure level for vertical profiles. For stations at high elevation, the lowest pressure level may have to be
# higher because data will be missing at higher pressures.
lowest_p_level = 950
# Starting and ending dates for the model training. BUFR data are available beginning 2010-01-01. If the parameter
# 'is_season' is True, then the program will assume that you want seasonally-subset training data beginning in the year
# of 'data_start_date' and ending in the year of 'data_end_date', with seasons spanning from the start day to the end
# day. Dates should be provided as YYYYMMDD.
data_start_date = 20081120
data_end_date = 20160409
is_season = True
# This is the UTC hour at which the forecast period starts on any given day. The period ends 24 hours later. (Defaults
# to 6.)
forecast_hour_start = 6
# Provide here the hourly resolution of the time series prediction. Should be an integer in the range of 1 to 6, but not
# 5... Ignored if the parameter 'predict_timeseries' under 'Model' is False. (Defaults to 3.)
time_series_interval = 3
# API token for MesoWest data (required). To get an API key, visit https://synopticlabs.org/api/
meso_token =
# Produce verbose output
verbose = True
########################################################################################################################
# In this section, provide parameters used to control the retrieval of BUFR profile model data.
[BUFR]
# (Optional) The station ID used in BUFR for the station (defaults to station_id, may be different though)
bufr_station_id =
# (Optional) Data directory where BUFKIT files are saved (defaults to %SITE_ROOT/bufkit)
bufr_data_dir =
# (Optional) Path to bufrgruven executable (defaults to %BUFR_ROOT/bufr_gruven.pl)
bufrgruven =
# BUFR models; should be a list
models = GFS, NAM
########################################################################################################################
# In this section, provide parameters to control the retrieval of observations.
[Obs]
# Option to use upper-air sounding data
use_soundings = False
# Upper-air sounding station ID, required if use_soundings is True
sounding_station_id = FWD
# (Optional) Data directory where sounding files are saved (defaults to %SITE_ROOT/soundings)
sounding_data_dir =
# Set this option to False to disable retrieval of NCDC and CF6 data for max wind values. It may need to be
# disabled if the forecast start hour is not near 6 UTC, otherwise should be True. (Defaults to True.)
use_climo_wind = True
########################################################################################################################
# In this section, provide the machine learning model parameters.
[Model]
# Save the model estimator using pickle to this file
estimator_file = %(SITE_ROOT)s/%(station_id)s_mosx.pkl
# The base scikit-learn regressor
regressor = ensemble.RandomForestRegressor
# If True, trains a separate estimator for each weather parameter
train_individual = True
# If True, also predict a meteorology time series for the next day, in addition to high/low/wind/rain
predict_timeseries = False
# This is the type of rain forecast. 'quantity' predicts an actual amount of rain in inches, 'categorical'
# predicts a probabilistic category of rain (a la MOS), and 'pop' (probability of precipitation) predicts the
# fractional chance that there will be ANY measurable precipitation.
rain_forecast_type = pop
# Keyword arguments passed to the base regressor
[[Parameters]]
n_estimators = 1000
max_features = 0.75
n_jobs = 2
verbose = 1
# This section, if present, enables a post-processing algorithm to be trained on the raw rain predictions from a
# native ensemble regressor. This is usually desirable because rain has a very non-normal distribution and is
# therefore tricky for standard algorithms to predict. The parameter 'rain_estimator' is a string, just like
# 'regressor' above, which determines the scikit-learn algorithm to use (classifiers are an option!). The parameter
# 'use_raw_rain' determines whether the raw rainfall estimates from the BUFR models are also used as features for
# the rain post-processor, in addition to the model ensemble statistics. Any other parameters provided here are
# passed as kwargs to the initialization of the processor's scikit-learn algorithm. Due to the use of certain
# methods used in Bootstrapping, use_raw_rain is currently not compatible with Bootstrapping.
# [[Rain tuning]]
# rain_estimator = ensemble.RandomForestRegressor
# use_raw_rain = False
# n_estimators = 100
# Ada boosting may improve the performance of a model by selectively increasing the weight of training on samples
# with a large training error. If Ada boosting is desired, provide here any parameters passed to the Ada class;
# otherwise, remove or comment out this subsection.
# [[Ada boosting]]
# n_estimators = 50
# This last option allows for bootstrapping development of an ensemble of ML models. The training set is split
# according to a few options here, then an ensemble of n_members is generated by training on individual splits.
# Comment out this section to disable bootstrapping.
# [[Bootstrapping]]
# n_members = 10
# # The number of training samples per split (if int), or the fraction (if float)
# n_samples_split = 0.1
# # If 1, each split contains no sample present in any other split. Also overrides n_samples_split and sets it
# # as the maximum available per split. Otherwise, set to 0.
# unique_splits = 0
########################################################################################################################
# There are a few parameters here for validation of the model.
[Validate]
# Start and end dates for the validation
start_date = 20161120
end_date = 20170409
########################################################################################################################
# The run executable allows for an upload to an FTP or SFTP server, for example, to post data to a website. The forecast
# data are aggregated over all runs and uploaded as a file_type (currently only 'pickle' is implemented), and if a plot
# of forecast ensemble distributions is requested, that plot is also uploaded.
[Upload]
# Type of file to upload forecasts in. Can be 'pickle' (one file for all forecasts), 'json' (one file per forecast
# date), 'uw_text' (one file per forecast date, only a short version with high/pop), or a list of several.
file_type = json
# User name, server, and directory on server. Prompts for password, or set an ssh key. The forecast_directory
# points to where the forecasts are uploaded, while the plot_directory points to where plots are uploaded. If
# 'username' and 'server' are both empty, then assumes local directories are specified.
username =
server =
forecast_directory =
plot_directory =