# -*- coding: UTF-8 -*-
"""
MATILDA Calibration Module "mspot"
--------------------------
This script provides tools for calibrating the MATILDA glacio-hydrological model using statistical parameter optimization
techniques. It makes extensive use of the SPOTPY package for sampling, optimization, and parameter uncertainty analysis. The module
is tailored for glacierized catchments and includes support for both catchment-wide and glacier-only calibration.
Key Features:
-------------
- Parameter Sampling: Flexible setup for sampling hydrological model parameters using various algorithms from SPOTPY.
- Objective Functions: Includes multiple objective functions such as Kling-Gupta Efficiency (KGE), Mean Absolute Error
(MAE), and custom user-defined functions.
- Calibration Targets: Supports calibration against observed discharge, glacier mass balance, and snow water equivalent.
- Statistical Analysis: Provides tools to analyze parameter posterior distributions and derive parameter bounds.
- Seasonal and Annual Analysis: Includes functions for evaluating seasonal (winter/summer) and annual metrics.
- Parallel Computing: Supports parallel sampling for computational efficiency.
Functions:
----------
1. `yesno`: Interactive function to confirm actions.
2. `dict2bounds`: Converts a dictionary of parameter values into bounds.
3. `loglike_kge`: Calculates a pseudo-log-likelihood using the Kling-Gupta Efficiency (KGE).
4. `winter`, `summer`, `annual`: Extract seasonal or annual data slices for evaluation.
5. `scaled_pdd`, `scaled_ndd`: Apply temperature lapse rates to calculate positive or negative degree days.
6. `spot_setup` and `spot_setup_glacier`: Set up SPOTPY sampling for catchment-wide and glacier-specific calibration.
7. `analyze_results`: Analyze SPOTPY results to identify the best-performing parameter set and generate diagnostic plots.
8. `psample`: Automates parameter sampling and optimization with various SPOTPY algorithms.
9. `load_parameters`: Loads the best-performing parameter set from SPOTPY results.
10. `get_par_bounds`: Derives parameter bounds from SPOTPY sampling results.
Reference:
-----------
SPOTPY Library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
Dependencies
------------
- Python 3.7 or higher
- spotpy
- pandas
- numpy
- matplotlib
- hydroeval
- scipy
Usage:
------
This script is designed to be part of the MATILDA framework as calibration tool for the matilda.core model.
License:
--------
This software is released under the MIT License. See LICENSE file for details.
Contact
-------
For questions or contributions, please contact:
- Developer: Phillip Schuster
- Email: phillip.schuster@geo.hu-berlin.de
- Institution: Humboldt-Universität zu Berlin
"""
import sys
import os
from datetime import date
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spotpy
import HydroErr as he
from scipy.stats import gamma
from spotpy.parameter import Uniform
from spotpy.objectivefunctions import mae
from matilda.core import (
matilda_simulation,
matilda_parameter,
matilda_preproc,
create_lookup_table,
updated_glacier_melt,
)
[docs]
class HiddenPrints:
"""
Suppress prints during multiple iterations or noisy function calls.
This utility class redirects standard output to `/dev/null`, effectively silencing
any print statements during its usage. It can be used as a context manager to suppress
output temporarily.
Example:
with HiddenPrints():
# Any print statements here will be suppressed
noisy_function()
Methods
-------
__enter__()
Redirects standard output to `/dev/null`.
__exit__(exc_type, exc_val, exc_tb)
Restores the original standard output.
"""
def __init__(self):
# Initialize the attribute in the constructor
self._original_stdout = None
[docs]
def __enter__(self):
self._original_stdout = sys.stdout
sys.stdout = open(os.devnull, "w", encoding="utf-8")
[docs]
def __exit__(self, exc_type, exc_val, exc_tb):
if sys.stdout:
sys.stdout.close()
sys.stdout = self._original_stdout
[docs]
def yesno(question):
"""
Prompt the user with a yes/no question and return the response.
This function repeatedly prompts the user with the given question until a valid
response ('y' or 'n') is entered.
Parameters
----------
question : str
The yes/no question to present to the user.
Returns
-------
bool
Returns True if the user responds with 'y', and False if the user responds with 'n'.
"""
prompt = f"{question} ? (y/n): "
ans = input(prompt).strip().lower()
if ans not in ["y", "n"]:
print(f"{ans} is invalid, please try again...")
return yesno(question)
if ans == "y":
return True
return False
[docs]
def dict2bounds(p_dict, drop=None):
"""
Convert a parameter dictionary to a bounds dictionary to be passed to spot_setup().
This function takes a dictionary of parameters and converts it into a dictionary
containing both lower (`_lo`) and upper (`_up`) bounds for each parameter. Optionally,
specific parameters can be excluded from the resulting dictionary.
Parameters
----------
p_dict : dict
A dictionary containing parameter names as keys and their initial values as values.
drop : list, optional
A list of parameter names to exclude from the resulting bounds dictionary.
Returns
-------
dict
A dictionary containing both lower (`_lo`) and upper (`_up`) bounds for the remaining
parameters in `p_dict`.
"""
if drop is None:
drop = []
for i in drop:
p_dict.pop(i, None) # Use pop to avoid KeyError if key doesn't exist
p = {
**dict(zip([i + "_lo" for i in p_dict.keys()], p_dict.values())),
**dict(zip([i + "_up" for i in p_dict.keys()], p_dict.values())),
}
return p
[docs]
def loglike_kge(qsim, qobs):
"""
Calculate a pseudo log-likelihood function using the Kling-Gupta Efficiency (KGE) and a gamma distribution.
This function computes a log-likelihood value based on the KGE between simulated and observed flow values,
and models the exceedance deviation (ED) using a gamma distribution.
Parameters
----------
qsim : array_like
Array of simulated flow values.
qobs : array_like
Array of observed flow values.
Returns
-------
float
The calculated value of the log-likelihood function.
Notes
-----
The log-likelihood function is based on the Kling-Gupta Efficiency (KGE) following Liu et al. (2022).
The exceedance deviation (ED) is derived from the KGE as `ED = 1 - KGE`. The gamma probability density
function (PDF) is then used to model the likelihood, with shape parameter `a=0.5` and scale parameter `scale=1`.
References
----------
- Liu, Y., Fernández-Ortega, J., Mudarra, M., and Hartmann, A. (2022). "Pitfalls and a feasible solution for
using KGE as an informal likelihood function in MCMC methods: DREAM(ZS) as an example." HESS, 26(20).
- Kling, H., Gupta, H., & Yilmaz, K. K. (2012). "Model selection criteria for rainfall-runoff
models: Representation of variability in Bayesian Model Averaging." Water Resources Research, 48(6), W06306.
- `hydroeval`: https://github.com/ThibHlln/hydroeval
- `scipy.stats.gamma`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
"""
# Calculate KGE
n = len(qobs)
KGE = he.kge_2012(qsim, qobs, remove_zero=True)
ED = 1 - KGE
# Calculate log-likelihood function
gammapdf = gamma.pdf(ED, a=0.5, scale=1)
log_L = 0.5 * n * np.log(gammapdf)
return log_L
[docs]
def winter(year, data):
"""
Extract the winter period for a given year from a record of the World Glacier Monitoring service (WGMS).
This function filters the input dataset for the specified year and returns a slice object representing the winter
period based on the `BEGIN_PERIOD` and `END_WINTER` columns.
Parameters
----------
year : int
The year for which the winter period is to be extracted.
data : pandas.DataFrame
The input dataset containing at least the following columns:
- `BEGIN_PERIOD`: The start of the winter period.
- `END_WINTER`: The end of the winter period.
- Index representing years.
Returns
-------
slice
A slice object representing the winter period for the specified year.
Notes
-----
The function assumes that the dataset is indexed by years and contains the `BEGIN_PERIOD` and `END_WINTER` columns
with appropriate datetime or timestamp values.
References
----------
- WGMS (2024): Fluctuations of Glaciers Database. World Glacier Monitoring Service, Zurich, Switzerland.
https://doi.org/10.5904/wgms-fog-2024-01
"""
data = data[data.index == year]
winter_slice = slice(data.BEGIN_PERIOD.squeeze(), data.END_WINTER.squeeze())
return winter_slice
[docs]
def summer(year, data):
"""
Extract the summer period for a given year from a record of the World Glacier Monitoring service (WGMS).
This function filters the input dataset for the specified year and returns a slice object representing the summer
period based on the `END_WINTER` and `END_PERIOD` columns.
Parameters
----------
year : int
The year for which the summer period is to be extracted.
data : pandas.DataFrame
The input dataset containing at least the following columns:
- `END_WINTER`: The end of the winter period.
- `END_PERIOD`: The end of the summer period.
- Index representing years.
Returns
-------
slice
A slice object representing the summer period for the specified year.
Notes
-----
The function assumes that the dataset is indexed by years and contains the `END_WINTER` and `END_PERIOD` columns
with appropriate datetime or timestamp values.
References
----------
- WGMS (2024): Fluctuations of Glaciers Database. World Glacier Monitoring Service, Zurich, Switzerland.
https://doi.org/10.5904/wgms-fog-2024-01
"""
data = data[data.index == year]
summer_slice = slice(data.END_WINTER.squeeze(), data.END_PERIOD.squeeze())
return summer_slice
[docs]
def annual(year, data):
"""
Extract the annual period for a given year from a record of the World Glacier Monitoring service (WGMS).
This function filters the input dataset for the specified year and returns a slice object representing the
annual period based on the `BEGIN_PERIOD` and `END_PERIOD` columns.
Parameters
----------
year : int
The year for which the annual period is to be extracted.
data : pandas.DataFrame
The input dataset containing at least the following columns:
- `BEGIN_PERIOD`: The start of the annual period.
- `END_PERIOD`: The end of the annual period.
- Index representing years.
Returns
-------
slice
A slice object representing the annual period for the specified year.
Notes
-----
The function assumes that the dataset is indexed by years and contains the `BEGIN_PERIOD` and `END_PERIOD` columns
with appropriate datetime or timestamp values.
References
----------
- WGMS (2024): Fluctuations of Glaciers Database. World Glacier Monitoring Service, Zurich, Switzerland.
https://doi.org/10.5904/wgms-fog-2024-01
"""
data = data[data.index == year]
annual_slice = slice(data.BEGIN_PERIOD.squeeze(), data.END_PERIOD.squeeze())
return annual_slice
[docs]
def scaled_pdd(data, elev, lr):
"""
Calculate scaled positive degree days (PDD) based on temperature lapse rate and elevation adjustments.
This function adjusts the input temperature data for elevation differences using a given lapse rate, then calculates
the positive degree days by setting all negative or zero temperatures to zero.
Parameters
----------
data : array_like
Array or series of temperature values (in °C).
elev : float
Elevation difference (in meters) used for scaling the temperature values.
lr : float
Temperature lapse rate (in °C/m) used for scaling.
Returns
-------
pdd : array_like
Scaled positive degree days (PDD), where negative values are replaced with zero.
Notes
-----
Positive degree days are commonly used in snow and ice melt models to estimate melt based on temperature inputs.
This function adjusts the temperature for elevation differences before computing PDD.
Examples
--------
>>> data = np.array([2, -1, 3, -2, 5])
>>> elev = 500 # meters
>>> lr = -0.006 # °C/m
>>> scaled_pdd(data, elev, lr)
array([5, 0, 6, 0, 8])
"""
s = data + elev * lr
pdd = np.where(s > 0, s, 0)
return pdd
[docs]
def scaled_ndd(data, elev, lr):
"""
Calculate scaled negative degree days (NDD) based on temperature lapse rate and elevation adjustments.
This function adjusts the input temperature data for elevation differences using a given lapse rate, then calculates
the negative degree days by setting all positive or zero temperatures to zero and marking negative temperatures as 1.
Parameters
----------
data : array_like
Array or series of temperature values (in °C).
elev : float
Elevation difference (in meters) used for scaling the temperature values.
lr : float
Temperature lapse rate (in °C/m) used for scaling.
Returns
-------
ndd : array_like
Scaled negative degree days (NDD), where negative temperatures are marked as 1 and others as 0.
Notes
-----
Negative degree days are used in some hydrological and glaciological models to track periods with freezing conditions.
This function adjusts the temperature for elevation differences before computing NDD.
Examples
--------
>>> data = np.array([2, -1, 3, -2, 5])
>>> elev = 500 # meters
>>> lr = -0.006 # °C/m
>>> scaled_ndd(data, elev, lr)
array([0, 1, 0, 1, 0])
"""
s = data + elev * lr
ndd = np.where(s < 0, 1, 0)
return ndd
[docs]
def spot_setup(
set_up_start=None,
set_up_end=None,
sim_start=None,
sim_end=None,
freq="D",
lat=None,
area_cat=None,
area_glac=None,
ele_dat=None,
ele_glac=None,
ele_cat=None,
glacier_profile=None,
elev_rescaling=True,
target_mb=None,
target_swe=None,
swe_scaling=None,
fix_param=None,
fix_val=None,
obj_func=None,
lr_temp_lo=-0.0065,
lr_temp_up=-0.0055,
lr_prec_lo=0,
lr_prec_up=0.002,
BETA_lo=1,
BETA_up=6,
CET_lo=0,
CET_up=0.3,
FC_lo=50,
FC_up=500,
K0_lo=0.01,
K0_up=0.4,
K1_lo=0.01,
K1_up=0.4,
K2_lo=0.001,
K2_up=0.15,
LP_lo=0.3,
LP_up=1,
MAXBAS_lo=2,
MAXBAS_up=7,
PERC_lo=0,
PERC_up=3,
UZL_lo=0,
UZL_up=500,
PCORR_lo=0.5,
PCORR_up=2,
TT_snow_lo=-1.5,
TT_snow_up=1.5,
TT_diff_lo=0.5,
TT_diff_up=2.5,
CFMAX_snow_lo=0.5,
CFMAX_snow_up=10,
CFMAX_rel_lo=1.2,
CFMAX_rel_up=2,
SFCF_lo=0.4,
SFCF_up=1,
CWH_lo=0,
CWH_up=0.2,
AG_lo=0,
AG_up=1,
CFR_lo=0.05,
CFR_up=0.25,
interf=4,
freqst=2,
):
"""
Spotpy-based setup for parameter optimization and calibration of the MATILDA model.
This class provides the framework to optimize the parameters of the MATILDA model using SPOTPY, a statistical parameter
optimization tool. It enables flexible parameter setup, handles fixed parameters, and evaluates simulation results
against observed data (e.g., streamflow, snow water equivalent, surface mass balance).
Parameters
----------
General inputs:
- set_up_start, set_up_end : str
Start and end dates of the setup period.
- sim_start, sim_end : str
Start and end dates of the simulation period.
- freq : str, optional
Frequency of the data, default is daily ("D").
- lat : float, optional
Latitude of the catchment for extraterrestrial radiation calculation.
- area_cat, area_glac : float
Catchment and glacierized areas in km².
- ele_dat, ele_glac, ele_cat : float
Mean elevations (m a.s.l.) of the data, glacier, and catchment.
- glacier_profile : DataFrame, optional
Glacier profile for scaling and elevation rescaling.
- elev_rescaling : bool, optional
Flag for enabling elevation rescaling, default is True.
- target_mb, target_swe : float, optional
Target surface mass balance (MB) or snow water equivalent (SWE) for calibration.
- swe_scaling : float, optional
Scaling factor for snow water equivalent. Used to account for the mismatch in resolution of the SWE calibration
data and the glacier mask used for MATILDA.
Parameter bounds for calibration:
- Fix parameter bounds (e.g., `lr_temp_lo`, `lr_temp_up` for lapse rates).
- `fix_param` : list, optional
List of parameters to fix during calibration.
- `fix_val` : dict, optional
Fixed parameter values (if applicable).
SPOTPY settings:
- `interf` : int, optional
Inference factor for parameter iterations, default is 4.
- `freqst` : int, optional
Frequency step for sensitivity analysis, default is 2.
Methods
-------
simulation(x, param_names, fix_param, fix_val, swe_scaling)
Executes the MATILDA simulation with sampled parameters and fixed parameters (if specified).
evaluation()
Prepares the observed data (e.g., streamflow, SWE) for comparison with simulation results.
objectivefunction(simulation, evaluation, params=None)
Defines the objective function for model evaluation using the Kling-Gupta Efficiency (KGE) or user-defined metrics.
Returns
-------
spot_setup_class : class
Configured SPOTPY setup class for MATILDA parameter optimization and calibration.
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
- Kling-Gupta Efficiency (KGE):
- Gupta, H. V., & Kling, H. (2011). "On typical range, sensitivity, and normalization of Mean Squared Error and
Nash-Sutcliffe Efficiency type metrics." Water Resources Research, 47(10). https://doi.org/10.1029/2011WR010962
- Kling, H., Fuchs, M., & Paulin, M. (2012). "Runoff conditions in the upper Danube basin under an ensemble of
climate change scenarios." Journal of Hydrology, 424-425, 264-277. https://doi.org/10.1016/j.jhydrol.2012.01.011
"""
class spot_setup_class:
"""
spot_setup_class: A SPOTPY setup class for MATILDA model parameter calibration.
This class is designed to facilitate the optimization and calibration of the MATILDA model parameters
using SPOTPY, a statistical parameter optimization library. It defines the parameter space, simulation
methods, evaluation functions, and the objective function to assess the model's performance.
Attributes
----------
param : list
List of SPOTPY parameter distributions for calibration.
param_names : list
List of names corresponding to the parameters being calibrated.
par_iter : int
Number of parameter iterations needed for sensitivity analysis and parameter inference.
Methods
-------
__init__(df, obs, swe, obj_func=None)
Initializes the setup class with input data, observations, and an optional objective function.
simulation(x, param_names=param_names, fix_param=fix_param, fix_val=fix_val, swe_scaling=swe_scaling)
Runs the MATILDA simulation with sampled parameters and any fixed parameters specified.
evaluation()
Prepares the observed data (e.g., streamflow, snow water equivalent, surface mass balance) for
comparison with simulation results.
objectivefunction(simulation, evaluation, params=None)
Calculates the objective function to evaluate model performance. Supports metrics such as
the Kling-Gupta Efficiency (KGE) and custom metrics defined by the user.
Usage
-----
The `spot_setup_class` is instantiated with observed data (`obs`) and optionally with snow water equivalent
(SWE) data and a custom objective function. Once instantiated, it can be passed to SPOTPY's optimization
and sensitivity analysis tools to calibrate the MATILDA model.
Example
-------
from spotpy.algorithms import sceua
spot_setup_instance = spot_setup(df, obs, swe, obj_func)
sampler = sceua(spot_setup_instance)
sampler.sample(1000)
References
----------
- SPOTPY library:
- Houska, T., et al. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE.
- GitHub: https://github.com/thouska/spotpy
- Kling-Gupta Efficiency (KGE):
- Gupta, H. V., & Kling, H. (2011). "On typical range, sensitivity, and normalization of Mean Squared Error
and Nash-Sutcliffe Efficiency type metrics." Water Resources Research.
- Kling, H., et al. (2012). "Runoff conditions in the upper Danube basin under an ensemble of climate
change scenarios." Journal of Hydrology.
"""
# defining all parameters and the distribution
lr_temp = Uniform(low=lr_temp_lo, high=lr_temp_up)
lr_prec = Uniform(low=lr_prec_lo, high=lr_prec_up)
BETA = Uniform(low=BETA_lo, high=BETA_up)
CET = Uniform(low=CET_lo, high=CET_up)
FC = Uniform(low=FC_lo, high=FC_up)
K0 = Uniform(low=K0_lo, high=K0_up)
K1 = Uniform(low=K1_lo, high=K1_up)
K2 = Uniform(low=K2_lo, high=K2_up)
LP = Uniform(low=LP_lo, high=LP_up)
MAXBAS = Uniform(low=MAXBAS_lo, high=MAXBAS_up)
PERC = Uniform(low=PERC_lo, high=PERC_up)
UZL = Uniform(low=UZL_lo, high=UZL_up)
PCORR = Uniform(low=PCORR_lo, high=PCORR_up)
TT_snow = Uniform(low=TT_snow_lo, high=TT_snow_up)
TT_diff = Uniform(low=TT_diff_lo, high=TT_diff_up)
CFMAX_snow = Uniform(low=CFMAX_snow_lo, high=CFMAX_snow_up)
CFMAX_rel = Uniform(low=CFMAX_rel_lo, high=CFMAX_rel_up)
SFCF = Uniform(low=SFCF_lo, high=SFCF_up)
CWH = Uniform(low=CWH_lo, high=CWH_up)
AG = Uniform(low=AG_lo, high=AG_up)
CFR = Uniform(low=CFR_lo, high=CFR_up)
# Create the list containing the variables
param = [
lr_temp,
lr_prec,
BETA,
CET,
FC,
K0,
K1,
K2,
LP,
MAXBAS,
PERC,
UZL,
PCORR,
TT_snow,
TT_diff,
CFMAX_snow,
CFMAX_rel,
SFCF,
CWH,
AG,
CFR,
]
# Exclude parameters defined in fix_param
param_names = [
"lr_temp",
"lr_prec",
"BETA",
"CET",
"FC",
"K0",
"K1",
"K2",
"LP",
"MAXBAS",
"PERC",
"UZL",
"PCORR",
"TT_snow",
"TT_diff",
"CFMAX_snow",
"CFMAX_rel",
"SFCF",
"CWH",
"AG",
"CFR",
]
# Exclude parameters that should be fixed
if fix_param:
param = [p for p, name in zip(param, param_names) if name not in fix_param]
param_names = [param for param in param_names if param not in fix_param]
for par in fix_param:
if par in globals():
del globals()[par]
if par in locals():
del locals()[par]
# Number of needed parameter iterations for parametrization and sensitivity analysis
M = interf # inference factor (default = 4)
d = freqst # frequency step (default = 2)
k = len(param) # number of parameters
par_iter = (1 + 4 * M**2 * (1 + (k - 2) * d)) * k
def __init__(
self,
df,
obs,
swe,
swe_scaling=swe_scaling,
fix_param=fix_param,
fix_val=fix_val,
obj_func=obj_func,
):
self.obj_func = obj_func
self.Input = df
self.obs = obs
self.swe = swe
self.swe_scaling = swe_scaling
self.fix_param = fix_param
self.fix_val = fix_val
def simulation(self, x, param_names=None):
if param_names is None:
param_names = self.param_names
with HiddenPrints():
# Setup all parameters for sampling
args = {par_name: x[par_name] for par_name in param_names}
# Fix parameters on desired values if defined
if self.fix_param is not None and self.fix_val is not None:
for p in self.fix_param:
if p in self.fix_val:
args[p] = self.fix_val[p]
sim = matilda_simulation(
self.Input,
obs=self.obs,
output=None,
set_up_start=set_up_start,
set_up_end=set_up_end,
sim_start=sim_start,
sim_end=sim_end,
freq=freq,
lat=lat,
area_cat=area_cat,
area_glac=area_glac,
ele_dat=ele_dat,
ele_glac=ele_glac,
ele_cat=ele_cat,
plots=False,
warn=False,
glacier_profile=glacier_profile,
elev_rescaling=elev_rescaling,
**args,
)
swe_sim = (
sim[0]
.snowpack_off_glaciers["2000-01-01":"2017-09-30"]
.to_frame(name="SWE_sim")
)
if self.swe_scaling is not None:
swe_sim = swe_sim * self.swe_scaling
if target_mb is None:
if target_swe is None:
return sim[0].total_runoff
return [sim[0].total_runoff, swe_sim.SWE_sim]
if target_swe is None:
return [sim[0].total_runoff, sim[5].smb_water_year.mean()]
return [
sim[0].total_runoff,
sim[5].smb_water_year.mean(),
swe_sim.SWE_sim,
]
def evaluation(self):
obs_preproc = self.obs.copy()
obs_preproc.set_index("Date", inplace=True)
obs_preproc.index = pd.to_datetime(obs_preproc.index)
obs_preproc = obs_preproc[sim_start:sim_end]
# Changing the input unit from m³/s to mm.
obs_preproc["Qobs"] = (
obs_preproc["Qobs"] * 86400 / (area_cat * 1000000) * 1000
)
# To daily resolution
obs_preproc = obs_preproc.resample("D").agg(pd.Series.sum, skipna=False)
# Expanding the observation period full years filling up with NAs
idx_first = obs_preproc.index.year[1]
idx_last = obs_preproc.index.year[-1]
idx = pd.date_range(
start=date(idx_first, 1, 1),
end=date(idx_last, 12, 31),
freq="D",
name=obs_preproc.index.name,
)
obs_preproc = obs_preproc.reindex(idx)
obs_preproc = obs_preproc.fillna(np.NaN)
if target_swe is not None:
swe_obs = self.swe
if "Date" in self.swe.columns:
swe_obs["Date"] = pd.to_datetime(swe_obs["Date"])
swe_obs.set_index("Date", inplace=True)
swe_obs = swe_obs * 1000
swe_obs = swe_obs["2000-01-01":"2017-09-30"]
if target_mb is None:
if target_swe is None:
return obs_preproc.Qobs
return [obs_preproc.Qobs, swe_obs.SWE_Mean]
if target_swe is None:
return [obs_preproc.Qobs, target_mb]
return [obs_preproc.Qobs, target_mb, swe_obs.SWE_Mean]
def objectivefunction(self, simulation, evaluation):
# SPOTPY expects to get one or multiple values back,
# that define the performance of the model run
if target_mb is not None:
if target_swe is not None:
sim_runoff, sim_smb, sim_swe = simulation
eval_runoff, eval_smb, eval_swe = evaluation
obj3 = he.kge_2012(sim_swe, eval_swe, remove_zero=False)
else:
sim_runoff, sim_smb = simulation
eval_runoff, eval_smb = evaluation
obj2 = abs(eval_smb - sim_smb)
simulation = sim_runoff
evaluation = eval_runoff
elif target_swe is not None:
sim_runoff, sim_swe = simulation
eval_runoff, eval_swe = evaluation
obj3 = he.kge_2012(sim_swe, eval_swe, remove_zero=False)
# Crop both timeseries to same periods without NAs
sim_new = pd.DataFrame()
sim_new["mod"] = pd.DataFrame(simulation)
sim_new["obs"] = evaluation
clean = sim_new.dropna()
simulation_clean = clean["mod"]
evaluation_clean = clean["obs"]
if not self.obj_func:
# This is used if not overwritten by user
# obj1 = kge(evaluation_clean, simulation_clean) # SPOTPY internal kge
obj1 = he.kge_2012(
simulation_clean, evaluation_clean, remove_zero=True
) # same as MATILDA
else:
# Way to ensure flexible spot setup class
obj1 = self.obj_func(evaluation_clean, simulation_clean)
if target_mb is None:
if target_swe is None:
return obj1
return [obj1, obj3]
if target_swe is None:
return [obj1, obj2]
return [obj1, obj2, obj3]
return spot_setup_class
[docs]
def spot_setup_glacier(
set_up_start=None,
set_up_end=None,
sim_start=None,
sim_end=None,
freq="D",
lat=None,
area_cat=None,
area_glac=None,
ele_dat=None,
ele_glac=None,
glacier_profile=None,
obs_type="annual",
obj_func=None,
lr_temp_lo=-0.0065,
lr_temp_up=-0.0055,
lr_prec_lo=0,
lr_prec_up=0.002,
PCORR_lo=0.5,
PCORR_up=2,
TT_snow_lo=-1.5,
TT_snow_up=1.5,
TT_diff_lo=0.5,
TT_diff_up=2.5,
CFMAX_snow_lo=0.5,
CFMAX_snow_up=10,
CFMAX_rel_lo=1.2,
CFMAX_rel_up=2,
SFCF_lo=0.4,
SFCF_up=1,
CFR_lo=0.05,
CFR_up=0.25,
interf=4,
freqst=2,
):
"""
Spotpy-based setup for parameter optimization and calibration of the MATILDA model for a (sub-)catchment entirely
covered by glaciers.
This class provides the framework to optimize the parameters of the MATILDA model using SPOTPY, a statistical parameter
optimization tool. It enables flexible parameter setup, handles fixed parameters, and evaluates simulation results
against observed data (e.g., streamflow, snow water equivalent, surface mass balance). It supports annual, winter,
and summer mass balance optimization based on observed data.
Parameters
----------
General inputs:
- set_up_start, set_up_end : str
Start and end dates of the setup period.
- sim_start, sim_end : str
Start and end dates of the simulation period.
- freq : str, optional
Frequency of the data, default is daily ("D").
- lat : float, optional
Latitude of the catchment for extraterrestrial radiation calculation.
- area_cat, area_glac : float
Catchment and glacierized areas in km².
- ele_dat, ele_glac, ele_cat : float
Mean elevations (m a.s.l.) of the data, glacier, and catchment.
- glacier_profile : DataFrame
Glacier profile used for elevation rescaling and look-up table generation.
- obs_type : str, optional
Type of observed mass balance data: "annual", "winter", or "summer". Default is "annual".
- obj_func : callable, optional
Custom objective function to compare simulation and observation. If None, the Mean Absolute Error (MAE) is used.
Parameter bounds for calibration:
- lr_temp_lo, lr_temp_up : float
Bounds for temperature lapse rate (°C/m).
- lr_prec_lo, lr_prec_up : float
Bounds for precipitation correction factor.
- PCORR_lo, PCORR_up : float
Bounds for precipitation correction factor.
- TT_snow_lo, TT_snow_up : float
Bounds for snow temperature threshold (°C).
- TT_diff_lo, TT_diff_up : float
Bounds for the difference between rain and snow temperature thresholds (°C).
- CFMAX_snow_lo, CFMAX_snow_up : float
Bounds for the degree-day factor for snow melt (mm/°C/day).
- CFMAX_rel_lo, CFMAX_rel_up : float
Bounds for the degree-day factor for relative snow and ice melt.
- SFCF_lo, SFCF_up : float
Bounds for snow correction factor.
- CFR_lo, CFR_up : float
Bounds for the refreezing factor.
SPOTPY settings:
- interf : int, optional
Inference factor for parameter iterations, default is 4.
- freqst : int, optional
Frequency step for sensitivity analysis, default is 2.
Methods
-------
simulation(x)
Executes the glacier mass balance simulation using the degree-day model (DDM).
evaluation()
Prepares observed mass balance data (annual, winter, or summer) for comparison.
objectivefunction(simulation, evaluation, params=None)
Defines the objective function to calculate the error between simulated and observed mass balance values.
Returns
-------
spot_setup_class : class
Configured SPOTPY setup class for glacier mass balance parameter optimization.
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
"""
class spot_setup_class:
"""
spot_setup_class: A SPOTPY setup class for MATILDA model glacier mass balance parameter calibration.
This class facilitates the calibration and optimization of the MATILDA model parameters for glacier mass balance
modeling. It uses SPOTPY, a statistical parameter optimization library, to define the parameter space, simulate
mass balance, and evaluate the model's performance against observed data.
Attributes
----------
param : list
List of SPOTPY parameter distributions for calibration, including lapse rates, temperature thresholds, and
degree-day factors.
par_iter : int
Number of parameter iterations needed for sensitivity analysis and parameter inference.
Methods
-------
__init__(df, obs, obj_func=None)
Initializes the class with input data (`df`), observed data (`obs`), and an optional custom objective function.
simulation(x)
Simulates glacier mass balance using the MATILDA model with the specified parameters.
evaluation()
Processes the observed mass balance data into a format compatible with the simulated outputs.
objectivefunction(simulation, evaluation, params=None)
Defines the objective function to evaluate the model's performance. By default, it uses the Mean Absolute
Error (MAE) or a custom user-defined function.
Usage
-----
The `spot_setup_class` is instantiated with observed data and passed to SPOTPY for parameter optimization and
sensitivity analysis.
Example
-------
from spotpy.algorithms import sceua
spot_setup_instance = spot_setup_glacier(df, obs, obj_func)
sampler = sceua(spot_setup_instance)
sampler.sample(1000)
Parameters
----------
General Inputs:
- set_up_start, set_up_end : str
Start and end dates of the setup period.
- sim_start, sim_end : str
Start and end dates of the simulation period.
- freq : str, optional
Frequency of the data, default is daily ("D").
- lat : float, optional
Latitude of the catchment for extraterrestrial radiation calculation.
- area_cat, area_glac : float
Catchment and glacierized areas in km².
- ele_dat, ele_glac, ele_cat : float
Mean elevations (m a.s.l.) of the data, glacier, and catchment.
- glacier_profile : DataFrame
Glacier profile used for elevation rescaling and look-up table generation.
- obs_type : str, optional
Type of observed mass balance data ("annual", "winter", or "summer"). Default is "annual".
- obj_func : callable, optional
Custom objective function to compare simulation and observation. Defaults to Mean Absolute Error (MAE).
Parameter Bounds for Calibration:
- lr_temp_lo, lr_temp_up : float
Bounds for the temperature lapse rate (°C/m).
- lr_prec_lo, lr_prec_up : float
Bounds for the precipitation correction factor.
- PCORR_lo, PCORR_up : float
Bounds for the precipitation correction factor.
- TT_snow_lo, TT_snow_up : float
Bounds for the snow temperature threshold (°C).
- TT_diff_lo, TT_diff_up : float
Bounds for the difference between rain and snow temperature thresholds (°C).
- CFMAX_snow_lo, CFMAX_snow_up : float
Bounds for the degree-day factor for snow melt (mm/°C/day).
- CFMAX_rel_lo, CFMAX_rel_up : float
Bounds for the degree-day factor for relative snow and ice melt.
- SFCF_lo, SFCF_up : float
Bounds for the snow correction factor.
- CFR_lo, CFR_up : float
Bounds for the refreezing factor.
SPOTPY Settings:
- interf : int, optional
Inference factor for parameter iterations. Default is 4.
- freqst : int, optional
Frequency step for sensitivity analysis. Default is 2.
Returns
-------
spot_setup_class : class
Configured SPOTPY setup class for glacier mass balance parameter optimization.
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
"""
# defining all parameters and the distribution
param = (
lr_temp,
lr_prec,
PCORR,
TT_snow,
TT_diff,
CFMAX_snow,
CFMAX_rel,
SFCF,
CFR,
) = [
Uniform(low=lr_temp_lo, high=lr_temp_up), # lr_temp
Uniform(low=lr_prec_lo, high=lr_prec_up), # lr_prec
Uniform(low=PCORR_lo, high=PCORR_up), # PCORR
Uniform(low=TT_snow_lo, high=TT_snow_up), # TT_snow
Uniform(low=TT_diff_lo, high=TT_diff_up), # TT_diff
Uniform(low=CFMAX_snow_lo, high=CFMAX_snow_up), # CFMAX_snow
Uniform(low=CFMAX_rel_lo, high=CFMAX_rel_up), # CFMAX_rel
Uniform(low=SFCF_lo, high=SFCF_up), # SFCF
Uniform(low=CFR_lo, high=CFR_up), # CFR
]
# Number of needed parameter iterations for parametrization and sensitivity analysis
M = interf # inference factor (default = 4)
d = freqst # frequency step (default = 2)
k = len(param) # number of parameters
par_iter = (1 + 4 * M**2 * (1 + (k - 2) * d)) * k
def __init__(self, df, obs, obj_func=obj_func):
self.obj_func = obj_func
self.Input = df
self.obs = obs
def simulation(self, x):
with HiddenPrints():
parameter = matilda_parameter(
self.Input,
set_up_start=set_up_start,
set_up_end=set_up_end,
sim_start=sim_start,
sim_end=sim_end,
freq=freq,
lat=lat,
area_cat=area_cat,
area_glac=area_glac,
ele_dat=ele_dat,
ele_glac=ele_glac,
ele_cat=None,
lr_temp=x.lr_temp,
lr_prec=x.lr_prec,
PCORR=x.PCORR,
TT_snow=x.TT_snow,
TT_diff=x.TT_diff,
CFMAX_snow=x.CFMAX_snow,
CFMAX_rel=x.CFMAX_rel,
SFCF=x.SFCF,
CFR=x.CFR,
)
df_preproc = matilda_preproc(self.Input, parameter)
lookup_table = create_lookup_table(glacier_profile, parameter)
output_DDM = updated_glacier_melt(
df_preproc, lookup_table, glacier_profile, parameter
)[0]
sim = output_DDM.DDM_smb
return sim
def evaluation(self):
obs_preproc = self.obs.copy()
obs_preproc.set_index("YEAR", inplace=True)
obs_preproc.index = pd.to_datetime(obs_preproc.index)
return obs_preproc
def objectivefunction(self, simulation, evaluation):
# Aggregate MBs to fit the calibration data
sim = []
obs = []
sim_new = pd.DataFrame()
if obs_type == "winter":
for i in evaluation.index:
mb = simulation[winter(i, evaluation)].sum()
sim.append(mb)
mb_obs = evaluation[evaluation.index == i].WINTER_BALANCE.squeeze()
obs.append(mb_obs)
if obs_type == "summer":
for i in evaluation.index:
mb = simulation[summer(i, evaluation)].sum()
sim.append(mb)
mb_obs = evaluation[evaluation.index == i].SUMMER_BALANCE.squeeze()
obs.append(mb_obs)
if obs_type == "annual":
for i in evaluation.index:
mb_sim = simulation[annual(i, evaluation)].sum()
sim.append(mb_sim)
mb_obs = evaluation[evaluation.index == i].ANNUAL_BALANCE.squeeze()
obs.append(mb_obs)
# Crop both timeseries to same periods without NAs
sim_new["mod"] = pd.DataFrame(sim)
sim_new["obs"] = pd.DataFrame(obs)
clean = sim_new.dropna()
simulation_clean = clean["mod"]
evaluation_clean = clean["obs"]
if not self.obj_func:
# This is used if not overwritten by user
like = mae(evaluation_clean, simulation_clean)
print("MAE: " + str(like))
else:
# Way to ensure flexible spot setup class
like = self.obj_func(evaluation_clean, simulation_clean)
return like
return spot_setup_class
[docs]
def analyze_results(
sampling_data,
obs,
algorithm,
obj_dir="maximize",
fig_path=None,
dbname="mspot_results",
glacier_only=False,
target_mb=None,
):
"""
Analyze the results of SPOTPY sampling for model calibration of the MATILDA model.
This function processes sampling results from SPOTPY to identify the best parameter set, evaluate performance,
and optionally generate visualizations of the results.
Parameters
----------
sampling_data : str or SPOTPY result object
The file path to a SPOTPY results CSV file or a SPOTPY result object.
obs : str or pandas.DataFrame
The file path to observed data (in CSV format with 'Date' column) or a DataFrame containing observed data.
algorithm : str
The SPOTPY algorithm used for sampling (e.g., 'abc', 'dream', 'sceua').
obj_dir : str, optional
Direction of the objective function optimization, either 'maximize' or 'minimize'. Default is 'maximize'.
fig_path : str, optional
Path to save generated plots. If None, plots are not saved.
dbname : str, optional
Name for saving the database and plots. Default is 'mspot_results'.
glacier_only : bool, optional
If True, optimizes only for glacier-specific metrics (e.g., mass balance). Default is False.
target_mb : float, optional
Target mean annual mass balance for melt model calibration. If None, this metric is not considered. Default is None.
Returns
-------
dict
A dictionary containing:
- 'best_param': dict
Best parameter set identified during sampling.
- 'best_index': int
Index of the best run in the SPOTPY results.
- 'best_model_run': array
Model output corresponding to the best parameter set.
- 'best_objf': float
Value of the objective function for the best parameter set.
- 'best_simulation': pandas.Series (if glacier_only is False and target_mb is None)
Simulated time series corresponding to the best parameter set.
- 'sampling_plot': matplotlib.Figure (if applicable)
Iteration vs. objective function plot.
- 'best_run_plot': matplotlib.Figure (if applicable)
Best simulation vs. observed data plot.
- 'par_uncertain_plot': matplotlib.Figure (if applicable)
Parameter uncertainty plot.
Notes
-----
- The objective function can be maximized (e.g., Kling-Gupta Efficiency) or minimized (e.g., Mean Absolute Error)
based on the algorithm and obj_dir setting.
- For glacier-specific calibration, only glacier mass balance data is considered.
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
"""
if isinstance(obs, str):
if glacier_only:
obs = pd.read_csv(obs)
else:
obs = pd.read_csv(obs, index_col="Date", parse_dates=["Date"])
if isinstance(sampling_data, str):
if sampling_data.endswith(".csv"):
sampling_data = sampling_data[: len(sampling_data) - 4]
results = spotpy.analyser.load_csv_results(sampling_data)
else:
results = sampling_data.getdata()
maximize = ["abc", "dds", "demcz", "dream", "rope", "sa", "fscabc", "mcmc", "mle"]
minimize = ["nsgaii", "padds", "sceua"]
both = ["fast", "lhs", "mc"]
if algorithm in maximize:
best_param = spotpy.analyser.get_best_parameterset(results)
elif algorithm in minimize:
best_param = spotpy.analyser.get_best_parameterset(results, maximize=False)
elif algorithm in both:
print(
"WARNING: The selected algorithm "
+ algorithm
+ " can either maximize or minimize the objective function."
" You can specify the direction by passing obj_dir to analyze_results(). The default is 'maximize'."
)
if obj_dir == "maximize":
best_param = spotpy.analyser.get_best_parameterset(results)
elif obj_dir == "minimize":
best_param = spotpy.analyser.get_best_parameterset(results, maximize=False)
else:
raise ValueError(
"Invalid argument for obj_dir. Choose 'minimize' or 'maximize'."
)
else:
raise ValueError(
"Invalid argument for algorithm. Available algorithms: ['abc', 'dds', 'demcz', 'dream', 'rope', 'sa',"
"'fscabc', 'mcmc', 'mle', 'nsgaii', 'padds', 'sceua', 'fast', 'lhs', 'mc']"
)
par_names = spotpy.analyser.get_parameternames(best_param)
param_zip = zip(par_names, best_param[0])
best_param = dict(param_zip)
if glacier_only:
bestindex, bestobjf = spotpy.analyser.get_minlikeindex(
results
) # Run with lowest MAE
best_model_run = results[bestindex]
elif target_mb is not None:
if obj_dir == "minimize":
bestindex, bestobjf = spotpy.analyser.get_minlikeindex(results)
else:
bestindex, bestobjf = spotpy.analyser.get_maxlikeindex(results)
best_model_run = results[bestindex]
else:
sim_start = obs.index[0]
sim_end = obs.index[-1]
bestindex, bestobjf = spotpy.analyser.get_maxlikeindex(
results
) # Run with highest KGE
best_model_run = results[bestindex]
fields = [word for word in best_model_run.dtype.names if word.startswith("sim")]
best_simulation = pd.Series(
list(list(best_model_run[fields])[0]),
index=pd.date_range(sim_start, sim_end),
)
# Only necessary because obs has a datetime. Thus, both need a datetime.
if not glacier_only and target_mb is None:
fig1 = plt.figure(1, figsize=(9, 5))
plt.plot(results["like1"])
plt.ylabel("KGE")
plt.xlabel("Iteration")
if fig_path is not None:
plt.savefig(fig_path + "/" + dbname + "_sampling_plot.png")
fig2 = plt.figure(figsize=(16, 9))
ax = plt.subplot(1, 1, 1)
ax.plot(
best_simulation,
color="black",
linestyle="solid",
label="Best objf.=" + str(bestobjf),
)
ax.plot(obs, "r.", markersize=3, label="Observation data")
plt.xlabel("Date")
plt.ylabel("Discharge [mm d-1]")
plt.legend(loc="upper right")
if fig_path is not None:
plt.savefig(fig_path + "/" + dbname + "_best_run_plot.png")
fig3 = plt.figure(figsize=(16, 9))
ax = plt.subplot(1, 1, 1)
q5, q95 = [], []
for field in fields:
q5.append(np.percentile(results[field][-100:-1], 2.5))
q95.append(np.percentile(results[field][-100:-1], 97.5))
ax.plot(q5, color="dimgrey", linestyle="solid")
ax.plot(q95, color="dimgrey", linestyle="solid")
ax.fill_between(
np.arange(0, len(q5), 1),
list(q5),
list(q95),
facecolor="dimgrey",
zorder=0,
linewidth=0,
label="parameter uncertainty",
)
ax.plot(
np.array(obs), "r.", label="data"
) # Need to remove Timestamp from Evaluation to make comparable
# ax.set_ylim(0, 100)
ax.set_xlim(0, len(obs))
ax.legend()
if fig_path is not None:
plt.savefig(fig_path + "/" + dbname + "_par_uncertain_plot.png")
return {
"best_param": best_param,
"best_index": bestindex,
"best_model_run": best_model_run,
"best_objf": bestobjf,
"best_simulation": best_simulation,
"sampling_plot": fig1,
"best_run_plot": fig2,
"par_uncertain_plot": fig3,
}
return {
"best_param": best_param,
"best_index": bestindex,
"best_model_run": best_model_run,
"best_objf": bestobjf,
}
[docs]
def psample(
df,
obs,
rep=10,
output=None,
dbname="matilda_par_smpl",
dbformat=None,
obj_func=None,
opt_iter=False,
fig_path=None, # savefig=False,
set_up_start=None,
set_up_end=None,
sim_start=None,
sim_end=None,
freq="D",
lat=None,
area_cat=None,
area_glac=None,
ele_dat=None,
ele_glac=None,
ele_cat=None,
glacier_profile=None,
interf=4,
freqst=2,
parallel=False,
cores=2,
save_sim=True,
elev_rescaling=True,
glacier_only=False,
obs_type="annual",
target_mb=None,
target_swe=None,
swe_scaling=None,
algorithm="lhs",
obj_dir="maximize",
fix_param=None,
fix_val=None,
demcz_args: dict = None, # DEMCz settings
**kwargs,
):
"""
Run parameter sampling for the MATILDA model using SPOTPY.
This function sets up a SPOTPY sampling routine for parameter optimization or sensitivity analysis.
It supports parallel and serial computation, custom objective functions, and a variety of SPOTPY algorithms.
Parameters
----------
df : pandas.DataFrame
Input data for the MATILDA model.
obs : pandas.DataFrame or str
Observed data as a DataFrame or a file path to a CSV file containing observations.
rep : int, optional
Number of sampling repetitions (default is 10).
output : str, optional
Path to save the output files (default is None).
dbname : str, optional
Name of the database for saving results (default is 'matilda_par_smpl').
dbformat : str, optional
Format for the SPOTPY database (e.g., 'csv', 'sql'). Default is None (no database saved).
obj_func : callable, optional
Custom objective function for parameter optimization. If None, defaults are used.
opt_iter : bool, optional
If True, samples the optimum number of iterations based on the setup (default is False).
fig_path : str, optional
Path to save generated plots (default is None).
set_up_start, set_up_end : str, optional
Start and end dates for the model setup period (e.g., '2000-01-01').
sim_start, sim_end : str, optional
Start and end dates for the simulation period.
freq : str, optional
Temporal resolution of the input data (default is 'D').
lat : float, optional
Latitude of the study area (used for potential evapotranspiration calculations).
area_cat : float, optional
Total catchment area in km².
area_glac : float, optional
Glacierized area in km².
ele_dat, ele_glac, ele_cat : float, optional
Elevations (mean, glacier, and catchment).
glacier_profile : pandas.DataFrame, optional
Glacier profile data for rescaling and optimization routines.
interf : int, optional
Inference factor for SPOTPY (default is 4).
freqst : int, optional
Frequency step for SPOTPY (default is 2).
parallel : bool, optional
If True, runs sampling in parallel using MPI (default is False).
cores : int, optional
Number of cores to use for parallel sampling (default is 2).
save_sim : bool, optional
Whether to save simulation results in the database (default is True).
elev_rescaling : bool, optional
If True, applies elevation-based rescaling for glacier simulations (default is True).
glacier_only : bool, optional
If True, runs sampling specifically for glacier-related parameters (default is False).
obs_type : str, optional
Type of observed data for glacier calibration ('annual', 'winter', or 'summer'). Default is 'annual'.
target_mb : float, optional
Target mass balance for glacier calibration (default is None).
target_swe : float, optional
Target snow water equivalent for calibration (default is None).
swe_scaling : float, optional
Scaling factor for snow water equivalent (default is None).
algorithm : str, optional
SPOTPY algorithm to use (e.g., 'lhs', 'mcmc', 'dream'). Default is 'lhs'.
obj_dir : str, optional
Direction of the objective function optimization ('maximize' or 'minimize'). Default is 'maximize'.
fix_param : list of str, optional
List of parameters to fix during sampling (default is None).
fix_val : dict, optional
Dictionary of fixed parameter values (default is None).
demcz_args : dict, optional
Additional arguments for the DEMCz algorithm (default is None).
kwargs : dict, optional
Additional parameters for the MATILDA model.
Returns
-------
dict
Results of the sampling, including:
- 'best_param': dict
Best parameter set identified during sampling.
- 'best_index': int
Index of the best run in the SPOTPY results.
- 'best_model_run': array
Model output corresponding to the best parameter set.
- 'best_objf': float
Value of the objective function for the best parameter set.
- 'best_simulation': pandas.Series (if applicable)
Simulated time series corresponding to the best parameter set.
- 'sampling_plot': matplotlib.Figure (if applicable)
Iteration vs. objective function plot.
- 'best_run_plot': matplotlib.Figure (if applicable)
Best simulation vs. observed data plot.
- 'par_uncertain_plot': matplotlib.Figure (if applicable)
Parameter uncertainty plot.
Notes
-----
- This function uses the SPOTPY library for parameter sampling and optimization.
- Parallel computation is supported for specific algorithms ('mc', 'lhs', 'fast', 'rope', 'sceua', 'demcz').
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
"""
cwd = os.getcwd()
if output is not None:
os.chdir(output)
# Setup model class:
if glacier_only:
setup = spot_setup_glacier(
set_up_start=set_up_start,
set_up_end=set_up_end,
sim_start=sim_start,
sim_end=sim_end,
freq=freq,
area_cat=area_cat,
area_glac=area_glac,
ele_dat=ele_dat,
ele_glac=ele_glac,
lat=lat,
interf=interf,
freqst=freqst,
glacier_profile=glacier_profile,
obs_type=obs_type,
**kwargs,
)
else:
setup = spot_setup(
set_up_start=set_up_start,
set_up_end=set_up_end,
sim_start=sim_start,
sim_end=sim_end,
freq=freq,
area_cat=area_cat,
area_glac=area_glac,
ele_dat=ele_dat,
ele_glac=ele_glac,
ele_cat=ele_cat,
lat=lat,
interf=interf,
freqst=freqst,
glacier_profile=glacier_profile,
elev_rescaling=elev_rescaling,
target_mb=target_mb,
fix_param=fix_param,
fix_val=fix_val,
target_swe=target_swe,
swe_scaling=swe_scaling,
**kwargs,
)
psample_setup = setup(
df, obs, target_swe, obj_func
) # Define custom objective function using obj_func=
alg_selector = {
"mc": spotpy.algorithms.mc,
"sceua": spotpy.algorithms.sceua,
"mcmc": spotpy.algorithms.mcmc,
"mle": spotpy.algorithms.mle,
"abc": spotpy.algorithms.abc,
"sa": spotpy.algorithms.sa,
"dds": spotpy.algorithms.dds,
"demcz": spotpy.algorithms.demcz,
"dream": spotpy.algorithms.dream,
"fscabc": spotpy.algorithms.fscabc,
"lhs": spotpy.algorithms.lhs,
"padds": spotpy.algorithms.padds,
"rope": spotpy.algorithms.rope,
"fast": spotpy.algorithms.fast,
"nsgaii": spotpy.algorithms.nsgaii,
}
if target_mb is not None: # Format errors in database csv when saving simulations
save_sim = False
if parallel:
sampler = alg_selector[algorithm](
psample_setup,
dbname=dbname,
dbformat=dbformat,
parallel="mpi",
optimization_direction=obj_dir,
save_sim=save_sim,
)
if algorithm in ("mc", "lhs", "fast", "rope"):
sampler.sample(rep)
elif algorithm == "sceua":
sampler.sample(rep, ngs=cores)
elif algorithm == "demcz":
sampler.sample(rep, nChains=cores, **demcz_args)
else:
raise ValueError(
"The selected algorithm is ineligible for parallel computing."
'Either select a different algorithm (mc, lhs, fast, rope, sceua or demcz) or set "parallel = False".'
)
else:
sampler = alg_selector[algorithm](
psample_setup,
dbname=dbname,
dbformat=dbformat,
save_sim=save_sim,
optimization_direction=obj_dir,
)
if opt_iter:
if yesno(
f"\n******** WARNING! Your optimum # of iterations is {psample_setup.par_iter}. "
"This may take a long time.\n******** Do you wish to proceed"
):
sampler.sample(
psample_setup.par_iter
) # ideal number of reps = psample_setup.par_iter
else:
print(f"Proceeding with {rep} iterations.")
sampler.sample(rep)
else:
sampler.sample(rep)
# Change dbformat to None for short tests but to 'csv' or 'sql' to avoid data loss in case off long calculations.
if target_mb is None:
psample_setup.evaluation().to_csv(dbname + "_observations.csv")
else:
psample_setup.evaluation()[0].to_csv(dbname + "_observations.csv")
if fix_param is not None:
print("Fixed parameters:\n")
defaults = {
"lr_temp": -0.006,
"lr_prec": 0,
"TT_snow": 0,
"TT_diff": 2,
"CFMAX_snow": 2.5,
"CFMAX_rel": 2,
"BETA": 1.0,
"CET": 0.15,
"FC": 250,
"K0": 0.055,
"K1": 0.055,
"K2": 0.04,
"LP": 0.7,
"MAXBAS": 3.0,
"PERC": 1.5,
"UZL": 120,
"PCORR": 1.0,
"SFCF": 0.7,
"CWH": 0.1,
"AG": 0.7,
"CFR": 0.15,
}
print_par = {}
for p in fix_param:
if p in fix_val:
print_par[p] = fix_val[p]
else:
print_par[p] = defaults[p]
for key, value in print_par.items():
print(f"{key}: {value}")
print("\nNOTE: Fixed parameters are not listed in the final parameter set.\n")
if not parallel:
if target_mb is None:
results = analyze_results(
sampler,
psample_setup.evaluation(),
algorithm=algorithm,
obj_dir=obj_dir,
fig_path=fig_path,
dbname=dbname,
glacier_only=glacier_only,
)
else:
results = analyze_results(
sampler,
psample_setup.evaluation()[0],
algorithm=algorithm,
obj_dir=obj_dir,
fig_path=fig_path,
dbname=dbname,
glacier_only=glacier_only,
target_mb=target_mb,
)
os.chdir(cwd)
return results
os.chdir(cwd)
[docs]
def load_parameters(path, algorithm, obj_dir="maximize", glacier_only=False):
"""
Load the best parameter set from SPOTPY sampling results.
This function retrieves the best parameter set from a SPOTPY sampling results file, based on the specified algorithm
and objective function direction.
Parameters
----------
path : str
File path to the SPOTPY sampling results (e.g., 'results.csv').
algorithm : str
SPOTPY algorithm used for sampling (e.g., 'lhs', 'mcmc', 'dream').
obj_dir : str, optional
Direction of the objective function optimization ('maximize' or 'minimize'). Default is 'maximize'.
glacier_only : bool, optional
If True, processes only glacier-specific sampling results (default is False).
Returns
-------
dict
A dictionary containing the best parameter set identified from the sampling results.
Notes
-----
- This function uses the `analyze_results` function to extract the best parameters.
- It assumes that the corresponding observation file is named `<path_without_extension>_observations.csv`.
- Depending on the number of samples loading of the sampling file might be slow.
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
"""
sampling_csv = path
sampling_obs = path.split(".")[0] + "_observations.csv"
results = analyze_results(
sampling_csv, sampling_obs, algorithm, obj_dir, glacier_only=glacier_only
)
return results["best_param"]
[docs]
def get_par_bounds(path, threshold=10, percentage=True, drop=None):
"""
Generate parameter bounds from SPOTPY sampling results.
This function extracts the minimum and maximum values of model parameters from the best-performing runs in
SPOTPY sampling results. The best-performing runs are determined based on a likelihood threshold or percentage.
Parameters
----------
path : str
File path to the SPOTPY sampling results (e.g., 'results.csv').
threshold : float, optional
The threshold for selecting the best-performing runs:
- If `percentage` is True, `threshold` represents the percentage of the top-performing runs to consider.
- If `percentage` is False, `threshold` represents the numerical threshold for the objective function.
Default is 10.
percentage : bool, optional
Determines whether to interpret `threshold` as a percentage (True) or a numerical threshold (False).
Default is True.
drop : list of str, optional
A list of parameter names to exclude from the resulting bounds. Default is None.
Returns
-------
dict
A dictionary containing the lower (`_lo`) and upper (`_up`) bounds for each parameter, derived from the
best-performing runs.
Notes
-----
- The function loads SPOTPY sampling results and identifies the best-performing runs using the specified threshold
and criteria.
- Parameters specified in the `drop` list are excluded from the output bounds.
- The resulting dictionary contains keys in the format `<parameter_name>_lo` and `<parameter_name>_up`.
References
----------
- SPOTPY library:
- Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
- GitHub repository: https://github.com/thouska/spotpy
"""
if drop is None:
drop = [] # Initialize to an empty list if not provided
result_path = path
results = spotpy.analyser.load_csv_results(result_path)
# Get parameters of best model runs
if percentage:
best = spotpy.analyser.get_posterior(
results, maximize=False, percentage=threshold
) # get best xx% runs
else:
best = results[
np.where(results["like1"] <= threshold)
] # get all runs below a treshhold
params = spotpy.analyser.get_parameters(best)
# Write min and max parameter values of best model runs to dictionary
par_bounds = {}
for i in spotpy.analyser.get_parameter_fields(best):
p = params[i]
par_bounds[i.split("par")[1] + "_lo"] = min(p)
par_bounds[i.split("par")[1] + "_up"] = max(p)
for i in drop:
del par_bounds[i + "_lo"]
del par_bounds[i + "_up"]
return par_bounds