Source code for matilda.mspot_glacier

# -*- coding: UTF-8 -*-
"""
MATILDA Calibration Module "mspot"
--------------------------
This script provides tools for calibrating the MATILDA glacio-hydrological model using statistical parameter optimization
techniques. It makes extensive use of the SPOTPY package for sampling, optimization, and parameter uncertainty analysis. The module
is tailored for glacierized catchments and includes support for both catchment-wide and glacier-only calibration.

Key Features:
-------------
- Parameter Sampling: Flexible setup for sampling hydrological model parameters using various algorithms from SPOTPY.
- Objective Functions: Includes multiple objective functions such as Kling-Gupta Efficiency (KGE), Mean Absolute Error
  (MAE), and custom user-defined functions.
- Calibration Targets: Supports calibration against observed discharge, glacier mass balance, and snow water equivalent.
- Statistical Analysis: Provides tools to analyze parameter posterior distributions and derive parameter bounds.
- Seasonal and Annual Analysis: Includes functions for evaluating seasonal (winter/summer) and annual metrics.
- Parallel Computing: Supports parallel sampling for computational efficiency.

Functions:
----------
1. `yesno`: Interactive function to confirm actions.
2. `dict2bounds`: Converts a dictionary of parameter values into bounds.
3. `loglike_kge`: Calculates a pseudo-log-likelihood using the Kling-Gupta Efficiency (KGE).
4. `winter`, `summer`, `annual`: Extract seasonal or annual data slices for evaluation.
5. `scaled_pdd`, `scaled_ndd`: Apply temperature lapse rates to calculate positive or negative degree days.
6. `spot_setup` and `spot_setup_glacier`: Set up SPOTPY sampling for catchment-wide and glacier-specific calibration.
7. `analyze_results`: Analyze SPOTPY results to identify the best-performing parameter set and generate diagnostic plots.
8. `psample`: Automates parameter sampling and optimization with various SPOTPY algorithms.
9. `load_parameters`: Loads the best-performing parameter set from SPOTPY results.
10. `get_par_bounds`: Derives parameter bounds from SPOTPY sampling results.

Reference:
-----------
SPOTPY Library:
   - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made
     Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180
   - GitHub repository: https://github.com/thouska/spotpy

Dependencies
------------
- Python 3.7 or higher
- spotpy
- pandas
- numpy
- matplotlib
- hydroeval
- scipy

Usage:
------
This script is designed to be part of the MATILDA framework as calibration tool for the matilda.core model.

License:
--------
This software is released under the MIT License. See LICENSE file for details.

Contact
-------
For questions or contributions, please contact:
- Developer: Phillip Schuster
- Email: phillip.schuster@geo.hu-berlin.de
- Institution: Humboldt-Universität zu Berlin
"""

import sys
import os
from datetime import date
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spotpy
import HydroErr as he
from scipy.stats import gamma
from spotpy.parameter import Uniform
from spotpy.objectivefunctions import mae

from matilda.core import (
    matilda_simulation,
    matilda_parameter,
    matilda_preproc,
    create_lookup_table,
    updated_glacier_melt,
)


[docs] class HiddenPrints: """ Suppress prints during multiple iterations or noisy function calls. This utility class redirects standard output to `/dev/null`, effectively silencing any print statements during its usage. It can be used as a context manager to suppress output temporarily. Example: with HiddenPrints(): # Any print statements here will be suppressed noisy_function() Methods ------- __enter__() Redirects standard output to `/dev/null`. __exit__(exc_type, exc_val, exc_tb) Restores the original standard output. """ def __init__(self): # Initialize the attribute in the constructor self._original_stdout = None
[docs] def __enter__(self): self._original_stdout = sys.stdout sys.stdout = open(os.devnull, "w", encoding="utf-8")
[docs] def __exit__(self, exc_type, exc_val, exc_tb): if sys.stdout: sys.stdout.close() sys.stdout = self._original_stdout
[docs] def yesno(question): """ Prompt the user with a yes/no question and return the response. This function repeatedly prompts the user with the given question until a valid response ('y' or 'n') is entered. Parameters ---------- question : str The yes/no question to present to the user. Returns ------- bool Returns True if the user responds with 'y', and False if the user responds with 'n'. """ prompt = f"{question} ? (y/n): " ans = input(prompt).strip().lower() if ans not in ["y", "n"]: print(f"{ans} is invalid, please try again...") return yesno(question) if ans == "y": return True return False
[docs] def dict2bounds(p_dict, drop=None): """ Convert a parameter dictionary to a bounds dictionary to be passed to spot_setup(). This function takes a dictionary of parameters and converts it into a dictionary containing both lower (`_lo`) and upper (`_up`) bounds for each parameter. Optionally, specific parameters can be excluded from the resulting dictionary. Parameters ---------- p_dict : dict A dictionary containing parameter names as keys and their initial values as values. drop : list, optional A list of parameter names to exclude from the resulting bounds dictionary. Returns ------- dict A dictionary containing both lower (`_lo`) and upper (`_up`) bounds for the remaining parameters in `p_dict`. """ if drop is None: drop = [] for i in drop: p_dict.pop(i, None) # Use pop to avoid KeyError if key doesn't exist p = { **dict(zip([i + "_lo" for i in p_dict.keys()], p_dict.values())), **dict(zip([i + "_up" for i in p_dict.keys()], p_dict.values())), } return p
[docs] def loglike_kge(qsim, qobs): """ Calculate a pseudo log-likelihood function using the Kling-Gupta Efficiency (KGE) and a gamma distribution. This function computes a log-likelihood value based on the KGE between simulated and observed flow values, and models the exceedance deviation (ED) using a gamma distribution. Parameters ---------- qsim : array_like Array of simulated flow values. qobs : array_like Array of observed flow values. Returns ------- float The calculated value of the log-likelihood function. Notes ----- The log-likelihood function is based on the Kling-Gupta Efficiency (KGE) following Liu et al. (2022). The exceedance deviation (ED) is derived from the KGE as `ED = 1 - KGE`. The gamma probability density function (PDF) is then used to model the likelihood, with shape parameter `a=0.5` and scale parameter `scale=1`. References ---------- - Liu, Y., Fernández-Ortega, J., Mudarra, M., and Hartmann, A. (2022). "Pitfalls and a feasible solution for using KGE as an informal likelihood function in MCMC methods: DREAM(ZS) as an example." HESS, 26(20). - Kling, H., Gupta, H., & Yilmaz, K. K. (2012). "Model selection criteria for rainfall-runoff models: Representation of variability in Bayesian Model Averaging." Water Resources Research, 48(6), W06306. - `hydroeval`: https://github.com/ThibHlln/hydroeval - `scipy.stats.gamma`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html """ # Calculate KGE n = len(qobs) KGE = he.kge_2012(qsim, qobs, remove_zero=True) ED = 1 - KGE # Calculate log-likelihood function gammapdf = gamma.pdf(ED, a=0.5, scale=1) log_L = 0.5 * n * np.log(gammapdf) return log_L
[docs] def winter(year, data): """ Extract the winter period for a given year from a record of the World Glacier Monitoring service (WGMS). This function filters the input dataset for the specified year and returns a slice object representing the winter period based on the `BEGIN_PERIOD` and `END_WINTER` columns. Parameters ---------- year : int The year for which the winter period is to be extracted. data : pandas.DataFrame The input dataset containing at least the following columns: - `BEGIN_PERIOD`: The start of the winter period. - `END_WINTER`: The end of the winter period. - Index representing years. Returns ------- slice A slice object representing the winter period for the specified year. Notes ----- The function assumes that the dataset is indexed by years and contains the `BEGIN_PERIOD` and `END_WINTER` columns with appropriate datetime or timestamp values. References ---------- - WGMS (2024): Fluctuations of Glaciers Database. World Glacier Monitoring Service, Zurich, Switzerland. https://doi.org/10.5904/wgms-fog-2024-01 """ data = data[data.index == year] winter_slice = slice(data.BEGIN_PERIOD.squeeze(), data.END_WINTER.squeeze()) return winter_slice
[docs] def summer(year, data): """ Extract the summer period for a given year from a record of the World Glacier Monitoring service (WGMS). This function filters the input dataset for the specified year and returns a slice object representing the summer period based on the `END_WINTER` and `END_PERIOD` columns. Parameters ---------- year : int The year for which the summer period is to be extracted. data : pandas.DataFrame The input dataset containing at least the following columns: - `END_WINTER`: The end of the winter period. - `END_PERIOD`: The end of the summer period. - Index representing years. Returns ------- slice A slice object representing the summer period for the specified year. Notes ----- The function assumes that the dataset is indexed by years and contains the `END_WINTER` and `END_PERIOD` columns with appropriate datetime or timestamp values. References ---------- - WGMS (2024): Fluctuations of Glaciers Database. World Glacier Monitoring Service, Zurich, Switzerland. https://doi.org/10.5904/wgms-fog-2024-01 """ data = data[data.index == year] summer_slice = slice(data.END_WINTER.squeeze(), data.END_PERIOD.squeeze()) return summer_slice
[docs] def annual(year, data): """ Extract the annual period for a given year from a record of the World Glacier Monitoring service (WGMS). This function filters the input dataset for the specified year and returns a slice object representing the annual period based on the `BEGIN_PERIOD` and `END_PERIOD` columns. Parameters ---------- year : int The year for which the annual period is to be extracted. data : pandas.DataFrame The input dataset containing at least the following columns: - `BEGIN_PERIOD`: The start of the annual period. - `END_PERIOD`: The end of the annual period. - Index representing years. Returns ------- slice A slice object representing the annual period for the specified year. Notes ----- The function assumes that the dataset is indexed by years and contains the `BEGIN_PERIOD` and `END_PERIOD` columns with appropriate datetime or timestamp values. References ---------- - WGMS (2024): Fluctuations of Glaciers Database. World Glacier Monitoring Service, Zurich, Switzerland. https://doi.org/10.5904/wgms-fog-2024-01 """ data = data[data.index == year] annual_slice = slice(data.BEGIN_PERIOD.squeeze(), data.END_PERIOD.squeeze()) return annual_slice
[docs] def scaled_pdd(data, elev, lr): """ Calculate scaled positive degree days (PDD) based on temperature lapse rate and elevation adjustments. This function adjusts the input temperature data for elevation differences using a given lapse rate, then calculates the positive degree days by setting all negative or zero temperatures to zero. Parameters ---------- data : array_like Array or series of temperature values (in °C). elev : float Elevation difference (in meters) used for scaling the temperature values. lr : float Temperature lapse rate (in °C/m) used for scaling. Returns ------- pdd : array_like Scaled positive degree days (PDD), where negative values are replaced with zero. Notes ----- Positive degree days are commonly used in snow and ice melt models to estimate melt based on temperature inputs. This function adjusts the temperature for elevation differences before computing PDD. Examples -------- >>> data = np.array([2, -1, 3, -2, 5]) >>> elev = 500 # meters >>> lr = -0.006 # °C/m >>> scaled_pdd(data, elev, lr) array([5, 0, 6, 0, 8]) """ s = data + elev * lr pdd = np.where(s > 0, s, 0) return pdd
[docs] def scaled_ndd(data, elev, lr): """ Calculate scaled negative degree days (NDD) based on temperature lapse rate and elevation adjustments. This function adjusts the input temperature data for elevation differences using a given lapse rate, then calculates the negative degree days by setting all positive or zero temperatures to zero and marking negative temperatures as 1. Parameters ---------- data : array_like Array or series of temperature values (in °C). elev : float Elevation difference (in meters) used for scaling the temperature values. lr : float Temperature lapse rate (in °C/m) used for scaling. Returns ------- ndd : array_like Scaled negative degree days (NDD), where negative temperatures are marked as 1 and others as 0. Notes ----- Negative degree days are used in some hydrological and glaciological models to track periods with freezing conditions. This function adjusts the temperature for elevation differences before computing NDD. Examples -------- >>> data = np.array([2, -1, 3, -2, 5]) >>> elev = 500 # meters >>> lr = -0.006 # °C/m >>> scaled_ndd(data, elev, lr) array([0, 1, 0, 1, 0]) """ s = data + elev * lr ndd = np.where(s < 0, 1, 0) return ndd
[docs] def spot_setup( set_up_start=None, set_up_end=None, sim_start=None, sim_end=None, freq="D", lat=None, area_cat=None, area_glac=None, ele_dat=None, ele_glac=None, ele_cat=None, glacier_profile=None, elev_rescaling=True, target_mb=None, target_swe=None, swe_scaling=None, fix_param=None, fix_val=None, obj_func=None, lr_temp_lo=-0.0065, lr_temp_up=-0.0055, lr_prec_lo=0, lr_prec_up=0.002, BETA_lo=1, BETA_up=6, CET_lo=0, CET_up=0.3, FC_lo=50, FC_up=500, K0_lo=0.01, K0_up=0.4, K1_lo=0.01, K1_up=0.4, K2_lo=0.001, K2_up=0.15, LP_lo=0.3, LP_up=1, MAXBAS_lo=2, MAXBAS_up=7, PERC_lo=0, PERC_up=3, UZL_lo=0, UZL_up=500, PCORR_lo=0.5, PCORR_up=2, TT_snow_lo=-1.5, TT_snow_up=1.5, TT_diff_lo=0.5, TT_diff_up=2.5, CFMAX_snow_lo=0.5, CFMAX_snow_up=10, CFMAX_rel_lo=1.2, CFMAX_rel_up=2, SFCF_lo=0.4, SFCF_up=1, CWH_lo=0, CWH_up=0.2, AG_lo=0, AG_up=1, CFR_lo=0.05, CFR_up=0.25, interf=4, freqst=2, ): """ Spotpy-based setup for parameter optimization and calibration of the MATILDA model. This class provides the framework to optimize the parameters of the MATILDA model using SPOTPY, a statistical parameter optimization tool. It enables flexible parameter setup, handles fixed parameters, and evaluates simulation results against observed data (e.g., streamflow, snow water equivalent, surface mass balance). Parameters ---------- General inputs: - set_up_start, set_up_end : str Start and end dates of the setup period. - sim_start, sim_end : str Start and end dates of the simulation period. - freq : str, optional Frequency of the data, default is daily ("D"). - lat : float, optional Latitude of the catchment for extraterrestrial radiation calculation. - area_cat, area_glac : float Catchment and glacierized areas in km². - ele_dat, ele_glac, ele_cat : float Mean elevations (m a.s.l.) of the data, glacier, and catchment. - glacier_profile : DataFrame, optional Glacier profile for scaling and elevation rescaling. - elev_rescaling : bool, optional Flag for enabling elevation rescaling, default is True. - target_mb, target_swe : float, optional Target surface mass balance (MB) or snow water equivalent (SWE) for calibration. - swe_scaling : float, optional Scaling factor for snow water equivalent. Used to account for the mismatch in resolution of the SWE calibration data and the glacier mask used for MATILDA. Parameter bounds for calibration: - Fix parameter bounds (e.g., `lr_temp_lo`, `lr_temp_up` for lapse rates). - `fix_param` : list, optional List of parameters to fix during calibration. - `fix_val` : dict, optional Fixed parameter values (if applicable). SPOTPY settings: - `interf` : int, optional Inference factor for parameter iterations, default is 4. - `freqst` : int, optional Frequency step for sensitivity analysis, default is 2. Methods ------- simulation(x, param_names, fix_param, fix_val, swe_scaling) Executes the MATILDA simulation with sampled parameters and fixed parameters (if specified). evaluation() Prepares the observed data (e.g., streamflow, SWE) for comparison with simulation results. objectivefunction(simulation, evaluation, params=None) Defines the objective function for model evaluation using the Kling-Gupta Efficiency (KGE) or user-defined metrics. Returns ------- spot_setup_class : class Configured SPOTPY setup class for MATILDA parameter optimization and calibration. References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy - Kling-Gupta Efficiency (KGE): - Gupta, H. V., & Kling, H. (2011). "On typical range, sensitivity, and normalization of Mean Squared Error and Nash-Sutcliffe Efficiency type metrics." Water Resources Research, 47(10). https://doi.org/10.1029/2011WR010962 - Kling, H., Fuchs, M., & Paulin, M. (2012). "Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios." Journal of Hydrology, 424-425, 264-277. https://doi.org/10.1016/j.jhydrol.2012.01.011 """ class spot_setup_class: """ spot_setup_class: A SPOTPY setup class for MATILDA model parameter calibration. This class is designed to facilitate the optimization and calibration of the MATILDA model parameters using SPOTPY, a statistical parameter optimization library. It defines the parameter space, simulation methods, evaluation functions, and the objective function to assess the model's performance. Attributes ---------- param : list List of SPOTPY parameter distributions for calibration. param_names : list List of names corresponding to the parameters being calibrated. par_iter : int Number of parameter iterations needed for sensitivity analysis and parameter inference. Methods ------- __init__(df, obs, swe, obj_func=None) Initializes the setup class with input data, observations, and an optional objective function. simulation(x, param_names=param_names, fix_param=fix_param, fix_val=fix_val, swe_scaling=swe_scaling) Runs the MATILDA simulation with sampled parameters and any fixed parameters specified. evaluation() Prepares the observed data (e.g., streamflow, snow water equivalent, surface mass balance) for comparison with simulation results. objectivefunction(simulation, evaluation, params=None) Calculates the objective function to evaluate model performance. Supports metrics such as the Kling-Gupta Efficiency (KGE) and custom metrics defined by the user. Usage ----- The `spot_setup_class` is instantiated with observed data (`obs`) and optionally with snow water equivalent (SWE) data and a custom objective function. Once instantiated, it can be passed to SPOTPY's optimization and sensitivity analysis tools to calibrate the MATILDA model. Example ------- from spotpy.algorithms import sceua spot_setup_instance = spot_setup(df, obs, swe, obj_func) sampler = sceua(spot_setup_instance) sampler.sample(1000) References ---------- - SPOTPY library: - Houska, T., et al. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE. - GitHub: https://github.com/thouska/spotpy - Kling-Gupta Efficiency (KGE): - Gupta, H. V., & Kling, H. (2011). "On typical range, sensitivity, and normalization of Mean Squared Error and Nash-Sutcliffe Efficiency type metrics." Water Resources Research. - Kling, H., et al. (2012). "Runoff conditions in the upper Danube basin under an ensemble of climate change scenarios." Journal of Hydrology. """ # defining all parameters and the distribution lr_temp = Uniform(low=lr_temp_lo, high=lr_temp_up) lr_prec = Uniform(low=lr_prec_lo, high=lr_prec_up) BETA = Uniform(low=BETA_lo, high=BETA_up) CET = Uniform(low=CET_lo, high=CET_up) FC = Uniform(low=FC_lo, high=FC_up) K0 = Uniform(low=K0_lo, high=K0_up) K1 = Uniform(low=K1_lo, high=K1_up) K2 = Uniform(low=K2_lo, high=K2_up) LP = Uniform(low=LP_lo, high=LP_up) MAXBAS = Uniform(low=MAXBAS_lo, high=MAXBAS_up) PERC = Uniform(low=PERC_lo, high=PERC_up) UZL = Uniform(low=UZL_lo, high=UZL_up) PCORR = Uniform(low=PCORR_lo, high=PCORR_up) TT_snow = Uniform(low=TT_snow_lo, high=TT_snow_up) TT_diff = Uniform(low=TT_diff_lo, high=TT_diff_up) CFMAX_snow = Uniform(low=CFMAX_snow_lo, high=CFMAX_snow_up) CFMAX_rel = Uniform(low=CFMAX_rel_lo, high=CFMAX_rel_up) SFCF = Uniform(low=SFCF_lo, high=SFCF_up) CWH = Uniform(low=CWH_lo, high=CWH_up) AG = Uniform(low=AG_lo, high=AG_up) CFR = Uniform(low=CFR_lo, high=CFR_up) # Create the list containing the variables param = [ lr_temp, lr_prec, BETA, CET, FC, K0, K1, K2, LP, MAXBAS, PERC, UZL, PCORR, TT_snow, TT_diff, CFMAX_snow, CFMAX_rel, SFCF, CWH, AG, CFR, ] # Exclude parameters defined in fix_param param_names = [ "lr_temp", "lr_prec", "BETA", "CET", "FC", "K0", "K1", "K2", "LP", "MAXBAS", "PERC", "UZL", "PCORR", "TT_snow", "TT_diff", "CFMAX_snow", "CFMAX_rel", "SFCF", "CWH", "AG", "CFR", ] # Exclude parameters that should be fixed if fix_param: param = [p for p, name in zip(param, param_names) if name not in fix_param] param_names = [param for param in param_names if param not in fix_param] for par in fix_param: if par in globals(): del globals()[par] if par in locals(): del locals()[par] # Number of needed parameter iterations for parametrization and sensitivity analysis M = interf # inference factor (default = 4) d = freqst # frequency step (default = 2) k = len(param) # number of parameters par_iter = (1 + 4 * M**2 * (1 + (k - 2) * d)) * k def __init__( self, df, obs, swe, swe_scaling=swe_scaling, fix_param=fix_param, fix_val=fix_val, obj_func=obj_func, ): self.obj_func = obj_func self.Input = df self.obs = obs self.swe = swe self.swe_scaling = swe_scaling self.fix_param = fix_param self.fix_val = fix_val def simulation(self, x, param_names=None): if param_names is None: param_names = self.param_names with HiddenPrints(): # Setup all parameters for sampling args = {par_name: x[par_name] for par_name in param_names} # Fix parameters on desired values if defined if self.fix_param is not None and self.fix_val is not None: for p in self.fix_param: if p in self.fix_val: args[p] = self.fix_val[p] sim = matilda_simulation( self.Input, obs=self.obs, output=None, set_up_start=set_up_start, set_up_end=set_up_end, sim_start=sim_start, sim_end=sim_end, freq=freq, lat=lat, area_cat=area_cat, area_glac=area_glac, ele_dat=ele_dat, ele_glac=ele_glac, ele_cat=ele_cat, plots=False, warn=False, glacier_profile=glacier_profile, elev_rescaling=elev_rescaling, **args, ) swe_sim = ( sim[0] .snowpack_off_glaciers["2000-01-01":"2017-09-30"] .to_frame(name="SWE_sim") ) if self.swe_scaling is not None: swe_sim = swe_sim * self.swe_scaling if target_mb is None: if target_swe is None: return sim[0].total_runoff return [sim[0].total_runoff, swe_sim.SWE_sim] if target_swe is None: return [sim[0].total_runoff, sim[5].smb_water_year.mean()] return [ sim[0].total_runoff, sim[5].smb_water_year.mean(), swe_sim.SWE_sim, ] def evaluation(self): obs_preproc = self.obs.copy() obs_preproc.set_index("Date", inplace=True) obs_preproc.index = pd.to_datetime(obs_preproc.index) obs_preproc = obs_preproc[sim_start:sim_end] # Changing the input unit from m³/s to mm. obs_preproc["Qobs"] = ( obs_preproc["Qobs"] * 86400 / (area_cat * 1000000) * 1000 ) # To daily resolution obs_preproc = obs_preproc.resample("D").agg(pd.Series.sum, skipna=False) # Expanding the observation period full years filling up with NAs idx_first = obs_preproc.index.year[1] idx_last = obs_preproc.index.year[-1] idx = pd.date_range( start=date(idx_first, 1, 1), end=date(idx_last, 12, 31), freq="D", name=obs_preproc.index.name, ) obs_preproc = obs_preproc.reindex(idx) obs_preproc = obs_preproc.fillna(np.NaN) if target_swe is not None: swe_obs = self.swe if "Date" in self.swe.columns: swe_obs["Date"] = pd.to_datetime(swe_obs["Date"]) swe_obs.set_index("Date", inplace=True) swe_obs = swe_obs * 1000 swe_obs = swe_obs["2000-01-01":"2017-09-30"] if target_mb is None: if target_swe is None: return obs_preproc.Qobs return [obs_preproc.Qobs, swe_obs.SWE_Mean] if target_swe is None: return [obs_preproc.Qobs, target_mb] return [obs_preproc.Qobs, target_mb, swe_obs.SWE_Mean] def objectivefunction(self, simulation, evaluation): # SPOTPY expects to get one or multiple values back, # that define the performance of the model run if target_mb is not None: if target_swe is not None: sim_runoff, sim_smb, sim_swe = simulation eval_runoff, eval_smb, eval_swe = evaluation obj3 = he.kge_2012(sim_swe, eval_swe, remove_zero=False) else: sim_runoff, sim_smb = simulation eval_runoff, eval_smb = evaluation obj2 = abs(eval_smb - sim_smb) simulation = sim_runoff evaluation = eval_runoff elif target_swe is not None: sim_runoff, sim_swe = simulation eval_runoff, eval_swe = evaluation obj3 = he.kge_2012(sim_swe, eval_swe, remove_zero=False) # Crop both timeseries to same periods without NAs sim_new = pd.DataFrame() sim_new["mod"] = pd.DataFrame(simulation) sim_new["obs"] = evaluation clean = sim_new.dropna() simulation_clean = clean["mod"] evaluation_clean = clean["obs"] if not self.obj_func: # This is used if not overwritten by user # obj1 = kge(evaluation_clean, simulation_clean) # SPOTPY internal kge obj1 = he.kge_2012( simulation_clean, evaluation_clean, remove_zero=True ) # same as MATILDA else: # Way to ensure flexible spot setup class obj1 = self.obj_func(evaluation_clean, simulation_clean) if target_mb is None: if target_swe is None: return obj1 return [obj1, obj3] if target_swe is None: return [obj1, obj2] return [obj1, obj2, obj3] return spot_setup_class
[docs] def spot_setup_glacier( set_up_start=None, set_up_end=None, sim_start=None, sim_end=None, freq="D", lat=None, area_cat=None, area_glac=None, ele_dat=None, ele_glac=None, glacier_profile=None, obs_type="annual", obj_func=None, lr_temp_lo=-0.0065, lr_temp_up=-0.0055, lr_prec_lo=0, lr_prec_up=0.002, PCORR_lo=0.5, PCORR_up=2, TT_snow_lo=-1.5, TT_snow_up=1.5, TT_diff_lo=0.5, TT_diff_up=2.5, CFMAX_snow_lo=0.5, CFMAX_snow_up=10, CFMAX_rel_lo=1.2, CFMAX_rel_up=2, SFCF_lo=0.4, SFCF_up=1, CFR_lo=0.05, CFR_up=0.25, interf=4, freqst=2, ): """ Spotpy-based setup for parameter optimization and calibration of the MATILDA model for a (sub-)catchment entirely covered by glaciers. This class provides the framework to optimize the parameters of the MATILDA model using SPOTPY, a statistical parameter optimization tool. It enables flexible parameter setup, handles fixed parameters, and evaluates simulation results against observed data (e.g., streamflow, snow water equivalent, surface mass balance). It supports annual, winter, and summer mass balance optimization based on observed data. Parameters ---------- General inputs: - set_up_start, set_up_end : str Start and end dates of the setup period. - sim_start, sim_end : str Start and end dates of the simulation period. - freq : str, optional Frequency of the data, default is daily ("D"). - lat : float, optional Latitude of the catchment for extraterrestrial radiation calculation. - area_cat, area_glac : float Catchment and glacierized areas in km². - ele_dat, ele_glac, ele_cat : float Mean elevations (m a.s.l.) of the data, glacier, and catchment. - glacier_profile : DataFrame Glacier profile used for elevation rescaling and look-up table generation. - obs_type : str, optional Type of observed mass balance data: "annual", "winter", or "summer". Default is "annual". - obj_func : callable, optional Custom objective function to compare simulation and observation. If None, the Mean Absolute Error (MAE) is used. Parameter bounds for calibration: - lr_temp_lo, lr_temp_up : float Bounds for temperature lapse rate (°C/m). - lr_prec_lo, lr_prec_up : float Bounds for precipitation correction factor. - PCORR_lo, PCORR_up : float Bounds for precipitation correction factor. - TT_snow_lo, TT_snow_up : float Bounds for snow temperature threshold (°C). - TT_diff_lo, TT_diff_up : float Bounds for the difference between rain and snow temperature thresholds (°C). - CFMAX_snow_lo, CFMAX_snow_up : float Bounds for the degree-day factor for snow melt (mm/°C/day). - CFMAX_rel_lo, CFMAX_rel_up : float Bounds for the degree-day factor for relative snow and ice melt. - SFCF_lo, SFCF_up : float Bounds for snow correction factor. - CFR_lo, CFR_up : float Bounds for the refreezing factor. SPOTPY settings: - interf : int, optional Inference factor for parameter iterations, default is 4. - freqst : int, optional Frequency step for sensitivity analysis, default is 2. Methods ------- simulation(x) Executes the glacier mass balance simulation using the degree-day model (DDM). evaluation() Prepares observed mass balance data (annual, winter, or summer) for comparison. objectivefunction(simulation, evaluation, params=None) Defines the objective function to calculate the error between simulated and observed mass balance values. Returns ------- spot_setup_class : class Configured SPOTPY setup class for glacier mass balance parameter optimization. References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy """ class spot_setup_class: """ spot_setup_class: A SPOTPY setup class for MATILDA model glacier mass balance parameter calibration. This class facilitates the calibration and optimization of the MATILDA model parameters for glacier mass balance modeling. It uses SPOTPY, a statistical parameter optimization library, to define the parameter space, simulate mass balance, and evaluate the model's performance against observed data. Attributes ---------- param : list List of SPOTPY parameter distributions for calibration, including lapse rates, temperature thresholds, and degree-day factors. par_iter : int Number of parameter iterations needed for sensitivity analysis and parameter inference. Methods ------- __init__(df, obs, obj_func=None) Initializes the class with input data (`df`), observed data (`obs`), and an optional custom objective function. simulation(x) Simulates glacier mass balance using the MATILDA model with the specified parameters. evaluation() Processes the observed mass balance data into a format compatible with the simulated outputs. objectivefunction(simulation, evaluation, params=None) Defines the objective function to evaluate the model's performance. By default, it uses the Mean Absolute Error (MAE) or a custom user-defined function. Usage ----- The `spot_setup_class` is instantiated with observed data and passed to SPOTPY for parameter optimization and sensitivity analysis. Example ------- from spotpy.algorithms import sceua spot_setup_instance = spot_setup_glacier(df, obs, obj_func) sampler = sceua(spot_setup_instance) sampler.sample(1000) Parameters ---------- General Inputs: - set_up_start, set_up_end : str Start and end dates of the setup period. - sim_start, sim_end : str Start and end dates of the simulation period. - freq : str, optional Frequency of the data, default is daily ("D"). - lat : float, optional Latitude of the catchment for extraterrestrial radiation calculation. - area_cat, area_glac : float Catchment and glacierized areas in km². - ele_dat, ele_glac, ele_cat : float Mean elevations (m a.s.l.) of the data, glacier, and catchment. - glacier_profile : DataFrame Glacier profile used for elevation rescaling and look-up table generation. - obs_type : str, optional Type of observed mass balance data ("annual", "winter", or "summer"). Default is "annual". - obj_func : callable, optional Custom objective function to compare simulation and observation. Defaults to Mean Absolute Error (MAE). Parameter Bounds for Calibration: - lr_temp_lo, lr_temp_up : float Bounds for the temperature lapse rate (°C/m). - lr_prec_lo, lr_prec_up : float Bounds for the precipitation correction factor. - PCORR_lo, PCORR_up : float Bounds for the precipitation correction factor. - TT_snow_lo, TT_snow_up : float Bounds for the snow temperature threshold (°C). - TT_diff_lo, TT_diff_up : float Bounds for the difference between rain and snow temperature thresholds (°C). - CFMAX_snow_lo, CFMAX_snow_up : float Bounds for the degree-day factor for snow melt (mm/°C/day). - CFMAX_rel_lo, CFMAX_rel_up : float Bounds for the degree-day factor for relative snow and ice melt. - SFCF_lo, SFCF_up : float Bounds for the snow correction factor. - CFR_lo, CFR_up : float Bounds for the refreezing factor. SPOTPY Settings: - interf : int, optional Inference factor for parameter iterations. Default is 4. - freqst : int, optional Frequency step for sensitivity analysis. Default is 2. Returns ------- spot_setup_class : class Configured SPOTPY setup class for glacier mass balance parameter optimization. References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy """ # defining all parameters and the distribution param = ( lr_temp, lr_prec, PCORR, TT_snow, TT_diff, CFMAX_snow, CFMAX_rel, SFCF, CFR, ) = [ Uniform(low=lr_temp_lo, high=lr_temp_up), # lr_temp Uniform(low=lr_prec_lo, high=lr_prec_up), # lr_prec Uniform(low=PCORR_lo, high=PCORR_up), # PCORR Uniform(low=TT_snow_lo, high=TT_snow_up), # TT_snow Uniform(low=TT_diff_lo, high=TT_diff_up), # TT_diff Uniform(low=CFMAX_snow_lo, high=CFMAX_snow_up), # CFMAX_snow Uniform(low=CFMAX_rel_lo, high=CFMAX_rel_up), # CFMAX_rel Uniform(low=SFCF_lo, high=SFCF_up), # SFCF Uniform(low=CFR_lo, high=CFR_up), # CFR ] # Number of needed parameter iterations for parametrization and sensitivity analysis M = interf # inference factor (default = 4) d = freqst # frequency step (default = 2) k = len(param) # number of parameters par_iter = (1 + 4 * M**2 * (1 + (k - 2) * d)) * k def __init__(self, df, obs, obj_func=obj_func): self.obj_func = obj_func self.Input = df self.obs = obs def simulation(self, x): with HiddenPrints(): parameter = matilda_parameter( self.Input, set_up_start=set_up_start, set_up_end=set_up_end, sim_start=sim_start, sim_end=sim_end, freq=freq, lat=lat, area_cat=area_cat, area_glac=area_glac, ele_dat=ele_dat, ele_glac=ele_glac, ele_cat=None, lr_temp=x.lr_temp, lr_prec=x.lr_prec, PCORR=x.PCORR, TT_snow=x.TT_snow, TT_diff=x.TT_diff, CFMAX_snow=x.CFMAX_snow, CFMAX_rel=x.CFMAX_rel, SFCF=x.SFCF, CFR=x.CFR, ) df_preproc = matilda_preproc(self.Input, parameter) lookup_table = create_lookup_table(glacier_profile, parameter) output_DDM = updated_glacier_melt( df_preproc, lookup_table, glacier_profile, parameter )[0] sim = output_DDM.DDM_smb return sim def evaluation(self): obs_preproc = self.obs.copy() obs_preproc.set_index("YEAR", inplace=True) obs_preproc.index = pd.to_datetime(obs_preproc.index) return obs_preproc def objectivefunction(self, simulation, evaluation): # Aggregate MBs to fit the calibration data sim = [] obs = [] sim_new = pd.DataFrame() if obs_type == "winter": for i in evaluation.index: mb = simulation[winter(i, evaluation)].sum() sim.append(mb) mb_obs = evaluation[evaluation.index == i].WINTER_BALANCE.squeeze() obs.append(mb_obs) if obs_type == "summer": for i in evaluation.index: mb = simulation[summer(i, evaluation)].sum() sim.append(mb) mb_obs = evaluation[evaluation.index == i].SUMMER_BALANCE.squeeze() obs.append(mb_obs) if obs_type == "annual": for i in evaluation.index: mb_sim = simulation[annual(i, evaluation)].sum() sim.append(mb_sim) mb_obs = evaluation[evaluation.index == i].ANNUAL_BALANCE.squeeze() obs.append(mb_obs) # Crop both timeseries to same periods without NAs sim_new["mod"] = pd.DataFrame(sim) sim_new["obs"] = pd.DataFrame(obs) clean = sim_new.dropna() simulation_clean = clean["mod"] evaluation_clean = clean["obs"] if not self.obj_func: # This is used if not overwritten by user like = mae(evaluation_clean, simulation_clean) print("MAE: " + str(like)) else: # Way to ensure flexible spot setup class like = self.obj_func(evaluation_clean, simulation_clean) return like return spot_setup_class
[docs] def analyze_results( sampling_data, obs, algorithm, obj_dir="maximize", fig_path=None, dbname="mspot_results", glacier_only=False, target_mb=None, ): """ Analyze the results of SPOTPY sampling for model calibration of the MATILDA model. This function processes sampling results from SPOTPY to identify the best parameter set, evaluate performance, and optionally generate visualizations of the results. Parameters ---------- sampling_data : str or SPOTPY result object The file path to a SPOTPY results CSV file or a SPOTPY result object. obs : str or pandas.DataFrame The file path to observed data (in CSV format with 'Date' column) or a DataFrame containing observed data. algorithm : str The SPOTPY algorithm used for sampling (e.g., 'abc', 'dream', 'sceua'). obj_dir : str, optional Direction of the objective function optimization, either 'maximize' or 'minimize'. Default is 'maximize'. fig_path : str, optional Path to save generated plots. If None, plots are not saved. dbname : str, optional Name for saving the database and plots. Default is 'mspot_results'. glacier_only : bool, optional If True, optimizes only for glacier-specific metrics (e.g., mass balance). Default is False. target_mb : float, optional Target mean annual mass balance for melt model calibration. If None, this metric is not considered. Default is None. Returns ------- dict A dictionary containing: - 'best_param': dict Best parameter set identified during sampling. - 'best_index': int Index of the best run in the SPOTPY results. - 'best_model_run': array Model output corresponding to the best parameter set. - 'best_objf': float Value of the objective function for the best parameter set. - 'best_simulation': pandas.Series (if glacier_only is False and target_mb is None) Simulated time series corresponding to the best parameter set. - 'sampling_plot': matplotlib.Figure (if applicable) Iteration vs. objective function plot. - 'best_run_plot': matplotlib.Figure (if applicable) Best simulation vs. observed data plot. - 'par_uncertain_plot': matplotlib.Figure (if applicable) Parameter uncertainty plot. Notes ----- - The objective function can be maximized (e.g., Kling-Gupta Efficiency) or minimized (e.g., Mean Absolute Error) based on the algorithm and obj_dir setting. - For glacier-specific calibration, only glacier mass balance data is considered. References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy """ if isinstance(obs, str): if glacier_only: obs = pd.read_csv(obs) else: obs = pd.read_csv(obs, index_col="Date", parse_dates=["Date"]) if isinstance(sampling_data, str): if sampling_data.endswith(".csv"): sampling_data = sampling_data[: len(sampling_data) - 4] results = spotpy.analyser.load_csv_results(sampling_data) else: results = sampling_data.getdata() maximize = ["abc", "dds", "demcz", "dream", "rope", "sa", "fscabc", "mcmc", "mle"] minimize = ["nsgaii", "padds", "sceua"] both = ["fast", "lhs", "mc"] if algorithm in maximize: best_param = spotpy.analyser.get_best_parameterset(results) elif algorithm in minimize: best_param = spotpy.analyser.get_best_parameterset(results, maximize=False) elif algorithm in both: print( "WARNING: The selected algorithm " + algorithm + " can either maximize or minimize the objective function." " You can specify the direction by passing obj_dir to analyze_results(). The default is 'maximize'." ) if obj_dir == "maximize": best_param = spotpy.analyser.get_best_parameterset(results) elif obj_dir == "minimize": best_param = spotpy.analyser.get_best_parameterset(results, maximize=False) else: raise ValueError( "Invalid argument for obj_dir. Choose 'minimize' or 'maximize'." ) else: raise ValueError( "Invalid argument for algorithm. Available algorithms: ['abc', 'dds', 'demcz', 'dream', 'rope', 'sa'," "'fscabc', 'mcmc', 'mle', 'nsgaii', 'padds', 'sceua', 'fast', 'lhs', 'mc']" ) par_names = spotpy.analyser.get_parameternames(best_param) param_zip = zip(par_names, best_param[0]) best_param = dict(param_zip) if glacier_only: bestindex, bestobjf = spotpy.analyser.get_minlikeindex( results ) # Run with lowest MAE best_model_run = results[bestindex] elif target_mb is not None: if obj_dir == "minimize": bestindex, bestobjf = spotpy.analyser.get_minlikeindex(results) else: bestindex, bestobjf = spotpy.analyser.get_maxlikeindex(results) best_model_run = results[bestindex] else: sim_start = obs.index[0] sim_end = obs.index[-1] bestindex, bestobjf = spotpy.analyser.get_maxlikeindex( results ) # Run with highest KGE best_model_run = results[bestindex] fields = [word for word in best_model_run.dtype.names if word.startswith("sim")] best_simulation = pd.Series( list(list(best_model_run[fields])[0]), index=pd.date_range(sim_start, sim_end), ) # Only necessary because obs has a datetime. Thus, both need a datetime. if not glacier_only and target_mb is None: fig1 = plt.figure(1, figsize=(9, 5)) plt.plot(results["like1"]) plt.ylabel("KGE") plt.xlabel("Iteration") if fig_path is not None: plt.savefig(fig_path + "/" + dbname + "_sampling_plot.png") fig2 = plt.figure(figsize=(16, 9)) ax = plt.subplot(1, 1, 1) ax.plot( best_simulation, color="black", linestyle="solid", label="Best objf.=" + str(bestobjf), ) ax.plot(obs, "r.", markersize=3, label="Observation data") plt.xlabel("Date") plt.ylabel("Discharge [mm d-1]") plt.legend(loc="upper right") if fig_path is not None: plt.savefig(fig_path + "/" + dbname + "_best_run_plot.png") fig3 = plt.figure(figsize=(16, 9)) ax = plt.subplot(1, 1, 1) q5, q95 = [], [] for field in fields: q5.append(np.percentile(results[field][-100:-1], 2.5)) q95.append(np.percentile(results[field][-100:-1], 97.5)) ax.plot(q5, color="dimgrey", linestyle="solid") ax.plot(q95, color="dimgrey", linestyle="solid") ax.fill_between( np.arange(0, len(q5), 1), list(q5), list(q95), facecolor="dimgrey", zorder=0, linewidth=0, label="parameter uncertainty", ) ax.plot( np.array(obs), "r.", label="data" ) # Need to remove Timestamp from Evaluation to make comparable # ax.set_ylim(0, 100) ax.set_xlim(0, len(obs)) ax.legend() if fig_path is not None: plt.savefig(fig_path + "/" + dbname + "_par_uncertain_plot.png") return { "best_param": best_param, "best_index": bestindex, "best_model_run": best_model_run, "best_objf": bestobjf, "best_simulation": best_simulation, "sampling_plot": fig1, "best_run_plot": fig2, "par_uncertain_plot": fig3, } return { "best_param": best_param, "best_index": bestindex, "best_model_run": best_model_run, "best_objf": bestobjf, }
[docs] def psample( df, obs, rep=10, output=None, dbname="matilda_par_smpl", dbformat=None, obj_func=None, opt_iter=False, fig_path=None, # savefig=False, set_up_start=None, set_up_end=None, sim_start=None, sim_end=None, freq="D", lat=None, area_cat=None, area_glac=None, ele_dat=None, ele_glac=None, ele_cat=None, glacier_profile=None, interf=4, freqst=2, parallel=False, cores=2, save_sim=True, elev_rescaling=True, glacier_only=False, obs_type="annual", target_mb=None, target_swe=None, swe_scaling=None, algorithm="lhs", obj_dir="maximize", fix_param=None, fix_val=None, demcz_args: dict = None, # DEMCz settings **kwargs, ): """ Run parameter sampling for the MATILDA model using SPOTPY. This function sets up a SPOTPY sampling routine for parameter optimization or sensitivity analysis. It supports parallel and serial computation, custom objective functions, and a variety of SPOTPY algorithms. Parameters ---------- df : pandas.DataFrame Input data for the MATILDA model. obs : pandas.DataFrame or str Observed data as a DataFrame or a file path to a CSV file containing observations. rep : int, optional Number of sampling repetitions (default is 10). output : str, optional Path to save the output files (default is None). dbname : str, optional Name of the database for saving results (default is 'matilda_par_smpl'). dbformat : str, optional Format for the SPOTPY database (e.g., 'csv', 'sql'). Default is None (no database saved). obj_func : callable, optional Custom objective function for parameter optimization. If None, defaults are used. opt_iter : bool, optional If True, samples the optimum number of iterations based on the setup (default is False). fig_path : str, optional Path to save generated plots (default is None). set_up_start, set_up_end : str, optional Start and end dates for the model setup period (e.g., '2000-01-01'). sim_start, sim_end : str, optional Start and end dates for the simulation period. freq : str, optional Temporal resolution of the input data (default is 'D'). lat : float, optional Latitude of the study area (used for potential evapotranspiration calculations). area_cat : float, optional Total catchment area in km². area_glac : float, optional Glacierized area in km². ele_dat, ele_glac, ele_cat : float, optional Elevations (mean, glacier, and catchment). glacier_profile : pandas.DataFrame, optional Glacier profile data for rescaling and optimization routines. interf : int, optional Inference factor for SPOTPY (default is 4). freqst : int, optional Frequency step for SPOTPY (default is 2). parallel : bool, optional If True, runs sampling in parallel using MPI (default is False). cores : int, optional Number of cores to use for parallel sampling (default is 2). save_sim : bool, optional Whether to save simulation results in the database (default is True). elev_rescaling : bool, optional If True, applies elevation-based rescaling for glacier simulations (default is True). glacier_only : bool, optional If True, runs sampling specifically for glacier-related parameters (default is False). obs_type : str, optional Type of observed data for glacier calibration ('annual', 'winter', or 'summer'). Default is 'annual'. target_mb : float, optional Target mass balance for glacier calibration (default is None). target_swe : float, optional Target snow water equivalent for calibration (default is None). swe_scaling : float, optional Scaling factor for snow water equivalent (default is None). algorithm : str, optional SPOTPY algorithm to use (e.g., 'lhs', 'mcmc', 'dream'). Default is 'lhs'. obj_dir : str, optional Direction of the objective function optimization ('maximize' or 'minimize'). Default is 'maximize'. fix_param : list of str, optional List of parameters to fix during sampling (default is None). fix_val : dict, optional Dictionary of fixed parameter values (default is None). demcz_args : dict, optional Additional arguments for the DEMCz algorithm (default is None). kwargs : dict, optional Additional parameters for the MATILDA model. Returns ------- dict Results of the sampling, including: - 'best_param': dict Best parameter set identified during sampling. - 'best_index': int Index of the best run in the SPOTPY results. - 'best_model_run': array Model output corresponding to the best parameter set. - 'best_objf': float Value of the objective function for the best parameter set. - 'best_simulation': pandas.Series (if applicable) Simulated time series corresponding to the best parameter set. - 'sampling_plot': matplotlib.Figure (if applicable) Iteration vs. objective function plot. - 'best_run_plot': matplotlib.Figure (if applicable) Best simulation vs. observed data plot. - 'par_uncertain_plot': matplotlib.Figure (if applicable) Parameter uncertainty plot. Notes ----- - This function uses the SPOTPY library for parameter sampling and optimization. - Parallel computation is supported for specific algorithms ('mc', 'lhs', 'fast', 'rope', 'sceua', 'demcz'). References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy """ cwd = os.getcwd() if output is not None: os.chdir(output) # Setup model class: if glacier_only: setup = spot_setup_glacier( set_up_start=set_up_start, set_up_end=set_up_end, sim_start=sim_start, sim_end=sim_end, freq=freq, area_cat=area_cat, area_glac=area_glac, ele_dat=ele_dat, ele_glac=ele_glac, lat=lat, interf=interf, freqst=freqst, glacier_profile=glacier_profile, obs_type=obs_type, **kwargs, ) else: setup = spot_setup( set_up_start=set_up_start, set_up_end=set_up_end, sim_start=sim_start, sim_end=sim_end, freq=freq, area_cat=area_cat, area_glac=area_glac, ele_dat=ele_dat, ele_glac=ele_glac, ele_cat=ele_cat, lat=lat, interf=interf, freqst=freqst, glacier_profile=glacier_profile, elev_rescaling=elev_rescaling, target_mb=target_mb, fix_param=fix_param, fix_val=fix_val, target_swe=target_swe, swe_scaling=swe_scaling, **kwargs, ) psample_setup = setup( df, obs, target_swe, obj_func ) # Define custom objective function using obj_func= alg_selector = { "mc": spotpy.algorithms.mc, "sceua": spotpy.algorithms.sceua, "mcmc": spotpy.algorithms.mcmc, "mle": spotpy.algorithms.mle, "abc": spotpy.algorithms.abc, "sa": spotpy.algorithms.sa, "dds": spotpy.algorithms.dds, "demcz": spotpy.algorithms.demcz, "dream": spotpy.algorithms.dream, "fscabc": spotpy.algorithms.fscabc, "lhs": spotpy.algorithms.lhs, "padds": spotpy.algorithms.padds, "rope": spotpy.algorithms.rope, "fast": spotpy.algorithms.fast, "nsgaii": spotpy.algorithms.nsgaii, } if target_mb is not None: # Format errors in database csv when saving simulations save_sim = False if parallel: sampler = alg_selector[algorithm]( psample_setup, dbname=dbname, dbformat=dbformat, parallel="mpi", optimization_direction=obj_dir, save_sim=save_sim, ) if algorithm in ("mc", "lhs", "fast", "rope"): sampler.sample(rep) elif algorithm == "sceua": sampler.sample(rep, ngs=cores) elif algorithm == "demcz": sampler.sample(rep, nChains=cores, **demcz_args) else: raise ValueError( "The selected algorithm is ineligible for parallel computing." 'Either select a different algorithm (mc, lhs, fast, rope, sceua or demcz) or set "parallel = False".' ) else: sampler = alg_selector[algorithm]( psample_setup, dbname=dbname, dbformat=dbformat, save_sim=save_sim, optimization_direction=obj_dir, ) if opt_iter: if yesno( f"\n******** WARNING! Your optimum # of iterations is {psample_setup.par_iter}. " "This may take a long time.\n******** Do you wish to proceed" ): sampler.sample( psample_setup.par_iter ) # ideal number of reps = psample_setup.par_iter else: print(f"Proceeding with {rep} iterations.") sampler.sample(rep) else: sampler.sample(rep) # Change dbformat to None for short tests but to 'csv' or 'sql' to avoid data loss in case off long calculations. if target_mb is None: psample_setup.evaluation().to_csv(dbname + "_observations.csv") else: psample_setup.evaluation()[0].to_csv(dbname + "_observations.csv") if fix_param is not None: print("Fixed parameters:\n") defaults = { "lr_temp": -0.006, "lr_prec": 0, "TT_snow": 0, "TT_diff": 2, "CFMAX_snow": 2.5, "CFMAX_rel": 2, "BETA": 1.0, "CET": 0.15, "FC": 250, "K0": 0.055, "K1": 0.055, "K2": 0.04, "LP": 0.7, "MAXBAS": 3.0, "PERC": 1.5, "UZL": 120, "PCORR": 1.0, "SFCF": 0.7, "CWH": 0.1, "AG": 0.7, "CFR": 0.15, } print_par = {} for p in fix_param: if p in fix_val: print_par[p] = fix_val[p] else: print_par[p] = defaults[p] for key, value in print_par.items(): print(f"{key}: {value}") print("\nNOTE: Fixed parameters are not listed in the final parameter set.\n") if not parallel: if target_mb is None: results = analyze_results( sampler, psample_setup.evaluation(), algorithm=algorithm, obj_dir=obj_dir, fig_path=fig_path, dbname=dbname, glacier_only=glacier_only, ) else: results = analyze_results( sampler, psample_setup.evaluation()[0], algorithm=algorithm, obj_dir=obj_dir, fig_path=fig_path, dbname=dbname, glacier_only=glacier_only, target_mb=target_mb, ) os.chdir(cwd) return results os.chdir(cwd)
[docs] def load_parameters(path, algorithm, obj_dir="maximize", glacier_only=False): """ Load the best parameter set from SPOTPY sampling results. This function retrieves the best parameter set from a SPOTPY sampling results file, based on the specified algorithm and objective function direction. Parameters ---------- path : str File path to the SPOTPY sampling results (e.g., 'results.csv'). algorithm : str SPOTPY algorithm used for sampling (e.g., 'lhs', 'mcmc', 'dream'). obj_dir : str, optional Direction of the objective function optimization ('maximize' or 'minimize'). Default is 'maximize'. glacier_only : bool, optional If True, processes only glacier-specific sampling results (default is False). Returns ------- dict A dictionary containing the best parameter set identified from the sampling results. Notes ----- - This function uses the `analyze_results` function to extract the best parameters. - It assumes that the corresponding observation file is named `<path_without_extension>_observations.csv`. - Depending on the number of samples loading of the sampling file might be slow. References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy """ sampling_csv = path sampling_obs = path.split(".")[0] + "_observations.csv" results = analyze_results( sampling_csv, sampling_obs, algorithm, obj_dir, glacier_only=glacier_only ) return results["best_param"]
[docs] def get_par_bounds(path, threshold=10, percentage=True, drop=None): """ Generate parameter bounds from SPOTPY sampling results. This function extracts the minimum and maximum values of model parameters from the best-performing runs in SPOTPY sampling results. The best-performing runs are determined based on a likelihood threshold or percentage. Parameters ---------- path : str File path to the SPOTPY sampling results (e.g., 'results.csv'). threshold : float, optional The threshold for selecting the best-performing runs: - If `percentage` is True, `threshold` represents the percentage of the top-performing runs to consider. - If `percentage` is False, `threshold` represents the numerical threshold for the objective function. Default is 10. percentage : bool, optional Determines whether to interpret `threshold` as a percentage (True) or a numerical threshold (False). Default is True. drop : list of str, optional A list of parameter names to exclude from the resulting bounds. Default is None. Returns ------- dict A dictionary containing the lower (`_lo`) and upper (`_up`) bounds for each parameter, derived from the best-performing runs. Notes ----- - The function loads SPOTPY sampling results and identifies the best-performing runs using the specified threshold and criteria. - Parameters specified in the `drop` list are excluded from the output bounds. - The resulting dictionary contains keys in the format `<parameter_name>_lo` and `<parameter_name>_up`. References ---------- - SPOTPY library: - Houska, T., Kraft, P., Chamorro-Chavez, A., & Breuer, L. (2015). "SPOTting Model Parameters Using a Ready-Made Python Package." PLOS ONE, 10(12), 1-22. https://doi.org/10.1371/journal.pone.0145180 - GitHub repository: https://github.com/thouska/spotpy """ if drop is None: drop = [] # Initialize to an empty list if not provided result_path = path results = spotpy.analyser.load_csv_results(result_path) # Get parameters of best model runs if percentage: best = spotpy.analyser.get_posterior( results, maximize=False, percentage=threshold ) # get best xx% runs else: best = results[ np.where(results["like1"] <= threshold) ] # get all runs below a treshhold params = spotpy.analyser.get_parameters(best) # Write min and max parameter values of best model runs to dictionary par_bounds = {} for i in spotpy.analyser.get_parameter_fields(best): p = params[i] par_bounds[i.split("par")[1] + "_lo"] = min(p) par_bounds[i.split("par")[1] + "_up"] = max(p) for i in drop: del par_bounds[i + "_lo"] del par_bounds[i + "_up"] return par_bounds