Skip to content

Models

pyextremes.models.models.get_model(model, extremes, distribution, distribution_kwargs=None, **kwargs)

Get distribution fitting model and fit it to given extreme values.

Parameters:

Name Type Description Default
model str

Name of model. Supported models: MLE - Maximum Likelihood Estimate (MLE) model. Based on 'scipy' package (scipy.stats.rv_continuous.fit). Emcee - Markov Chain Monte Carlo (MCMC) model. Based on 'emcee' package by Daniel Foreman-Mackey.

required
extremes Series

Time series of extreme events.

required
distribution str or rv_continuous

Distribution name compatible with scipy.stats or a subclass of scipy.stats.rv_continuous. See https://docs.scipy.org/doc/scipy/reference/stats.html

required
distribution_kwargs dict

Special keyword arguments, passed to the .fit method of the distribution. These keyword arguments represent parameters to be held fixed. Names of parameters to be fixed must have 'f' prefixes. Valid parameters: - shape(s): 'fc', e.g. fc=0 - location: 'floc', e.g. floc=0 - scale: 'fscale', e.g. fscale=1 By default, no parameters are fixed. See documentation of a specific scipy.stats distribution for names of available parameters.

None
kwargs

Keyword arguments passed to a model .fit method. MLE model: MLE model takes no additional arguments. Emcee model: n_walkers : int, optional The number of walkers in the ensemble (default=100). n_samples : int, optional The number of steps to run (default=500). progress : bool or str, optional If True, a progress bar will be shown as the sampler progresses. If a string, will select a specific tqdm progress bar. Most notable is 'notebook', which shows a progress bar suitable for Jupyter notebooks. If False (default), no progress bar will be shown. This progress bar is a part of the emcee package.

{}

Returns:

Name Type Description
model MLE or Emcee

Distribution fitting model fitted to the extremes.

Source code in src/pyextremes/models/models.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def get_model(
    model: Literal["MLE", "Emcee"],
    extremes: pd.Series,
    distribution: Union[str, scipy.stats.rv_continuous],
    distribution_kwargs: Optional[dict] = None,
    **kwargs,
) -> Union[MLE, Emcee]:
    """
    Get distribution fitting model and fit it to given extreme values.

    Parameters
    ----------
    model : str
        Name of model.
        Supported models:
            MLE - Maximum Likelihood Estimate (MLE) model.
                Based on 'scipy' package (scipy.stats.rv_continuous.fit).
            Emcee - Markov Chain Monte Carlo (MCMC) model.
                Based on 'emcee' package by Daniel Foreman-Mackey.
    extremes : pandas.Series
        Time series of extreme events.
    distribution : str or scipy.stats.rv_continuous
        Distribution name compatible with scipy.stats
        or a subclass of scipy.stats.rv_continuous.
        See https://docs.scipy.org/doc/scipy/reference/stats.html
    distribution_kwargs : dict, optional
        Special keyword arguments, passed to the `.fit` method of the distribution.
        These keyword arguments represent parameters to be held fixed.
        Names of parameters to be fixed must have 'f' prefixes. Valid parameters:
            - shape(s): 'fc', e.g. fc=0
            - location: 'floc', e.g. floc=0
            - scale: 'fscale', e.g. fscale=1
        By default, no parameters are fixed.
        See documentation of a specific scipy.stats distribution
        for names of available parameters.
    kwargs
        Keyword arguments passed to a model .fit method.
        MLE model:
            MLE model takes no additional arguments.
        Emcee model:
            n_walkers : int, optional
                The number of walkers in the ensemble (default=100).
            n_samples : int, optional
                The number of steps to run (default=500).
            progress : bool or str, optional
                If True, a progress bar will be shown as the sampler progresses.
                If a string, will select a specific tqdm progress bar.
                Most notable is 'notebook', which shows a progress bar
                suitable for Jupyter notebooks.
                If False (default), no progress bar will be shown.
                This progress bar is a part of the `emcee` package.

    Returns
    -------
    model : MLE or Emcee
        Distribution fitting model fitted to the `extremes`.

    """
    distribution_model_kwargs = {
        "extremes": extremes,
        "distribution": distribution,
        "distribution_kwargs": distribution_kwargs,
        **kwargs,
    }

    if model == "MLE":
        return MLE(**distribution_model_kwargs)
    if model == "Emcee":
        return Emcee(**distribution_model_kwargs)
    raise ValueError(
        f"invalid value in '{model}' for the 'model' argument, "
        f"available model: 'MLE', 'Emcee'"
    )

pyextremes.models.model_base.AbstractModelBaseClass

Bases: ABC

Source code in src/pyextremes/models/model_base.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
class AbstractModelBaseClass(abc.ABC):
    def __init__(
        self,
        extremes: pd.Series,
        distribution: typing.Union[str, scipy.stats.rv_continuous],
        distribution_kwargs: typing.Optional[dict] = None,
        **kwargs,
    ) -> None:
        """
        Initialize the model.

        Parameters
        ----------
        extremes : pandas.Series
            Time series of extreme events.
        distribution : str or scipy.stats.rv_continuous
            Distribution name compatible with scipy.stats
            or a subclass of scipy.stats.rv_continuous.
            See https://docs.scipy.org/doc/scipy/reference/stats.html
        distribution_kwargs : dict, optional
            Special keyword arguments, passed to the `.fit` method of the distribution.
            These keyword arguments represent parameters to be held fixed.
            Names of parameters to be fixed must have 'f' prefixes. Valid parameters:
                - shape(s): 'fc', e.g. fc=0
                - location: 'floc', e.g. floc=0
                - scale: 'fscale', e.g. fscale=1
            By default, no parameters are fixed.
            See documentation of a specific scipy.stats distribution
            for names of available parameters.
        kwargs
            Keyword arguments passed to a model .fit method.
            MLE model:
                MLE model takes no additional arguments.
            Emcee model:
                n_walkers : int, optional
                    The number of walkers in the ensemble (default=100).
                n_samples : int, optional
                    The number of steps to run (default=500).
                progress : bool or str, optional
                    If True, a progress bar will be shown as the sampler progresses.
                    If a string, will select a specific tqdm progress bar.
                    Most notable is 'notebook', which shows a progress bar
                    suitable for Jupyter notebooks.
                    If False (default), no progress bar will be shown.
                    This progress bar is a part of the `emcee` package.

        """
        self.extremes = extremes

        # Declare extreme value distribution
        distribution_kwargs = distribution_kwargs or {}
        self.distribution = Distribution(
            extremes=self.extremes, distribution=distribution, **distribution_kwargs
        )

        # Fit the distribution to extremes
        self._fit_parameters: typing.Optional[dict] = None
        self._trace: typing.Optional[np.ndarray] = None
        self.fit(**kwargs)

        # Initialize 'return_value_cache'
        self.return_value_cache: typing.Dict[tuple, tuple] = {}

    @property
    @abc.abstractmethod
    def name(self) -> str:
        """Return model name."""
        raise NotImplementedError

    @abc.abstractmethod
    def fit(self, **kwargs) -> None:
        """
        Set values for self.fit_parameters and self.trace.

        self.trace is set only for MCMC-like models.
        self.fit_parameters is a dictionary with {parameter_name: value},
        e.g. {'c': 0.1, 'loc': -7, 'scale': 0.3}
        self.trace is a numpy.ndarray with shape of
        (n_walkers, n_samples, n_free_parameters)

        """
        raise NotImplementedError

    @property
    def fit_parameters(self) -> typing.Dict[str, float]:
        if self._fit_parameters is None:
            raise AssertionError
        else:
            return self._fit_parameters

    @property
    def trace(self) -> np.ndarray:
        if self._trace is None:
            raise TypeError(f"trace property is not applicable for '{self.name}' model")
        else:
            return self._trace

    @property
    def loglikelihood(self) -> float:
        return np.sum(self.logpdf(x=self.extremes.values))

    @property
    def AIC(self) -> float:
        """
        Return corrected Akaike Information Criterion (AIC) of the model.

        Smaller AIC value corresponds to better model.
        This formula scales well for small sample sizes.
        See https://en.wikipedia.org/wiki/Akaike_information_criterion

        """
        k = self.distribution.number_of_parameters
        n = len(self.extremes)
        aic = 2 * k - 2 * self.loglikelihood
        correction = (2 * k**2 + 2 * k) / (n - k - 1)
        return aic + correction

    @abc.abstractmethod
    def get_return_value(
        self,
        exceedance_probability: float,
        alpha: typing.Optional[float] = None,
        **kwargs,
    ) -> tuple:
        """
        Calculate return value and confidence interval bounds.

        Parameters
        ----------
        exceedance_probability : array-like
            Exceedance probability or 1D array of exceedance probabilities.
            Each exceedance probability must be in the [0, 1) range.
        alpha : float, optional
            Width of confidence interval (0, 1).
            If None (default), return None
            for upper and lower confidence interval bounds.
        kwargs
            Model-specific keyword arguments.
            If alpha is None, keyword arguments are ignored
            (error still raised for unrecognized arguments).
            MLE model:
                n_samples : int, optional
                    Number of bootstrap samples used to estimate
                    confidence interval bounds (default=100).
            Emcee model:
                burn_in : int
                    Burn-in value (number of first steps to discard for each walker).

        Returns
        -------
        return_value : array-like
            Return values.
        ci_lower : array-like
            Lower confidence interval bounds.
        ci_upper : array-like
            Upper confidence interval bounds.

        """
        raise NotImplementedError

    def _get_prop(self, prop: str, x):
        return self.distribution.get_prop(
            prop=prop, x=x, free_parameters=self.fit_parameters
        )

    def pdf(self, x):
        return self._get_prop(prop="pdf", x=x)

    def logpdf(self, x):
        return self._get_prop(prop="logpdf", x=x)

    def cdf(self, x):
        return self._get_prop(prop="cdf", x=x)

    def ppf(self, x):
        return self._get_prop(prop="ppf", x=x)

    def isf(self, x):
        return self._get_prop(prop="isf", x=x)

AIC: float property

Return corrected Akaike Information Criterion (AIC) of the model.

Smaller AIC value corresponds to better model. This formula scales well for small sample sizes. See https://en.wikipedia.org/wiki/Akaike_information_criterion

name: str abstractmethod property

Return model name.

__init__(extremes, distribution, distribution_kwargs=None, **kwargs)

Initialize the model.

Parameters:

Name Type Description Default
extremes Series

Time series of extreme events.

required
distribution str or rv_continuous

Distribution name compatible with scipy.stats or a subclass of scipy.stats.rv_continuous. See https://docs.scipy.org/doc/scipy/reference/stats.html

required
distribution_kwargs dict

Special keyword arguments, passed to the .fit method of the distribution. These keyword arguments represent parameters to be held fixed. Names of parameters to be fixed must have 'f' prefixes. Valid parameters: - shape(s): 'fc', e.g. fc=0 - location: 'floc', e.g. floc=0 - scale: 'fscale', e.g. fscale=1 By default, no parameters are fixed. See documentation of a specific scipy.stats distribution for names of available parameters.

None
kwargs

Keyword arguments passed to a model .fit method. MLE model: MLE model takes no additional arguments. Emcee model: n_walkers : int, optional The number of walkers in the ensemble (default=100). n_samples : int, optional The number of steps to run (default=500). progress : bool or str, optional If True, a progress bar will be shown as the sampler progresses. If a string, will select a specific tqdm progress bar. Most notable is 'notebook', which shows a progress bar suitable for Jupyter notebooks. If False (default), no progress bar will be shown. This progress bar is a part of the emcee package.

{}
Source code in src/pyextremes/models/model_base.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __init__(
    self,
    extremes: pd.Series,
    distribution: typing.Union[str, scipy.stats.rv_continuous],
    distribution_kwargs: typing.Optional[dict] = None,
    **kwargs,
) -> None:
    """
    Initialize the model.

    Parameters
    ----------
    extremes : pandas.Series
        Time series of extreme events.
    distribution : str or scipy.stats.rv_continuous
        Distribution name compatible with scipy.stats
        or a subclass of scipy.stats.rv_continuous.
        See https://docs.scipy.org/doc/scipy/reference/stats.html
    distribution_kwargs : dict, optional
        Special keyword arguments, passed to the `.fit` method of the distribution.
        These keyword arguments represent parameters to be held fixed.
        Names of parameters to be fixed must have 'f' prefixes. Valid parameters:
            - shape(s): 'fc', e.g. fc=0
            - location: 'floc', e.g. floc=0
            - scale: 'fscale', e.g. fscale=1
        By default, no parameters are fixed.
        See documentation of a specific scipy.stats distribution
        for names of available parameters.
    kwargs
        Keyword arguments passed to a model .fit method.
        MLE model:
            MLE model takes no additional arguments.
        Emcee model:
            n_walkers : int, optional
                The number of walkers in the ensemble (default=100).
            n_samples : int, optional
                The number of steps to run (default=500).
            progress : bool or str, optional
                If True, a progress bar will be shown as the sampler progresses.
                If a string, will select a specific tqdm progress bar.
                Most notable is 'notebook', which shows a progress bar
                suitable for Jupyter notebooks.
                If False (default), no progress bar will be shown.
                This progress bar is a part of the `emcee` package.

    """
    self.extremes = extremes

    # Declare extreme value distribution
    distribution_kwargs = distribution_kwargs or {}
    self.distribution = Distribution(
        extremes=self.extremes, distribution=distribution, **distribution_kwargs
    )

    # Fit the distribution to extremes
    self._fit_parameters: typing.Optional[dict] = None
    self._trace: typing.Optional[np.ndarray] = None
    self.fit(**kwargs)

    # Initialize 'return_value_cache'
    self.return_value_cache: typing.Dict[tuple, tuple] = {}

fit(**kwargs) abstractmethod

Set values for self.fit_parameters and self.trace.

self.trace is set only for MCMC-like models. self.fit_parameters is a dictionary with {parameter_name: value}, e.g. {'c': 0.1, 'loc': -7, 'scale': 0.3} self.trace is a numpy.ndarray with shape of (n_walkers, n_samples, n_free_parameters)

Source code in src/pyextremes/models/model_base.py
83
84
85
86
87
88
89
90
91
92
93
94
95
@abc.abstractmethod
def fit(self, **kwargs) -> None:
    """
    Set values for self.fit_parameters and self.trace.

    self.trace is set only for MCMC-like models.
    self.fit_parameters is a dictionary with {parameter_name: value},
    e.g. {'c': 0.1, 'loc': -7, 'scale': 0.3}
    self.trace is a numpy.ndarray with shape of
    (n_walkers, n_samples, n_free_parameters)

    """
    raise NotImplementedError

get_return_value(exceedance_probability, alpha=None, **kwargs) abstractmethod

Calculate return value and confidence interval bounds.

Parameters:

Name Type Description Default
exceedance_probability array - like

Exceedance probability or 1D array of exceedance probabilities. Each exceedance probability must be in the [0, 1) range.

required
alpha float

Width of confidence interval (0, 1). If None (default), return None for upper and lower confidence interval bounds.

None
kwargs

Model-specific keyword arguments. If alpha is None, keyword arguments are ignored (error still raised for unrecognized arguments). MLE model: n_samples : int, optional Number of bootstrap samples used to estimate confidence interval bounds (default=100). Emcee model: burn_in : int Burn-in value (number of first steps to discard for each walker).

{}

Returns:

Name Type Description
return_value array - like

Return values.

ci_lower array - like

Lower confidence interval bounds.

ci_upper array - like

Upper confidence interval bounds.

Source code in src/pyextremes/models/model_base.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
@abc.abstractmethod
def get_return_value(
    self,
    exceedance_probability: float,
    alpha: typing.Optional[float] = None,
    **kwargs,
) -> tuple:
    """
    Calculate return value and confidence interval bounds.

    Parameters
    ----------
    exceedance_probability : array-like
        Exceedance probability or 1D array of exceedance probabilities.
        Each exceedance probability must be in the [0, 1) range.
    alpha : float, optional
        Width of confidence interval (0, 1).
        If None (default), return None
        for upper and lower confidence interval bounds.
    kwargs
        Model-specific keyword arguments.
        If alpha is None, keyword arguments are ignored
        (error still raised for unrecognized arguments).
        MLE model:
            n_samples : int, optional
                Number of bootstrap samples used to estimate
                confidence interval bounds (default=100).
        Emcee model:
            burn_in : int
                Burn-in value (number of first steps to discard for each walker).

    Returns
    -------
    return_value : array-like
        Return values.
    ci_lower : array-like
        Lower confidence interval bounds.
    ci_upper : array-like
        Upper confidence interval bounds.

    """
    raise NotImplementedError

pyextremes.models.model_emcee.Emcee

Bases: AbstractModelBaseClass

Source code in src/pyextremes/models/model_emcee.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
class Emcee(AbstractModelBaseClass):
    def __init__(
        self,
        extremes: pd.Series,
        distribution: typing.Union[str, scipy.stats.rv_continuous],
        distribution_kwargs: typing.Optional[dict] = None,
        n_walkers: int = 100,
        n_samples: int = 500,
        progress: bool = False,
    ) -> None:
        """
        Markov Chain Monte Carlo (MCMC) model.

        Built around the 'emcee' package by Daniel Foreman-Mackey

        """
        super().__init__(
            extremes=extremes,
            distribution=distribution,
            distribution_kwargs=distribution_kwargs,
            n_walkers=n_walkers,
            n_samples=n_samples,
            progress=progress,
        )
        self.n_walkers = n_walkers
        self.n_samples = n_samples

    @property
    def name(self) -> str:
        return "Emcee"

    def fit(self, **kwargs) -> None:
        # Parse kwargs
        n_walkers: int = kwargs.pop("n_walkers")
        n_samples: int = kwargs.pop("n_samples")
        progress: bool = kwargs.pop("progress")
        if len(kwargs) != 0:
            raise TypeError(
                f"unrecognized arguments passed in: {', '.join(kwargs.keys())}"
            )

        # Declare Emcee ensemble sampler
        sampler = emcee.EnsembleSampler(
            nwalkers=n_walkers,
            ndim=self.distribution.number_of_parameters,
            log_prob_fn=self.distribution.log_probability,
        )

        # Run the ensemble sampler
        logger.debug(
            "running ensemble sampler with %d walkers and %d samples",
            n_walkers,
            n_samples,
        )
        with warnings.catch_warnings():
            warnings.simplefilter(action="ignore", category=RuntimeWarning)
            sampler.run_mcmc(
                initial_state=self.distribution.get_initial_state(n_walkers=n_walkers),
                nsteps=n_samples,
                progress=progress,
            )
        logger.debug(
            "finished run for ensemble sampler with %d walkers and %d samples",
            n_walkers,
            n_samples,
        )

        # Extract ensemble sampler chain
        self._trace: np.ndarray = sampler.get_chain().transpose((1, 0, 2))

        # Calculate fit parameters as MAP of distribution parameters
        kernel = scipy.stats.gaussian_kde(np.vstack(self._trace).transpose())

        def kde_func(x):
            return -kernel(x)[0]

        fit_parameters = self._trace.mean(axis=(0, 1))
        solution = scipy.optimize.minimize(
            kde_func,
            x0=fit_parameters,
            method="Nelder-Mead",
        )
        if solution.success:
            fit_parameters = solution.x
        else:  # pragma: no cover
            warnings.warn(
                message=(
                    "cannot calculate MAP using Gaussian KDE, "
                    "setting fit parameters as mean"
                ),
                category=RuntimeWarning,
            )
        self._fit_parameters = dict(
            zip(self.distribution.free_parameters, fit_parameters)
        )

    @property
    def trace_map(self) -> tuple:
        return tuple(
            self.fit_parameters[parameter]
            for parameter in self.distribution.free_parameters
        )

    def get_return_value(
        self, exceedance_probability, alpha: typing.Optional[float] = None, **kwargs
    ) -> tuple:
        """
        Calculate return value and confidence interval bounds.

        Parameters
        ----------
        exceedance_probability : array-like
            Exceedance probability or 1D array of exceedance probabilities.
            Each exceedance probability must be in the [0, 1) range.
        alpha : float, optional
            Width of confidence interval (0, 1).
            If None (default), return None
            for upper and lower confidence interval bounds.
        kwargs
            burn_in : int, optional
                Burn-in value (number of first steps to discard for each walker).
                By default it is 0 (no values are discarded).

        Returns
        -------
        return_value : array-like
            Return values.
        ci_lower : array-like
            Lower confidence interval bounds.
        ci_upper : array-like
            Upper confidence interval bounds.

        """
        # Parse 'kwargs'
        burn_in = kwargs.pop("burn_in", 0)
        if len(kwargs) != 0:
            raise TypeError(
                f"unrecognized arguments passed in: {', '.join(kwargs.keys())}"
            )

        # Convert 'exceedance_probability' to ndarray
        exceedance_probability = np.asarray(
            a=exceedance_probability, dtype=np.float64
        ).copy()
        if exceedance_probability.ndim == 0:
            exceedance_probability = exceedance_probability[np.newaxis]
        if exceedance_probability.ndim != 1:
            raise ValueError(
                f"invalid shape in {exceedance_probability.shape} "
                f"for the 'exceedance_probability' argument, must be 1D array"
            )

        # Calculate return values
        return_value = np.full(
            shape=exceedance_probability.shape, fill_value=np.nan, dtype=np.float64
        )
        ci_lower = return_value.copy()
        ci_upper = return_value.copy()
        for i, ep in enumerate(exceedance_probability):
            key: typing.Tuple[float, typing.Optional[float], int] = (
                ep,
                alpha,
                burn_in,
            )
            try:
                # Try to fetch pre-calculated values from cache
                rv, cil, ciu = self.return_value_cache[key]
                logger.debug(
                    "fetched return value for %s from cache as (%s, %s, %s)",
                    key,
                    rv,
                    cil,
                    ciu,
                )
            except KeyError:
                # Value not in cache - calculate new return value
                rv = self.distribution.distribution.isf(
                    q=ep,
                    **self.fit_parameters,
                    **self.distribution._fixed_parameters,
                )

                # Calculate confidence intervals
                if alpha is None:
                    cil = None
                    ciu = None
                else:
                    # Calculate confidence intervals
                    rv_sample = self.distribution.get_prop(
                        prop="isf",
                        x=ep,
                        free_parameters=np.vstack(self.trace[:, burn_in:, :]),
                    )
                    cil, ciu = np.quantile(
                        a=rv_sample, q=[(1 - alpha) / 2, (1 + alpha) / 2]
                    )

                # Add calculated return value and intervals to cache
                self.return_value_cache[key] = (rv, cil, ciu)
                logger.debug(
                    "calculated return value for %s as (%s, %s, %s)",
                    key,
                    rv,
                    cil,
                    ciu,
                )

            return_value[i] = rv
            ci_lower[i] = cil
            ci_upper[i] = ciu

        # Return results
        if len(return_value) == 1:
            return return_value[0], ci_lower[0], ci_upper[0]
        else:
            return return_value, ci_lower, ci_upper

    def __repr__(self) -> str:
        free_parameters = ", ".join(
            [
                f"{parameter}={self.fit_parameters[parameter]:.3f}"
                for parameter in self.distribution.free_parameters
            ]
        )

        fixed_parameters = ", ".join(
            [
                f"{key}={value:.3f}"
                for key, value in self.distribution.fixed_parameters.items()
            ]
        )
        if fixed_parameters == "":
            fixed_parameters = "all parameters are free"

        summary = [
            "Emcee model",
            "",
            f"free parameters: {free_parameters}",
            f"fixed parameters: {fixed_parameters}",
            f"AIC: {self.AIC:.3f}",
            f"loglikelihood: {self.loglikelihood:.3f}",
            f"number of walkers: {self.n_walkers:d}",
            f"number of samples: {self.n_samples:d}",
            f"return value cache size: {len(self.return_value_cache):,d}",
        ]

        longest_row = max(map(len, summary))
        summary[1] = "-" * longest_row
        summary.append(summary[1])
        summary[0] = " " * ((longest_row - len(summary[0])) // 2) + summary[0]
        for i, row in enumerate(summary):
            summary[i] += " " * (longest_row - len(row))

        return "\n".join(summary)

__init__(extremes, distribution, distribution_kwargs=None, n_walkers=100, n_samples=500, progress=False)

Markov Chain Monte Carlo (MCMC) model.

Built around the 'emcee' package by Daniel Foreman-Mackey

Source code in src/pyextremes/models/model_emcee.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def __init__(
    self,
    extremes: pd.Series,
    distribution: typing.Union[str, scipy.stats.rv_continuous],
    distribution_kwargs: typing.Optional[dict] = None,
    n_walkers: int = 100,
    n_samples: int = 500,
    progress: bool = False,
) -> None:
    """
    Markov Chain Monte Carlo (MCMC) model.

    Built around the 'emcee' package by Daniel Foreman-Mackey

    """
    super().__init__(
        extremes=extremes,
        distribution=distribution,
        distribution_kwargs=distribution_kwargs,
        n_walkers=n_walkers,
        n_samples=n_samples,
        progress=progress,
    )
    self.n_walkers = n_walkers
    self.n_samples = n_samples

get_return_value(exceedance_probability, alpha=None, **kwargs)

Calculate return value and confidence interval bounds.

Parameters:

Name Type Description Default
exceedance_probability array - like

Exceedance probability or 1D array of exceedance probabilities. Each exceedance probability must be in the [0, 1) range.

required
alpha float

Width of confidence interval (0, 1). If None (default), return None for upper and lower confidence interval bounds.

None
kwargs

burn_in : int, optional Burn-in value (number of first steps to discard for each walker). By default it is 0 (no values are discarded).

{}

Returns:

Name Type Description
return_value array - like

Return values.

ci_lower array - like

Lower confidence interval bounds.

ci_upper array - like

Upper confidence interval bounds.

Source code in src/pyextremes/models/model_emcee.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def get_return_value(
    self, exceedance_probability, alpha: typing.Optional[float] = None, **kwargs
) -> tuple:
    """
    Calculate return value and confidence interval bounds.

    Parameters
    ----------
    exceedance_probability : array-like
        Exceedance probability or 1D array of exceedance probabilities.
        Each exceedance probability must be in the [0, 1) range.
    alpha : float, optional
        Width of confidence interval (0, 1).
        If None (default), return None
        for upper and lower confidence interval bounds.
    kwargs
        burn_in : int, optional
            Burn-in value (number of first steps to discard for each walker).
            By default it is 0 (no values are discarded).

    Returns
    -------
    return_value : array-like
        Return values.
    ci_lower : array-like
        Lower confidence interval bounds.
    ci_upper : array-like
        Upper confidence interval bounds.

    """
    # Parse 'kwargs'
    burn_in = kwargs.pop("burn_in", 0)
    if len(kwargs) != 0:
        raise TypeError(
            f"unrecognized arguments passed in: {', '.join(kwargs.keys())}"
        )

    # Convert 'exceedance_probability' to ndarray
    exceedance_probability = np.asarray(
        a=exceedance_probability, dtype=np.float64
    ).copy()
    if exceedance_probability.ndim == 0:
        exceedance_probability = exceedance_probability[np.newaxis]
    if exceedance_probability.ndim != 1:
        raise ValueError(
            f"invalid shape in {exceedance_probability.shape} "
            f"for the 'exceedance_probability' argument, must be 1D array"
        )

    # Calculate return values
    return_value = np.full(
        shape=exceedance_probability.shape, fill_value=np.nan, dtype=np.float64
    )
    ci_lower = return_value.copy()
    ci_upper = return_value.copy()
    for i, ep in enumerate(exceedance_probability):
        key: typing.Tuple[float, typing.Optional[float], int] = (
            ep,
            alpha,
            burn_in,
        )
        try:
            # Try to fetch pre-calculated values from cache
            rv, cil, ciu = self.return_value_cache[key]
            logger.debug(
                "fetched return value for %s from cache as (%s, %s, %s)",
                key,
                rv,
                cil,
                ciu,
            )
        except KeyError:
            # Value not in cache - calculate new return value
            rv = self.distribution.distribution.isf(
                q=ep,
                **self.fit_parameters,
                **self.distribution._fixed_parameters,
            )

            # Calculate confidence intervals
            if alpha is None:
                cil = None
                ciu = None
            else:
                # Calculate confidence intervals
                rv_sample = self.distribution.get_prop(
                    prop="isf",
                    x=ep,
                    free_parameters=np.vstack(self.trace[:, burn_in:, :]),
                )
                cil, ciu = np.quantile(
                    a=rv_sample, q=[(1 - alpha) / 2, (1 + alpha) / 2]
                )

            # Add calculated return value and intervals to cache
            self.return_value_cache[key] = (rv, cil, ciu)
            logger.debug(
                "calculated return value for %s as (%s, %s, %s)",
                key,
                rv,
                cil,
                ciu,
            )

        return_value[i] = rv
        ci_lower[i] = cil
        ci_upper[i] = ciu

    # Return results
    if len(return_value) == 1:
        return return_value[0], ci_lower[0], ci_upper[0]
    else:
        return return_value, ci_lower, ci_upper

pyextremes.models.model_mle.MLE

Bases: AbstractModelBaseClass

Source code in src/pyextremes/models/model_mle.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
class MLE(AbstractModelBaseClass):
    def __init__(
        self,
        extremes: pd.Series,
        distribution: typing.Union[str, scipy.stats.rv_continuous],
        distribution_kwargs: typing.Optional[dict] = None,
    ) -> None:
        """
        Maximum Likelihood Estimate (MLE) model.

        Built around the scipy.stats.rv_continuous.fit method.

        """
        super().__init__(
            extremes=extremes,
            distribution=distribution,
            distribution_kwargs=distribution_kwargs,
        )

        # Initialize 'fit_parameter_cache' and 'seed_cache'
        self.fit_parameter_cache: typing.List[typing.Tuple[float, ...]] = []
        self.seed_cache: typing.Set[int] = set()

    @property
    def name(self) -> str:
        return "MLE"

    def fit(self, **kwargs) -> None:
        if len(kwargs) != 0:
            raise TypeError(
                f"unrecognized arguments passed in: {', '.join(kwargs.keys())}"
            )
        self._fit_parameters = self.distribution.mle_parameters
        logger.debug(
            "fit %s distribution with %s parameters",
            self.distribution.name,
            len(self._fit_parameters),
        )

    def get_return_value(
        self, exceedance_probability, alpha: typing.Optional[float] = None, **kwargs
    ) -> tuple:
        """
        Calculate return value and confidence interval bounds.

        Parameters
        ----------
        exceedance_probability : array-like
            Exceedance probability or 1D array of exceedance probabilities.
            Each exceedance probability must be in the [0, 1) range.
        alpha : float, optional
            Width of confidence interval (0, 1).
            If None (default), return None
            for upper and lower confidence interval bounds.
        kwargs
            n_samples : int, optional
                Number of bootstrap samples used to estimate
                confidence interval bounds (default=100).

        Returns
        -------
        return_value : array-like
            Return values.
        ci_lower : array-like
            Lower confidence interval bounds.
        ci_upper : array-like
            Upper confidence interval bounds.

        """
        # Parse 'kwargs'
        n_samples = kwargs.pop("n_samples", 100)
        if not n_samples > 0:
            raise ValueError(
                f"invalid value in {n_samples} for the 'n_samples' "
                f"argument, must be positive number"
            )
        if len(kwargs) != 0:
            raise TypeError(
                f"unrecognized arguments passed in: {', '.join(kwargs.keys())}"
            )

        # Convert 'exceedance_probability' to ndarray
        exceedance_probability = np.asarray(
            a=exceedance_probability, dtype=np.float64
        ).copy()
        if exceedance_probability.ndim == 0:
            exceedance_probability = exceedance_probability[np.newaxis]
        if exceedance_probability.ndim != 1:
            raise ValueError(
                f"invalid shape in {exceedance_probability.shape} "
                f"for the 'exceedance_probability' argument, must be 1D array"
            )

        # If cache doesn't have enough values, calculate new fit parameters
        if alpha is not None:
            n_extra_fit_parameters = n_samples - len(self.fit_parameter_cache)
            if n_extra_fit_parameters > 0:
                self._extend_fit_parameter_cache(n=n_extra_fit_parameters)

        # Calculate return values
        return_value = np.full(
            shape=exceedance_probability.shape, fill_value=np.nan, dtype=np.float64
        )
        ci_lower = return_value.copy()
        ci_upper = return_value.copy()
        for i, ep in enumerate(exceedance_probability):
            key: typing.Tuple[float, typing.Optional[float], int] = (
                ep,
                alpha,
                n_samples,
            )
            try:
                # Try to fetch pre-calculated values from cache
                rv, cil, ciu = self.return_value_cache[key]
                logger.debug(
                    "fetched return value for %s from cache as (%s, %s, %s)",
                    key,
                    rv,
                    cil,
                    ciu,
                )
            except KeyError:
                # Value not in cache - calculate new return value
                rv = self.distribution.distribution.isf(
                    q=ep,
                    **self.fit_parameters,
                    **self.distribution._fixed_parameters,
                )

                # Calculate confidence intervals
                if alpha is None:
                    cil = None
                    ciu = None
                else:
                    # Calculate confidence intervals
                    rv_sample = self.distribution.distribution.isf(
                        ep, *np.transpose(self.fit_parameter_cache[:n_samples])
                    )
                    cil, ciu = np.quantile(
                        a=rv_sample, q=[(1 - alpha) / 2, (1 + alpha) / 2]
                    )

                # Add calculated return value and intervals to cache
                self.return_value_cache[key] = (rv, cil, ciu)
                logger.debug(
                    "calculated return value for %s as (%s, %s, %s)",
                    key,
                    rv,
                    cil,
                    ciu,
                )

            return_value[i] = rv
            ci_lower[i] = cil
            ci_upper[i] = ciu

        # Return results
        if len(return_value) == 1:
            return return_value[0], ci_lower[0], ci_upper[0]
        else:
            return return_value, ci_lower, ci_upper

    def _extend_fit_parameter_cache(self, n: int) -> None:
        # Prepare local variables used by fit parameter calculator
        extremes = self.extremes.values
        distribution = self.distribution.distribution
        fixed_parameters = self.distribution.fixed_parameters

        min_samples_per_core = 50
        if n <= min_samples_per_core:
            # Calculate without multiprocessing
            logger.debug("getting random seed value for fit parameter sampler")
            seed = None
            while seed is None:
                _seed = np.random.randint(low=0, high=1e6, size=None)
                if _seed not in self.seed_cache:
                    seed = _seed
                    self.seed_cache.add(_seed)

            logger.debug(
                "calculating %d additional fit parameters using single core",
                n,
            )
            new_fit_parameters = get_fit_parameters(
                params=(
                    n,
                    distribution,
                    extremes,
                    fixed_parameters,
                    seed,
                )
            )
        else:
            # Find number of cores
            n_cores = min(
                os.cpu_count() or 2,
                int(np.ceil(n / min_samples_per_core)),
            )

            # Calculate number of samples per core
            min_samples_per_core = int(n / n_cores)
            core_samples = [min_samples_per_core for _ in range(n_cores)]

            # Distribute remaining samples evenly across cores
            for i in range(n - sum(core_samples)):
                core_samples[i] += 1

            # Get unique random seed for each core and add it to `self.seed_cache`
            logger.debug("getting random seed values for each core")
            seeds: typing.List[int] = []
            while len(seeds) < n_cores:
                seed = np.random.randint(low=0, high=1e6, size=None)
                if seed not in self.seed_cache:
                    seeds.append(seed)
                    self.seed_cache.add(seed)

            # Calculate new fit parameters using processor pool
            logger.debug(
                "calculating %d additional fit parameters using %d cores "
                "having %s samples accordingly",
                n,
                n_cores,
                core_samples,
            )
            with multiprocessing.Pool(processes=n_cores) as pool:
                new_fit_parameters = list(
                    itertools.chain(
                        *pool.map(
                            get_fit_parameters,
                            zip(
                                core_samples,
                                [distribution for _ in range(n_cores)],
                                [extremes for _ in range(n_cores)],
                                [fixed_parameters for _ in range(n_cores)],
                                seeds,
                            ),
                        )
                    )
                )

        # Extend fit parameter cache
        logger.debug("extending fit parameter cache with %d new entries", n)
        self.fit_parameter_cache.extend(new_fit_parameters)
        return None

    def __repr__(self) -> str:
        free_parameters = ", ".join(
            [
                f"{parameter}={self.fit_parameters[parameter]:.3f}"
                for parameter in self.distribution.free_parameters
            ]
        )

        fixed_parameters = ", ".join(
            [
                f"{key}={value:.3f}"
                for key, value in self.distribution.fixed_parameters.items()
            ]
        )
        if fixed_parameters == "":
            fixed_parameters = "all parameters are free"

        summary = [
            "MLE model",
            "",
            f"free parameters: {free_parameters}",
            f"fixed parameters: {fixed_parameters}",
            f"AIC: {self.AIC:.3f}",
            f"loglikelihood: {self.loglikelihood:.3f}",
            f"return value cache size: {len(self.return_value_cache):,d}",
            f"fit parameter cache size: {len(self.fit_parameter_cache):,d}",
        ]

        longest_row = max(map(len, summary))
        summary[1] = "-" * longest_row
        summary.append(summary[1])
        summary[0] = " " * ((longest_row - len(summary[0])) // 2) + summary[0]
        for i, row in enumerate(summary):
            summary[i] += " " * (longest_row - len(row))

        return "\n".join(summary)

__init__(extremes, distribution, distribution_kwargs=None)

Maximum Likelihood Estimate (MLE) model.

Built around the scipy.stats.rv_continuous.fit method.

Source code in src/pyextremes/models/model_mle.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def __init__(
    self,
    extremes: pd.Series,
    distribution: typing.Union[str, scipy.stats.rv_continuous],
    distribution_kwargs: typing.Optional[dict] = None,
) -> None:
    """
    Maximum Likelihood Estimate (MLE) model.

    Built around the scipy.stats.rv_continuous.fit method.

    """
    super().__init__(
        extremes=extremes,
        distribution=distribution,
        distribution_kwargs=distribution_kwargs,
    )

    # Initialize 'fit_parameter_cache' and 'seed_cache'
    self.fit_parameter_cache: typing.List[typing.Tuple[float, ...]] = []
    self.seed_cache: typing.Set[int] = set()

get_return_value(exceedance_probability, alpha=None, **kwargs)

Calculate return value and confidence interval bounds.

Parameters:

Name Type Description Default
exceedance_probability array - like

Exceedance probability or 1D array of exceedance probabilities. Each exceedance probability must be in the [0, 1) range.

required
alpha float

Width of confidence interval (0, 1). If None (default), return None for upper and lower confidence interval bounds.

None
kwargs

n_samples : int, optional Number of bootstrap samples used to estimate confidence interval bounds (default=100).

{}

Returns:

Name Type Description
return_value array - like

Return values.

ci_lower array - like

Lower confidence interval bounds.

ci_upper array - like

Upper confidence interval bounds.

Source code in src/pyextremes/models/model_mle.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def get_return_value(
    self, exceedance_probability, alpha: typing.Optional[float] = None, **kwargs
) -> tuple:
    """
    Calculate return value and confidence interval bounds.

    Parameters
    ----------
    exceedance_probability : array-like
        Exceedance probability or 1D array of exceedance probabilities.
        Each exceedance probability must be in the [0, 1) range.
    alpha : float, optional
        Width of confidence interval (0, 1).
        If None (default), return None
        for upper and lower confidence interval bounds.
    kwargs
        n_samples : int, optional
            Number of bootstrap samples used to estimate
            confidence interval bounds (default=100).

    Returns
    -------
    return_value : array-like
        Return values.
    ci_lower : array-like
        Lower confidence interval bounds.
    ci_upper : array-like
        Upper confidence interval bounds.

    """
    # Parse 'kwargs'
    n_samples = kwargs.pop("n_samples", 100)
    if not n_samples > 0:
        raise ValueError(
            f"invalid value in {n_samples} for the 'n_samples' "
            f"argument, must be positive number"
        )
    if len(kwargs) != 0:
        raise TypeError(
            f"unrecognized arguments passed in: {', '.join(kwargs.keys())}"
        )

    # Convert 'exceedance_probability' to ndarray
    exceedance_probability = np.asarray(
        a=exceedance_probability, dtype=np.float64
    ).copy()
    if exceedance_probability.ndim == 0:
        exceedance_probability = exceedance_probability[np.newaxis]
    if exceedance_probability.ndim != 1:
        raise ValueError(
            f"invalid shape in {exceedance_probability.shape} "
            f"for the 'exceedance_probability' argument, must be 1D array"
        )

    # If cache doesn't have enough values, calculate new fit parameters
    if alpha is not None:
        n_extra_fit_parameters = n_samples - len(self.fit_parameter_cache)
        if n_extra_fit_parameters > 0:
            self._extend_fit_parameter_cache(n=n_extra_fit_parameters)

    # Calculate return values
    return_value = np.full(
        shape=exceedance_probability.shape, fill_value=np.nan, dtype=np.float64
    )
    ci_lower = return_value.copy()
    ci_upper = return_value.copy()
    for i, ep in enumerate(exceedance_probability):
        key: typing.Tuple[float, typing.Optional[float], int] = (
            ep,
            alpha,
            n_samples,
        )
        try:
            # Try to fetch pre-calculated values from cache
            rv, cil, ciu = self.return_value_cache[key]
            logger.debug(
                "fetched return value for %s from cache as (%s, %s, %s)",
                key,
                rv,
                cil,
                ciu,
            )
        except KeyError:
            # Value not in cache - calculate new return value
            rv = self.distribution.distribution.isf(
                q=ep,
                **self.fit_parameters,
                **self.distribution._fixed_parameters,
            )

            # Calculate confidence intervals
            if alpha is None:
                cil = None
                ciu = None
            else:
                # Calculate confidence intervals
                rv_sample = self.distribution.distribution.isf(
                    ep, *np.transpose(self.fit_parameter_cache[:n_samples])
                )
                cil, ciu = np.quantile(
                    a=rv_sample, q=[(1 - alpha) / 2, (1 + alpha) / 2]
                )

            # Add calculated return value and intervals to cache
            self.return_value_cache[key] = (rv, cil, ciu)
            logger.debug(
                "calculated return value for %s as (%s, %s, %s)",
                key,
                rv,
                cil,
                ciu,
            )

        return_value[i] = rv
        ci_lower[i] = cil
        ci_upper[i] = ciu

    # Return results
    if len(return_value) == 1:
        return return_value[0], ci_lower[0], ci_upper[0]
    else:
        return return_value, ci_lower, ci_upper