Skip to content

Extremes

pyextremes.extremes.get_extremes(ts, method, extremes_type='high', **kwargs)

Get extreme events from time series.

Parameters:

Name Type Description Default
ts Series

Time series of the signal.

required
method str

Extreme value extraction method. Supported values: BM - Block Maxima POT - Peaks Over Threshold

required
extremes_type str

high (default) - get extreme high values low - get extreme low values

'high'
kwargs

if method is BM: block_size : str or pandas.Timedelta, optional Block size (default='365.2425D'). errors : str, optional raise (default) - raise an exception when encountering a block with no data ignore - ignore blocks with no data coerce - get extreme values for blocks with no data as mean of all other extreme events in the series with index being the middle point of corresponding interval min_last_block : float, optional Minimum data availability ratio (0 to 1) in the last block for it to be used to extract extreme value from. This is used to discard last block when it is too short. If None (default), last block is always used. if method is POT: threshold : float Threshold used to find exceedances. r : pandas.Timedelta or value convertible to timedelta, optional Duration of window used to decluster the exceedances. By default r='24H' (24 hours). See pandas.to_timedelta for more information.

{}

Returns:

Name Type Description
extremes Series

Time series of extreme events.

Source code in src/pyextremes/extremes/extremes.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def get_extremes(
    ts: pd.Series,
    method: Literal["BM", "POT"],
    extremes_type: Literal["high", "low"] = "high",
    **kwargs,
) -> pd.Series:
    """
    Get extreme events from time series.

    Parameters
    ----------
    ts : pandas.Series
        Time series of the signal.
    method : str
        Extreme value extraction method.
        Supported values:
            BM - Block Maxima
            POT - Peaks Over Threshold
    extremes_type : str, optional
        high (default) - get extreme high values
        low - get extreme low values
    kwargs
        if method is BM:
            block_size : str or pandas.Timedelta, optional
                Block size (default='365.2425D').
            errors : str, optional
                raise (default) - raise an exception
                    when encountering a block with no data
                ignore - ignore blocks with no data
                coerce - get extreme values for blocks with no data
                    as mean of all other extreme events in the series
                    with index being the middle point of corresponding interval
            min_last_block : float, optional
                Minimum data availability ratio (0 to 1) in the last block
                for it to be used to extract extreme value from.
                This is used to discard last block when it is too short.
                If None (default), last block is always used.
        if method is POT:
            threshold : float
                Threshold used to find exceedances.
            r : pandas.Timedelta or value convertible to timedelta, optional
                Duration of window used to decluster the exceedances.
                By default r='24H' (24 hours).
                See pandas.to_timedelta for more information.

    Returns
    -------
    extremes : pandas.Series
        Time series of extreme events.

    """
    if method == "BM":
        return get_extremes_block_maxima(
            ts=ts,
            extremes_type=extremes_type,
            **kwargs,
        )
    if method == "POT":
        return get_extremes_peaks_over_threshold(
            ts=ts,
            extremes_type=extremes_type,
            **kwargs,
        )
    raise ValueError(
        f"invalid value in '{method}' for the 'method' argument, "
        f"available methods: 'BM', 'POT'"
    )

pyextremes.extremes.get_return_periods(ts, extremes, extremes_method, extremes_type, block_size=None, return_period_size='365.2425D', plotting_position='weibull')

Calculate return periods for given extreme values using given plotting position.

Return periods are multiples of return_period_size. Plotting positions were taken from https://matplotlib.org/mpl-probscale/tutorial/closer_look_at_plot_pos.html

Parameters:

Name Type Description Default
ts Series

Time series of the signal.

required
extremes Series

Time series of extreme events.

required
extremes_method str

Extreme value extraction method. Supported values: BM - Block Maxima POT - Peaks Over Threshold

required
extremes_type str

high - provided extreme values are extreme high values low - provided extreme values are extreme low values

required
block_size str or Timedelta

Block size in the 'BM' extremes_method (default=None). If None, then is calculated as median distance between extreme events.

None
return_period_size str or Timedelta

Size of return periods (default='365.2425D'). If set to '30D', then a return period of 12 would be roughly equivalent to a 1 year return period (360 days).

'365.2425D'
plotting_position str

Plotting position name (default='weibull'), not case-sensitive. Supported plotting positions: ecdf, hazen, weibull, tukey, blom, median, cunnane, gringorten, beard

'weibull'

Returns:

Name Type Description
extreme_events DataFrame

A DataFrame with extreme values, exceedance probabilities, and return periods as multiples of return_period_size.

Source code in src/pyextremes/extremes/return_periods.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def get_return_periods(
    ts: pd.Series,
    extremes: pd.Series,
    extremes_method: Literal["BM", "POT"],
    extremes_type: Literal["high", "low"],
    block_size: Optional[Union[str, pd.Timedelta]] = None,
    return_period_size: Union[str, pd.Timedelta] = "365.2425D",
    plotting_position: Literal[
        "ecdf",
        "hazen",
        "weibull",
        "tukey",
        "blom",
        "median",
        "cunnane",
        "gringorten",
        "beard",
    ] = "weibull",
) -> pd.DataFrame:
    """
    Calculate return periods for given extreme values using given plotting position.

    Return periods are multiples of `return_period_size`.
    Plotting positions were taken from
    https://matplotlib.org/mpl-probscale/tutorial/closer_look_at_plot_pos.html

    Parameters
    ----------
    ts : pandas.Series
        Time series of the signal.
    extremes : pandas.Series
        Time series of extreme events.
    extremes_method : str
        Extreme value extraction method.
        Supported values:
            BM - Block Maxima
            POT - Peaks Over Threshold
    extremes_type : str
        high - provided extreme values are extreme high values
        low - provided extreme values are extreme low values
    block_size : str or pandas.Timedelta, optional
        Block size in the 'BM' `extremes_method` (default=None).
        If None, then is calculated as median distance between extreme events.
    return_period_size : str or pandas.Timedelta, optional
        Size of return periods (default='365.2425D').
        If set to '30D', then a return period of 12
        would be roughly equivalent to a 1 year return period (360 days).
    plotting_position : str, optional
        Plotting position name (default='weibull'), not case-sensitive.
        Supported plotting positions:
            ecdf, hazen, weibull, tukey, blom, median, cunnane, gringorten, beard

    Returns
    -------
    extreme_events : pandas.DataFrame
        A DataFrame with extreme values, exceedance probabilities,
        and return periods as multiples of `return_period_size`.

    """
    if extremes_method == "BM":
        # Parse 'block_size' argument
        if block_size is None:
            # Calculate 'block_size' as median distance between extremes
            block_size = pd.to_timedelta(np.quantile(np.diff(extremes.index), 0.5))
        else:
            if not isinstance(block_size, pd.Timedelta):
                if isinstance(block_size, str):
                    block_size = pd.to_timedelta(block_size)
                else:
                    raise TypeError(
                        f"invalid type in {type(block_size)} "
                        f"for the 'block_size' argument"
                    )
    else:
        if block_size is not None:
            raise ValueError(
                f"'block_size' value is used only if 'extremes_method' is 'BM', "
                f"provided 'extremes_method' is {extremes_method}"
            )

    # Parse the 'return_period_size' argument
    if not isinstance(return_period_size, pd.Timedelta):
        if isinstance(return_period_size, str):
            return_period_size = pd.to_timedelta(return_period_size)
        else:
            raise TypeError(
                f"invalid type in {type(return_period_size)} "
                f"for the 'return_period_size' argument"
            )

    # Calculate rate of extreme events as number of events per one return period
    if extremes_method == "BM":
        extremes_rate = return_period_size / block_size
    elif extremes_method == "POT":
        n_periods = (ts.index.max() - ts.index.min()) / return_period_size
        extremes_rate = len(extremes) / n_periods
    else:
        raise ValueError(
            f"invalid value in '{extremes_method}' for the 'extremes_method' argument"
        )

    # Rank extreme values from most extreme (1) to least extreme (len(extremes))
    if extremes_type == "high":
        ranks = (
            len(extremes) + 1 - scipy.stats.rankdata(extremes.values, method="average")
        )
    elif extremes_type == "low":
        ranks = scipy.stats.rankdata(extremes.values, method="average")
    else:
        raise ValueError(
            f"invalid value in '{extremes_type}' for the 'extremes_type' argument"
        )

    # Get plotting position parameters
    try:
        alpha, beta = plotting_positions[plotting_position.lower()]
    except KeyError as _error:
        raise ValueError(
            f"invalid value in '{plotting_position}' "
            f"for the 'plotting_position' argument"
        ) from _error

    # Calculate exceedance probabilities
    exceedance_probability = (ranks - alpha) / (len(extremes) + 1 - alpha - beta)

    # Calculate return periods
    return_periods = 1 / exceedance_probability / extremes_rate

    # Copy `extremes` to make the returned DataFrame independent from the original
    extremes = extremes.copy(deep=True)

    return pd.DataFrame(
        data={
            extremes.name: extremes.values,
            "exceedance probability": exceedance_probability,
            "return period": return_periods,
        },
        index=extremes.index,
        dtype=np.float64,
    )