Source code for aerosandbox.tools.pretty_plots.plots.plot_with_bootstrapped_uncertainty

from typing import Union, Iterable, Tuple, Optional, Callable
import matplotlib.pyplot as plt

import numpy as np
from aerosandbox.tools.statistics import time_series_uncertainty_quantification as tsuq


[docs]def plot_with_bootstrapped_uncertainty( x: np.ndarray, y: np.ndarray, ci: Optional[Union[float, Iterable[float], np.ndarray]] = 0.95, x_stdev: Union[None, float] = 0., y_stdev: Union[None, float] = None, color: Optional[Union[str, Tuple[float]]] = None, draw_data: bool = True, label_line: Union[bool, str] = "Best Estimate", label_ci: bool = True, label_data: Union[bool, str] = "Raw Data", line_alpha: float = 0.9, ci_to_alpha_mapping: Callable[[float], float] = lambda ci: 0.8 * (1 - ci) ** 0.4, n_bootstraps=2000, n_fit_points=500, spline_degree=3, normalize: bool=True, x_log_scale: bool = False, y_log_scale: bool = False, ): x = np.array(x) y = np.array(y) ### Log-transform the data if desired if x_log_scale: x = np.log(x) if y_log_scale: y = np.log(y) ### Make sure `ci` is a NumPy array if ci is None: ci = [] else: try: iter(ci) except TypeError: ci = [ci] ci = np.array(ci) ### Make sure `ci` is sorted ci = np.sort(ci) ### Make sure `ci` is in bounds if not (np.all(ci > 0) and np.all(ci < 1)): raise ValueError("Confidence interval values in `ci` should all be in the range of (0, 1).") ### Do the bootstrap fits x_fit, y_bootstrap_fits = tsuq.bootstrap_fits( x=x, y=y, x_noise_stdev=x_stdev, y_noise_stdev=y_stdev, n_bootstraps=n_bootstraps, fit_points=n_fit_points, spline_degree=spline_degree, normalize=normalize, ) ### Undo the log-transform if desired if x_log_scale: x = np.exp(x) x_fit = np.exp(x_fit) if y_log_scale: y = np.exp(y) y_bootstrap_fits = np.exp(y_bootstrap_fits) ### Plot the best-estimator line line, = plt.plot( x_fit, np.nanquantile(y_bootstrap_fits, q=0.5, axis=0), color=color, label=label_line, zorder=2, alpha=line_alpha, ) if color is None: color = line.get_color() if x_log_scale: plt.xscale('log') if y_log_scale: plt.yscale('log') ### Plot the confidence intervals if len(ci) != 0: ### Using the method of equal-tails confidence intervals lower_quantiles = np.concatenate([[0.5], (1 - ci) / 2]) upper_quantiles = np.concatenate([[0.5], 1 - (1 - ci) / 2]) lower_ci = np.nanquantile(y_bootstrap_fits, q=lower_quantiles, axis=0) upper_ci = np.nanquantile(y_bootstrap_fits, q=upper_quantiles, axis=0) for i, ci_val in enumerate(ci): settings = dict( color=color, alpha=ci_to_alpha_mapping(ci_val), linewidth=0, zorder=1.5 ) plt.fill_between( x_fit, lower_ci[i], lower_ci[i + 1], label=f"{ci_val:.0%} CI" if label_ci else None, **settings ) plt.fill_between( x_fit, upper_ci[i], upper_ci[i + 1], **settings ) ### Plot the data if draw_data: plt.plot( x, y, ".k", label=label_data, alpha=0.25, markersize=5, markeredgewidth=0, zorder=1, ) return x_fit, y_bootstrap_fits
if __name__ == '__main__': import matplotlib.pyplot as plt import aerosandbox.tools.pretty_plots as p np.random.seed(0) ### Generate data
[docs] x = np.linspace(0, 20, 1001)
y_true = np.sin(x - 5) # np.sin(x) y_stdev = 0.5 y_noisy = y_true + y_stdev * np.random.randn(len(x)) ### Plot spline regression fig, ax = plt.subplots(dpi=300) x_fit, y_bootstrap_fits = plot_with_bootstrapped_uncertainty( x, y_noisy, ci=[0.75, 0.95], label_line="Best Estimate", label_data="Data (True Function + Noise)", ) ax.plot(x, y_true, "k--", label="True Function (Hidden)", alpha=0.8, zorder=1) plt.legend(ncols=2) p.show_plot( "Spline Bootstrapping Test", r"$x$", r"$y$", legend=False ) ### Generate data x = np.geomspace(10, 1000, 1000) y_true = 3 * x ** 0.5 y_stdev = 0.1 y_noisy = y_true * y_stdev * np.random.lognormal(size=len(x)) fig, ax = plt.subplots() x_fit, y_bootstrap_fits = plot_with_bootstrapped_uncertainty( x, y_noisy, ci=[0.75, 0.95], label_line="Best Estimate", label_data="Data (True Function + Noise)", # normalize=False, x_log_scale=True, y_log_scale=True, ) p.show_plot()