Source code for traval.plots

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import norm


[docs]class ComparisonPlots: """Mix-in class for plots for comparing timeseries.""" color_dict = { "only_in_s1": {"color": "orange"}, "only_in_s2": {"color": "blue"}, "identical": {"color": "LimeGreen", "alpha": 0.5}, "different": {"color": "Red", "alpha": 0.3}, "flagged_in_both": {"color": "DarkOrchid"}, "introduced": {"color": "Coral"}, } def __init__(self, cp): """Initialize comparison plots mix-in class. Parameters ---------- cp : SeriesComparison traval comparison object """ self.cp = cp
[docs] def update_color_dict(self, key, color=None, alpha=None): """Update colors for plots. Parameters ---------- key : str name of category to update, see `ComparisonPlots.color_dict.keys()` for options color : str, optional color name, by default None alpha : float, optional alpha value, by default None """ d = self.color_dict[key] if color is not None: d.update({"color": color}) if alpha is not None: d.update({"alpha": alpha})
[docs] def reset_color_dict(self): """Reset color_dict to default values.""" self.color_dict = { "only_in_s1": {"color": "orange"}, "only_in_s2": {"color": "blue"}, "identical": {"color": "LimeGreen", "alpha": 0.5}, "different": {"color": "Red", "alpha": 0.3}, "flagged_in_both": {"color": "DarkOrchid"}, "introduced": {"color": "Coral"}, }
[docs] def plot_series_comparison(self, mark_unique=True, mark_different=True, mark_identical=True, ax=None): """Plot comparison between two timeseries. Parameters ---------- mark_unique : bool, optional mark unique values with colored X's, by default True mark_different : bool, optional highlight where timeseries differ with red, by default True mark_identical : bool, optional highlight where timeseries are identical with green, by default True ax : axis, optional axis object to plot on, by default None Returns ------- ax : axis axis object """ if ax is None: fig, ax = plt.subplots(1, 1, figsize=(12, 5)) else: fig = ax.figure plot_handles = [] # Plot both series for s, c, ls in zip([self.cp.s1n, self.cp.s2n], ["gray", "k"], ["solid", "dashed"]): p1, = ax.plot(s.index, s, c=c, marker=None, ls=ls, label=s.name) plot_handles.append(p1) # Mark differences between both in red (do for both lines) if mark_different: s_diff = s.copy() not_diff = s_diff.index.difference( self.cp.idx_in_both_different) s_diff.loc[not_diff] = np.nan p2, = ax.plot(s_diff.index, s_diff, lw=3, marker=None, ls="solid", label="different", **self.color_dict["different"]) # add to legend once if mark_different: plot_handles.append(p2) # Mark sections with identical measurements in green (do for one line) if mark_identical: s_identical = self.cp.s1n.copy() not_identical = s_identical.index.difference( self.cp.idx_in_both_identical) s_identical.loc[not_identical] = np.nan p5, = ax.plot(s_identical.index, s_identical, marker=None, ls="solid", label="identical", lw=3, **self.color_dict["identical"]) plot_handles.append(p5) # Mark unique observations with x's if they exist if mark_unique: if self.cp.idx_in_s1.size > 0: p3, = ax.plot(self.cp.idx_in_s1, self.cp.s1.loc[self.cp.idx_in_s1], marker="x", ms=5, ls="none", **self.color_dict["only_in_s1"], label="only in series 1: {}".format( self.cp.s1n.name)) plot_handles.append(p3) if self.cp.idx_in_s2.size > 0: p4, = ax.plot(self.cp.idx_in_s2, self.cp.s2.loc[self.cp.idx_in_s2], marker="x", ms=5, ls="none", **self.color_dict["only_in_s2"], label="only in series 2: {}".format( self.cp.s2.name)) plot_handles.append(p4) # Add legend and other plot stuff plot_labels = [i.get_label() for i in plot_handles] ax.legend(plot_handles, plot_labels, loc="best", ncol=int(np.ceil(len(plot_handles) / 2.))) ax.grid(visible=True) fig.tight_layout() return ax
[docs] def plot_relative_comparison(self, mark_unique=True, mark_different=True, mark_identical=True, mark_introduced=False, ax=None): """Plot comparison between two timeseries relative to base timeseries. Parameters ---------- mark_unique : bool, optional mark unique observations with colored X's, by default True mark_different : bool, optional highlight where series are different in red, by default True mark_identical : bool, optional highlight where series are identical with green, by default True mark_introduced : bool, optional mark observations that are not in the base timeseries with X's, by default False ax : axis, optional axis to plot on, by default None Returns ------- ax : axis axis handle """ ax = self.plot_series_comparison(mark_unique=mark_unique, mark_different=mark_different, mark_identical=mark_identical, ax=ax) plot_handles, plot_labels = ax.get_legend_handles_labels() # remove duplicates for ilbl in plot_labels: if plot_labels.count(ilbl) > 1: idx = plot_labels.index(ilbl) plot_labels.remove(ilbl) plot_handles.remove(plot_handles[idx]) # Add an base series (i.e. raw data to the plot) p0, = ax.plot(self.cp.basen.index, self.cp.basen, lw=0.5, c="k", label="base series", ls="solid", zorder=2) # insert entry at beginning plot_handles.insert(0, p0) plot_labels.insert(0, p0.get_label()) # mark flagged in both if self.cp.idx_r_flagged_in_both.size > 0: s_base = pd.Series(index=self.cp.basen.index, data=np.nan, dtype=float) s_base.loc[self.cp.idx_r_flagged_in_both] = \ self.cp.basen.loc[self.cp.idx_r_flagged_in_both] p6, = ax.plot(s_base.index, s_base, lw=0.5, **self.color_dict["flagged_in_both"], ls="none", marker="x", ms=5, label="flagged in both") plot_handles.append(p6) plot_labels.append(p6.get_label()) if mark_introduced: intro_idx = (self.cp.idx_r_introduced_in_s2 .union(self.cp.idx_r_introduced_in_both)) if ((self.cp.idx_r_introduced_in_s1.size > 0) or (intro_idx.size > 0)): ax.plot(self.cp.s1n.loc[self.cp.idx_r_introduced_in_s1].index, self.cp.s1n.loc[self.cp.idx_r_introduced_in_s1], ls="none", marker="x", ms=5, **self.color_dict["introduced"], label="introduced in s1/s2") p7, = ax.plot(self.cp.s1n.loc[intro_idx].index, self.cp.s2n.loc[intro_idx], ls="none", marker="x", ms=5, **self.color_dict["introduced"], label="introduced in s1/s2") plot_handles.append(p7) plot_labels.append(p7.get_label()) ax.legend(plot_handles, plot_labels, loc="best", ncol=int(np.ceil(len(plot_handles) / 3.))) return ax
def plot_validation_result(self, ax=None): # Some plot settings ms_valid = 6 # markersize validation result mew = 1.25 # markeredgewidth validation result if ax is None: fig, ax = plt.subplots(1, 1, figsize=(12, 5)) else: fig = ax.figure plot_handles = [] # Add an original series i.e. raw data to the plot p0, = ax.plot(self.cp.basen.index, self.cp.basen, lw=0.5, c="k", marker=".", ms=3, label="base series", ls="solid") plot_handles.append(p0) # set marker colors c = pd.Series(index=self.cp.basen.index, data='') c.loc[self.cp.idx_r_flagged_in_both] = "Green" c.loc[self.cp.idx_r_flagged_in_s1] = "DarkOrange" c.loc[self.cp.idx_r_flagged_in_s2] = "Red" mask = c != "" s = self.cp.basen.loc[mask] c = c.loc[mask] sc = ax.scatter(s.index, s.values, c=c.values, s=ms_valid**2, linewidths=mew, marker="o", edgecolor=c.values, zorder=10) sc.set_facecolor("none") dummy1, = ax.plot([], [], c="Green", marker="o", mfc="none", mew=mew, ls="none", ms=ms_valid, label="Correctly flagged (TP)") dummy2, = ax.plot([], [], c="DarkOrange", marker="o", mfc="none", mew=mew, ls="none", ms=ms_valid, label="Incorrectly flagged (FP)") dummy3, = ax.plot([], [], c="Red", marker="o", mfc="none", mew=mew, ls="none", ms=ms_valid, label="Wrongly kept (FN)") plot_handles += [dummy1, dummy2, dummy3] # Add legend and other plot stuff plot_labels = [i.get_label() for i in plot_handles] ax.legend(plot_handles, plot_labels, loc=(0, 1), markerscale=1.25, ncol=len(plot_handles), frameon=False) ax.grid(visible=True) fig.tight_layout() return ax
[docs]def roc_plot(tpr, fpr, labels, colors=None, ax=None, plot_diagonal=True, colorbar_label=None, **kwargs): """Receiver operator characteristic plot. Plots the false positive rate (x-axis) versus the true positive rate (y-axis). The 'tpr' and 'fpr' can be passed as: - values: outcome of a single error detection algorithm - arrays: outcomes of error detection algorithm in which a detection parameter is varied. - lists: for passing multiple results, entries can be values or arrays, as listed above. Parameters ---------- tpr : list or value or array true positive rate. If passed as a list loops through each entry and plots it. Otherwise just plots the array or value. fpr : list or value or array false positive rate. If passed as a list loops through each entry and plots it. Otherwise just plots the array or value. labels : list or str label for each tpr/fpr entry. ax : matplotlib.pyplot.Axes, optional axes to plot on, default is None, which creates new figure plot_diagonal : bool, optional whether to plot the diagonal (useful for combining multiple ROC plots) **kwargs passed to ax.scatter Returns ------- ax : matplotlib.pyplot.Axes axes instance """ if not isinstance(tpr, list): tpr = [tpr] if not isinstance(fpr, list): fpr = [fpr] if not isinstance(labels, list): labels = [labels] if ax is None: fig, ax = plt.subplots(1, 1, figsize=(10, 8)) else: fig = ax.figure ax.set_aspect("equal") if plot_diagonal: ax.plot([0, 1], [0, 1], ls="dashed", lw=1.0, c="k", label="random guess") for itpr, ifpr, ilbl in zip(tpr, fpr, labels): sc = ax.scatter(ifpr, itpr, s=6**2, c=colors, marker="o", label=ilbl, **kwargs) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.grid(visible=True) ax.legend(loc="lower right") ax.set_ylabel("True Positive Rate (sensitivity)") ax.set_xlabel("False Positive Rate (1-specificity)") ax.set_title("receiver operator characteristic plot") if colors is not None: divider = make_axes_locatable(ax) cax = divider.append_axes("right", "5%", pad="3%") cbar = fig.colorbar(sc, cax=cax) if colorbar_label is not None: cbar.set_label(colorbar_label) fig.tight_layout() return ax
[docs]def det_plot(fpr, fnr, labels, ax=None, **kwargs): """Detection Error Tradeoff plot. Adapted from scikitlearn `DetCurveDisplay`. Parameters ---------- fpr : list or value or array false positive rate. If passed as a list loops through each entry and plots it. Otherwise just plots the array or value. fnr : list or value or array false negative rate. If passed as a list loops through each entry and plots it. Otherwise just plots the array or value. labels : list or str label for each fpr/fnr entry. ax : matplotlib.pyplot.Axes, optional axes handle to plot on, by default None, which creates a new figure Returns ------- ax : matplotlib.pyplot.Axes axes handle """ if not isinstance(fpr, list): fpr = [fpr] if not isinstance(fnr, list): fnr = [fnr] if not isinstance(labels, list): labels = [labels] if ax is None: _, ax = plt.subplots(1, 1, figsize=(10, 8)) ax.set_aspect("equal") for ifpr, ifnr, ilbl in zip(fpr, fnr, labels): ax.plot(norm.ppf(ifpr), norm.ppf(ifnr), marker="o", ls="none", label=ilbl, **kwargs) xlabel = "False Positive Rate" ylabel = "False Negative Rate" ax.set(xlabel=xlabel, ylabel=ylabel) ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999] tick_locations = norm.ppf(ticks) tick_labels = [ '{:.0%}'.format(s) if (100 * s).is_integer() else '{:.1%}'.format(s) for s in ticks ] ax.set_xticks(tick_locations) ax.set_xticklabels(tick_labels) ax.set_xlim(-3, 3) ax.set_yticks(tick_locations) ax.set_yticklabels(tick_labels) ax.set_ylim(-3, 3) ax.grid(visible=True) ax.set_title("detection error tradeoff plot") # fig.tight_layout() return ax