Source code for traval.plots

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import norm


[docs]class ComparisonPlots:
    """Mix-in class for plots for comparing timeseries."""

    color_dict = {
        "only_in_s1": {"color": "orange"},
        "only_in_s2": {"color": "blue"},
        "identical": {"color": "LimeGreen", "alpha": 0.5},
        "different": {"color": "Red", "alpha": 0.3},
        "flagged_in_both": {"color": "DarkOrchid"},
        "introduced": {"color": "Coral"},
    }

    def __init__(self, cp):
        """Initialize comparison plots mix-in class.

        Parameters
        ----------
        cp : SeriesComparison
            traval comparison object
        """
        self.cp = cp

[docs]    def update_color_dict(self, key, color=None, alpha=None):
        """Update colors for plots.

        Parameters
        ----------
        key : str
            name of category to update, see
            `ComparisonPlots.color_dict.keys()` for options
        color : str, optional
            color name, by default None
        alpha : float, optional
            alpha value, by default None
        """
        d = self.color_dict[key]
        if color is not None:
            d.update({"color": color})
        if alpha is not None:
            d.update({"alpha": alpha})

[docs]    def reset_color_dict(self):
        """Reset color_dict to default values."""
        self.color_dict = {
            "only_in_s1": {"color": "orange"},
            "only_in_s2": {"color": "blue"},
            "identical": {"color": "LimeGreen", "alpha": 0.5},
            "different": {"color": "Red", "alpha": 0.3},
            "flagged_in_both": {"color": "DarkOrchid"},
            "introduced": {"color": "Coral"},
        }

[docs]    def plot_series_comparison(self, mark_unique=True, mark_different=True,
                               mark_identical=True, ax=None):
        """Plot comparison between two timeseries.

        Parameters
        ----------
        mark_unique : bool, optional
            mark unique values with colored X's, by default True
        mark_different : bool, optional
            highlight where timeseries differ with red, by default True
        mark_identical : bool, optional
            highlight where timeseries are identical with green,
            by default True
        ax : axis, optional
            axis object to plot on, by default None

        Returns
        -------
        ax : axis
            axis object
        """

        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=(12, 5))
        else:
            fig = ax.figure

        plot_handles = []

        # Plot both series
        for s, c, ls in zip([self.cp.s1n, self.cp.s2n],
                            ["gray", "k"],
                            ["solid", "dashed"]):
            p1, = ax.plot(s.index, s, c=c, marker=None, ls=ls, label=s.name)
            plot_handles.append(p1)

            # Mark differences between both in red (do for both lines)
            if mark_different:
                s_diff = s.copy()
                not_diff = s_diff.index.difference(
                    self.cp.idx_in_both_different)
                s_diff.loc[not_diff] = np.nan
                p2, = ax.plot(s_diff.index, s_diff, lw=3, marker=None,
                              ls="solid", label="different",
                              **self.color_dict["different"])
        # add to legend once
        if mark_different:
            plot_handles.append(p2)

        # Mark sections with identical measurements in green (do for one line)
        if mark_identical:
            s_identical = self.cp.s1n.copy()
            not_identical = s_identical.index.difference(
                self.cp.idx_in_both_identical)
            s_identical.loc[not_identical] = np.nan
            p5, = ax.plot(s_identical.index, s_identical,
                          marker=None, ls="solid", label="identical", lw=3,
                          **self.color_dict["identical"])
            plot_handles.append(p5)

        # Mark unique observations with x's if they exist
        if mark_unique:
            if self.cp.idx_in_s1.size > 0:
                p3, = ax.plot(self.cp.idx_in_s1,
                              self.cp.s1.loc[self.cp.idx_in_s1],
                              marker="x", ms=5, ls="none",
                              **self.color_dict["only_in_s1"],
                              label="only in series 1: {}".format(
                                  self.cp.s1n.name))
                plot_handles.append(p3)
            if self.cp.idx_in_s2.size > 0:
                p4, = ax.plot(self.cp.idx_in_s2,
                              self.cp.s2.loc[self.cp.idx_in_s2],
                              marker="x", ms=5, ls="none",
                              **self.color_dict["only_in_s2"],
                              label="only in series 2: {}".format(
                                  self.cp.s2.name))
                plot_handles.append(p4)

        # Add legend and other plot stuff
        plot_labels = [i.get_label() for i in plot_handles]
        ax.legend(plot_handles, plot_labels, loc="best",
                  ncol=int(np.ceil(len(plot_handles) / 2.)))
        ax.grid(visible=True)
        fig.tight_layout()
        return ax

[docs]    def plot_relative_comparison(self, mark_unique=True, mark_different=True,
                                 mark_identical=True, mark_introduced=False,
                                 ax=None):
        """Plot comparison between two timeseries relative to base timeseries.

        Parameters
        ----------
        mark_unique : bool, optional
            mark unique observations with colored X's, by default True
        mark_different : bool, optional
            highlight where series are different in red, by default True
        mark_identical : bool, optional
            highlight where series are identical with green, by default True
        mark_introduced : bool, optional
            mark observations that are not in the base timeseries with X's,
            by default False
        ax : axis, optional
            axis to plot on, by default None

        Returns
        -------
        ax : axis
            axis handle
        """

        ax = self.plot_series_comparison(mark_unique=mark_unique,
                                         mark_different=mark_different,
                                         mark_identical=mark_identical,
                                         ax=ax)

        plot_handles, plot_labels = ax.get_legend_handles_labels()
        # remove duplicates
        for ilbl in plot_labels:
            if plot_labels.count(ilbl) > 1:
                idx = plot_labels.index(ilbl)
                plot_labels.remove(ilbl)
                plot_handles.remove(plot_handles[idx])

        # Add an base series (i.e. raw data to the plot)
        p0, = ax.plot(self.cp.basen.index, self.cp.basen, lw=0.5, c="k",
                      label="base series", ls="solid", zorder=2)
        # insert entry at beginning
        plot_handles.insert(0, p0)
        plot_labels.insert(0, p0.get_label())

        # mark flagged in both
        if self.cp.idx_r_flagged_in_both.size > 0:
            s_base = pd.Series(index=self.cp.basen.index,
                               data=np.nan, dtype=float)
            s_base.loc[self.cp.idx_r_flagged_in_both] = \
                self.cp.basen.loc[self.cp.idx_r_flagged_in_both]
            p6, = ax.plot(s_base.index,
                          s_base, lw=0.5,
                          **self.color_dict["flagged_in_both"],
                          ls="none", marker="x", ms=5,
                          label="flagged in both")
            plot_handles.append(p6)
            plot_labels.append(p6.get_label())

        if mark_introduced:
            intro_idx = (self.cp.idx_r_introduced_in_s2
                         .union(self.cp.idx_r_introduced_in_both))
            if ((self.cp.idx_r_introduced_in_s1.size > 0) or
                    (intro_idx.size > 0)):
                ax.plot(self.cp.s1n.loc[self.cp.idx_r_introduced_in_s1].index,
                        self.cp.s1n.loc[self.cp.idx_r_introduced_in_s1],
                        ls="none", marker="x", ms=5,
                        **self.color_dict["introduced"],
                        label="introduced in s1/s2")
                p7, = ax.plot(self.cp.s1n.loc[intro_idx].index,
                              self.cp.s2n.loc[intro_idx],
                              ls="none", marker="x", ms=5,
                              **self.color_dict["introduced"],
                              label="introduced in s1/s2")
                plot_handles.append(p7)
                plot_labels.append(p7.get_label())

        ax.legend(plot_handles, plot_labels, loc="best",
                  ncol=int(np.ceil(len(plot_handles) / 3.)))

        return ax

    def plot_validation_result(self, ax=None):
        # Some plot settings
        ms_valid = 6  # markersize validation result
        mew = 1.25  # markeredgewidth validation result

        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=(12, 5))
        else:
            fig = ax.figure

        plot_handles = []

        # Add an original series i.e. raw data to the plot
        p0, = ax.plot(self.cp.basen.index, self.cp.basen, lw=0.5, c="k",
                      marker=".", ms=3, label="base series", ls="solid")
        plot_handles.append(p0)

        # set marker colors
        c = pd.Series(index=self.cp.basen.index, data='')
        c.loc[self.cp.idx_r_flagged_in_both] = "Green"
        c.loc[self.cp.idx_r_flagged_in_s1] = "DarkOrange"
        c.loc[self.cp.idx_r_flagged_in_s2] = "Red"

        mask = c != ""

        s = self.cp.basen.loc[mask]
        c = c.loc[mask]

        sc = ax.scatter(s.index, s.values, c=c.values, s=ms_valid**2,
                        linewidths=mew, marker="o", edgecolor=c.values,
                        zorder=10)
        sc.set_facecolor("none")

        dummy1, = ax.plot([], [], c="Green", marker="o", mfc="none", mew=mew,
                          ls="none", ms=ms_valid, label="Correctly flagged (TP)")
        dummy2, = ax.plot([], [], c="DarkOrange", marker="o", mfc="none", mew=mew,
                          ls="none", ms=ms_valid, label="Incorrectly flagged (FP)")
        dummy3, = ax.plot([], [], c="Red", marker="o", mfc="none", mew=mew,
                          ls="none", ms=ms_valid, label="Wrongly kept (FN)")

        plot_handles += [dummy1, dummy2, dummy3]

        # Add legend and other plot stuff
        plot_labels = [i.get_label() for i in plot_handles]
        ax.legend(plot_handles, plot_labels, loc=(0, 1), markerscale=1.25,
                  ncol=len(plot_handles), frameon=False)
        ax.grid(visible=True)
        fig.tight_layout()

        return ax


[docs]def roc_plot(tpr, fpr, labels, colors=None, ax=None,
             plot_diagonal=True, colorbar_label=None, **kwargs):
    """Receiver operator characteristic plot.

    Plots the false positive rate (x-axis) versus the
    true positive rate (y-axis). The 'tpr' and 'fpr' can be passed as:
    -  values: outcome of a single error detection algorithm
    -  arrays: outcomes of error detection algorithm in which a detection
       parameter is varied.
    -  lists: for passing multiple results, entries can be values or
       arrays, as listed above.

    Parameters
    ----------
    tpr : list or value or array
        true positive rate. If passed as a list loops through each
        entry and plots it. Otherwise just plots the array or value.
    fpr : list or value or array
        false positive rate. If passed as a list loops through each
        entry and plots it. Otherwise just plots the array or value.
    labels : list or str
        label for each tpr/fpr entry.
    ax : matplotlib.pyplot.Axes, optional
        axes to plot on, default is None, which creates new figure
    plot_diagonal : bool, optional
        whether to plot the diagonal (useful for combining multiple
        ROC plots)
    **kwargs
        passed to ax.scatter

    Returns
    -------
    ax : matplotlib.pyplot.Axes
        axes instance
    """
    if not isinstance(tpr, list):
        tpr = [tpr]
    if not isinstance(fpr, list):
        fpr = [fpr]
    if not isinstance(labels, list):
        labels = [labels]

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(10, 8))
    else:
        fig = ax.figure

    ax.set_aspect("equal")
    if plot_diagonal:
        ax.plot([0, 1], [0, 1], ls="dashed", lw=1.0, c="k",
                label="random guess")

    for itpr, ifpr, ilbl in zip(tpr, fpr, labels):
        sc = ax.scatter(ifpr, itpr, s=6**2, c=colors,
                        marker="o", label=ilbl, **kwargs)

    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.grid(visible=True)
    ax.legend(loc="lower right")
    ax.set_ylabel("True Positive Rate (sensitivity)")
    ax.set_xlabel("False Positive Rate (1-specificity)")
    ax.set_title("receiver operator characteristic plot")

    if colors is not None:
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", "5%", pad="3%")
        cbar = fig.colorbar(sc, cax=cax)
        if colorbar_label is not None:
            cbar.set_label(colorbar_label)

    fig.tight_layout()
    return ax


[docs]def det_plot(fpr, fnr, labels, ax=None, **kwargs):
    """Detection Error Tradeoff plot.

    Adapted from scikitlearn `DetCurveDisplay`.

    Parameters
    ----------
    fpr : list or value or array
        false positive rate. If passed as a list loops through each
        entry and plots it. Otherwise just plots the array or value.
    fnr : list or value or array
        false negative rate. If passed as a list loops through each
        entry and plots it. Otherwise just plots the array or value.
    labels : list or str
        label for each fpr/fnr entry.
    ax : matplotlib.pyplot.Axes, optional
        axes handle to plot on, by default None, which
        creates a new figure

    Returns
    -------
    ax : matplotlib.pyplot.Axes
        axes handle
    """

    if not isinstance(fpr, list):
        fpr = [fpr]
    if not isinstance(fnr, list):
        fnr = [fnr]
    if not isinstance(labels, list):
        labels = [labels]

    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(10, 8))

    ax.set_aspect("equal")

    for ifpr, ifnr, ilbl in zip(fpr, fnr, labels):
        ax.plot(norm.ppf(ifpr), norm.ppf(ifnr), marker="o",
                ls="none", label=ilbl, **kwargs)

    xlabel = "False Positive Rate"
    ylabel = "False Negative Rate"
    ax.set(xlabel=xlabel, ylabel=ylabel)

    ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999]
    tick_locations = norm.ppf(ticks)
    tick_labels = [
        '{:.0%}'.format(s) if (100 * s).is_integer() else '{:.1%}'.format(s)
        for s in ticks
    ]
    ax.set_xticks(tick_locations)
    ax.set_xticklabels(tick_labels)
    ax.set_xlim(-3, 3)
    ax.set_yticks(tick_locations)
    ax.set_yticklabels(tick_labels)
    ax.set_ylim(-3, 3)
    ax.grid(visible=True)

    ax.set_title("detection error tradeoff plot")

    # fig.tight_layout()

    return ax