Source code for naima.plot

# Licensed under a 3-clause BSD style license - see LICENSE.rst
import astropy.units as u
import numpy as np
from astropy import log
from emcee import autocorr

from .extern.interruptible_pool import InterruptiblePool as Pool
from .extern.validator import validate_array
from .utils import sed_conversion, validate_data_table

__all__ = ["plot_chain", "plot_fit", "plot_data", "plot_blob", "plot_corner"]

marker_cycle = ["o", "s", "d", "p", "*"]
# from seaborn: sns.color_palette('deep',6)
color_cycle = [
    (0.2980392156862745, 0.4470588235294118, 0.6901960784313725),
    (0.3333333333333333, 0.6588235294117647, 0.40784313725490196),
    (0.7686274509803922, 0.3058823529411765, 0.3215686274509804),
    (0.5058823529411764, 0.4470588235294118, 0.6980392156862745),
    (0.8, 0.7254901960784313, 0.4549019607843137),
    (0.39215686274509803, 0.7098039215686275, 0.803921568627451),
]


[docs]def plot_chain(sampler, p=None, **kwargs):
    """Generate a diagnostic plot of the sampler chains.

    Parameters
    ----------
    sampler : `emcee.EnsembleSampler`
        Sampler containing the chains to be plotted.
    p : int (optional)
        Index of the parameter to plot. If omitted, all chains are plotted.
    last_step : bool (optional)
        Whether to plot the last step of the chain or the complete chain
        (default).

    Returns
    -------
    figure : `matplotlib.figure.Figure`
        Figure
    """
    if p is None:
        npars = sampler.get_chain().shape[-1]
        for pp in range(npars):
            _plot_chain_func(sampler, pp, **kwargs)
        fig = None
    else:
        fig = _plot_chain_func(sampler, p, **kwargs)

    return fig


def _latex_float(f, format=".3g"):
    """http://stackoverflow.com/a/13490601"""
    float_str = "{{0:{0}}}".format(format).format(f)
    if "e" in float_str:
        base, exponent = float_str.split("e")
        return r"{0}\times 10^{{{1}}}".format(base, int(exponent))
    else:
        return float_str


def round2(x, n):
    y = round(x, n)
    if n < 1:
        y = str(int(y))
    else:
        # preserve trailing zeroes
        y = ("{{0:.{0}f}}".format(n)).format(x)
    return y


def _latex_value_error(val, elo, ehi=0, tol=0.25):
    order = int(np.log10(abs(val)))
    if order > 2 or order < -2:
        val /= 10 ** order
        elo /= 10 ** order
        ehi /= 10 ** order
    else:
        order = 0
    nlo = -int(np.floor(np.log10(elo)))
    if elo * 10 ** nlo < 2:
        nlo += 1
    if ehi:
        # elo = round(elo,nlo)
        nhi = -int(np.floor(np.log10(ehi)))
        if ehi * 10 ** nhi < 2:
            nhi += 1
        # ehi = round(ehi,nhi)
        if np.abs(elo - ehi) / ((elo + ehi) / 2.0) > tol:
            n = max(nlo, nhi)
            string = "{0}^{{+{1}}}_{{-{2}}}".format(
                *[
                    round2(x, nn)
                    for x, nn in zip([val, ehi, elo], [n, nhi, nlo])
                ]
            )
        else:
            e = (elo + ehi) / 2.0
            n = -int(np.floor(np.log10(e)))
            if e * 10 ** n < 2:
                n += 1
            string = "{0} \\pm {1}".format(*[round2(x, n) for x in [val, e]])
    else:
        string = "{0} \\pm {1}".format(*[round2(x, nlo) for x in [val, elo]])
    if order != 0:
        string = "(" + string + r")\times10^{{{0}}}".format(order)

    return "$" + string + "$"


def _plot_chain_func(sampler, p, last_step=False):
    chain = sampler.get_chain()
    label = sampler.labels[p]

    import matplotlib.pyplot as plt
    from scipy import stats

    if len(chain.shape) > 2:
        # transpose from (step, walker) to (walker, step)
        traces = chain[:, :, p].T
        if last_step:
            # keep only last step
            dist = traces[:, -1]
        else:
            # convert chain to flatchain
            dist = traces.flatten()
    else:
        log.warning(
            "we need the full chain to plot the traces, not a flatchain!"
        )
        return None

    nwalkers = traces.shape[0]
    nsteps = traces.shape[1]

    f = plt.figure()

    ax1 = f.add_subplot(221)
    ax2 = f.add_subplot(122)

    f.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9)

    # plot five percent of the traces darker

    if nwalkers < 60:
        thresh = 1 - 3.0 / nwalkers
    else:
        thresh = 0.95
    red = np.arange(nwalkers) / float(nwalkers) >= thresh

    ax1.set_rasterization_zorder(1)
    for t in traces[~red]:  # range(nwalkers):
        ax1.plot(t, color=(0.1,) * 3, lw=1.0, alpha=0.25, zorder=0)
    for t in traces[red]:
        ax1.plot(t, color=color_cycle[0], lw=1.5, alpha=0.75, zorder=0)
    ax1.set_xlabel("step number")
    # [l.set_rotation(45) for l in ax1.get_yticklabels()]
    ax1.set_ylabel(label)
    ax1.yaxis.set_label_coords(-0.15, 0.5)
    ax1.set_title("Walker traces")

    nbins = min(max(25, int(len(dist) / 100.0)), 100)
    xlabel = label
    n, x, _ = ax2.hist(
        dist,
        nbins,
        histtype="stepfilled",
        color=color_cycle[0],
        lw=0,
        density=True,
    )
    kde = stats.kde.gaussian_kde(dist)
    ax2.plot(x, kde(x), color="k", label="KDE")
    quant = [16, 50, 84]
    xquant = np.percentile(dist, quant)
    quantiles = dict(zip(quant, xquant))

    ax2.axvline(
        quantiles[50],
        ls="--",
        color="k",
        alpha=0.5,
        lw=2,
        label="50% quantile",
    )
    ax2.axvspan(
        quantiles[16],
        quantiles[84],
        color=(0.5,) * 3,
        alpha=0.25,
        label="68% CI",
        lw=0,
    )
    # ax2.legend()
    for xticklabel in ax2.get_xticklabels():
        xticklabel.set_rotation(45)
    ax2.set_xlabel(xlabel)
    ax2.xaxis.set_label_coords(0.5, -0.1)
    ax2.set_title("posterior distribution")
    ax2.set_ylim(top=n.max() * 1.05)

    # Print distribution parameters on lower-left

    try:
        autocorr_message = "{0:.1f}".format(autocorr.integrated_time(chain)[p])
    except autocorr.AutocorrError:
        # Raised when chain is too short for meaningful auto-correlation
        # estimation
        autocorr_message = None

    if last_step:
        clen = "last ensemble"
    else:
        clen = "whole chain"

    chain_props = "Walkers: {0} \nSteps in chain: {1} \n".format(
        nwalkers, nsteps
    )
    if autocorr_message is not None:
        chain_props += "Autocorrelation time: {0}\n".format(autocorr_message)
    chain_props += "Mean acceptance fraction: {0:.3f}\n".format(
        np.mean(sampler.acceptance_fraction)
    ) + "Distribution properties for the {clen}:\n \
    $-$ median: ${median}$, std: ${std}$ \n \
    $-$ median with uncertainties based on \n \
      the 16th and 84th percentiles ($\\sim$1$\\sigma$):\n".format(
        median=_latex_float(quantiles[50]),
        std=_latex_float(np.std(dist)),
        clen=clen,
    )

    info_line = (
        " " * 10
        + label
        + " = "
        + _latex_value_error(
            quantiles[50],
            quantiles[50] - quantiles[16],
            quantiles[84] - quantiles[50],
        )
    )

    chain_props += info_line

    if "log10(" in label or "log(" in label:
        nlabel = label.split("(")[-1].split(")")[0]
        ltype = label.split("(")[0]
        if ltype == "log10":
            new_dist = 10 ** dist
        elif ltype == "log":
            new_dist = np.exp(dist)

        quant = [16, 50, 84]
        quantiles = dict(zip(quant, np.percentile(new_dist, quant)))

        label_template = "\n" + " " * 10 + "{{label:>{0}}}".format(len(label))

        new_line = label_template.format(label=nlabel)
        new_line += " = " + _latex_value_error(
            quantiles[50],
            quantiles[50] - quantiles[16],
            quantiles[84] - quantiles[50],
        )

        chain_props += new_line
        info_line += new_line

    log.info("{0:-^50}\n".format(label) + info_line)
    f.text(0.05, 0.45, chain_props, ha="left", va="top")

    return f


def _process_blob(sampler, modelidx, last_step=False, energy=None):
    """
    Process binary blob in sampler. If blob in position modelidx is:

    - a Quantity array of len(blob[i])=len(data['energy']: use blob as model,
      data['energy'] as modelx
    - a tuple: use first item as modelx, second as model
    - a Quantity scalar: return array of scalars
    """

    # Allow process blob to be used by _calc_samples and _calc_ML by sending
    # only blobs, not full sampler
    try:
        blobs = sampler.get_blobs()
        blob0 = blobs[-1][0][modelidx]
        energy = sampler.data["energy"]
    except AttributeError:
        blobs = [sampler]
        blob0 = sampler[0][modelidx]
        last_step = True

    if isinstance(blob0, u.Quantity):
        if blob0.size == energy.size:
            # Energy array for blob is not provided, use data['energy']
            modelx = energy
        elif blob0.size == 1:
            modelx = None

        if last_step:
            model = u.Quantity([m[modelidx] for m in blobs[-1]])
        else:
            model = []
            for step in blobs:
                for walkerblob in step:
                    model.append(walkerblob[modelidx])
            model = u.Quantity(model)
    elif np.isscalar(blob0):
        modelx = None

        if last_step:
            model = u.Quantity([m[modelidx] for m in blobs[-1]])
        else:
            model = []
            for step in blobs:
                for walkerblob in step:
                    model.append(walkerblob[modelidx])
            model = u.Quantity(model)
    elif isinstance(blob0, list) or isinstance(blob0, tuple):
        if (
            len(blob0) == 2
            and isinstance(blob0[0], u.Quantity)
            and isinstance(blob0[1], u.Quantity)
        ):
            # Energy array for model is item 0 in blob, model flux is item 1
            modelx = blob0[0]

            if last_step:
                model = u.Quantity([m[modelidx][1] for m in blobs[-1]])
            else:
                model = []
                for step in blobs:
                    for walkerblob in step:
                        model.append(walkerblob[modelidx][1])
                model = u.Quantity(model)
        else:
            raise TypeError("Model {0} has wrong blob format".format(modelidx))

    else:
        raise TypeError("Model {0} has wrong blob format".format(modelidx))

    return modelx, model


def _read_or_calc_samples(
    sampler,
    modelidx=0,
    n_samples=100,
    last_step=False,
    e_range=None,
    e_npoints=100,
    threads=None,
):
    """Get samples from blob or compute them from chain and sampler.modelfn"""

    if e_range is None:
        # return the results saved in blobs
        modelx, model = _process_blob(sampler, modelidx, last_step=last_step)
    else:
        # prepare bogus data for calculation
        e_range = validate_array(
            "e_range", u.Quantity(e_range), physical_type="energy"
        )
        e_unit = e_range.unit
        energy = (
            np.logspace(
                np.log10(e_range[0].value),
                np.log10(e_range[1].value),
                e_npoints,
            )
            * e_unit
        )
        data = {
            "energy": energy,
            "flux": np.zeros(energy.shape) * sampler.data["flux"].unit,
        }
        # init pool and select parameters
        chain = (
            sampler.get_chain()[-1]
            if last_step
            else sampler.get_chain(flat=True)
        )
        pars = chain[np.random.randint(len(chain), size=n_samples)]
        args = ((p, data) for p in pars)
        blobs = []

        pool = Pool(threads)
        modelouts = pool.starmap(sampler.modelfn, args)
        pool.close()
        pool.terminate()

        for modelout in modelouts:
            if isinstance(modelout, np.ndarray):
                blobs.append([modelout])
            else:
                blobs.append(modelout)

        modelx, model = _process_blob(
            blobs, modelidx=modelidx, energy=data["energy"]
        )

    return modelx, model


def _calc_ML(sampler, modelidx=0, e_range=None, e_npoints=100):
    """Get ML model from blob or compute them from chain and sampler.modelfn"""

    ML, MLp, MLerr, ML_model = find_ML(sampler, modelidx)

    if e_range is not None:
        # prepare bogus data for calculation
        e_range = validate_array(
            "e_range", u.Quantity(e_range), physical_type="energy"
        )
        e_unit = e_range.unit
        energy = (
            np.logspace(
                np.log10(e_range[0].value),
                np.log10(e_range[1].value),
                e_npoints,
            )
            * e_unit
        )
        data = {
            "energy": energy,
            "flux": np.zeros(energy.shape) * sampler.data["flux"].unit,
        }
        modelout = sampler.modelfn(MLp, data)

        if isinstance(modelout, np.ndarray):
            blob = modelout
        else:
            blob = modelout[modelidx]

        if isinstance(blob, u.Quantity):
            modelx = data["energy"].copy()
            model_ML = blob.copy()
        elif len(blob) == 2:
            modelx = blob[0].copy()
            model_ML = blob[1].copy()
        else:
            raise TypeError("Model {0} has wrong blob format".format(modelidx))

        ML_model = (modelx, model_ML)

    return ML, MLp, MLerr, ML_model


def _calc_CI(
    sampler,
    modelidx=0,
    confs=[3, 1],
    last_step=False,
    e_range=None,
    e_npoints=100,
    threads=None,
):
    """Calculate confidence interval."""
    from scipy import stats

    # If we are computing the samples for the confidence intervals, we need at
    # least one sample to constrain the highest confidence band
    # 1 sigma -> 6 samples
    # 2 sigma -> 43 samples
    # 3 sigma -> 740 samples
    # 4 sigma -> 31574 samples
    # 5 sigma -> 3488555 samples
    # We limit it to 1000 samples and warn that it might not be enough
    if e_range is not None:
        maxconf = np.max(confs)
        minsamples = min(100, int(1 / stats.norm.cdf(-maxconf) + 1))
        if minsamples > 1000:
            log.warning(
                "In order to sample the confidence band for {0} sigma,"
                " {1} new samples need to be computed, but we are limiting"
                " it to 1000 samples, so the confidence band might not be"
                " well constrained."
                " Consider reducing the maximum"
                " confidence significance or using the samples stored in"
                " the sampler by setting e_range"
                " to None".format(maxconf, minsamples)
            )
            minsamples = 1000
    else:
        minsamples = None

    modelx, model = _read_or_calc_samples(
        sampler,
        modelidx,
        last_step=last_step,
        e_range=e_range,
        e_npoints=e_npoints,
        n_samples=minsamples,
        threads=threads,
    )

    nwalkers = len(model) - 1
    CI = []
    for conf in confs:
        fmin = stats.norm.cdf(-conf)
        fmax = stats.norm.cdf(conf)
        ymin, ymax = [], []
        for fr, y in ((fmin, ymin), (fmax, ymax)):
            nf = int((fr * nwalkers))
            for i in range(len(modelx)):
                ysort = np.sort(model[:, i])
                y.append(ysort[nf])

        # create an array from lists ymin and ymax preserving units
        CI.append((u.Quantity(ymin), u.Quantity(ymax)))

    return modelx, CI


def _plot_MLmodel(ax, sampler, modelidx, e_range, e_npoints, e_unit, sed):
    """compute and plot ML model"""

    ML, MLp, MLerr, ML_model = _calc_ML(
        sampler, modelidx, e_range=e_range, e_npoints=e_npoints
    )
    f_unit, sedf = sed_conversion(ML_model[0], ML_model[1].unit, sed)

    ax.loglog(
        ML_model[0].to(e_unit).value,
        (ML_model[1] * sedf).to(f_unit).value,
        color="k",
        lw=2,
        alpha=0.8,
    )


def plot_CI(
    ax,
    sampler,
    modelidx=0,
    sed=True,
    confs=[3, 1, 0.5],
    e_unit=u.eV,
    label=None,
    e_range=None,
    e_npoints=100,
    threads=None,
    last_step=False,
):
    """Plot confidence interval.

    Parameters
    ----------
    ax : `matplotlib.Axes`
        Axes to plot on.
    sampler : `emcee.EnsembleSampler`
        Sampler
    modelidx : int, optional
        Model index. Default is 0
    sed : bool, optional
        Whether to plot SED or differential spectrum. If `None`, the units of
        the observed spectrum will be used.
    confs : list, optional
        List of confidence levels (in sigma) to use for generating the
        confidence intervals. Default is `[3,1,0.5]`
    e_unit : :class:`~astropy.units.Unit` or str parseable to unit
        Unit in which to plot energy axis.
    e_npoints : int, optional
        How many points to compute for the model samples and ML model if
        `e_range` is set.
    threads : int, optional
        How many parallel processing threads to use when computing the samples.
        Defaults to the number of available cores.
    last_step : bool, optional
        Whether to only use the positions in the final step of the run (True,
        default) or the whole chain (False).
    """
    confs.sort(reverse=True)

    modelx, CI = _calc_CI(
        sampler,
        modelidx=modelidx,
        confs=confs,
        e_range=e_range,
        e_npoints=e_npoints,
        last_step=last_step,
        threads=threads,
    )
    # pick first confidence interval curve for units
    f_unit, sedf = sed_conversion(modelx, CI[0][0].unit, sed)

    for (ymin, ymax), conf in zip(CI, confs):
        color = np.log(conf) / np.log(20) + 0.4
        ax.fill_between(
            modelx.to(e_unit).value,
            (ymax * sedf).to(f_unit).value,
            (ymin * sedf).to(f_unit).value,
            lw=0.001,
            color=(color,) * 3,
            alpha=0.6,
            zorder=-10,
        )

    _plot_MLmodel(ax, sampler, modelidx, e_range, e_npoints, e_unit, sed)

    if label is not None:
        ax.set_ylabel(
            "{0} [{1}]".format(label, f_unit.to_string("latex_inline"))
        )


def plot_samples(
    ax,
    sampler,
    modelidx=0,
    sed=True,
    n_samples=100,
    e_unit=u.eV,
    e_range=None,
    e_npoints=100,
    threads=None,
    label=None,
    last_step=False,
):
    """Plot a number of samples from the sampler chain.

    Parameters
    ----------
    ax : `matplotlib.Axes`
        Axes to plot on.
    sampler : `emcee.EnsembleSampler`
        Sampler
    modelidx : int, optional
        Model index. Default is 0
    sed : bool, optional
        Whether to plot SED or differential spectrum. If `None`, the units of
        the observed spectrum will be used.
    n_samples : int, optional
        Number of samples to plot. Default is 100.
    e_unit : :class:`~astropy.units.Unit` or str parseable to unit
        Unit in which to plot energy axis.
    e_range : list of `~astropy.units.Quantity`, length 2, optional
        Limits in energy for the computation of the model samples and ML model.
        Note that setting this parameter will mean that the samples for the
        model are recomputed and depending on the model speed might be quite
        slow.
    e_npoints : int, optional
        How many points to compute for the model samples and ML model if
        `e_range` is set.
    threads : int, optional
        How many parallel processing threads to use when computing the samples.
        Defaults to the number of available cores.
    last_step : bool, optional
        Whether to only use the positions in the final step of the run (True,
        default) or the whole chain (False).
    """

    modelx, model = _read_or_calc_samples(
        sampler,
        modelidx,
        last_step=last_step,
        e_range=e_range,
        e_npoints=e_npoints,
        threads=threads,
    )
    # pick first model sample for units
    f_unit, sedf = sed_conversion(modelx, model[0].unit, sed)

    sample_alpha = min(5.0 / n_samples, 0.5)
    for my in model[np.random.randint(len(model), size=n_samples)]:
        ax.loglog(
            modelx.to(e_unit).value,
            (my * sedf).to(f_unit).value,
            color=(0.1,) * 3,
            alpha=sample_alpha,
            lw=1.0,
        )

    _plot_MLmodel(ax, sampler, modelidx, e_range, e_npoints, e_unit, sed)

    if label is not None:
        ax.set_ylabel(
            "{0} [{1}]".format(label, f_unit.to_string("latex_inline"))
        )


def find_ML(sampler, modelidx):
    """
    Find Maximum Likelihood parameters as those in the chain with a highest log
    probability.
    """
    lnprobability = sampler.get_log_prob()
    index = np.unravel_index(np.argmax(lnprobability), lnprobability.shape)
    MLp = sampler.get_chain()[index]
    blobs = sampler.get_blobs()
    if modelidx is not None and blobs is not None:
        blob = blobs[index][modelidx]
        if isinstance(blob, u.Quantity):
            modelx = sampler.data["energy"].copy()
            model_ML = blob.copy()
        elif len(blob) == 2:
            modelx = blob[0].copy()
            model_ML = blob[1].copy()
        else:
            raise TypeError("Model {0} has wrong blob format".format(modelidx))
    elif modelidx is not None and hasattr(sampler, "modelfn"):
        blob = _process_blob(
            [sampler.modelfn(MLp, sampler.data)],
            modelidx,
            energy=sampler.data["energy"],
        )
        modelx, model_ML = blob[0], blob[1][0]
    else:
        modelx, model_ML = None, None

    MLerr = []
    for dist in sampler.get_chain(flat=True).T:
        hilo = np.percentile(dist, [16.0, 84.0])
        MLerr.append((hilo[1] - hilo[0]) / 2.0)
    ML = lnprobability[index]

    return ML, MLp, MLerr, (modelx, model_ML)


[docs]def plot_blob(
    sampler, blobidx=0, label=None, last_step=False, figure=None, **kwargs
):
    """
    Plot a metadata blob as a fit to spectral data or value distribution

    Additional ``kwargs`` are passed to `plot_fit`.

    Parameters
    ----------
    sampler : `emcee.EnsembleSampler`
        Sampler with a stored chain.
    blobidx : int, optional
        Metadata blob index to plot.
    label : str, optional
        Label for the value distribution. Labels for the fit plot can be passed
        as ``xlabel`` and ``ylabel`` and will be passed to `plot_fit`.

    Returns
    -------
    figure : `matplotlib.pyplot.Figure`
        `matplotlib` figure instance containing the plot.
    """

    modelx, model = _process_blob(sampler, blobidx, last_step)
    if label is None:
        label = "Model output {0}".format(blobidx)

    if modelx is None:
        # Blob is scalar, plot distribution
        f = plot_distribution(model, label, figure=figure)
    else:
        f = plot_fit(
            sampler,
            modelidx=blobidx,
            last_step=last_step,
            label=label,
            figure=figure,
            **kwargs
        )

    return f


[docs]def plot_fit(
    sampler,
    modelidx=0,
    label=None,
    sed=True,
    last_step=False,
    n_samples=100,
    confs=None,
    ML_info=False,
    figure=None,
    plotdata=None,
    plotresiduals=None,
    e_unit=None,
    e_range=None,
    e_npoints=100,
    threads=None,
    xlabel=None,
    ylabel=None,
    ulim_opts={},
    errorbar_opts={},
):
    """
    Plot data with fit confidence regions.

    Parameters
    ----------
    sampler : `emcee.EnsembleSampler`
        Sampler with a stored chain.
    modelidx : int, optional
        Model index to plot.
    label : str, optional
        Label for the title of the plot.
    sed : bool, optional
        Whether to plot SED or differential spectrum.
    last_step : bool, optional
        Whether to use only the samples of the last step in the run when
        showing either the model samples or the confidence intervals.
    n_samples : int, optional
        If not ``None``, number of sample models to plot. If ``None``,
        confidence bands will be plotted instead of samples. Default is 100.
    confs : list, optional
        List of confidence levels (in sigma) to use for generating the
        confidence intervals. Default is to plot sample models instead of
        confidence bands.
    ML_info : bool, optional
        Whether to plot information about the maximum likelihood parameters and
        the standard deviation of their distributions. Default is True.
    figure : `matplotlib.figure.Figure`, optional
        `matplotlib` figure to plot on. If omitted a new one will be generated.
    plotdata : bool, optional
        Wheter to plot data on top of model confidence intervals. Default is
        True if the physical types of the data and the model match.
    plotresiduals : bool, optional
        Wheter to plot the residuals with respect to the maximum likelihood
        model. Default is True if ``plotdata`` is True and either ``confs`` or
        ``n_samples`` are set.
    e_unit : `~astropy.units.Unit`, optional
        Units for the energy axis of the plot. The default is to use the units
        of the energy array of the observed data.
    e_range : list of `~astropy.units.Quantity`, length 2, optional
        Limits in energy for the computation of the model samples and ML model.
        Note that setting this parameter will mean that the samples for the
        model are recomputed and depending on the model speed might be quite
        slow.
    e_npoints : int, optional
        How many points to compute for the model samples and ML model if
        `e_range` is set.
    threads : int, optional
        How many parallel processing threads to use when computing the samples.
        Defaults to the number of available cores.
    xlabel : str, optional
        Label for the ``x`` axis of the plot.
    ylabel : str, optional
        Label for the ``y`` axis of the plot.
    ulim_opts : dict
        Option for upper-limit plotting. Available options are capsize (arrow
        width) and height_fraction (arrow length in fraction of flux value).
    errorbar_opts : dict
        Addtional options to pass to `matplotlib.plt.errorbar` for plotting the
        spectral flux points.

    """
    import matplotlib.pyplot as plt

    ML, MLp, MLerr, model_ML = find_ML(sampler, modelidx)
    infostr = "Maximum log probability: {0:.3g}\n".format(ML)
    infostr += "Maximum Likelihood values:\n"
    maxlen = np.max([len(ilabel) for ilabel in sampler.labels])
    vartemplate = "{{2:>{0}}}: {{0:>8.3g}} +/- {{1:<8.3g}}\n".format(maxlen)
    for p, v, ilabel in zip(MLp, MLerr, sampler.labels):
        infostr += vartemplate.format(p, v, ilabel)

    # log.info(infostr)

    data = sampler.data

    if e_range is None and not hasattr(sampler, "blobs"):
        e_range = data["energy"][[0, -1]] * np.array((1.0 / 3.0, 3.0))

    if plotdata is None and len(model_ML[0]) == len(data["energy"]):
        model_unit, _ = sed_conversion(model_ML[0], model_ML[1].unit, sed)
        data_unit, _ = sed_conversion(data["energy"], data["flux"].unit, sed)
        plotdata = model_unit.is_equivalent(data_unit)
    elif plotdata is None:
        plotdata = False

    if plotresiduals is None and plotdata and (confs is not None or n_samples):
        plotresiduals = True

    if confs is None and not n_samples and plotdata and not plotresiduals:
        # We actually only want to plot the data, so let's go there
        return plot_data(
            sampler.data,
            xlabel=xlabel,
            ylabel=ylabel,
            sed=sed,
            figure=figure,
            e_unit=e_unit,
            ulim_opts=ulim_opts,
            errorbar_opts=errorbar_opts,
        )

    if figure is None:
        f = plt.figure()
    else:
        f = figure

    if plotdata and plotresiduals:
        ax1 = plt.subplot2grid((4, 1), (0, 0), rowspan=3)
        ax2 = plt.subplot2grid((4, 1), (3, 0), sharex=ax1)
        for subp in [ax1, ax2]:
            f.add_subplot(subp)
    else:
        ax1 = f.add_subplot(111)

    if e_unit is None:
        e_unit = data["energy"].unit

    if confs is not None:
        plot_CI(
            ax1,
            sampler,
            modelidx,
            sed=sed,
            confs=confs,
            e_unit=e_unit,
            label=label,
            e_range=e_range,
            e_npoints=e_npoints,
            last_step=last_step,
            threads=threads,
        )
    elif n_samples:
        plot_samples(
            ax1,
            sampler,
            modelidx,
            sed=sed,
            n_samples=n_samples,
            e_unit=e_unit,
            label=label,
            e_range=e_range,
            e_npoints=e_npoints,
            last_step=last_step,
            threads=threads,
        )
    else:
        # plot only ML model
        _plot_MLmodel(ax1, sampler, modelidx, e_range, e_npoints, e_unit, sed)

    xlaxis = ax1
    if plotdata:
        _plot_data_to_ax(
            data,
            ax1,
            e_unit=e_unit,
            sed=sed,
            ylabel=ylabel,
            ulim_opts=ulim_opts,
            errorbar_opts=errorbar_opts,
        )
        if plotresiduals:
            _plot_residuals_to_ax(
                data,
                model_ML,
                ax2,
                e_unit=e_unit,
                sed=sed,
                errorbar_opts=errorbar_opts,
            )
            xlaxis = ax2
            for tl in ax1.get_xticklabels():
                tl.set_visible(False)
        xmin = 10 ** np.floor(
            np.log10(
                np.min(data["energy"] - data["energy_error_lo"])
                .to(e_unit)
                .value
            )
        )
        xmax = 10 ** np.ceil(
            np.log10(
                np.max(data["energy"] + data["energy_error_hi"])
                .to(e_unit)
                .value
            )
        )
        ax1.set_xlim(xmin, xmax)
    else:
        ax1.set_xscale("log")
        ax1.set_yscale("log")
        if sed:
            ndecades = 10
        else:
            ndecades = 20
        # restrict y axis to ndecades to avoid autoscaling deep exponentials
        xmin, xmax, ymin, ymax = ax1.axis()
        ymin = max(ymin, ymax / 10 ** ndecades)
        ax1.set_ylim(bottom=ymin)
        # scale x axis to largest model_ML x point within ndecades decades of
        # maximum
        f_unit, sedf = sed_conversion(model_ML[0], model_ML[1].unit, sed)
        hi = np.where((model_ML[1] * sedf).to(f_unit).value > ymin)
        xmax = np.max(model_ML[0][hi])
        ax1.set_xlim(right=10 ** np.ceil(np.log10(xmax.to(e_unit).value)))

    if e_range is not None:
        # ensure that xmin/xmax contains e_range
        xmin, xmax, ymin, ymax = ax1.axis()
        xmin = min(xmin, e_range[0].to(e_unit).value)
        xmax = max(xmax, e_range[1].to(e_unit).value)
        ax1.set_xlim(xmin, xmax)

    if ML_info and (confs is not None or n_samples):
        ax1.text(
            0.05,
            0.05,
            infostr,
            ha="left",
            va="bottom",
            transform=ax1.transAxes,
            family="monospace",
        )

    if label is not None:
        ax1.set_title(label)

    if xlabel is None:
        xlaxis.set_xlabel(
            "Energy [{0}]".format(e_unit.to_string("latex_inline"))
        )
    else:
        xlaxis.set_xlabel(xlabel)

    f.subplots_adjust(hspace=0)

    return f


def _plot_ulims(
    ax, x, y, xerr, color, capsize=5, height_fraction=0.25, elinewidth=2
):
    """
    Plot upper limits as arrows with cap at value of upper limit.

    uplim behaviour has been fixed in matplotlib 1.4
    """
    ax.errorbar(
        x, y, xerr=xerr, ls="", color=color, elinewidth=elinewidth, capsize=0
    )

    from distutils.version import LooseVersion

    import matplotlib

    mpl_version = LooseVersion(matplotlib.__version__)
    if mpl_version >= LooseVersion("1.4.0"):
        ax.errorbar(
            x,
            y,
            yerr=height_fraction * y,
            ls="",
            uplims=True,
            color=color,
            elinewidth=elinewidth,
            capsize=capsize,
            zorder=10,
        )
    else:
        ax.errorbar(
            x,
            (1 - height_fraction) * y,
            yerr=height_fraction * y,
            ls="",
            lolims=True,
            color=color,
            elinewidth=elinewidth,
            capsize=capsize,
            zorder=10,
        )


def _plot_data_to_ax(
    data_all,
    ax1,
    e_unit=None,
    sed=True,
    ylabel=None,
    ulim_opts={},
    errorbar_opts={},
):
    """Plots data errorbars and upper limits onto ax.
    X label is left to plot_data and plot_fit because they depend on whether
    residuals are plotted.
    """

    if e_unit is None:
        e_unit = data_all["energy"].unit

    f_unit, sedf = sed_conversion(
        data_all["energy"], data_all["flux"].unit, sed
    )

    if "group" not in data_all.keys():
        data_all["group"] = np.zeros(len(data_all))

    groups = np.unique(data_all["group"])

    for g in groups:
        data = data_all[np.where(data_all["group"] == g)]
        _, sedfg = sed_conversion(data["energy"], data["flux"].unit, sed)

        # wrap around color and marker cycles
        color = color_cycle[int(g) % len(color_cycle)]
        marker = marker_cycle[int(g) % len(marker_cycle)]

        ul = data["ul"]
        notul = ~ul

        # Hack to show y errors compatible with 0 in loglog plot
        yerr_lo = data["flux_error_lo"][notul]
        y = data["flux"][notul].to(yerr_lo.unit)
        bad_err = np.where((y - yerr_lo) <= 0.0)
        yerr_lo[bad_err] = y[bad_err] * (1.0 - 1e-7)
        yerr = u.Quantity((yerr_lo, data["flux_error_hi"][notul]))
        xerr = u.Quantity((data["energy_error_lo"], data["energy_error_hi"]))

        opts = dict(
            zorder=100,
            marker=marker,
            ls="",
            elinewidth=2,
            capsize=0,
            mec=color,
            mew=0.1,
            ms=5,
            color=color,
        )
        opts.update(**errorbar_opts)

        ax1.errorbar(
            data["energy"][notul].to(e_unit).value,
            (data["flux"][notul] * sedfg[notul]).to(f_unit).value,
            yerr=(yerr * sedfg[notul]).to(f_unit).value,
            xerr=xerr[:, notul].to(e_unit).value,
            **opts
        )

        if np.any(ul):
            if "elinewidth" in errorbar_opts:
                ulim_opts["elinewidth"] = errorbar_opts["elinewidth"]

            _plot_ulims(
                ax1,
                data["energy"][ul].to(e_unit).value,
                (data["flux"][ul] * sedfg[ul]).to(f_unit).value,
                (xerr[:, ul]).to(e_unit).value,
                color,
                **ulim_opts
            )

    ax1.set_xscale("log")
    ax1.set_yscale("log")
    xmin = 10 ** np.floor(
        np.log10(
            np.min(data["energy"] - data["energy_error_lo"]).to(e_unit).value
        )
    )
    xmax = 10 ** np.ceil(
        np.log10(
            np.max(data["energy"] + data["energy_error_hi"]).to(e_unit).value
        )
    )
    ax1.set_xlim(xmin, xmax)
    # avoid autoscaling to errorbars to 0
    notul = ~data_all["ul"]
    if np.any(data_all["flux_error_lo"][notul] >= data_all["flux"][notul]):
        elo = (data_all["flux"][notul] * sedf[notul]).to(f_unit).value - (
            data_all["flux_error_lo"][notul] * sedf[notul]
        ).to(f_unit).value
        gooderr = np.where(
            data_all["flux_error_lo"][notul] < data_all["flux"][notul]
        )
        ymin = 10 ** np.floor(np.log10(np.min(elo[gooderr])))
        ax1.set_ylim(bottom=ymin)

    if ylabel is None:
        if sed:
            ax1.set_ylabel(
                r"$E^2\mathrm{{d}}N/\mathrm{{d}}E$"
                " [{0}]".format(u.Unit(f_unit).to_string("latex_inline"))
            )
        else:
            ax1.set_ylabel(
                r"$\mathrm{{d}}N/\mathrm{{d}}E$"
                " [{0}]".format(u.Unit(f_unit).to_string("latex_inline"))
            )
    else:
        ax1.set_ylabel(ylabel)


def _plot_residuals_to_ax(
    data_all, model_ML, ax, e_unit=u.eV, sed=True, errorbar_opts={}
):
    """Function to compute and plot residuals in units of the uncertainty"""
    if "group" not in data_all.keys():
        data_all["group"] = np.zeros(len(data_all))

    groups = np.unique(data_all["group"])

    MLf_unit, MLsedf = sed_conversion(model_ML[0], model_ML[1].unit, sed)
    MLene = model_ML[0].to(e_unit)
    MLflux = (model_ML[1] * MLsedf).to(MLf_unit)

    ax.axhline(0, color="k", lw=1, ls="--")

    interp = False
    if data_all["energy"].size != MLene.size or not np.allclose(
        data_all["energy"].value, MLene.value
    ):
        interp = True
        from scipy.interpolate import interp1d

        modelfunc = interp1d(MLene.value, MLflux.value, bounds_error=False)

    for g in groups:
        groupidx = np.where(data_all["group"] == g)

        data = data_all[groupidx]

        notul = ~data["ul"]
        df_unit, dsedf = sed_conversion(data["energy"], data["flux"].unit, sed)
        ene = data["energy"].to(e_unit)
        xerr = u.Quantity((data["energy_error_lo"], data["energy_error_hi"]))
        flux = (data["flux"] * dsedf).to(df_unit)
        dflux = (data["flux_error_lo"] + data["flux_error_hi"]) / 2.0
        dflux = (dflux * dsedf).to(df_unit)[notul]

        if interp:
            difference = flux[notul] - modelfunc(ene[notul]) * flux.unit
        else:
            difference = flux[notul] - MLflux[groupidx][notul]

        # wrap around color and marker cycles
        color = color_cycle[int(g) % len(color_cycle)]
        marker = marker_cycle[int(g) % len(marker_cycle)]

        opts = dict(
            zorder=100,
            marker=marker,
            ls="",
            elinewidth=2,
            capsize=0,
            mec=color,
            mew=0.1,
            ms=6,
            color=color,
        )
        opts.update(errorbar_opts)

        ax.errorbar(
            ene[notul].value,
            (difference / dflux).decompose().value,
            yerr=(dflux / dflux).decompose().value,
            xerr=xerr[:, notul].to(e_unit).value,
            **opts
        )

    from matplotlib.ticker import MaxNLocator

    ax.yaxis.set_major_locator(
        MaxNLocator(5, integer="True", prune="upper", symmetric=True)
    )

    ax.set_ylabel(r"$\Delta\sigma$")
    ax.set_xscale("log")


[docs]def plot_data(
    input_data,
    xlabel=None,
    ylabel=None,
    sed=True,
    figure=None,
    e_unit=None,
    ulim_opts={},
    errorbar_opts={},
):
    """
    Plot spectral data.

    Parameters
    ----------
    input_data : `emcee.EnsembleSampler`, `astropy.table.Table`, or `dict`
        Spectral data to plot. Can be given as a data table, a dict generated
        with `validate_data_table` or a `emcee.EnsembleSampler` with a data
        property.
    xlabel : str, optional
        Label for the ``x`` axis of the plot.
    ylabel : str, optional
        Label for the ``y`` axis of the plot.
    sed : bool, optional
        Whether to plot SED or differential spectrum.
    figure : `matplotlib.figure.Figure`, optional
        `matplotlib` figure to plot on. If omitted a new one will be generated.
    e_unit : `astropy.unit.Unit`, optional
        Units for energy axis. Defaults to those of the data.
    ulim_opts : dict
        Options for upper-limit plotting. Available options are capsize (arrow
        width) and height_fraction (arrow length in fraction of flux value).
    errorbar_opts : dict
        Addtional options to pass to `matplotlib.plt.errorbar` for plotting the
        spectral flux points.
    """

    import matplotlib.pyplot as plt

    try:
        data = validate_data_table(input_data)
    except TypeError as exc:
        if hasattr(input_data, "data"):
            data = input_data.data
        elif isinstance(input_data, dict) and "energy" in input_data.keys():
            data = input_data
        else:
            log.warning(
                "input_data format unknown, no plotting data! "
                "Data loading exception: {}".format(exc)
            )
            raise

    if figure is None:
        f = plt.figure()
    else:
        f = figure

    if len(f.axes) > 0:
        ax1 = f.axes[0]
    else:
        ax1 = f.add_subplot(111)

    # try to get units from previous plot in figure
    try:
        old_e_unit = u.Unit(ax1.get_xlabel().split("[")[-1].split("]")[0])
    except ValueError:
        old_e_unit = u.Unit("")

    if e_unit is None and old_e_unit.physical_type == "energy":
        e_unit = old_e_unit
    elif e_unit is None:
        e_unit = data["energy"].unit

    _plot_data_to_ax(
        data,
        ax1,
        e_unit=e_unit,
        sed=sed,
        ylabel=ylabel,
        ulim_opts=ulim_opts,
        errorbar_opts=errorbar_opts,
    )

    if xlabel is not None:
        ax1.set_xlabel(xlabel)
    elif xlabel is None and ax1.get_xlabel() == "":
        ax1.set_xlabel(
            r"$\mathrm{Energy}$"
            + " [{0}]".format(e_unit.to_string("latex_inline"))
        )

    ax1.autoscale()

    return f


def plot_distribution(samples, label, figure=None):
    """Plot a distribution and print statistics about it"""

    import matplotlib.pyplot as plt
    from scipy import stats

    quant = [16, 50, 84]
    quantiles = dict(zip(quant, np.percentile(samples, quant)))
    std = np.std(samples)

    if isinstance(samples[0], u.Quantity):
        unit = samples[0].unit
        std = std.value
        quantiles = {k: v.value for k, v in quantiles.items()}
    else:
        unit = ""

    dist_props = "{label} distribution properties:\n \
    $-$ median: ${median}$ {unit}, std: ${std}$ {unit}\n \
    $-$ Median with uncertainties based on \n \
      the 16th and 84th percentiles ($\\sim$1$\\sigma$):\n\
          {label} = {value_error} {unit}".format(
        label=label,
        median=_latex_float(quantiles[50]),
        std=_latex_float(std),
        value_error=_latex_value_error(
            quantiles[50],
            quantiles[50] - quantiles[16],
            quantiles[84] - quantiles[50],
        ),
        unit=unit,
    )

    if figure is None:
        f = plt.figure()
    else:
        f = figure

    ax = f.add_subplot(111)
    f.subplots_adjust(bottom=0.40, top=0.93, left=0.06, right=0.95)

    f.text(0.2, 0.27, dist_props, ha="left", va="top")

    histnbins = min(max(25, int(len(samples) / 100.0)), 100)
    xlabel = "" if label is None else label

    if isinstance(samples, u.Quantity):
        samples_nounit = samples.value
    else:
        samples_nounit = samples

    n, x, _ = ax.hist(
        samples_nounit,
        histnbins,
        histtype="stepfilled",
        color=color_cycle[0],
        lw=0,
        density=True,
    )

    kde = stats.kde.gaussian_kde(samples_nounit)
    ax.plot(x, kde(x), color="k", label="KDE")

    ax.axvline(
        quantiles[50],
        ls="--",
        color="k",
        alpha=0.5,
        lw=2,
        label="50% quantile",
    )
    ax.axvspan(
        quantiles[16],
        quantiles[84],
        color=(0.5,) * 3,
        alpha=0.25,
        label="68% CI",
        lw=0,
    )
    for xticklabel in ax.get_xticklabels():
        xticklabel.set_rotation(45)
    if unit != "":
        xlabel += " [{0}]".format(unit)
    ax.set_xlabel(xlabel)
    ax.set_title("Posterior distribution of {0}".format(label))
    ax.set_ylim(top=n.max() * 1.05)

    return f


[docs]def plot_corner(sampler, show_ML=True, **kwargs):
    """
    A plot that summarizes the parameter samples by showing them as individual
    histograms and 2D histograms against each other. The maximum likelihood
    parameter vector is indicated by a cross.

    This function is a thin wrapper around `corner.corner`, found at
    https://github.com/corner/corner.py.

    Parameters
    ----------
    sampler : `emcee.EnsembleSampler`
        Sampler with a stored chain.
    show_ML : bool, optional
        Whether to show the maximum likelihood parameter vector as a cross on
        the 2D histograms.
    """
    import matplotlib.pyplot as plt

    oldlw = plt.rcParams["lines.linewidth"]
    plt.rcParams["lines.linewidth"] = 0.7
    try:
        from corner import corner

        if show_ML:
            _, MLp, _, _ = find_ML(sampler, 0)
        else:
            MLp = None

        corner_opts = {
            "labels": sampler.labels,
            "truths": MLp,
            "quantiles": [0.16, 0.5, 0.84],
            "verbose": False,
            "truth_color": color_cycle[0],
        }

        corner_opts.update(kwargs)

        f = corner(sampler.get_chain(flat=True), **corner_opts)
    except ImportError:
        log.warning(
            "The corner package is not installed;" " corner plot not available"
        )
        f = None

    plt.rcParams["lines.linewidth"] = oldlw

    return f
Source code for naima.plot

naima

Navigation

Related Topics