Source code for limix_plot._qqplot

from ._plt import get_pyplot


[docs]def qqplot(
    a,
    label=None,
    alpha=0.05,
    cutoff=0.1,
    line=True,
    pts_kws=None,
    band_kws=None,
    ax=None,
    show_lambda=True,
):
    """
    Quantile-Quantile plot of observed p-values versus theoretical ones.

    Parameters
    ----------
    a : Series, 1d-array, list
        Observed p-values.
    label : string, optional
        Legend label for the relevent component of the plot.
    alpha : float, optional
        Significance level defining the confidfence interval. Set to ``None``
        to disable plotting. Defaults to ``0.05``.
    cutoff : float, optional
        P-values higher than `cutoff` will not be plotted. Defaults to ``0.1``.
    line : bool, optional
        Whether or not plot a straight line. Defaults to ``True``.
    pts_kws : dict, optional
        Keyword arguments forwarded to the matplotlib function used for
        plotting the points.
    band_kws : dict, optional
        Keyword arguments forwarded to the fill_between function used for
        plotting the confidence band.
    ax : matplotlib Axes, optional
        The target handle for this figure. If ``None``, the current axes is
        set.

    Example
    -------
    .. plot::

        >>> import limix_plot as lp
        >>> from numpy.random import RandomState
        >>>
        >>> random = RandomState(1)
        >>>
        >>> pv0 = random.rand(10000)
        >>> pv0[0] = 1e-6
        >>>
        >>> pv1 = random.rand(10000)
        >>> pv2 = random.rand(10000)
        >>>
        >>> lp.qqplot(pv0)
        >>>
        >>> lp.qqplot(pv0)
        >>> lp.qqplot(pv1, line=False, alpha=None)
        >>>
        >>> lp.qqplot(pv1)
        >>> lp.qqplot(pv2, line=False, alpha=None)
        >>> lp.box_aspect()
        >>>

        >>> lp.qqplot(pv0, label='label0', band_kws=dict(color='#EE0000',
        ...           alpha=0.2));
        >>> lp.qqplot(pv1, label='label1', line=False, alpha=None);
        >>> lp.qqplot(pv2, label='label2', line=False,
        ...           alpha=None, pts_kws=dict(marker='*'));
        >>> _ = lp.get_pyplot().legend()
    """
    from numpy import asarray, sort, log10, arange

    plt = get_pyplot()

    a = asarray(a)
    if a.ndim > 1:
        a = a.squeeze()

    if ax is None:
        ax = plt.gca()

    if pts_kws is None:
        pts_kws = dict()
    if "marker" not in pts_kws:
        pts_kws["marker"] = "o"
    if "linestyle" not in pts_kws:
        pts_kws["linestyle"] = ""
    if "markeredgecolor" not in pts_kws:
        pts_kws["markeredgecolor"] = None
    if label is not None:
        pts_kws["label"] = label

    if band_kws is None:
        band_kws = dict()
    if "facecolor" not in band_kws:
        band_kws["facecolor"] = "#DDDDDD"
    if "linewidth" not in band_kws:
        band_kws["linewidth"] = 0
    if "zorder" not in band_kws:
        band_kws["zorder"] = -1
    if "alpha" not in band_kws:
        band_kws["alpha"] = 1.0

    pv = sort(a)
    ok = _subsample(pv, cutoff)

    qnull = -log10((0.5 + arange(len(pv))) / len(pv))
    qemp = -log10(pv)

    ax.plot(qnull[ok], qemp[ok], **pts_kws)

    qmax = max(qnull[ok].max(), qemp[ok].max())

    xmin = qnull[ok].min()
    xmax = qnull[ok].max()

    if line:
        ax.plot([xmin, xmax], [xmin, xmax], color="black", zorder=0)

    if alpha is not None:
        _plot_confidence_band(ok, qnull, alpha, ax, qmax, band_kws)

    if show_lambda:
        _plot_lambda(pv, ax)
        _adjust_lambda_texts(ax)

    ax.set_ylabel("-log$_{10}$pv observed")
    ax.set_xlabel("-log$_{10}$pv expected")

    ax.xaxis.set_ticks_position("both")
    ax.yaxis.set_ticks_position("both")


def _plot_lambda(pv, ax):
    from numpy import median
    import scipy.stats as st

    chi2 = st.chi2(df=1)
    lamb = chi2.isf(median(pv)) / chi2.median()
    text = "$\\lambda={:.3f}$".format(lamb)
    ax.text(
        0.40,
        0.75,
        text,
        horizontalalignment="center",
        verticalalignment="center",
        transform=ax.transAxes,
    )


def _adjust_lambda_texts(ax):
    from adjustText import adjust_text

    texts = []
    for t in ax.texts:

        if "$\\lambda" in t.get_text():
            texts.append(t)

    if len(texts) > 1:
        y = texts[0].get_position()[1]
        for (i, t) in enumerate(texts[1:]):
            xy = t.get_position()
            t.set_position((xy[0], y - (i + 1) * 0.05))
        adjust_text(
            texts, autoalign="y", only_move={"text": "y"}, text_from_points=False
        )


def _expected(n):
    from numpy import linspace, flipud, log10

    lnpv = linspace(1 / (n + 1), n / (n + 1), n, endpoint=True)
    return flipud(-log10(lnpv))


def _rank_confidence_band(nranks, significance_level, ok):
    from numpy import arange, flipud, ascontiguousarray
    from scipy.special import betaincinv

    alpha = significance_level

    k0 = arange(1, nranks + 1)
    k1 = flipud(k0).copy()

    k0 = ascontiguousarray(k0[ok])
    k1 = ascontiguousarray(k1[ok])

    my_ok = k1 / k0 / (k1[0] / k0[0]) > 1e-4
    k0 = ascontiguousarray(k0[my_ok])
    k1 = ascontiguousarray(k1[my_ok])

    top = betaincinv(k0, k1, 1 - alpha)
    bottom = betaincinv(k0, k1, alpha)

    return (my_ok, bottom, top)


def _plot_confidence_band(ok, null_qvals, significance_level, ax, qmax, band_kws):
    from numpy import log10

    (cb_ok, bo, to) = _rank_confidence_band(len(null_qvals), significance_level, ok)

    bo = -log10(bo)
    to = -log10(to)

    m = null_qvals[ok][cb_ok]

    ax.fill_between(m, bo, to, **band_kws)


def _subsample(pvalues, cutoff):
    from numpy import ones, percentile, log10, linspace, searchsorted, sum, where

    resolution = 500

    if len(pvalues) <= resolution:
        return ones(len(pvalues), dtype=bool)

    ok = pvalues <= percentile(pvalues, cutoff)
    nok = ~ok

    qv = -log10(pvalues[nok])
    qv_min = qv[-1]
    qv_max = qv[0]

    snok = sum(nok)

    resolution = min(snok, resolution)

    qv_chosen = linspace(qv_min, qv_max, resolution)
    pv_chosen = 10 ** (-qv_chosen)

    idx = searchsorted(pvalues[nok], pv_chosen)
    n = sum(nok)
    i = 0
    while i < len(idx) and idx[i] == n:
        i += 1
        idx = idx[i:]

    ok[where(nok)[0][idx]] = True

    ok[0] = True
    ok[-1] = True

    return ok