Source code for limix_plot._qqplot

from ._plt import get_pyplot


[docs]def qqplot( a, label=None, alpha=0.05, cutoff=0.1, line=True, pts_kws=None, band_kws=None, ax=None, show_lambda=True, ): """ Quantile-Quantile plot of observed p-values versus theoretical ones. Parameters ---------- a : Series, 1d-array, list Observed p-values. label : string, optional Legend label for the relevent component of the plot. alpha : float, optional Significance level defining the confidfence interval. Set to ``None`` to disable plotting. Defaults to ``0.05``. cutoff : float, optional P-values higher than `cutoff` will not be plotted. Defaults to ``0.1``. line : bool, optional Whether or not plot a straight line. Defaults to ``True``. pts_kws : dict, optional Keyword arguments forwarded to the matplotlib function used for plotting the points. band_kws : dict, optional Keyword arguments forwarded to the fill_between function used for plotting the confidence band. ax : matplotlib Axes, optional The target handle for this figure. If ``None``, the current axes is set. Example ------- .. plot:: >>> import limix_plot as lp >>> from numpy.random import RandomState >>> >>> random = RandomState(1) >>> >>> pv0 = random.rand(10000) >>> pv0[0] = 1e-6 >>> >>> pv1 = random.rand(10000) >>> pv2 = random.rand(10000) >>> >>> lp.qqplot(pv0) >>> >>> lp.qqplot(pv0) >>> lp.qqplot(pv1, line=False, alpha=None) >>> >>> lp.qqplot(pv1) >>> lp.qqplot(pv2, line=False, alpha=None) >>> lp.box_aspect() >>> >>> lp.qqplot(pv0, label='label0', band_kws=dict(color='#EE0000', ... alpha=0.2)); >>> lp.qqplot(pv1, label='label1', line=False, alpha=None); >>> lp.qqplot(pv2, label='label2', line=False, ... alpha=None, pts_kws=dict(marker='*')); >>> _ = lp.get_pyplot().legend() """ from numpy import asarray, sort, log10, arange plt = get_pyplot() a = asarray(a) if a.ndim > 1: a = a.squeeze() if ax is None: ax = plt.gca() if pts_kws is None: pts_kws = dict() if "marker" not in pts_kws: pts_kws["marker"] = "o" if "linestyle" not in pts_kws: pts_kws["linestyle"] = "" if "markeredgecolor" not in pts_kws: pts_kws["markeredgecolor"] = None if label is not None: pts_kws["label"] = label if band_kws is None: band_kws = dict() if "facecolor" not in band_kws: band_kws["facecolor"] = "#DDDDDD" if "linewidth" not in band_kws: band_kws["linewidth"] = 0 if "zorder" not in band_kws: band_kws["zorder"] = -1 if "alpha" not in band_kws: band_kws["alpha"] = 1.0 pv = sort(a) ok = _subsample(pv, cutoff) qnull = -log10((0.5 + arange(len(pv))) / len(pv)) qemp = -log10(pv) ax.plot(qnull[ok], qemp[ok], **pts_kws) qmax = max(qnull[ok].max(), qemp[ok].max()) xmin = qnull[ok].min() xmax = qnull[ok].max() if line: ax.plot([xmin, xmax], [xmin, xmax], color="black", zorder=0) if alpha is not None: _plot_confidence_band(ok, qnull, alpha, ax, qmax, band_kws) if show_lambda: _plot_lambda(pv, ax) _adjust_lambda_texts(ax) ax.set_ylabel("-log$_{10}$pv observed") ax.set_xlabel("-log$_{10}$pv expected") ax.xaxis.set_ticks_position("both") ax.yaxis.set_ticks_position("both")
def _plot_lambda(pv, ax): from numpy import median import scipy.stats as st chi2 = st.chi2(df=1) lamb = chi2.isf(median(pv)) / chi2.median() text = "$\\lambda={:.3f}$".format(lamb) ax.text( 0.40, 0.75, text, horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) def _adjust_lambda_texts(ax): from adjustText import adjust_text texts = [] for t in ax.texts: if "$\\lambda" in t.get_text(): texts.append(t) if len(texts) > 1: y = texts[0].get_position()[1] for (i, t) in enumerate(texts[1:]): xy = t.get_position() t.set_position((xy[0], y - (i + 1) * 0.05)) adjust_text( texts, autoalign="y", only_move={"text": "y"}, text_from_points=False ) def _expected(n): from numpy import linspace, flipud, log10 lnpv = linspace(1 / (n + 1), n / (n + 1), n, endpoint=True) return flipud(-log10(lnpv)) def _rank_confidence_band(nranks, significance_level, ok): from numpy import arange, flipud, ascontiguousarray from scipy.special import betaincinv alpha = significance_level k0 = arange(1, nranks + 1) k1 = flipud(k0).copy() k0 = ascontiguousarray(k0[ok]) k1 = ascontiguousarray(k1[ok]) my_ok = k1 / k0 / (k1[0] / k0[0]) > 1e-4 k0 = ascontiguousarray(k0[my_ok]) k1 = ascontiguousarray(k1[my_ok]) top = betaincinv(k0, k1, 1 - alpha) bottom = betaincinv(k0, k1, alpha) return (my_ok, bottom, top) def _plot_confidence_band(ok, null_qvals, significance_level, ax, qmax, band_kws): from numpy import log10 (cb_ok, bo, to) = _rank_confidence_band(len(null_qvals), significance_level, ok) bo = -log10(bo) to = -log10(to) m = null_qvals[ok][cb_ok] ax.fill_between(m, bo, to, **band_kws) def _subsample(pvalues, cutoff): from numpy import ones, percentile, log10, linspace, searchsorted, sum, where resolution = 500 if len(pvalues) <= resolution: return ones(len(pvalues), dtype=bool) ok = pvalues <= percentile(pvalues, cutoff) nok = ~ok qv = -log10(pvalues[nok]) qv_min = qv[-1] qv_max = qv[0] snok = sum(nok) resolution = min(snok, resolution) qv_chosen = linspace(qv_min, qv_max, resolution) pv_chosen = 10 ** (-qv_chosen) idx = searchsorted(pvalues[nok], pv_chosen) n = sum(nok) i = 0 while i < len(idx) and idx[i] == n: i += 1 idx = idx[i:] ok[where(nok)[0][idx]] = True ok[0] = True ok[-1] = True return ok