Source code for limix_plot._manhattan

from ._plt import get_pyplot


[docs]def manhattan(data, colora="#5689AC", colorb="#21334F", pts_kws=None, ax=None): """ Produce a manhattan plot. Parameters ---------- data : DataFrame, dict DataFrame containing the chromosome, base-pair positions, and p-values. colora : matplotlib color Points color of the first group. colorb : matplotlib color Points color of the second group. pts_kws : dict, optional Keyword arguments forwarded to the matplotlib function used for plotting the points. ax : matplotlib Axes, optional The target handle for this figure. If ``None``, the current axes is set. Example ------- .. plot:: >>> import limix_plot as lp >>> from numpy import log10 >>> >>> df = lp.load_dataset('gwas') >>> df = df.rename(columns={"chr": "chrom"}) >>> print(df.head()) chrom pos pv 234 10 224239 0.00887 239 10 229681 0.00848 253 10 240788 0.00721 258 10 246933 0.00568 266 10 255222 0.00593 >>> lp.manhattan(df) >>> plt = lp.get_pyplot() >>> _ = plt.axhline(-log10(1e-7), color='red') >>> _ = plt.ylim(2, plt.ylim()[1]) """ from numpy import log10, unique from xarray import DataArray import pandas as pd plt = get_pyplot() if isinstance(data, pd.DataFrame): data = DataArray( data=data["pv"], dims=["candidate"], coords={k: ("candidate", data[k]) for k in data.columns}, ) else: data = DataArray(data=data) if len(data) == 0: raise ValueError("DataFrame is empty.") if pts_kws is None: pts_kws = dict() ax = plt.gca() if ax is None else ax data["chrom"] = data["chrom"].astype(str) data["pos"] = data["pos"].astype(int) chr_order = _chr_precedence(data) data["order"] = ("candidate", [chr_order[i] for i in data["chrom"].values]) data = data.sortby(["order", "pos"]) data = _abs_pos(data) if "markersize" not in pts_kws: pts_kws["markersize"] = 2 if "marker" not in pts_kws: pts_kws["marker"] = "." if "linestyle" not in pts_kws: pts_kws["linestyle"] = "" colors = {0: colora, 1: colorb} for i, c in enumerate(unique(data["order"])): ok = data["order"] == c pts_kws["color"] = colors[i % 2] x = data.loc[ok]["abs_pos"] y = -log10(data.loc[ok].values) ax.plot(x, y, **pts_kws) ax.set_xlim(data["abs_pos"].min(), data["abs_pos"].max()) ax.set_ylim(0, ax.get_ylim()[1]) ax.set_ylabel("-log$_{10}$pv") ax.set_xlabel("chromosome") u = unique(data["chrom"].values) chrom_labels = sorted(u, key=lambda x: chr_order[x]) _set_ticks(ax, _chrom_bounds(data), chrom_labels)
def _plot_points(ax, data, alpha, null_style, alt_style): from numpy import log10 null = data.loc[data.values >= alpha, :] alt = data.loc[data.values < alpha, :] ax.plot(null["abs_pos"], -log10(null.values), ".", ms=7, **null_style) ax.plot(alt["abs_pos"], -log10(alt.values), ".", ms=7, **alt_style) def _set_ticks(ax, chrom_bounds, chrom_labels): from numpy import asarray, mean n = len(chrom_bounds) xticks = asarray([mean(chrom_bounds[i]) for i in range(n)]) ax.set_xticks(xticks) ax.set_xticklabels(chrom_labels) def _abs_pos(data): from numpy import cumsum, flipud, unique order = unique(data["order"].values) chrom_ends = [data["pos"].values[data["order"].values == c].max() for c in order] offset = flipud(cumsum(chrom_ends)[:-1]) data["abs_pos"] = data["pos"].copy() order = list(reversed(order)) for i, oi in enumerate(offset): ix = data["order"] == order[i] data["abs_pos"].values[ix] = data.loc[ix]["abs_pos"] + oi return data def _chrom_bounds(data): from numpy import unique order = unique(data["order"]) v = [] for c in order: vals = data["abs_pos"][data["order"] == c] v += [(vals.min(), vals.max())] return v def _isint(i): try: int(i) except ValueError: return False else: return True def _chr_precedence(data): from numpy import unique uchr = unique(data["chrom"].values) nchr = [int(i) for i in uchr if _isint(i)] if len(nchr) > 0: offset = max(nchr) else: offset = -1 precedence = {str(i): i for i in nchr} schr = sorted([i for i in uchr if not _isint(i)]) for i, s in enumerate(schr): precedence[s] = offset + i + 1 return precedence