Source code for chromosight.utils.plotting

"""Chromosight's plotting submodule contains utilities to visualize the pileup
of detected patterns or the input matrix. It also implements an interactive map
recording the coordinates of double clicks."""

import os
import sys
import numpy as np
from matplotlib import pyplot as plt


[docs]def pileup_plot(pileup_pattern, output_prefix, name="pileup_patterns"): """ Wrapper around matplotlib.pyplot.imshow to visualize the pileup of patterns detected by chromosight """ plt.imshow( pileup_pattern, interpolation="none", vmin=0.0, vmax=2.0, cmap="seismic", ) plt.title("{} pileup".format(name)) plt.colorbar() plt.xlabel(output_prefix) plt.savefig(output_prefix + ".pdf", dpi=100, format="pdf") plt.close("all")
[docs]def plot_whole_matrix( clr, patterns, out=None, region=None, region2=None, log_transform=False ): """ Visualise the input matrix with a set of patterns overlaid on top. Can optionally restrict the visualisation to a region. Parameters ---------- mat : scipy.sparse.csr_matrix The whole genome Hi-C matrix to be visualized. patterns : pandas.DataFrame The set of patterns to be plotted on top of the matrix. One pattern per row, 3 columns: bin1, bin2 and score of types int, int and float, respectively. region : str The genomic range, in UCSC format, corresponding to rows to be plotted in the matrix. If not given, the whole matrix is used. It only region is given, but not region2, the matrix is subsetted on rows and columns to show a region on the diagonal. region2 : str The genomic range, in UCSC format, of columns to be plotted in the matrix. Region must also be provided, or this will be ignored. log_transform : bool Whether to log transform the matrix. """ if region is not None and region2 is None: mat = clr.matrix().fetch(region) s1, e1 = clr.extent(region) s2, e2 = s1, e1 elif region is not None and region2 is not None: mat = clr.matrix().fetch(region, region2) s1, e1 = clr.extent(region) s2, e2 = clr.extent(region2) else: mat = clr.matrix()[:] s1, e1 = 0, clr.shape[0] s2, e2 = 0, clr.shape[1] pat = patterns.copy() pat = pat.loc[ (pat.bin1 > s1) & (pat.bin1 < e1) & (pat.bin2 > s2) & (pat.bin2 < e2), :, ] if log_transform: mat = np.log(mat) mat[mat == 0] = np.nan plt.figure(dpi=1200) plt.imshow( mat, cmap="Reds", vmax=np.percentile(mat[~np.isnan(mat)], 99), ) plt.scatter( pat.bin1 - s1, pat.bin2 - s2, facecolors="none", edgecolors="blue", s=0.05, ) if out is None: plt.show() else: plt.savefig(out)
[docs]def click_finder(mat, half_w=8, xlab=None, ylab=None): """ Given an input Hi-C matrix, show an interactive window and record coordinates where the user double-clicks. When the interactive window is closed, the stack of windows around recorded coordinates is returned. Parameters ---------- mat : scipy.sparse.csr_matrix The input Hi-C matrix to display interactively. half_w : int Half width of the windows to return. The resulting windows xlab: str Horizontal label to display below the matrix. ylab: str Vertical label to display next to the matrix. Returns ------- numpy.array : 3D stack of images around coordinates recorded interactively. The shape of the stack is (N, w, w) where N is the number of coordinates and w is 2*half_w. """ global coords coords = [] def onclick(event): global ix, iy global coords try: ix, iy = int(event.xdata), int(event.ydata) except TypeError: return None try: if coords[-1] == (ix, iy): print(f"x = {ix}, y = {iy}") except IndexError: pass coords.append((ix, iy)) return coords fig = plt.figure() plt.imshow(mat.toarray(), cmap="afmhot_r", vmax=np.percentile(mat.data, 95)) plt.title("Double click to record pattern positions") if xlab: plt.xlabel(xlab) if ylab: plt.ylabel(ylab) # Setup click listener cid = fig.canvas.mpl_connect("button_press_event", onclick) plt.show() fig.canvas.mpl_disconnect(cid) # Keep coordinates that were clicked twice consecutively (double-click) double_clicked = set() for c in range(1, len(coords)): if coords[c - 1] == coords[c]: double_clicked.add(coords[c]) # initialize empty image stack, will store windows around coords img_stack = np.zeros((len(double_clicked), half_w * 2 + 1, half_w * 2 + 1)) bad_coords = np.zeros(len(double_clicked), dtype=bool) # Fill the image stack with windows around each coord for i, (center_v, center_h) in enumerate(double_clicked): high, low = center_h - half_w, center_h + half_w + 1 left, right = center_v - half_w, center_v + half_w + 1 try: img_stack[i] = mat[high:low, left:right].toarray() except ValueError: bad_coords[i] = True sys.stderr.write( f"Discarding {(center_v, center_h)}: Too close " "to the edge of the matrix\n" ) # Discard images associated with coords too close to the edge img_stack = img_stack[~bad_coords] return img_stack