Source code for chromosight.utils.preprocessing

#!/usr/bin/env python3
# coding: utf-8

"""Chromosight's preprocessing submodule implements a number of functions to
operate on Hi-C contact maps before detection. These functions can be used to
improve the signal or filter unneeded signal. There are also functions to edit
(zoom, crop, factorize) kernel matrices.
"""

import sys
import numpy as np
import numpy.linalg as la
import scipy.stats as ss
import scipy.sparse as sp
import scipy.ndimage as ndi
from sklearn.isotonic import IsotonicRegression


[docs]def erase_missing(signal, valid_rows, valid_cols, sym_upper=True): """ Given a sparse matrix, set all pixels in missing (invalid) bins to 0. Parameters ---------- signal : scipy.sparse.csr_matrix of floats Input signal on which to erase values. valid_rows : numpy.ndarray of ints Indices of rows considered valid (not missing). valid_cols : numpy.ndarray of ints Indices of columns considered valid (not missing). sym_upper : bool Define if the input signal is upper symmetric. Returns ------- scipy.sparse.csr_matrix The input signal with all values in missing bins set to 0 """ if sym_upper and sp.issparse(signal): if np.any(valid_rows != valid_cols): raise ValueError( "Valid rows and columns must be identical with sym_upper=True" ) if signal.shape[0] != signal.shape[1]: raise ValueError( "Input matrix must be square when using sym_upper=True" ) # Make a boolean mask from good bins good_mask = np.isin(range(signal.shape[0]), valid_rows) # Set all pixels in a nondetectable bin to 0 # For faster masking of bins, mask bins using dot product with an # identify matrix where bad bins have been masked on the diagonal # E.g. if removing the second bin (row and column): # 1 0 0 9 6 5 1 0 0 9 0 5 # 0 0 0 X 6 8 7 X 0 0 0 = 0 0 0 # 0 0 1 6 7 8 0 0 1 6 0 8 mask_mat = sp.eye(signal.shape[0]) mask_mat.data[0][~good_mask] = 0 erased = mask_mat.dot(signal).dot(mask_mat) else: # Get a boolean array of missing (1) and valid (0) rows missing_rows = valid_to_missing(valid_rows, signal.shape[0]) missing_cols = valid_to_missing(valid_cols, signal.shape[1]) erased = signal.copy() erased[missing_rows, :] = 0 erased[:, missing_cols] = 0 return erased
[docs]def set_mat_diag(mat, diag=0, val=0): """ Set the nth diagonal of a symmetric 2D numpy array to a fixed value. Operates in place. Parameters ---------- mat : numpy.ndarray Symmetric 2D array of floats. diag : int 0-based index of the diagonal to modify. Use negative values for the lower half. val : float Value to use for filling the diagonal """ m = mat.shape[0] step = m + 1 start = diag end = m ** 2 - diag * m mat.flat[start:end:step] = val
[docs]def diag_trim(mat, n): """ Trim an upper triangle sparse matrix so that only the first n diagonals are kept. Parameters ---------- mat : scipy.sparse.csr_matrix or numpy.ndarray The sparse matrix to be trimmed n : int The number of diagonals from the center to keep (0-based). Returns ------- scipy.sparse.dia_matrix or numpy.ndarray: The diagonally trimmed upper triangle matrix with only the first n diagonal. """ if sp.issparse(mat): if mat.format != "csr": raise ValueError("input type must be scipy.sparse.csr_matrix") # Trim diagonals by removing all elements further than n in the # upper triangle trimmed = sp.tril(mat, n, format="csr") trimmed = sp.triu(trimmed, format="csr") else: trimmed = mat.copy() n_diags = trimmed.shape[0] for diag in range(n, n_diags): set_mat_diag(trimmed, diag, 0) return trimmed return trimmed
[docs]def distance_law( matrix, detectable_bins=None, max_dist=None, smooth=True, fun=np.nanmean ): """ Computes genomic distance law by averaging over each diagonal in the upper triangle matrix. If a list of detectable bins is provided, pixels in missing bins will be excluded from the averages. A maximum distance can be specified to define how many diagonals should be computed. parameters ---------- matrix: scipy.sparse.csr_matrix the input matrix to compute distance law from. detectable_bins : numpy.ndarray of ints An array of detectable bins indices to consider when computing distance law. max_dist : int Maximum distance from diagonal, in number of bins in which to compute distance law smooth : bool Whether to use isotonic regression to smooth the distance law. fun : callable A function to apply on each diagonal. Defaults to mean. Returns ------- dist: np.ndarray the output genomic distance law. example ------- >>> m = np.ones((3,3)) >>> m += np.array([1,2,3]) >>> m array([[2., 3., 4.], [2., 3., 4.], [2., 3., 4.]]) >>> distance_law(csr_matrix(m)) array([3. , 3.5, 4. ]) """ mat_n = matrix.shape[0] if max_dist is None: max_dist = mat_n n_diags = min(mat_n, max_dist + 1) dist = np.zeros(mat_n) if detectable_bins is None: detectable_bins = np.array(range(mat_n)) for diag in range(n_diags): # Find detectable which fall in diagonal detect_mask = np.zeros(mat_n, dtype=bool) detect_mask[detectable_bins] = 1 # Find bins which are detectable in the diagonal (intersect of # hori and verti) detect_mask_h = detect_mask[: (mat_n - diag)] detect_mask_v = detect_mask[mat_n - (mat_n - diag) :] detect_mask_diag = detect_mask_h & detect_mask_v detect_diag = matrix.diagonal(diag)[detect_mask_diag] dist[diag] = fun(detect_diag[detect_diag > 0]) # Smooth the curve using isotonic regression: Find closest approximation # with the condition that point n+1 cannot be higher than point n. # (i.e. contacts can only decrease when increasing distance) if smooth and mat_n > 2: ir = IsotonicRegression(increasing=False) dist[~np.isfinite(dist)] = 0 dist = ir.fit_transform(range(len(dist)), dist) return dist
[docs]def get_detectable_bins(mat, n_mads=3, inter=False): """ Returns lists of detectable indices after excluding low interacting bins based on the proportion of zero pixel values in the matrix bins. Parameters ---------- mat : scipy.sparse.coo_matrix A Hi-C matrix in tihe form of a 2D numpy array or coo matrix n_mads : int Number of median absolute deviation below the median required to consider bins non-detectable. inter : bool Whether the matrix is interchromosomal. Default is to consider the matrix is intrachromosomal (i.e. upper symmetric). Returns ------- numpy ndarray : tuple of 2 1D arrays containing indices of low interacting rows and columns, respectively. """ matrix = mat.copy() matrix.eliminate_zeros() def mad(x): return ss.median_abs_deviation(x, nan_policy="omit") if not inter: if matrix.shape[0] != matrix.shape[1]: raise ValueError("Intrachromosomal matrices must be symmetric.") # Replace nonzero pixels by ones to work on prop. of nonzero pixels matrix.data = np.ones(matrix.data.shape) # Compute number of nonzero values in each bin sum_bins = sum_mat_bins(matrix) # Compute variation in the number of nonzero pixels sum_mad = mad(sum_bins) # Find poor interacting rows and columns sum_med = np.median(sum_bins) detect_threshold = max(1, sum_med - sum_mad * n_mads) # Removal of poor interacting rows and columns good_bins = np.flatnonzero(sum_bins >= detect_threshold) good_bins = (good_bins, good_bins) else: # Adapted for asymetric matrices (need to compute rows and columns) sum_rows, sum_cols = matrix.sum(axis=1).A1, matrix.sum(axis=0).A1 mad_rows, mad_cols = mad(sum_rows), mad(sum_cols) med_rows, med_cols = np.median(sum_rows), np.median(sum_cols) detect_threshold_rows = max(1, med_rows - mad_rows * n_mads) detect_threshold_cols = max(1, med_cols - mad_cols * n_mads) good_rows = np.flatnonzero(sum_rows > detect_threshold_rows) good_cols = np.flatnonzero(sum_cols > detect_threshold_cols) good_bins = (good_rows, good_cols) return good_bins
[docs]def detrend( matrix, detectable_bins=None, max_dist=None, smooth=False, fun=np.nanmean, max_val=10, ): """ Detrends a Hi-C matrix by the distance law. The input matrix should have been normalised beforehandand. Parameters ---------- matrix : scipy.sparse.csr_matrix The normalised intrachromosomal Hi-C matrix to detrend. detectable_bins : tuple Tuple containing a list of detectable rows and a list of columns on which to perform detrending. Poorly interacting indices have been excluded. max_dist : int Maximum number of bins from the diagonal at which to compute trend. smooth : bool Whether to use isotonic regression to smooth the trend. fun : callable Function to use on each diagonal to compute the trend. max_val : float or None Maximum value in the detrended matrix. Set to None to disable Returns ------- numpy.ndarray : The detrended Hi-C matrix. """ matrix = matrix.tocsr() y = distance_law( matrix, detectable_bins=detectable_bins, max_dist=max_dist, smooth=smooth, fun=fun, ) y[np.isnan(y)] = 0.0 # Detrending by the distance law clean_mat = matrix.tocoo() # clean_mat.data /= y_savgol[abs(clean_mat.row - clean_mat.col)] try: clean_mat.data = clean_mat.data / y[abs(clean_mat.row - clean_mat.col)] # If no nonzero value in matrix, do nothing except TypeError: pass clean_mat = clean_mat.tocsr() if max_val is not None: clean_mat[clean_mat >= max_val] = 1 return clean_mat
[docs]def ztransform(matrix): """ Z transformation for Hi-C matrices. Parameters ---------- matrix : scipy.sparse.coo_matrix A Hi-C matrix in sparse format. Returns ------- scipy.sparse.coo_matrix: The detrended Hi-C matrix """ mat = matrix.copy() mu = np.mean(mat.data) sd = np.std(mat.data) mat.data -= mu mat.data /= sd return mat
[docs]def sum_mat_bins(mat): """ Compute the sum of matrices bins (i.e. rows or columns) using only the upper triangle, assuming symmetrical matrices. Parameters ---------- mat : scipy.sparse.coo_matrix Contact map in sparse format, either in upper triangle or full matrix. Returns ------- numpy.ndarray : 1D array of bin sums. """ # Equivalaent to row or col sum on a full matrix # Note: mat.sum returns a 'matrix' object. A1 extracts the 1D flat array # from the matrix return mat.sum(axis=0).A1 + mat.sum(axis=1).A1 - mat.diagonal(0)
[docs]def subsample_contacts(M, n_contacts): """Bootstrap sampling of contacts in a sparse Hi-C map. Parameters ---------- M : scipy.sparse.coo_matrix The input Hi-C contact map in sparse format. n_contacts : int The number of contacts to sample. Returns ------- scipy.sparse.coo_matrix A new matrix with a fraction of the original contacts. """ S = M.data.copy() # Match cell idx to cumulative number of contacts cum_counts = np.cumsum(S) # Total number of contacts to sample tot_contacts = int(cum_counts[-1]) # Sample desired number of contacts from the range(0, n_contacts) array sampled_contacts = np.random.choice( int(tot_contacts), size=(n_contacts), replace=False ) # Get indices of sampled contacts in the cum_counts array idx = np.searchsorted(cum_counts, sampled_contacts, side="right") # Bin those indices to the same dimensions as matrix data to get counts sampled_counts = np.bincount(idx, minlength=S.shape[0]) # Get nonzero values to build new sparse matrix nnz_mask = sampled_counts > 0 sampled_counts = sampled_counts[nnz_mask].astype(np.float64) sampled_rows = M.row[nnz_mask] sampled_cols = M.col[nnz_mask] return sp.coo_matrix( (sampled_counts, (sampled_rows, sampled_cols)), shape=(M.shape[0], M.shape[1]), )
[docs]def frame_missing_mask(mask, kernel_shape, sym_upper=False, max_dist=None): """ Adds a frame around input mask, given a kernel. The goal of this frame is define margins around the matrix where the kernel will not perform convolution (denoted by 1). If the matrix is upper symmetric, a margin of half the kernel's width is added below the diagonal and a maximum distance from the diagonal above which margins need not be drawn can be considered. Otherwise Margins are simply added on all 4 sides of the matrix. :: signal kernel _________ ______ ____ |#######| | | | | ==> |# #| | | |___| |# #| | | |# #| |_____| |# #| |#######| -------- Parameters ---------- mask : scipy.sparse.csr_matrix of bool The mask around which to add margins. kernels_shape : tuple of ints The number of rows and kernel in the input kernel. Margins will be half these values. sym_upper : bool Whether the signal is a symmetric upper triangle matrix. If so, values on a margin below the diagonal will be masked. max_dist : int or None Number of diagonals to keep Returns ------- framed_mask : scipy.sparse.csr_matrix of bool The input mask with a padding of True around the edges. If sym_upper is True, a padding is also added below the diagonal. """ if mask.dtype != bool: raise ValueError("Mask must contain boolean values") if not sp.issparse(mask): raise ValueError("Mask must be a sparse matrix") framed_mask = mask.copy() ms, ns = mask.shape mk, nk = kernel_shape if sym_upper and (max_dist is not None): # Remove diagonals further than scan distance in the input mask framed_mask = diag_trim(framed_mask, max_dist + max(nk, mk)).tocsr() max_m = max_dist + mk max_n = max_dist + nk else: max_m, max_n = ms, ns # Up and down margins initialized with zeros and filled as needed margin_1 = sp.csr_matrix((mk - 1, ns), dtype=bool) margin_2 = sp.csr_matrix((mk - 1, ns), dtype=bool) if sym_upper and (max_dist is not None): # Margin 1 (top) is in upper triangle -> fill missing up to scan dist margin_1[:, :max_n] = 1 else: margin_1[:, :] = 1 margin_2[:, :] = 1 framed_mask = sp.vstack([margin_1, framed_mask, margin_2], format="csr") # Left and right margin_1 = sp.csr_matrix((ms + 2 * (mk - 1), nk - 1), dtype=bool) margin_2 = sp.csr_matrix((ms + 2 * (mk - 1), nk - 1), dtype=bool) if sym_upper and (max_dist is not None): # Margin 2 (right) is in upper triangle-> fill missing up to scan dist margin_2[-(max_m + 1) :, :] = 1 # Fill only the start of left margin for the top-left corner margin_1[: mk - 1, :] = 1 else: margin_1[:, :] = 1 margin_2[:, :] = 1 framed_mask = sp.hstack([margin_1, framed_mask, margin_2], format="csr") if sym_upper: # LIL format is much faster when changing sparsity framed_mask = framed_mask.tolil() # Add margin below diagonal big_k = max(nk, mk) dia_margins = np.ones(big_k) dia_offsets = np.arange(-1, -big_k-1, -1) framed_mask += sp.diags( dia_margins, dia_offsets, shape=framed_mask.shape, format="lil", dtype=bool, ) framed_mask = framed_mask.tocsr() return framed_mask
[docs]def check_missing_mask(signal, mask): """ Ensure all elements defined as missing by the mask are set to zero in the signal. If this is not the case, raises an error. Parameters ---------- signal : numpy.ndarray of floats or scipy.sparse.csr_matrix of floats The signal to be checked. mask : numpy.ndarray of bools or scipy.sparse.csr_matrix of bools The mask defining missing values as True and valid values as False. """ if sp.issparse(mask): # Check if there are nonzero values in the signal reported as missing # by the mask missing_with_signal = np.nonzero( abs(signal[mask.nonzero()[0], mask.nonzero()[1]]) > 0 )[0] if len(missing_with_signal) > 0: raise ValueError( "There are", len(missing_with_signal), "non-zero elements reported as missing.", ) else: if np.sum(abs(signal[mask > 0])) > 1e-10: raise ValueError( "There are", str(np.sum(abs(signal[mask > 0]))), "non-zero elements reported as missing.", )
[docs]def make_missing_mask( shape, valid_rows, valid_cols, max_dist=None, sym_upper=False ): """ Given lists of valid rows and columns, generate a sparse matrix mask with missing pixels denoted as 1 and valid pixels as 0. If a max_dist is provided, upper symmetric matrices will only be flagged up to max_dist pixels from the diagonal. Parameters ---------- shape : tuple of ints Shape of the mask to generate. valid_rows : numpy.ndarray of ints Array with the indices of valid rows that should be set to 0 in the mask. valid_cols : numpy.ndarray of ints Array with the indices of valid rows that should be set to 0 in the mask. max_dist : int or None The maximum diagonal distance at which masking should take place. sym_upper : bool Whether the matrix is symmetric upper. If so, max_dist is ignored Returns ------- scipy.sparse.csr_matrix of bool The mask containing False values where pixels are valid and True valid where pixels are missing """ # Error if the matrix upper symmetric but shape is rectangle or missing # rows and cols are different sm, sn = shape if sym_upper and (sm != sn or len(valid_rows) != len(valid_cols)): raise ValueError("Rectangular matrices cannot be upper symmetric") # Get a boolean array of missing (1) and valid (0) rows missing_rows = valid_to_missing(valid_rows, sm) # When matrix is sym., rows and cols are synonym, no need to compute 2x if sym_upper: missing_cols = missing_rows else: missing_cols = valid_to_missing(valid_cols, sn) # If upper sym., fill only upper diag up to max_dist. # E. g. with bins 1 and 3 missing # and a max_dist of 1: # 0 1 0 0 0 # 0 1 1 0 0 # 0 0 0 1 0 # 0 0 0 1 1 # 0 0 0 0 0 # For each missing bin, mask is apply 1 pixel upwards and 1 to the right # to fill only the upper triangle up to max_dist if sym_upper: # If no max dist has been specified, fill the whole upper triangle if max_dist is None: max_dist = min(shape) # Generate matrix of distance shifts by row. # Shape is len(missing_rows) x (max_dist + 1) # e.g.: 2 missing rows and max dist of 1 # 0 0 # 1 1 row_shifts = np.tile( np.array(range(max_dist + 1)), (len(missing_rows), 1) ).T # Compute row positions upwards to diagonal by subtracting missing rows # to the shifts. Following the previous example, if missing rows are # bins 1 and 3: # 1 3 # 0 2 rows_before = (missing_rows - row_shifts).flatten("F") # looking at pixels up from the bins, cols remain the same: # 1 3 # 1 3 cols_before = np.repeat(missing_rows, max_dist+1) # Compute col position to the right until diagonal by adding the shift # Note: upper symmetric, so row_shifts = col_shift_ # 1 3 # 2 4 cols_after = (missing_cols + row_shifts).flatten("F") # This time, rows remain constant since we are computing positions to # the right rows_after = np.repeat(missing_cols, max_dist+1) # Combine positions to the right and upwards rows = np.concatenate([rows_before, rows_after]) cols = np.concatenate([cols_before, cols_after]) data = np.ones(rows.shape, dtype=bool) # Remove entries where rows or cols are negative or larger than shape valid = (cols < sm) & (cols >= 0) & (rows < sm) & (rows >= 0) # Build mask mat with miss bins up to max scan dist in upper triangle mask = sp.coo_matrix( (data[valid], (rows[valid], cols[valid])), shape=shape, dtype=bool ).tocsr() else: mask = sp.csr_matrix(shape, dtype=bool) mask[missing_rows, :] = 1 mask[:, missing_cols] = 1 return mask
[docs]def zero_pad_sparse(mat, margin_h, margin_v, fmt="coo"): """ Adds margin of zeros around an input sparse matrix. Parameters ---------- mat : scipy.sparse.csr_matrix The matrix to be padded. margin_h : int The width of the horizontal margin to add on the left and right of the matrix. margin_v : int The width of the vertical margin to add on the top and bottom of the matrix. fmt : string The desired scipy sparse format of the output matrix Returns ------- scipy.sparse.csr_matrix : The padded matrix of dimensions (m + 2 * margin_h, n + 2 * margin_v). Examples -------- >>> m = sp.csr_matrix(np.array([[1, 2], [10, 20]])) >>> zero_pad_sparse(m, 2, 1).toarray() array([[ 0, 0, 0, 0, 0, 0], [ 0, 0, 1, 2, 0, 0], [ 0, 0, 10, 20, 0, 0], [ 0, 0, 0, 0, 0, 0]]) """ sm, sn = mat.shape padded_mat = mat.copy() # Up and down margins initialized with zeros and filled as needed margin_h_0 = sp.csr_matrix((sm, margin_h), dtype=mat.dtype) margin_v_0 = sp.csr_matrix((margin_v, sn + 2 * margin_h), dtype=mat.dtype) padded_mat = sp.hstack([margin_h_0, padded_mat, margin_h_0], format="csr") padded_mat = sp.vstack([margin_v_0, padded_mat, margin_v_0], format="csr") return padded_mat
[docs]def crop_kernel(kernel, target_size): """ Crop a kernel matrix to target size horizontally and vertically. If the target size is even, the target size is adjusted to the next integer up. Parameters ---------- kernel : numpy.ndarray of floats Image to crop. target_size : tuple of ints Tuple defining the target shape of the kernel, takes the form (rows, cols) where rows and cols are odd numbers. Returns ------- cropped : numpy.ndarray of floats New image no larger than target dimensions """ # Use list for mutability target = [d for d in target_size] adjusted = False for dim in range(len(target)): if not target[dim] % 2: target[dim] += 1 adjusted = True if adjusted: sys.stderr.write( "WARNING: Cropped kernel size adjusted to " f"{target[0]}x{target[1]} to keep odd dimensions.\n" ) source_m, source_n = kernel.shape target_m, target_n = target # Define horizontal and vertical margins to trim if source_m > target_m: margin_rows = (source_m - target_m) // 2 else: margin_rows = 0 if source_n > target_n: margin_cols = (source_n - target_n) // 2 else: margin_cols = 0 cropped = kernel[ margin_rows : (source_m - margin_rows), margin_cols : (source_n - margin_cols), ] return cropped
[docs]def resize_kernel( kernel, kernel_res=None, signal_res=None, factor=None, min_size=7, quiet=False, ): """ Resize a kernel matrix based on the resolution at which it was defined and the signal resolution. E.g. if a kernel matrix was generated for 10kb and the input signal is 20kb, kernel size will be divided by two. If the kernel is enlarged, pixels are interpolated with a spline of degree 1. Alternatively, a resize factor can be provided. In the example above, the factor would be 0.5. Parameters ---------- kernel : numpy.ndarray Kernel matrix. kernel_res : int Resolution for which the kernel was designed. Mutually exclusive with factor. signal_res : int Resolution of the signal matrix in basepair per matrix bin. Mutually exclusive with factor. factor : float Resize factor. Can be provided as an alternative to kernel_res and signal_res. Values above 1 will enlarge the kernel, values below 1 will shrink it. min_size : int Lower bound, in number of rows/column allowed when resizing the kernel. quiet : bool Suppress warnings if resize factor was adjusted. Returns ------- resized_kernel : numpy.ndarray The resized input kernel. """ km, kn = kernel.shape if km != kn: raise ValueError("kernel must be square.") if not (km % 2) or not (kn % 2): raise ValueError("kernel size must be odd.") if factor is not None: if kernel_res is not None or signal_res is not None: raise ValueError( "factor is mutually exclusive with resolution " "parameters (kernel_res and signal_res)." ) resize_factor = factor else: if kernel_res is None or signal_res is None: raise ValueError( "You must provide either a resize factor or the signal and " "kernel resolutions." ) # Define by how many times kernel must be enlarged for its pixels to # match the signal's pixels resize_factor = kernel_res / signal_res if km * resize_factor < min_size: resize_factor = min_size / km resized_kernel = ndi.zoom(kernel, resize_factor, order=1) if not resized_kernel.shape[0] % 2: # Compute the factor required to yield a dimension smaller by one adj_resize_factor = (resized_kernel.shape[0] - 1) / km if not quiet: sys.stderr.write( f"Adjusting resize factor from {resize_factor} to {adj_resize_factor}.\n" ) resized_kernel = ndi.zoom(kernel, adj_resize_factor, order=1) return resized_kernel
[docs]def factorise_kernel(kernel, prop_info=0.999): """ Performs truncated SVD on an input kernel, returning the singular vectors necessary to retain a given proportion of information contained in the kernel. Parameters ---------- kernel : numpy.ndarray of floats The input 2D kernel to factorise. prop_info : float Proportion of information to retain. Returns ------- tuple of numpy.ndarrays of floats A tuple containing the truncated left and right singular matrices, where each singular vector has been multiplied by the square root of their respective singular values. """ u, sigma, v = la.svd(kernel) total_info = np.sum(sigma ** 2) # Compute min. number of singular vectors to retain enough info keep_k = np.flatnonzero(np.cumsum(sigma ** 2) > prop_info * total_info)[0] + 1 if keep_k > np.floor(min(kernel.shape) / 2): sys.stderr.write( f"Warning: Kernel factorisation required {keep_k} singular," "vectors this may result in slow operations.\n", ) # Truncate singular matrix to the keep only required vectors u = u[:, :keep_k] v = v[:keep_k, :] # Multiply each singular vector by the sqrt of its singular value for i in range(keep_k): u[:, i] *= np.sqrt(sigma[i]) v[i, :] *= np.sqrt(sigma[i]) return (u, v)
[docs]def valid_to_missing(valid, size): """ Given an array of valid indices, return the corrsesponding array of missing indices. Parameters --------- valid : numpy.ndarray of ints The valid indices. size : int The size of the matrix (maximum possible index + 1). Return ------ missing : numpy.ndarray of ints The missing indices. """ missing = np.ones(size, dtype=bool) try: missing[valid] = False # In case there is no valid index except IndexError: pass missing = np.flatnonzero(missing) return missing