Source code for bullkpy.pp.normalize
from __future__ import annotations
from typing import Literal
import numpy as np
import scipy.sparse as sp
import anndata as ad
from ..logging import info, warn
[docs]
def set_raw_counts(
adata: ad.AnnData,
*,
layer: str = "counts",
overwrite: bool = False,
) -> None:
"""
Store current adata.X into adata.layers[layer] as raw counts.
Call this once right after reading counts (before any normalization).
"""
if layer in adata.layers and not overwrite:
warn(f"adata.layers['{layer}'] already exists; not overwriting.")
return
adata.layers[layer] = adata.X.copy()
info(f"Stored raw counts in adata.layers['{layer}'].")
[docs]
def normalize_cpm(
adata: ad.AnnData,
*,
layer: str | None = "counts",
target_sum: float = 1e6,
out_layer: str = "cpm",
inplace_X: bool = False,
eps: float = 1e-12,
) -> None:
"""
CPM normalize counts per sample.
Parameters
----------
layer
Input layer. If None, uses adata.X.
target_sum
Scale factor (1e6 = CPM).
out_layer
Where to write normalized values (adata.layers[out_layer]).
inplace_X
If True, also write normalized values into adata.X.
eps
Small constant to avoid division by zero.
"""
X = adata.layers[layer] if layer is not None else adata.X
if sp.issparse(X):
X = X.tocsr()
libsize = np.asarray(X.sum(axis=1)).ravel()
libsize = np.maximum(libsize, eps)
scale = target_sum / libsize
X_norm = X.multiply(scale[:, None])
else:
libsize = X.sum(axis=1)
libsize = np.maximum(libsize, eps)
scale = (target_sum / libsize).astype(float)
X_norm = (X.astype(float).T * scale).T
adata.layers[out_layer] = X_norm
info(f"Wrote CPM-normalized data to adata.layers['{out_layer}'].")
if inplace_X:
adata.X = X_norm
info("Updated adata.X with CPM-normalized data.")
# Store library size for reference (bulk QC)
adata.obs["libsize"] = libsize
[docs]
def log1p(
adata: ad.AnnData,
*,
layer: str = "cpm",
out_layer: str = "log1p_cpm",
inplace_X: bool = False,
) -> None:
"""
log1p-transform a layer (default: CPM) and store as a new layer.
Works with dense or sparse matrices.
"""
X = adata.layers[layer] if layer is not None else adata.X
if sp.issparse(X):
X_log = X.copy()
X_log.data = np.log1p(X_log.data)
else:
X_log = np.log1p(X.astype(float))
adata.layers[out_layer] = X_log
info(f"Wrote log1p-transformed data to adata.layers['{out_layer}'].")
if inplace_X:
adata.X = X_log
info("Updated adata.X with log1p-transformed data.")