unisonus-livekit/choir-mixer/mixer.py

import numpy as np

SAMPLES_PER_FRAME = 960  # 48kHz * 20ms


def normalize_rms(samples: np.ndarray, target_dbfs: float = -20.0) -> np.ndarray:
    """Scale audio samples so their RMS matches target_dbfs.

    Args:
        samples: float32 PCM audio samples
        target_dbfs: target loudness in dBFS (e.g. -20.0)

    Returns:
        Scaled float32 samples at target loudness. Silence is returned as-is.
    """
    rms = np.sqrt(np.mean(samples ** 2))
    if rms < 1e-10:
        return samples

    # Noise gate: don't amplify signals below -40 dBFS (background noise, keyboard clicks)
    rms_dbfs = 20 * np.log10(rms + 1e-10)
    if rms_dbfs < -40.0:
        return np.zeros_like(samples)

    target_rms = 10 ** (target_dbfs / 20)
    gain = target_rms / rms
    return (samples * gain).astype(np.float32)


def mix_streams(streams: list[np.ndarray]) -> np.ndarray:
    """Sum multiple audio streams into one.

    Args:
        streams: list of float32 PCM arrays (same sample rate, possibly different lengths)

    Returns:
        Summed float32 array. Length matches the shortest stream.
        Empty list returns silence of SAMPLES_PER_FRAME length.
    """
    if not streams:
        return np.zeros(SAMPLES_PER_FRAME, dtype=np.float32)

    min_len = min(s.shape[0] for s in streams)
    trimmed = [s[:min_len] for s in streams]
    return np.sum(trimmed, axis=0).astype(np.float32)


def soft_limit(samples: np.ndarray) -> np.ndarray:
    """Apply tanh-based soft limiter to prevent clipping.

    Gentle saturation: signals below ~0.5 pass nearly unchanged,
    signals approaching 1.0 are compressed, signals above 1.0 are
    smoothly clamped.

    Args:
        samples: float32 PCM audio samples

    Returns:
        Limited float32 samples guaranteed within [-1.0, 1.0].
    """
    return np.tanh(samples).astype(np.float32)