import numpy as np SAMPLES_PER_FRAME = 960 # 48kHz * 20ms def normalize_rms(samples: np.ndarray, target_dbfs: float = -20.0) -> np.ndarray: """Scale audio samples so their RMS matches target_dbfs. Args: samples: float32 PCM audio samples target_dbfs: target loudness in dBFS (e.g. -20.0) Returns: Scaled float32 samples at target loudness. Silence is returned as-is. """ rms = np.sqrt(np.mean(samples ** 2)) if rms < 1e-10: return samples # Noise gate: don't amplify signals below -40 dBFS (background noise, keyboard clicks) rms_dbfs = 20 * np.log10(rms + 1e-10) if rms_dbfs < -40.0: return np.zeros_like(samples) target_rms = 10 ** (target_dbfs / 20) gain = target_rms / rms return (samples * gain).astype(np.float32) def mix_streams(streams: list[np.ndarray]) -> np.ndarray: """Sum multiple audio streams into one. Args: streams: list of float32 PCM arrays (same sample rate, possibly different lengths) Returns: Summed float32 array. Length matches the shortest stream. Empty list returns silence of SAMPLES_PER_FRAME length. """ if not streams: return np.zeros(SAMPLES_PER_FRAME, dtype=np.float32) min_len = min(s.shape[0] for s in streams) trimmed = [s[:min_len] for s in streams] return np.sum(trimmed, axis=0).astype(np.float32) def soft_limit(samples: np.ndarray) -> np.ndarray: """Apply tanh-based soft limiter to prevent clipping. Gentle saturation: signals below ~0.5 pass nearly unchanged, signals approaching 1.0 are compressed, signals above 1.0 are smoothly clamped. Args: samples: float32 PCM audio samples Returns: Limited float32 samples guaranteed within [-1.0, 1.0]. """ return np.tanh(samples).astype(np.float32)