Files
unisonus-livekit/choir-mixer/mixer.py
2026-03-29 22:02:20 -04:00

62 lines
1.8 KiB
Python

import numpy as np
SAMPLES_PER_FRAME = 960 # 48kHz * 20ms
def normalize_rms(samples: np.ndarray, target_dbfs: float = -20.0) -> np.ndarray:
"""Scale audio samples so their RMS matches target_dbfs.
Args:
samples: float32 PCM audio samples
target_dbfs: target loudness in dBFS (e.g. -20.0)
Returns:
Scaled float32 samples at target loudness. Silence is returned as-is.
"""
rms = np.sqrt(np.mean(samples ** 2))
if rms < 1e-10:
return samples
# Noise gate: don't amplify signals below -40 dBFS (background noise, keyboard clicks)
rms_dbfs = 20 * np.log10(rms + 1e-10)
if rms_dbfs < -40.0:
return np.zeros_like(samples)
target_rms = 10 ** (target_dbfs / 20)
gain = target_rms / rms
return (samples * gain).astype(np.float32)
def mix_streams(streams: list[np.ndarray]) -> np.ndarray:
"""Sum multiple audio streams into one.
Args:
streams: list of float32 PCM arrays (same sample rate, possibly different lengths)
Returns:
Summed float32 array. Length matches the shortest stream.
Empty list returns silence of SAMPLES_PER_FRAME length.
"""
if not streams:
return np.zeros(SAMPLES_PER_FRAME, dtype=np.float32)
min_len = min(s.shape[0] for s in streams)
trimmed = [s[:min_len] for s in streams]
return np.sum(trimmed, axis=0).astype(np.float32)
def soft_limit(samples: np.ndarray) -> np.ndarray:
"""Apply tanh-based soft limiter to prevent clipping.
Gentle saturation: signals below ~0.5 pass nearly unchanged,
signals approaching 1.0 are compressed, signals above 1.0 are
smoothly clamped.
Args:
samples: float32 PCM audio samples
Returns:
Limited float32 samples guaranteed within [-1.0, 1.0].
"""
return np.tanh(samples).astype(np.float32)