62 lines
1.8 KiB
Python
62 lines
1.8 KiB
Python
import numpy as np
|
|
|
|
SAMPLES_PER_FRAME = 960 # 48kHz * 20ms
|
|
|
|
|
|
def normalize_rms(samples: np.ndarray, target_dbfs: float = -20.0) -> np.ndarray:
|
|
"""Scale audio samples so their RMS matches target_dbfs.
|
|
|
|
Args:
|
|
samples: float32 PCM audio samples
|
|
target_dbfs: target loudness in dBFS (e.g. -20.0)
|
|
|
|
Returns:
|
|
Scaled float32 samples at target loudness. Silence is returned as-is.
|
|
"""
|
|
rms = np.sqrt(np.mean(samples ** 2))
|
|
if rms < 1e-10:
|
|
return samples
|
|
|
|
# Noise gate: don't amplify signals below -40 dBFS (background noise, keyboard clicks)
|
|
rms_dbfs = 20 * np.log10(rms + 1e-10)
|
|
if rms_dbfs < -40.0:
|
|
return np.zeros_like(samples)
|
|
|
|
target_rms = 10 ** (target_dbfs / 20)
|
|
gain = target_rms / rms
|
|
return (samples * gain).astype(np.float32)
|
|
|
|
|
|
def mix_streams(streams: list[np.ndarray]) -> np.ndarray:
|
|
"""Sum multiple audio streams into one.
|
|
|
|
Args:
|
|
streams: list of float32 PCM arrays (same sample rate, possibly different lengths)
|
|
|
|
Returns:
|
|
Summed float32 array. Length matches the shortest stream.
|
|
Empty list returns silence of SAMPLES_PER_FRAME length.
|
|
"""
|
|
if not streams:
|
|
return np.zeros(SAMPLES_PER_FRAME, dtype=np.float32)
|
|
|
|
min_len = min(s.shape[0] for s in streams)
|
|
trimmed = [s[:min_len] for s in streams]
|
|
return np.sum(trimmed, axis=0).astype(np.float32)
|
|
|
|
|
|
def soft_limit(samples: np.ndarray) -> np.ndarray:
|
|
"""Apply tanh-based soft limiter to prevent clipping.
|
|
|
|
Gentle saturation: signals below ~0.5 pass nearly unchanged,
|
|
signals approaching 1.0 are compressed, signals above 1.0 are
|
|
smoothly clamped.
|
|
|
|
Args:
|
|
samples: float32 PCM audio samples
|
|
|
|
Returns:
|
|
Limited float32 samples guaranteed within [-1.0, 1.0].
|
|
"""
|
|
return np.tanh(samples).astype(np.float32)
|