chore: initial commit

2026-03-29 22:02:20 -04:00
commit edc367b31a
9 changed files with 360 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,9 @@
 node_modules/
 __pycache__/
 *.pyc
 *.pyo
 .env
 *.env
 .venv/
 venv/
 *.log
--- a/choir-mixer/Dockerfile
+++ b/choir-mixer/Dockerfile
@@ -0,0 +1,10 @@
 FROM python:3.11-slim
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 CMD ["python", "main.py", "start"]
--- a/choir-mixer/main.py
+++ b/choir-mixer/main.py
@@ -0,0 +1,141 @@
 import asyncio
 import logging
 import time
 import numpy as np
 from livekit import agents, rtc
 from mixer import normalize_rms, mix_streams, soft_limit
 logger = logging.getLogger("choir-mixer")
 logging.basicConfig(level=logging.INFO)
 SAMPLE_RATE = 48000
 NUM_CHANNELS = 1
 FRAME_DURATION_MS = 20
 SAMPLES_PER_FRAME = SAMPLE_RATE * FRAME_DURATION_MS // 1000  # 960
 MAX_STREAMS = 6
 TARGET_DBFS = -20.0
 FRAME_MAX_AGE_S = 0.06  # Only use frames received in the last 60ms
 server = agents.AgentServer()
@server.rtc_session(agent_name="choir-mixer")
 async def choir_mixer_agent(ctx: agents.JobContext):
    """Choir mixer agent: subscribes to participant audio, mixes, and publishes."""
    room = ctx.room
    # Track active audio streams: {track_sid: AudioStream}
    audio_streams: dict[str, rtc.AudioStream] = {}
    # Latest frame per track: {track_sid: (timestamp, np.ndarray)}
    latest_frames: dict[str, tuple[float, np.ndarray]] = {}
    lock = asyncio.Lock()
    # Set up audio output
    source = rtc.AudioSource(SAMPLE_RATE, NUM_CHANNELS)
    track = rtc.LocalAudioTrack.create_audio_track("choir_mix", source)
    options = rtc.TrackPublishOptions()
    options.source = rtc.TrackSource.SOURCE_MICROPHONE
    await ctx.connect(auto_subscribe=agents.AutoSubscribe.AUDIO_ONLY)
    await room.local_participant.publish_track(track, options)
    logger.info("Choir mixer joined room: %s", room.name)
    async def read_track(track_sid: str, stream: rtc.AudioStream):
        """Continuously read frames from one participant's audio stream."""
        try:
            async for event in stream:
                frame = event.frame
                # Convert int16 PCM to float32 [-1.0, 1.0]
                pcm = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32) / 32768.0
                async with lock:
                    latest_frames[track_sid] = (time.monotonic(), pcm)
        except Exception as e:
            logger.warning("Stream read error for %s: %s", track_sid, e)
        finally:
            async with lock:
                latest_frames.pop(track_sid, None)
                audio_streams.pop(track_sid, None)
            logger.info("Stream ended: %s (active: %d)", track_sid, len(audio_streams))
    @room.on("track_subscribed")
    def on_track_subscribed(
        subscribed_track: rtc.Track,
        publication: rtc.RemoteTrackPublication,
        participant: rtc.RemoteParticipant,
    ):
        if subscribed_track.kind != rtc.TrackKind.KIND_AUDIO:
            return
        if len(audio_streams) >= MAX_STREAMS:
            logger.info("At max streams (%d), ignoring track from %s",
                        MAX_STREAMS, participant.identity)
            return
        sid = subscribed_track.sid
        stream = rtc.AudioStream(
            subscribed_track,
            sample_rate=SAMPLE_RATE,
            num_channels=NUM_CHANNELS,
        )
        audio_streams[sid] = stream
        asyncio.create_task(read_track(sid, stream))
        logger.info("Subscribed to %s from %s (active: %d)",
                     sid, participant.identity, len(audio_streams))
    @room.on("track_unsubscribed")
    def on_track_unsubscribed(
        unsubscribed_track: rtc.Track,
        publication: rtc.RemoteTrackPublication,
        participant: rtc.RemoteParticipant,
    ):
        sid = unsubscribed_track.sid
        stream = audio_streams.get(sid)
        if stream:
            asyncio.create_task(stream.aclose())
            logger.info("Unsubscribed from %s (%s)", sid, participant.identity)
    # Mixing loop: runs at frame rate (~20ms intervals)
    async def mixing_loop():
        while True:
            now = time.monotonic()
            async with lock:
                # Only use frames that arrived recently (discard stale ones)
                frames = [
                    pcm for ts, pcm in latest_frames.values()
                    if now - ts < FRAME_MAX_AGE_S
                ]
            if frames:
                # Normalize each stream, mix, and limit
                normalized = [normalize_rms(f, TARGET_DBFS) for f in frames]
                mixed = mix_streams(normalized)
                limited = soft_limit(mixed)
            else:
                limited = np.zeros(SAMPLES_PER_FRAME, dtype=np.float32)
            # Convert float32 back to int16 PCM
            pcm_int16 = (limited * 32767).astype(np.int16)
            audio_frame = rtc.AudioFrame(
                data=pcm_int16.tobytes(),
                sample_rate=SAMPLE_RATE,
                num_channels=NUM_CHANNELS,
                samples_per_channel=len(pcm_int16),
            )
            await source.capture_frame(audio_frame)
            await asyncio.sleep(FRAME_DURATION_MS / 1000)
    # Start mixing loop
    mix_task = asyncio.create_task(mixing_loop())
    # Keep agent alive until mixing_loop exits (room disconnect cancels it)
    try:
        await mix_task
    except asyncio.CancelledError:
        pass
    finally:
        for stream in list(audio_streams.values()):
            await stream.aclose()
        logger.info("Choir mixer exiting room: %s", room.name)
 if __name__ == "__main__":
    agents.cli.run_app(server)
--- a/choir-mixer/mixer.py
+++ b/choir-mixer/mixer.py
@@ -0,0 +1,61 @@
 import numpy as np
 SAMPLES_PER_FRAME = 960  # 48kHz * 20ms
 def normalize_rms(samples: np.ndarray, target_dbfs: float = -20.0) -> np.ndarray:
    """Scale audio samples so their RMS matches target_dbfs.
    Args:
        samples: float32 PCM audio samples
        target_dbfs: target loudness in dBFS (e.g. -20.0)
    Returns:
        Scaled float32 samples at target loudness. Silence is returned as-is.
    """
    rms = np.sqrt(np.mean(samples ** 2))
    if rms < 1e-10:
        return samples
    # Noise gate: don't amplify signals below -40 dBFS (background noise, keyboard clicks)
    rms_dbfs = 20 * np.log10(rms + 1e-10)
    if rms_dbfs < -40.0:
        return np.zeros_like(samples)
    target_rms = 10 ** (target_dbfs / 20)
    gain = target_rms / rms
    return (samples * gain).astype(np.float32)
 def mix_streams(streams: list[np.ndarray]) -> np.ndarray:
    """Sum multiple audio streams into one.
    Args:
        streams: list of float32 PCM arrays (same sample rate, possibly different lengths)
    Returns:
        Summed float32 array. Length matches the shortest stream.
        Empty list returns silence of SAMPLES_PER_FRAME length.
    """
    if not streams:
        return np.zeros(SAMPLES_PER_FRAME, dtype=np.float32)
    min_len = min(s.shape[0] for s in streams)
    trimmed = [s[:min_len] for s in streams]
    return np.sum(trimmed, axis=0).astype(np.float32)
 def soft_limit(samples: np.ndarray) -> np.ndarray:
    """Apply tanh-based soft limiter to prevent clipping.
    Gentle saturation: signals below ~0.5 pass nearly unchanged,
    signals approaching 1.0 are compressed, signals above 1.0 are
    smoothly clamped.
    Args:
        samples: float32 PCM audio samples
    Returns:
        Limited float32 samples guaranteed within [-1.0, 1.0].
    """
    return np.tanh(samples).astype(np.float32)
--- a/choir-mixer/requirements.txt
+++ b/choir-mixer/requirements.txt
@@ -0,0 +1,3 @@
 livekit-agents>=1.0.0
 livekit>=1.0.0
 numpy>=1.26.0
--- a/choir-mixer/tests/conftest.py
+++ b/choir-mixer/tests/conftest.py
@@ -0,0 +1,31 @@
 import numpy as np
 import pytest
 SAMPLE_RATE = 48000
 FRAME_DURATION_MS = 20
 SAMPLES_PER_FRAME = SAMPLE_RATE * FRAME_DURATION_MS // 1000  # 960
@pytest.fixture
 def silence():
    """960 samples of silence."""
    return np.zeros(SAMPLES_PER_FRAME, dtype=np.float32)
@pytest.fixture
 def quiet_tone():
    """960 samples of a quiet 440Hz sine wave at -40 dBFS."""
    t = np.arange(SAMPLES_PER_FRAME, dtype=np.float32) / SAMPLE_RATE
    amplitude = 10 ** (-40 / 20)  # ~0.01
    return (amplitude * np.sin(2 * np.pi * 440 * t)).astype(np.float32)
@pytest.fixture
 def loud_tone():
    """960 samples of a loud 440Hz sine wave at -6 dBFS."""
    t = np.arange(SAMPLES_PER_FRAME, dtype=np.float32) / SAMPLE_RATE
    amplitude = 10 ** (-6 / 20)  # ~0.5
    return (amplitude * np.sin(2 * np.pi * 440 * t)).astype(np.float32)
@pytest.fixture
 def clipping_tone():
    """960 samples of a sine wave that exceeds [-1, 1] range."""
    t = np.arange(SAMPLES_PER_FRAME, dtype=np.float32) / SAMPLE_RATE
    return (1.5 * np.sin(2 * np.pi * 440 * t)).astype(np.float32)
--- a/choir-mixer/tests/test_mixer.py
+++ b/choir-mixer/tests/test_mixer.py
@@ -0,0 +1,66 @@
 import numpy as np
 from mixer import normalize_rms, mix_streams, soft_limit
 TARGET_DBFS = -20
 class TestNormalizeRms:
    def test_silence_stays_silent(self, silence):
        result = normalize_rms(silence, TARGET_DBFS)
        assert np.allclose(result, 0.0)
    def test_quiet_tone_gets_louder(self, quiet_tone):
        original_rms = np.sqrt(np.mean(quiet_tone ** 2))
        result = normalize_rms(quiet_tone, TARGET_DBFS)
        result_rms = np.sqrt(np.mean(result ** 2))
        assert result_rms > original_rms
    def test_loud_tone_gets_quieter(self, loud_tone):
        original_rms = np.sqrt(np.mean(loud_tone ** 2))
        result = normalize_rms(loud_tone, TARGET_DBFS)
        result_rms = np.sqrt(np.mean(result ** 2))
        assert result_rms < original_rms
    def test_normalized_to_target(self, loud_tone):
        result = normalize_rms(loud_tone, TARGET_DBFS)
        result_rms = np.sqrt(np.mean(result ** 2))
        result_dbfs = 20 * np.log10(result_rms + 1e-10)
        assert abs(result_dbfs - TARGET_DBFS) < 1.0  # within 1 dB
 class TestMixStreams:
    def test_single_stream_unchanged(self, loud_tone):
        result = mix_streams([loud_tone])
        assert np.allclose(result, loud_tone)
    def test_two_streams_summed(self, loud_tone):
        result = mix_streams([loud_tone, loud_tone])
        # Two identical streams summed should be louder
        assert np.max(np.abs(result)) > np.max(np.abs(loud_tone))
    def test_empty_list_returns_silence(self, silence):
        result = mix_streams([])
        assert result.shape[0] == 960
        assert np.allclose(result, 0.0)
    def test_different_lengths_uses_shortest(self):
        short = np.ones(480, dtype=np.float32) * 0.5
        long = np.ones(960, dtype=np.float32) * 0.5
        result = mix_streams([short, long])
        assert result.shape[0] == 480
 class TestSoftLimit:
    def test_quiet_signal_unchanged(self, quiet_tone):
        result = soft_limit(quiet_tone)
        assert np.allclose(result, quiet_tone, atol=0.001)
    def test_clipping_signal_contained(self, clipping_tone):
        result = soft_limit(clipping_tone)
        assert np.max(np.abs(result)) <= 1.0
    def test_preserves_sign(self, clipping_tone):
        result = soft_limit(clipping_tone)
        # Signs should match where input is non-zero
        nonzero = np.abs(clipping_tone) > 0.01
        assert np.all(np.sign(result[nonzero]) == np.sign(clipping_tone[nonzero]))
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,24 @@
 version: "3.9"
 services:
  livekit:
    image: livekit/livekit-server:latest
    restart: unless-stopped
    ports:
      - "7880:7880"
      - "7881:7881"
      - "50000-50100:50000-50100/udp"
    volumes:
      - ./livekit.yaml:/etc/livekit.yaml
    command: --config /etc/livekit.yaml
  choir-mixer:
    build:
      context: ./choir-mixer
      dockerfile: Dockerfile
    environment:
      - LIVEKIT_URL=ws://livekit:7880
      - LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
      - LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
    depends_on:
      - livekit
    restart: unless-stopped
--- a/livekit.yaml
+++ b/livekit.yaml
@@ -0,0 +1,15 @@
 port: 7880
 rtc:
  tcp_port: 7881
  port_range_start: 50000
  port_range_end: 50100
  use_external_ip: false
  node_ip: 192.168.0.241
  stun_servers: []
 keys:
  6c0e48f597799858028a4d7c88bdf830: cb43b9cc77ea24378af12cf3c4a0e52e7de267ca0d9d33ac7919354685e56e8d
 logging:
  level: info
 room:
  empty_timeout: 300
  max_participants: 50