ErikDeBruijn · January 17, 2026 21:01
diff --git a/config.yaml b/config.yaml
 # Jarvis Voice Assistant Daemon Configuration

 wake_words: ["hey jarvis", "jarvis"]
 language: "nl"

 llm:
  command: "/Users/erik/.claude/local/claude"  # Volledige pad naar juiste versie
  jarvis_prompt: "/jarvis"       # activeer voice mode
  extra_args: []                 # bijv. ["--model", "opus"]
  tmux_session: "jarvis"         # tmux session name

 whisper:
  url: "http://10.1.1.64:8081/inference"
  local_fallback: true
  model: "base"                  # voor lokale fallback

 audio:
  device_name: "Yeti X"          # Expliciet microfoon kiezen (of null voor default)
  native_sample_rate: 48000      # Native sample rate van de microfoon
  target_sample_rate: 16000      # Sample rate voor VAD/Whisper
  channels: 1
  silence_seconds: 1.5           # sec stilte = einde utterance
  max_listen_seconds: 30         # max opnametijd
  vad_threshold: 0.5             # Silero VAD threshold

 session:
  idle_timeout_seconds: 60       # Geen input -> sluit sessie
  end_phrases: ["tot ziens", "bedankt jarvis", "klaar", "goodbye", "exit"]

 tts:
  command: "say"
  voice: "Xander"
  rate: 180

 sounds:
  wake: "~/.config/jarvis/sounds/wake.wav"
  listening: "~/.config/jarvis/sounds/listening.wav"
diff --git a/jarvis-daemon b/jarvis-daemon
 #!/usr/bin/env python3
 """
 Jarvis Voice Assistant Daemon

 An always-on voice daemon that orchestrates Claude Code sessions.
 Wake word triggers a new conversation, daemon manages the entire session
 including follow-up responses.

 Usage:
    jarvis-daemon              # Run in foreground
    jarvis-daemon --test-wake  # Test wake word detection
    jarvis-daemon --test-listen # Test audio capture + VAD
    jarvis-daemon --active     # Active mode (voice enabled)
    jarvis-daemon --passive    # Passive mode (monitoring only)
 """

 import argparse
 import io
 import logging
 import os
 import signal
 import subprocess
 import sys
 import tempfile
 import threading
 import time
 import wave
 from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
 from typing import Optional

 import numpy as np
 import requests
 import sounddevice as sd
 import yaml

 # Optional imports with graceful fallback
 try:
    import openwakeword
    from openwakeword.model import Model as WakeWordModel
    WAKE_WORD_AVAILABLE = True
 except ImportError:
    WAKE_WORD_AVAILABLE = False
    logging.warning("openwakeword not installed, wake word detection disabled")

 try:
    import torch
    SILERO_AVAILABLE = True
 except ImportError:
    SILERO_AVAILABLE = False
    logging.warning("torch not installed, using simple VAD")


 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)


 class DaemonState(Enum):
    IDLE = auto()          # Waiting for wake word
    LISTENING = auto()     # Recording user speech
    PROCESSING = auto()    # Sending to Whisper/Claude
    SPEAKING = auto()      # TTS playing
    FOLLOW_UP = auto()     # Waiting for follow-up (no wake word needed)


 @dataclass
 class Config:
    """Configuration loaded from YAML."""
    wake_words: list[str]
    language: str
    llm_command: str
    llm_jarvis_prompt: str
    llm_extra_args: list[str]
    tmux_session: str
    whisper_url: str
    whisper_local_fallback: bool
    device_name: Optional[str]
    native_sample_rate: int
    target_sample_rate: int
    channels: int
    silence_seconds: float
    max_listen_seconds: float
    vad_threshold: float
    idle_timeout_seconds: int
    end_phrases: list[str]
    tts_command: str
    tts_voice: str
    tts_rate: int
    sound_wake: str
    sound_listening: str

    @classmethod
    def load(cls, path: Path) -> 'Config':
        with open(path) as f:
            data = yaml.safe_load(f)

        return cls(
            wake_words=data.get('wake_words', ['hey jarvis', 'jarvis']),
            language=data.get('language', 'nl'),
            llm_command=data.get('llm', {}).get('command', 'claude'),
            llm_jarvis_prompt=data.get('llm', {}).get('jarvis_prompt', '/jarvis'),
            llm_extra_args=data.get('llm', {}).get('extra_args', []),
            tmux_session=data.get('llm', {}).get('tmux_session', 'jarvis'),
            whisper_url=data.get('whisper', {}).get('url', 'http://10.1.1.64:8081/inference'),
            whisper_local_fallback=data.get('whisper', {}).get('local_fallback', True),
            device_name=data.get('audio', {}).get('device_name'),
            native_sample_rate=data.get('audio', {}).get('native_sample_rate', 48000),
            target_sample_rate=data.get('audio', {}).get('target_sample_rate', 16000),
            channels=data.get('audio', {}).get('channels', 1),
            silence_seconds=data.get('audio', {}).get('silence_seconds', 1.5),
            max_listen_seconds=data.get('audio', {}).get('max_listen_seconds', 30),
            vad_threshold=data.get('audio', {}).get('vad_threshold', 0.5),
            idle_timeout_seconds=data.get('session', {}).get('idle_timeout_seconds', 60),
            end_phrases=data.get('session', {}).get('end_phrases', ['tot ziens', 'bedankt jarvis', 'klaar']),
            tts_command=data.get('tts', {}).get('command', 'say'),
            tts_voice=data.get('tts', {}).get('voice', 'Xander'),
            tts_rate=data.get('tts', {}).get('rate', 180),
            sound_wake=os.path.expanduser(data.get('sounds', {}).get('wake', '~/.config/jarvis/sounds/wake.wav')),
            sound_listening=os.path.expanduser(data.get('sounds', {}).get('listening', '~/.config/jarvis/sounds/listening.wav')),
        )


 class AudioCapture:
    """Handles audio capture with VAD (Voice Activity Detection)."""

    # Silero VAD requires exactly 512 samples at 16kHz (32ms)
    VAD_CHUNK_SAMPLES = 512

    def __init__(self, config: Config):
        self.config = config
        self.vad_model = None
        self.device_index = self._find_device()
        self._load_vad()

    def _find_device(self) -> Optional[int]:
        """Find audio device by name."""
        if not self.config.device_name:
            logger.info("Using default audio input device")
            return None

        devices = sd.query_devices()
        for i, d in enumerate(devices):
            if self.config.device_name in d['name'] and d['max_input_channels'] > 0:
                logger.info(f"Using audio device [{i}]: {d['name']}")
                return i

        logger.warning(f"Device '{self.config.device_name}' not found, using default")
        return None

    def _resample(self, audio: np.ndarray) -> np.ndarray:
        """Resample audio from native to target sample rate."""
        if self.config.native_sample_rate == self.config.target_sample_rate:
            return audio

        from scipy import signal
        target_length = int(len(audio) * self.config.target_sample_rate / self.config.native_sample_rate)
        return signal.resample(audio, target_length).astype(np.float32)

    def _load_vad(self):
        """Load Silero VAD model."""
        if SILERO_AVAILABLE:
            try:
                self.vad_model, utils = torch.hub.load(
                    repo_or_dir='snakers4/silero-vad',
                    model='silero_vad',
                    force_reload=False,
                    trust_repo=True
                )
                self.get_speech_timestamps = utils[0]
                logger.info("Silero VAD loaded successfully")
            except Exception as e:
                logger.warning(f"Failed to load Silero VAD: {e}")
                self.vad_model = None

    def is_speech(self, audio_chunk_16k: np.ndarray) -> bool:
        """Check if audio chunk contains speech using VAD.

        Args:
            audio_chunk_16k: Audio chunk already at 16kHz sample rate
        """
        if self.vad_model is None:
            # Fallback: simple energy-based detection
            energy = np.sqrt(np.mean(audio_chunk_16k ** 2))
            return energy > 0.01

        try:
            # Silero VAD expects exactly 512 samples at 16kHz
            # Process in 512-sample windows and return True if any has speech
            chunk_size = self.VAD_CHUNK_SAMPLES
            has_speech = False

            for i in range(0, len(audio_chunk_16k) - chunk_size + 1, chunk_size):
                window = audio_chunk_16k[i:i + chunk_size]
                audio_tensor = torch.from_numpy(window.astype(np.float32))
                speech_prob = self.vad_model(audio_tensor, 16000).item()
                if speech_prob > self.config.vad_threshold:
                    has_speech = True
                    break

            return has_speech
        except Exception as e:
            logger.warning(f"VAD error: {e}")
            # Fallback to energy-based
            energy = np.sqrt(np.mean(audio_chunk_16k ** 2))
            return energy > 0.01

    def record_until_silence(self, max_duration: Optional[float] = None, min_duration: float = 0.5) -> np.ndarray:
        """Record audio until silence is detected.

        Records at native sample rate, performs real-time VAD at 16kHz,
        returns audio resampled to target sample rate.

        Args:
            max_duration: Maximum recording duration in seconds
            min_duration: Minimum recording duration before silence detection activates

        Returns:
            Audio array at target_sample_rate (16kHz for Whisper)
        """
        max_duration = max_duration or self.config.max_listen_seconds

        # Calculate chunk sizes for native and 16kHz
        # We want ~32ms chunks (512 samples at 16kHz)
        vad_chunk_duration = self.VAD_CHUNK_SAMPLES / 16000  # ~32ms
        native_chunk_samples = int(self.config.native_sample_rate * vad_chunk_duration)
        silence_chunks_needed = int(self.config.silence_seconds / vad_chunk_duration)

        recorded_chunks = []
        silence_count = 0
        total_duration = 0
        speech_started = False
        speech_chunk_count = 0

        # Reset VAD state before recording
        if self.vad_model is not None:
            self.vad_model.reset_states()

        logger.info(f"Recording at {self.config.native_sample_rate}Hz... (waiting for speech)")

        def audio_callback(indata, frames, time_info, status):
            nonlocal silence_count, speech_started, total_duration, speech_chunk_count
            if status:
                logger.warning(f"Audio status: {status}")

            chunk = indata[:, 0].copy()
            recorded_chunks.append(chunk)
            total_duration += vad_chunk_duration

            # Resample chunk to 16kHz for VAD
            chunk_16k = self._resample(chunk) if self.config.native_sample_rate != 16000 else chunk

            if self.is_speech(chunk_16k):
                if not speech_started:
                    logger.info("Speech detected!")
                speech_started = True
                speech_chunk_count += 1
                silence_count = 0
            elif speech_started:
                silence_count += 1

        with sd.InputStream(
            samplerate=self.config.native_sample_rate,
            channels=self.config.channels,
            dtype=np.float32,
            device=self.device_index,
            blocksize=native_chunk_samples,
            callback=audio_callback
        ):
            while total_duration < max_duration:
                # Only stop on silence if:
                # 1. Speech was detected
                # 2. We've recorded for at least min_duration
                # 3. We've had enough silence chunks
                can_stop = (
                    speech_started and
                    total_duration >= min_duration and
                    silence_count >= silence_chunks_needed
                )
                if can_stop:
                    logger.info(f"Silence detected after {speech_chunk_count} speech chunks, stopping")
                    break
                time.sleep(vad_chunk_duration)

        if recorded_chunks:
            # Concatenate and resample to target sample rate
            audio_native = np.concatenate(recorded_chunks)
            audio = self._resample(audio_native)
            duration = len(audio) / self.config.target_sample_rate
            logger.info(f"Recorded {duration:.1f}s of audio (speech detected: {speech_started})")
            return audio
        return np.array([], dtype=np.float32)


 class WakeWordDetector:
    """Detects wake words in audio stream."""

    # OpenWakeWord expects 16kHz audio
    WAKE_WORD_SAMPLE_RATE = 16000

    def __init__(self, config: Config):
        self.config = config
        self.model = None
        self.device_index = self._find_device()
        self._load_model()

    def _find_device(self) -> Optional[int]:
        """Find audio device by name."""
        if not self.config.device_name:
            return None

        devices = sd.query_devices()
        for i, d in enumerate(devices):
            if self.config.device_name in d['name'] and d['max_input_channels'] > 0:
                return i
        return None

    def _load_model(self):
        """Load OpenWakeWord model."""
        if not WAKE_WORD_AVAILABLE:
            logger.warning("Wake word detection not available")
            return

        try:
            # Download and load the "hey jarvis" model
            openwakeword.utils.download_models(['hey_jarvis_v0.1'])
            self.model = WakeWordModel(
                wakeword_models=['hey_jarvis_v0.1'],
                inference_framework='onnx'
            )
            logger.info("Wake word model loaded successfully")
        except Exception as e:
            logger.error(f"Failed to load wake word model: {e}")
            self.model = None

    def listen_for_wake_word(self, timeout: Optional[float] = None) -> tuple[bool, np.ndarray]:
        """Listen for wake word, return (detected, post_wake_audio).

        Returns:
            Tuple of (wake_word_detected, audio_buffer)
            The audio_buffer contains audio captured AFTER wake word detection,
            which should be prepended to the main recording.
        """
        if self.model is None:
            logger.warning("Wake word model not loaded, triggering immediately")
            return True, np.array([], dtype=np.float32)

        # OpenWakeWord expects 16kHz, 80ms chunks (1280 samples)
        chunk_duration = 0.08
        chunk_samples_16k = int(self.WAKE_WORD_SAMPLE_RATE * chunk_duration)

        # If recording at native rate, calculate native chunk size
        native_rate = self.config.native_sample_rate
        chunk_samples_native = int(native_rate * chunk_duration)

        detected = threading.Event()
        start_time = time.time()

        # Buffer to store audio chunks (native rate)
        audio_buffer = []
        wake_word_time = [None]  # Use list to allow modification in callback
        POST_WAKE_BUFFER_SECONDS = 3.0  # Continue capturing for 3s after wake word

        def audio_callback(indata, frames, time_info, status):
            if status:
                logger.warning(f"Audio status: {status}")

            chunk_native = indata[:, 0].copy()

            # Always buffer audio (keep last ~5 seconds before wake word)
            audio_buffer.append(chunk_native)
            max_buffer_chunks = int(5.0 / chunk_duration)
            if len(audio_buffer) > max_buffer_chunks and wake_word_time[0] is None:
                audio_buffer.pop(0)

            # Resample to 16kHz for wake word detection
            if native_rate != self.WAKE_WORD_SAMPLE_RATE:
                from scipy import signal
                target_len = int(len(chunk_native) * self.WAKE_WORD_SAMPLE_RATE / native_rate)
                chunk_16k = signal.resample(chunk_native, target_len).astype(np.float32)
            else:
                chunk_16k = chunk_native

            # Convert to int16 for openwakeword
            audio_int16 = (chunk_16k * 32767).astype(np.int16)
            prediction = self.model.predict(audio_int16)

            # Check all wake word scores
            for model_name, score in prediction.items():
                if score > 0.5 and wake_word_time[0] is None:
                    logger.info(f"Wake word detected: {model_name} (score: {score:.2f})")
                    wake_word_time[0] = time.time()
                    detected.set()

        try:
            with sd.InputStream(
                samplerate=native_rate,
                channels=self.config.channels,
                dtype=np.float32,
                device=self.device_index,
                blocksize=chunk_samples_native,
                callback=audio_callback
            ):
                # Wait for wake word
                while not detected.is_set():
                    if timeout and (time.time() - start_time) > timeout:
                        return False, np.array([], dtype=np.float32)
                    time.sleep(0.05)

                # Continue capturing audio after wake word for a bit
                # This captures the rest of the user's sentence
                logger.info(f"Buffering post-wake-word audio for {POST_WAKE_BUFFER_SECONDS}s...")
                post_wake_start = time.time()
                while time.time() - post_wake_start < POST_WAKE_BUFFER_SECONDS:
                    time.sleep(0.05)

        except Exception as e:
            logger.error(f"Wake word detection error: {e}")
            return False, np.array([], dtype=np.float32)

        # Return the buffered audio (at native sample rate)
        if audio_buffer:
            buffered_audio = np.concatenate(audio_buffer)
            logger.info(f"Captured {len(buffered_audio) / native_rate:.1f}s of buffered audio")
            return True, buffered_audio

        return True, np.array([], dtype=np.float32)


 class WhisperClient:
    """Transcribe audio using Whisper server or local fallback."""

    def __init__(self, config: Config):
        self.config = config
        self.local_model = None

    def transcribe(self, audio: np.ndarray) -> str:
        """Transcribe audio to text."""
        # Try remote Whisper server first
        try:
            return self._transcribe_remote(audio)
        except Exception as e:
            logger.warning(f"Remote Whisper failed: {e}")

        # Fallback to local if enabled
        if self.config.whisper_local_fallback:
            return self._transcribe_local(audio)

        return ""

    def _transcribe_remote(self, audio: np.ndarray) -> str:
        """Transcribe using remote Whisper server."""
        # Convert to WAV bytes
        wav_buffer = io.BytesIO()
        with wave.open(wav_buffer, 'wb') as wav_file:
            wav_file.setnchannels(self.config.channels)
            wav_file.setsampwidth(2)  # 16-bit
            wav_file.setframerate(self.config.target_sample_rate)
            wav_file.writeframes((audio * 32767).astype(np.int16).tobytes())

        wav_buffer.seek(0)

        response = requests.post(
            self.config.whisper_url,
            files={'file': ('audio.wav', wav_buffer, 'audio/wav')},
            data={
                'language': self.config.language,
                'response_format': 'json'
            },
            timeout=30
        )
        response.raise_for_status()

        result = response.json()
        text = result.get('text', '').strip()
        logger.info(f"Transcription: {text}")
        return text

    def _transcribe_local(self, audio: np.ndarray) -> str:
        """Transcribe using local faster-whisper."""
        try:
            from faster_whisper import WhisperModel

            if self.local_model is None:
                logger.info("Loading local Whisper model...")
                self.local_model = WhisperModel("base", compute_type="int8")

            segments, _ = self.local_model.transcribe(
                audio,
                language=self.config.language[:2],  # 'nl' from 'nl'
                beam_size=5
            )
            text = " ".join(segment.text for segment in segments).strip()
            logger.info(f"Local transcription: {text}")
            return text
        except Exception as e:
            logger.error(f"Local Whisper failed: {e}")
            return ""


 class TTS:
    """Text-to-Speech using macOS say command."""

    def __init__(self, config: Config):
        self.config = config

    def speak(self, text: str):
        """Speak text using TTS (blocking)."""
        if not text:
            return

        logger.info(f"Speaking: {text[:50]}...")
        try:
            subprocess.run([
                self.config.tts_command,
                '-v', self.config.tts_voice,
                '-r', str(self.config.tts_rate),
                text
            ], check=True)
        except subprocess.CalledProcessError as e:
            logger.error(f"TTS failed: {e}")


 class TmuxSession:
    """Manage Claude Code session via tmux."""

    def __init__(self, config: Config):
        self.config = config
        self.session_name = config.tmux_session
        self._last_output_line = 0

    def exists(self) -> bool:
        """Check if tmux session exists."""
        result = subprocess.run(
            ['tmux', 'has-session', '-t', self.session_name],
            capture_output=True
        )
        return result.returncode == 0

    def start(self, initial_text: str):
        """Start a new Claude session in tmux."""
        if self.exists():
            logger.info(f"Killing existing tmux session: {self.session_name}")
            subprocess.run(['tmux', 'kill-session', '-t', self.session_name])

        # Start Claude interactively (no -p flag) so session is resumable
        import shlex
        cmd_parts = [self.config.llm_command]
        cmd_parts.extend(self.config.llm_extra_args)
        cmd = shlex.join(cmd_parts)

        logger.info(f"Starting tmux session: {self.session_name}")
        logger.info(f"Command: {cmd}")

        # Start Claude in interactive mode
        subprocess.run([
            'tmux', 'new-session', '-d', '-s', self.session_name, cmd
        ], check=True)

        self._last_output_line = 0
        time.sleep(2)  # Give Claude time to start and show prompt

        # Now send the initial prompt via tmux send-keys
        prompt_text = f'{self.config.llm_jarvis_prompt} {initial_text}'
        logger.info(f"Sending initial prompt: {prompt_text[:80]}...")
        self.send_input(prompt_text)

    def send_input(self, text: str):
        """Send text input to the Claude session."""
        if not self.exists():
            logger.warning("No tmux session to send input to")
            return

        logger.info(f"Sending to Claude: {text[:50]}...")
        subprocess.run([
            'tmux', 'send-keys', '-t', self.session_name, text, 'Enter'
        ], check=True)

    def get_new_output(self) -> str:
        """Get new output from the session since last check."""
        if not self.exists():
            return ""

        result = subprocess.run(
            ['tmux', 'capture-pane', '-t', self.session_name, '-p', '-S', '-1000'],
            capture_output=True,
            text=True
        )

        if result.returncode != 0:
            return ""

        lines = result.stdout.split('\n')
        new_lines = lines[self._last_output_line:]
        self._last_output_line = len(lines)

        return '\n'.join(new_lines).strip()

    def is_claude_ready(self) -> bool:
        """Check if Claude has finished responding (waiting for input)."""
        if not self.exists():
            return False

        # Capture the last few lines to check for the prompt
        result = subprocess.run(
            ['tmux', 'capture-pane', '-t', self.session_name, '-p', '-S', '-5'],
            capture_output=True,
            text=True
        )

        output = result.stdout.strip()
        # Claude shows '>' when ready for input
        return output.endswith('>') or '>' in output.split('\n')[-1]

    def kill(self):
        """Kill the tmux session."""
        if self.exists():
            subprocess.run(['tmux', 'kill-session', '-t', self.session_name])


 class JarvisDaemon:
    """Main daemon orchestrating the voice assistant."""

    def __init__(self, config: Config, passive: bool = False):
        self.config = config
        self.passive = passive
        self.state = DaemonState.IDLE
        self.running = True
        self.last_activity = time.time()

        # Initialize components
        self.audio = AudioCapture(config)
        self.wake_word = WakeWordDetector(config)
        self.whisper = WhisperClient(config)
        self.tts = TTS(config)
        self.tmux = TmuxSession(config)

        # Setup signal handlers
        signal.signal(signal.SIGINT, self._handle_signal)
        signal.signal(signal.SIGTERM, self._handle_signal)

    def _handle_signal(self, signum, frame):
        """Handle shutdown signals."""
        logger.info(f"Received signal {signum}, shutting down...")
        self.running = False

    def _play_sound(self, sound_path: str):
        """Play a sound file."""
        if os.path.exists(sound_path):
            subprocess.run(['afplay', sound_path], capture_output=True)

    def _check_end_phrase(self, text: str) -> bool:
        """Check if text contains an end phrase."""
        text_lower = text.lower()
        return any(phrase in text_lower for phrase in self.config.end_phrases)

    def run(self):
        """Main daemon loop."""
        logger.info("Jarvis daemon starting...")
        logger.info(f"Mode: {'passive' if self.passive else 'active'}")

        while self.running:
            try:
                if self.state == DaemonState.IDLE:
                    self._handle_idle()
                elif self.state == DaemonState.LISTENING:
                    self._handle_listening()
                elif self.state == DaemonState.PROCESSING:
                    self._handle_processing()
                elif self.state == DaemonState.SPEAKING:
                    self._handle_speaking()
                elif self.state == DaemonState.FOLLOW_UP:
                    self._handle_follow_up()
            except Exception as e:
                logger.error(f"Error in state {self.state}: {e}")
                time.sleep(1)

        logger.info("Jarvis daemon stopped")

    def _handle_idle(self):
        """Wait for wake word."""
        logger.info("Waiting for wake word...")

        detected, buffered_audio = self.wake_word.listen_for_wake_word()
        if detected:
            self._play_sound(self.config.sound_wake)
            self.last_activity = time.time()

            # If we have buffered audio from wake word detection, use it directly
            if len(buffered_audio) > 0:
                # Resample buffered audio to target sample rate
                resampled = self.audio._resample(buffered_audio)
                logger.info(f"Using {len(resampled) / self.config.target_sample_rate:.1f}s of wake-word buffered audio")
                self._current_audio = resampled
                self.state = DaemonState.PROCESSING
            else:
                # No buffered audio, go to listening mode
                self.state = DaemonState.LISTENING

    def _handle_listening(self):
        """Record user speech (only used when no buffered audio from wake word)."""
        if self.passive:
            logger.info("Passive mode, skipping audio capture")
            time.sleep(1)
            return

        self._play_sound(self.config.sound_listening)
        logger.info("Listening...")

        audio = self.audio.record_until_silence()

        # Check if we got meaningful audio (at least 0.3s with speech detected)
        min_audio_samples = int(self.config.target_sample_rate * 0.3)
        if len(audio) > min_audio_samples:
            self._current_audio = audio
            self.state = DaemonState.PROCESSING
        else:
            logger.warning("No meaningful audio captured, returning to idle")
            self.state = DaemonState.IDLE

    def _is_meaningful_transcription(self, text: str) -> bool:
        """Check if transcription contains actual speech content."""
        if not text:
            logger.debug("Empty transcription")
            return False

        # Strip whitespace and common noise patterns
        cleaned = text.strip()

        # Too short to be meaningful (need at least 4 chars)
        if len(cleaned) < 4:
            logger.debug(f"Transcription too short: {len(cleaned)} chars")
            return False

        # Check if it's only asterisks, dots, or other noise characters
        if all(c in '*.-…  \t\n' for c in cleaned):
            logger.debug(f"Transcription is only noise chars: {cleaned!r}")
            return False

        # Common "no speech" patterns from Whisper
        noise_patterns = [
            '***', '****', '*****', '* * *',
            '...', '....', '…', '. . .',
            '[muziek]', '(muziek)', '*muziek*', '[music]', '[ music ]',
            '[stilte]', '(stilte)', '[silence]', '[ silence ]',
            '[geluid]', '(geluid)', '[sound]',
            '[applaus]', '(applaus)',
            'you', 'thank you', 'thanks for watching',  # Common Whisper hallucinations
        ]

        cleaned_lower = cleaned.lower().strip('*[]() .')
        for pattern in noise_patterns:
            pattern_clean = pattern.lower().strip('*[]() .')
            if cleaned_lower == pattern_clean:
                logger.debug(f"Transcription matches noise pattern: {pattern}")
                return False

        # Check if it's mostly punctuation/symbols (need at least 3 letters)
        alpha_count = sum(1 for c in cleaned if c.isalpha())
        if alpha_count < 3:
            logger.debug(f"Transcription has too few letters: {alpha_count}")
            return False

        return True

    def _handle_processing(self):
        """Transcribe and send to Claude."""
        text = self.whisper.transcribe(self._current_audio)

        if not text:
            logger.warning("No transcription result")
            self.state = DaemonState.FOLLOW_UP if self.tmux.exists() else DaemonState.IDLE
            return

        # Check if transcription is meaningful (not just noise)
        if not self._is_meaningful_transcription(text):
            logger.info(f"Ignoring noise/empty transcription: {text!r}")
            self.state = DaemonState.FOLLOW_UP if self.tmux.exists() else DaemonState.IDLE
            return

        # Check for end phrase
        if self._check_end_phrase(text):
            logger.info("End phrase detected, closing session")
            self.tts.speak("Tot ziens!")
            self.tmux.kill()
            self.state = DaemonState.IDLE
            return

        # Start or continue session
        if not self.tmux.exists():
            self.tmux.start(text)
        else:
            self.tmux.send_input(text)

        self.state = DaemonState.SPEAKING
        self.last_activity = time.time()

    def _handle_speaking(self):
        """Wait for Claude response and speak it."""
        # Wait for Claude to finish
        max_wait = 120  # Max wait for Claude response
        start_time = time.time()

        while not self.tmux.is_claude_ready():
            if time.time() - start_time > max_wait:
                logger.warning("Timeout waiting for Claude response")
                break
            time.sleep(0.5)

        # Get the response (Claude outputs TTS via /jarvis skill)
        # The TTS is handled by the skill, so we just wait and move to follow-up
        time.sleep(1)  # Give TTS time to complete

        self.state = DaemonState.FOLLOW_UP
        self.last_activity = time.time()

    def _handle_follow_up(self):
        """Wait for follow-up speech (no wake word needed)."""
        # Check for idle timeout
        if time.time() - self.last_activity > self.config.idle_timeout_seconds:
            logger.info("Idle timeout, returning to wake word mode")
            self.tts.speak("Ik ga slapen. Zeg Hey Jarvis om me te wekken.")
            self.state = DaemonState.IDLE
            return

        if self.passive:
            time.sleep(1)
            return

        # Listen for follow-up (shorter timeout, no wake word)
        logger.info("Listening for follow-up...")
        self._play_sound(self.config.sound_listening)

        audio = self.audio.record_until_silence(max_duration=10)

        if len(audio) > self.config.target_sample_rate * 0.5:  # At least 0.5s of audio
            self._current_audio = audio
            self.state = DaemonState.PROCESSING
        else:
            # No speech detected, continue waiting
            time.sleep(0.5)


 def test_wake_word(config: Config):
    """Test wake word detection."""
    print("Testing wake word detection. Say 'Hey Jarvis'...")
    detector = WakeWordDetector(config)
    if detector.listen_for_wake_word(timeout=30):
        print("Wake word detected!")
    else:
        print("Timeout, no wake word detected")


 def test_listen(config: Config):
    """Test audio capture and transcription."""
    print("Testing audio capture. Speak something...")
    audio_capture = AudioCapture(config)
    whisper = WhisperClient(config)

    audio = audio_capture.record_until_silence()
    if len(audio) > 0:
        print(f"Captured {len(audio) / config.target_sample_rate:.1f}s of audio")
        text = whisper.transcribe(audio)
        print(f"Transcription: {text}")
    else:
        print("No audio captured")


 def write_pid():
    """Write PID file."""
    pid_file = Path.home() / '.local' / 'state' / 'jarvis' / 'jarvis.pid'
    pid_file.parent.mkdir(parents=True, exist_ok=True)
    pid_file.write_text(str(os.getpid()))


 def main():
    parser = argparse.ArgumentParser(description='Jarvis Voice Assistant Daemon')
    parser.add_argument('--config', type=Path,
                        default=Path.home() / '.config' / 'jarvis' / 'config.yaml',
                        help='Path to config file')
    parser.add_argument('--test-wake', action='store_true',
                        help='Test wake word detection')
    parser.add_argument('--test-listen', action='store_true',
                        help='Test audio capture and transcription')
    parser.add_argument('--active', action='store_true',
                        help='Run in active mode (voice enabled)')
    parser.add_argument('--passive', action='store_true',
                        help='Run in passive mode (monitoring only)')
    parser.add_argument('--no-wake-word', action='store_true',
                        help='Skip wake word, start listening immediately')

    args = parser.parse_args()

    # Load config
    config = Config.load(args.config)

    # Run tests if requested
    if args.test_wake:
        test_wake_word(config)
        return

    if args.test_listen:
        test_listen(config)
        return

    # Write PID file
    write_pid()

    # Create and run daemon
    daemon = JarvisDaemon(config, passive=args.passive)

    # Skip wake word if requested
    if args.no_wake_word:
        daemon.state = DaemonState.LISTENING

    daemon.run()


 if __name__ == '__main__':
    main()
diff --git a/jarvis-start b/jarvis-start
 #!/bin/bash
 # Start the Jarvis daemon in the background

 set -e

 PID_FILE="$HOME/.local/state/jarvis/jarvis.pid"
 LOG_FILE="$HOME/.local/state/jarvis/jarvis.log"

 # Check if already running
 if [ -f "$PID_FILE" ]; then
    PID=$(cat "$PID_FILE")
    if kill -0 "$PID" 2>/dev/null; then
        echo "Jarvis daemon is already running (PID: $PID)"
        exit 1
    fi
 fi

 # Create log directory if needed
 mkdir -p "$(dirname "$LOG_FILE")"

 # Parse arguments
 MODE=""
 NO_WAKE=""
 while [[ $# -gt 0 ]]; do
    case $1 in
        --passive)
            MODE="--passive"
            shift
            ;;
        --active)
            MODE="--active"
            shift
            ;;
        --no-wake-word)
            NO_WAKE="--no-wake-word"
            shift
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
 done

 echo "Starting Jarvis daemon..."
 nohup jarvis-daemon $MODE $NO_WAKE > "$LOG_FILE" 2>&1 &
 NOHUP_PID=$!

 # Wait for daemon to start and write PID file (may take a few seconds for model loading)
 echo "Waiting for daemon to initialize..."
 for i in {1..10}; do
    if [ -f "$PID_FILE" ]; then
        PID=$(cat "$PID_FILE")
        if kill -0 "$PID" 2>/dev/null; then
            echo "Jarvis daemon started (PID: $PID)"
            echo "Log file: $LOG_FILE"
            echo ""
            echo "Use 'jarvis-status' to check status"
            echo "Use 'jarvis-stop' to stop the daemon"
            echo "Use 'tmux attach -t jarvis' to view Claude session"
            exit 0
        fi
    fi
    sleep 1
 done

 # Check if nohup process is still running
 if kill -0 "$NOHUP_PID" 2>/dev/null; then
    echo "Daemon is starting but PID file not yet created."
    echo "Check progress with: tail -f $LOG_FILE"
    echo "Nohup PID: $NOHUP_PID"
 else
    echo "Failed to start Jarvis daemon. Check $LOG_FILE for errors."
    tail -10 "$LOG_FILE"
    exit 1
 fi
diff --git a/jarvis-status b/jarvis-status
 #!/bin/bash
 # Check status of Jarvis daemon and session

 PID_FILE="$HOME/.local/state/jarvis/jarvis.pid"
 LOG_FILE="$HOME/.local/state/jarvis/jarvis.log"

 echo "=== Jarvis Voice Assistant Status ==="
 echo ""

 # Check daemon status
 echo "Daemon:"
 if [ -f "$PID_FILE" ]; then
    PID=$(cat "$PID_FILE")
    if kill -0 "$PID" 2>/dev/null; then
        echo "  Status: Running (PID: $PID)"

        # Get process info
        ps -p "$PID" -o %cpu,%mem,etime | tail -1 | while read cpu mem time; do
            echo "  CPU: $cpu%  Memory: $mem%  Uptime: $time"
        done
    else
        echo "  Status: Not running (stale PID file)"
    fi
 else
    echo "  Status: Not running"
 fi

 # Check tmux session
 echo ""
 echo "Claude Session (tmux):"
 if tmux has-session -t jarvis 2>/dev/null; then
    echo "  Status: Active"
    echo "  Attach: tmux attach -t jarvis"

    # Show last few lines of session
    echo ""
    echo "  Last output:"
    tmux capture-pane -t jarvis -p -S -5 | sed 's/^/    /'
 else
    echo "  Status: No active session"
 fi

 # Check audio devices
 echo ""
 echo "Audio:"
 if command -v python3 &>/dev/null; then
    python3 -c "
 import sounddevice as sd
 try:
    default_input = sd.query_devices(kind='input')
    print(f\"  Input device: {default_input['name']}\")
 except Exception as e:
    print(f\"  Input device: Error - {e}\")
 " 2>/dev/null || echo "  Input device: Unable to query"
 fi

 # Show recent log entries
 if [ -f "$LOG_FILE" ]; then
    echo ""
    echo "Recent log entries:"
    tail -5 "$LOG_FILE" | sed 's/^/  /'
 fi

 echo ""
 echo "Commands:"
 echo "  jarvis-start          Start daemon"
 echo "  jarvis-start --no-wake-word  Start without wake word (immediate listen)"
 echo "  jarvis-stop           Stop daemon (keep tmux session)"
 echo "  jarvis-stop --kill-session  Stop daemon and kill session"
 echo "  tmux attach -t jarvis View Claude session"
diff --git a/jarvis-stop b/jarvis-stop
 #!/bin/bash
 # Stop the Jarvis daemon

 PID_FILE="$HOME/.local/state/jarvis/jarvis.pid"

 # Check for --kill-session flag
 KILL_SESSION=false
 while [[ $# -gt 0 ]]; do
    case $1 in
        --kill-session)
            KILL_SESSION=true
            shift
            ;;
        *)
            echo "Unknown option: $1"
            echo "Usage: jarvis-stop [--kill-session]"
            echo "  --kill-session  Also kill the tmux Claude session"
            exit 1
            ;;
    esac
 done

 if [ ! -f "$PID_FILE" ]; then
    echo "No PID file found, checking for orphan processes..."

    # Find any running jarvis-daemon processes
    ORPHAN_PIDS=$(pgrep -f "jarvis-daemon" 2>/dev/null || true)
    if [ -n "$ORPHAN_PIDS" ]; then
        echo "Found orphan jarvis-daemon process(es): $ORPHAN_PIDS"
        for OPID in $ORPHAN_PIDS; do
            echo "Killing orphan PID: $OPID"
            kill "$OPID" 2>/dev/null || true
        done
        sleep 1
        # Force kill if still running
        for OPID in $ORPHAN_PIDS; do
            if kill -0 "$OPID" 2>/dev/null; then
                echo "Force killing PID: $OPID"
                kill -9 "$OPID" 2>/dev/null || true
            fi
        done
        echo "Orphan processes cleaned up"
    else
        echo "Jarvis daemon is not running"
    fi

    # Still check for tmux session
    if tmux has-session -t jarvis 2>/dev/null; then
        if [ "$KILL_SESSION" = true ]; then
            tmux kill-session -t jarvis
            echo "Killed tmux jarvis session"
        else
            echo "Note: tmux jarvis session still exists (use --kill-session to remove)"
        fi
    fi
    exit 0
 fi

 PID=$(cat "$PID_FILE")

 if kill -0 "$PID" 2>/dev/null; then
    echo "Stopping Jarvis daemon (PID: $PID)..."
    kill "$PID"

    # Wait for graceful shutdown
    for i in {1..10}; do
        if ! kill -0 "$PID" 2>/dev/null; then
            break
        fi
        sleep 0.5
    done

    # Force kill if still running
    if kill -0 "$PID" 2>/dev/null; then
        echo "Force killing daemon..."
        kill -9 "$PID"
    fi

    echo "Jarvis daemon stopped"
 else
    echo "Jarvis daemon was not running (stale PID file)"
 fi

 # Clean up PID file
 rm -f "$PID_FILE"

 # Handle tmux session
 if tmux has-session -t jarvis 2>/dev/null; then
    if [ "$KILL_SESSION" = true ]; then
        tmux kill-session -t jarvis
        echo "Killed tmux jarvis session"
    else
        echo "Note: tmux jarvis session still exists"
        echo "  - Use 'tmux attach -t jarvis' to resume manually"
        echo "  - Use 'jarvis-stop --kill-session' to also kill it"
    fi
 fi
diff --git a/requirements.txt b/requirements.txt
 # Jarvis Voice Assistant Daemon Dependencies

 # Audio capture
 sounddevice>=0.4.6
 numpy>=1.24.0

 # Wake word detection
 openwakeword>=0.6.0

 # Voice Activity Detection
 silero-vad>=4.0

 # HTTP client for Whisper server
 requests>=2.31.0

 # Local Whisper fallback
 faster-whisper>=1.0.0

 # Config parsing
 PyYAML>=6.0
	# Jarvis Voice Assistant Daemon Configuration

	wake_words: ["hey jarvis", "jarvis"]
	language: "nl"

	llm:
	command: "/Users/erik/.claude/local/claude" # Volledige pad naar juiste versie
	jarvis_prompt: "/jarvis" # activeer voice mode
	extra_args: [] # bijv. ["--model", "opus"]
	tmux_session: "jarvis" # tmux session name

	whisper:
	url: "http://10.1.1.64:8081/inference"
	local_fallback: true
	model: "base" # voor lokale fallback

	audio:
	device_name: "Yeti X" # Expliciet microfoon kiezen (of null voor default)
	native_sample_rate: 48000 # Native sample rate van de microfoon
	target_sample_rate: 16000 # Sample rate voor VAD/Whisper
	channels: 1
	silence_seconds: 1.5 # sec stilte = einde utterance
	max_listen_seconds: 30 # max opnametijd
	vad_threshold: 0.5 # Silero VAD threshold

	session:
	idle_timeout_seconds: 60 # Geen input -> sluit sessie
	end_phrases: ["tot ziens", "bedankt jarvis", "klaar", "goodbye", "exit"]

	tts:
	command: "say"
	voice: "Xander"
	rate: 180

	sounds:
	wake: "~/.config/jarvis/sounds/wake.wav"
	listening: "~/.config/jarvis/sounds/listening.wav"
	#!/bin/bash
	# Start the Jarvis daemon in the background

	set -e

	PID_FILE="$HOME/.local/state/jarvis/jarvis.pid"
	LOG_FILE="$HOME/.local/state/jarvis/jarvis.log"

	# Check if already running
	if [ -f "$PID_FILE" ]; then
	PID=$(cat "$PID_FILE")
	if kill -0 "$PID" 2>/dev/null; then
	echo "Jarvis daemon is already running (PID: $PID)"
	exit 1
	fi
	fi

	# Create log directory if needed
	mkdir -p "$(dirname "$LOG_FILE")"

	# Parse arguments
	MODE=""
	NO_WAKE=""
	while [[ $# -gt 0 ]]; do
	case $1 in
	--passive)
	MODE="--passive"
	shift
	;;
	--active)
	MODE="--active"
	shift
	;;
	--no-wake-word)
	NO_WAKE="--no-wake-word"
	shift
	;;
	*)
	echo "Unknown option: $1"
	exit 1
	;;
	esac
	done

	echo "Starting Jarvis daemon..."
	nohup jarvis-daemon $MODE $NO_WAKE > "$LOG_FILE" 2>&1 &
	NOHUP_PID=$!

	# Wait for daemon to start and write PID file (may take a few seconds for model loading)
	echo "Waiting for daemon to initialize..."
	for i in {1..10}; do
	if [ -f "$PID_FILE" ]; then
	PID=$(cat "$PID_FILE")
	if kill -0 "$PID" 2>/dev/null; then
	echo "Jarvis daemon started (PID: $PID)"
	echo "Log file: $LOG_FILE"
	echo ""
	echo "Use 'jarvis-status' to check status"
	echo "Use 'jarvis-stop' to stop the daemon"
	echo "Use 'tmux attach -t jarvis' to view Claude session"
	exit 0
	fi
	fi
	sleep 1
	done

	# Check if nohup process is still running
	if kill -0 "$NOHUP_PID" 2>/dev/null; then
	echo "Daemon is starting but PID file not yet created."
	echo "Check progress with: tail -f $LOG_FILE"
	echo "Nohup PID: $NOHUP_PID"
	else
	echo "Failed to start Jarvis daemon. Check $LOG_FILE for errors."
	tail -10 "$LOG_FILE"
	exit 1
	fi
	#!/bin/bash
	# Check status of Jarvis daemon and session

	PID_FILE="$HOME/.local/state/jarvis/jarvis.pid"
	LOG_FILE="$HOME/.local/state/jarvis/jarvis.log"

	echo "=== Jarvis Voice Assistant Status ==="
	echo ""

	# Check daemon status
	echo "Daemon:"
	if [ -f "$PID_FILE" ]; then
	PID=$(cat "$PID_FILE")
	if kill -0 "$PID" 2>/dev/null; then
	echo " Status: Running (PID: $PID)"

	# Get process info
	ps -p "$PID" -o %cpu,%mem,etime \| tail -1 \| while read cpu mem time; do
	echo " CPU: $cpu% Memory: $mem% Uptime: $time"
	done
	else
	echo " Status: Not running (stale PID file)"
	fi
	else
	echo " Status: Not running"
	fi

	# Check tmux session
	echo ""
	echo "Claude Session (tmux):"
	if tmux has-session -t jarvis 2>/dev/null; then
	echo " Status: Active"
	echo " Attach: tmux attach -t jarvis"

	# Show last few lines of session
	echo ""
	echo " Last output:"
	tmux capture-pane -t jarvis -p -S -5 \| sed 's/^/ /'
	else
	echo " Status: No active session"
	fi

	# Check audio devices
	echo ""
	echo "Audio:"
	if command -v python3 &>/dev/null; then
	python3 -c "
	import sounddevice as sd
	try:
	default_input = sd.query_devices(kind='input')
	print(f\" Input device: {default_input['name']}\")
	except Exception as e:
	print(f\" Input device: Error - {e}\")
	" 2>/dev/null \|\| echo " Input device: Unable to query"
	fi

	# Show recent log entries
	if [ -f "$LOG_FILE" ]; then
	echo ""
	echo "Recent log entries:"
	tail -5 "$LOG_FILE" \| sed 's/^/ /'
	fi

	echo ""
	echo "Commands:"
	echo " jarvis-start Start daemon"
	echo " jarvis-start --no-wake-word Start without wake word (immediate listen)"
	echo " jarvis-stop Stop daemon (keep tmux session)"
	echo " jarvis-stop --kill-session Stop daemon and kill session"
	echo " tmux attach -t jarvis View Claude session"
	#!/bin/bash
	# Stop the Jarvis daemon

	PID_FILE="$HOME/.local/state/jarvis/jarvis.pid"

	# Check for --kill-session flag
	KILL_SESSION=false
	while [[ $# -gt 0 ]]; do
	case $1 in
	--kill-session)
	KILL_SESSION=true
	shift
	;;
	*)
	echo "Unknown option: $1"
	echo "Usage: jarvis-stop [--kill-session]"
	echo " --kill-session Also kill the tmux Claude session"
	exit 1
	;;
	esac
	done

	if [ ! -f "$PID_FILE" ]; then
	echo "No PID file found, checking for orphan processes..."

	# Find any running jarvis-daemon processes
	ORPHAN_PIDS=$(pgrep -f "jarvis-daemon" 2>/dev/null \|\| true)
	if [ -n "$ORPHAN_PIDS" ]; then
	echo "Found orphan jarvis-daemon process(es): $ORPHAN_PIDS"
	for OPID in $ORPHAN_PIDS; do
	echo "Killing orphan PID: $OPID"
	kill "$OPID" 2>/dev/null \|\| true
	done
	sleep 1
	# Force kill if still running
	for OPID in $ORPHAN_PIDS; do
	if kill -0 "$OPID" 2>/dev/null; then
	echo "Force killing PID: $OPID"
	kill -9 "$OPID" 2>/dev/null \|\| true
	fi
	done
	echo "Orphan processes cleaned up"
	else
	echo "Jarvis daemon is not running"
	fi

	# Still check for tmux session
	if tmux has-session -t jarvis 2>/dev/null; then
	if [ "$KILL_SESSION" = true ]; then
	tmux kill-session -t jarvis
	echo "Killed tmux jarvis session"
	else
	echo "Note: tmux jarvis session still exists (use --kill-session to remove)"
	fi
	fi
	exit 0
	fi

	PID=$(cat "$PID_FILE")

	if kill -0 "$PID" 2>/dev/null; then
	echo "Stopping Jarvis daemon (PID: $PID)..."
	kill "$PID"

	# Wait for graceful shutdown
	for i in {1..10}; do
	if ! kill -0 "$PID" 2>/dev/null; then
	break
	fi
	sleep 0.5
	done

	# Force kill if still running
	if kill -0 "$PID" 2>/dev/null; then
	echo "Force killing daemon..."
	kill -9 "$PID"
	fi

	echo "Jarvis daemon stopped"
	else
	echo "Jarvis daemon was not running (stale PID file)"
	fi

	# Clean up PID file
	rm -f "$PID_FILE"

	# Handle tmux session
	if tmux has-session -t jarvis 2>/dev/null; then
	if [ "$KILL_SESSION" = true ]; then
	tmux kill-session -t jarvis
	echo "Killed tmux jarvis session"
	else
	echo "Note: tmux jarvis session still exists"
	echo " - Use 'tmux attach -t jarvis' to resume manually"
	echo " - Use 'jarvis-stop --kill-session' to also kill it"
	fi
	fi
	# Jarvis Voice Assistant Daemon Dependencies

	# Audio capture
	sounddevice>=0.4.6
	numpy>=1.24.0

	# Wake word detection
	openwakeword>=0.6.0

	# Voice Activity Detection
	silero-vad>=4.0

	# HTTP client for Whisper server
	requests>=2.31.0

	# Local Whisper fallback
	faster-whisper>=1.0.0

	# Config parsing
	PyYAML>=6.0