Bladeren bron

Fix: Follow-Up Audio-Streaming + RecordingComplete

Der FOLLOW_UP State im WakewordService pufferte Audio nur lokal,
streamte es aber NICHT an den Server. Der Server empfing nie
Audio-Daten und konnte keinen STT/NLP durchfuehren.

Fixes:
1. FOLLOW_UP State: Audio an Server streamen (wie RECORDING)
2. _finish_follow_up: RecordingComplete Command senden
3. Audio-Unduck vor TTS-Antwort

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
patrick 5 dagen geleden
bovenliggende
commit
0c349ec98b
1 gewijzigde bestanden met toevoegingen van 31 en 7 verwijderingen
  1. 31 7
      trixy_core/wakeword/service.py

+ 31 - 7
trixy_core/wakeword/service.py

@@ -713,10 +713,14 @@ class WakewordService(IService):
                 self._schedule_async(self._finish_recording())
 
         elif state == ServiceState.FOLLOW_UP:
-            # Rückfrage-Aufnahme
+            # Rückfrage-Aufnahme: Buffer + VAD + Streaming (wie RECORDING)
             self._audio_buffer.add_chunk(audio_data)
             vad_state = self._vad.process_frame(audio_data)
 
+            # Audio an Server streamen (gleich wie bei RECORDING)
+            if self._audio_streamer:
+                self._schedule_async(self._audio_streamer(audio_data))
+
             if vad_state == VADState.TIMEOUT:
                 self._set_state(ServiceState.PROCESSING)
                 self._schedule_async(self._finish_follow_up())
@@ -1080,14 +1084,38 @@ class WakewordService(IService):
         self._vad.start()
 
     async def _finish_follow_up(self) -> None:
-        """Beendet Follow-Up-Aufnahme."""
+        """Beendet Follow-Up-Aufnahme und sendet RecordingComplete."""
         if not self._current_session:
             return
 
         audio_data = self._audio_buffer.stop_recording()
         self._vad.stop()
 
-        # Sende Follow-Up-Antwort
+        duration = len(audio_data) / (16000 * 2)  # 16kHz, 16-bit
+        pinfo(f"Follow-Up Aufnahme beendet: {len(audio_data)} Bytes, {duration:.1f}s")
+
+        # Audio-Ducking: Lautstaerke wiederherstellen vor TTS
+        self._unduck_audio()
+
+        # Im Client-Modus: RecordingComplete senden (Audio wurde bereits gestreamt)
+        if self._connection:
+            try:
+                from trixy_core.network.cmd.wakeword import RecordingComplete
+                cmd = RecordingComplete(
+                    session_id=self._current_session.session_id,
+                    speech_detected=self._vad.has_speech,
+                    duration_seconds=duration,
+                    audio_level=self._vad._peak_level if hasattr(self._vad, "_peak_level") else 0.0,
+                )
+                await self._connection.send_message(cmd)
+                pinfo("Follow-Up RecordingComplete gesendet — warte auf Server")
+                self._schedule_conversation_end_timeout()
+            except Exception as e:
+                perror(f"Follow-Up RecordingComplete Fehler: {e}")
+                self._complete_session()
+            return
+
+        # Standalone-Modus: wie bisher
         if self._send_to_server:
             try:
                 response = await self._send_to_server("follow_up_response", {
@@ -1095,16 +1123,12 @@ class WakewordService(IService):
                     "audio_data": audio_data.hex(),
                     "follow_up_number": self._current_session.follow_up_count,
                 })
-
-                # Noch eine Rückfrage?
                 if response.get("follow_up"):
                     await self._handle_follow_up(response)
                     return
-
             except Exception as e:
                 perror(f"Follow-Up-Fehler: {e}")
 
-        # Session beenden
         self._complete_session()
 
     def _schedule_conversation_end_timeout(self) -> None: