From 1a570f4a08de4e6272150c8a2d39c570e81ca261 Mon Sep 17 00:00:00 2001
From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com>
Date: Mon, 3 Nov 2025 00:36:25 +0530
Subject: [PATCH] feat(voice-input): improve server-side speech detection with
 silence auto-stop

---
 .../chat/services/voice_input_service.dart    | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart
index 47b1238..a817a12 100644
--- a/lib/features/chat/services/voice_input_service.dart
+++ b/lib/features/chat/services/voice_input_service.dart
@@ -44,6 +44,8 @@ class VoiceInputService {
       _intensityController?.stream ?? const Stream<int>.empty();
   int _lastIntensity = 0;
   Timer? _intensityDecayTimer;
+  Timer? _silenceTimer;
+  bool _hasDetectedSpeech = false;
 
   /// Public stream of partial/final transcript strings and special audio tokens.
   Stream<String> get textStream =>
@@ -331,6 +333,9 @@ class VoiceInputService {
     _autoStopTimer?.cancel();
     _autoStopTimer = null;
 
+    _silenceTimer?.cancel();
+    _silenceTimer = null;
+
     if (_usingServerStt) {
       await _finalizeServerRecording();
     } else {
@@ -354,6 +359,7 @@ class VoiceInputService {
     _serverRecorderActive = false;
     _serverRecordingPath = null;
     _serverRecordingMimeType = null;
+    _hasDetectedSpeech = false;
   }
 
   Future<void> _stopLocalStt() async {
@@ -425,6 +431,7 @@ class VoiceInputService {
 
     await _recorder.start(config, path: path);
     _serverRecorderActive = true;
+    _hasDetectedSpeech = false;
 
     await _ampSub?.cancel();
     _ampSub = _recorder
@@ -435,9 +442,34 @@ class VoiceInputService {
           try {
             _intensityController?.add(_lastIntensity);
           } catch (_) {}
+
+          // Detect silence and auto-stop for server-side STT
+          _handleServerAmplitude(amplitude.current);
         }, onError: (_) {});
   }
 
+  void _handleServerAmplitude(double? amplitude) {
+    if (!_usingServerStt || !_isListening) return;
+
+    // Threshold for detecting speech (in dB)
+    const double speechThreshold = -45.0;
+    final double currentDb = amplitude ?? -100.0;
+
+    // If we detect speech, mark it and reset silence timer
+    if (currentDb > speechThreshold) {
+      _hasDetectedSpeech = true;
+      _silenceTimer?.cancel();
+      _silenceTimer = null;
+    } else if (_hasDetectedSpeech && _silenceTimer == null) {
+      // Start silence timer only after we've detected speech at least once
+      _silenceTimer = Timer(const Duration(seconds: 2), () {
+        if (_isListening && _usingServerStt) {
+          unawaited(_stopListening());
+        }
+      });
+    }
+  }
+
   Future<(String, String)> _createRecordingTarget() async {
     final directory = await getTemporaryDirectory();
     final timestamp = DateTime.now().millisecondsSinceEpoch;
@@ -657,6 +689,7 @@ class VoiceInputService {
 
   void dispose() {
     stopListening();
+    _silenceTimer?.cancel();
     try {
       _speech.dispose().catchError((_) {});
     } catch (_) {}