From 1a570f4a08de4e6272150c8a2d39c570e81ca261 Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Mon, 3 Nov 2025 00:36:25 +0530 Subject: [PATCH] feat(voice-input): improve server-side speech detection with silence auto-stop --- .../chat/services/voice_input_service.dart | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index 47b1238..a817a12 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -44,6 +44,8 @@ class VoiceInputService { _intensityController?.stream ?? const Stream.empty(); int _lastIntensity = 0; Timer? _intensityDecayTimer; + Timer? _silenceTimer; + bool _hasDetectedSpeech = false; /// Public stream of partial/final transcript strings and special audio tokens. Stream get textStream => @@ -331,6 +333,9 @@ class VoiceInputService { _autoStopTimer?.cancel(); _autoStopTimer = null; + _silenceTimer?.cancel(); + _silenceTimer = null; + if (_usingServerStt) { await _finalizeServerRecording(); } else { @@ -354,6 +359,7 @@ class VoiceInputService { _serverRecorderActive = false; _serverRecordingPath = null; _serverRecordingMimeType = null; + _hasDetectedSpeech = false; } Future _stopLocalStt() async { @@ -425,6 +431,7 @@ class VoiceInputService { await _recorder.start(config, path: path); _serverRecorderActive = true; + _hasDetectedSpeech = false; await _ampSub?.cancel(); _ampSub = _recorder @@ -435,9 +442,34 @@ class VoiceInputService { try { _intensityController?.add(_lastIntensity); } catch (_) {} + + // Detect silence and auto-stop for server-side STT + _handleServerAmplitude(amplitude.current); }, onError: (_) {}); } + void _handleServerAmplitude(double? amplitude) { + if (!_usingServerStt || !_isListening) return; + + // Threshold for detecting speech (in dB) + const double speechThreshold = -45.0; + final double currentDb = amplitude ?? -100.0; + + // If we detect speech, mark it and reset silence timer + if (currentDb > speechThreshold) { + _hasDetectedSpeech = true; + _silenceTimer?.cancel(); + _silenceTimer = null; + } else if (_hasDetectedSpeech && _silenceTimer == null) { + // Start silence timer only after we've detected speech at least once + _silenceTimer = Timer(const Duration(seconds: 2), () { + if (_isListening && _usingServerStt) { + unawaited(_stopListening()); + } + }); + } + } + Future<(String, String)> _createRecordingTarget() async { final directory = await getTemporaryDirectory(); final timestamp = DateTime.now().millisecondsSinceEpoch; @@ -657,6 +689,7 @@ class VoiceInputService { void dispose() { stopListening(); + _silenceTimer?.cancel(); try { _speech.dispose().catchError((_) {}); } catch (_) {}