From de0f195aea98080b706a435069c8977e48c2fd55 Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Thu, 30 Oct 2025 21:42:35 +0530 Subject: [PATCH] feat(tts): Improve text-to-speech service with enhanced error handling and state management --- .../providers/text_to_speech_provider.dart | 29 ++++- .../chat/services/text_to_speech_service.dart | 64 ++++++++-- .../voice_call_notification_service.dart | 13 +- .../chat/services/voice_call_service.dart | 78 ++++++++++-- lib/features/chat/views/voice_call_page.dart | 112 ++++++++++++------ 5 files changed, 238 insertions(+), 58 deletions(-) diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart index 8ed89c1..2f53bd6 100644 --- a/lib/features/chat/providers/text_to_speech_provider.dart +++ b/lib/features/chat/providers/text_to_speech_provider.dart @@ -188,10 +188,19 @@ class TextToSpeechController extends Notifier { return; } + final isPausedActive = + state.activeMessageId == messageId && + state.status == TtsPlaybackStatus.paused; + if (isPausedActive) { + await resume(); + return; + } + final isCurrentlyActive = state.activeMessageId == messageId && state.status != TtsPlaybackStatus.idle && - state.status != TtsPlaybackStatus.error; + state.status != TtsPlaybackStatus.error && + state.status != TtsPlaybackStatus.paused; if (isCurrentlyActive) { await stop(); @@ -294,6 +303,24 @@ class TextToSpeechController extends Notifier { await _service.pause(); } + Future resume() async { + if (!state.initialized || !state.available) { + return; + } + try { + await _service.resume(); + } catch (e) { + if (!ref.mounted) { + return; + } + state = state.copyWith( + status: TtsPlaybackStatus.error, + errorMessage: e.toString(), + clearActiveMessageId: true, + ); + } + } + Future stop() async { await _service.stop(); if (!ref.mounted) { diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart index 2d4277d..c2ec6fe 100644 --- a/lib/features/chat/services/text_to_speech_service.dart +++ b/lib/features/chat/services/text_to_speech_service.dart @@ -24,6 +24,7 @@ class TextToSpeechService { int _expectedChunks = 0; int _currentIndex = -1; bool _waitingNext = false; + bool _deviceEngineAvailable = false; String? _serverDefaultVoice; Future? _serverDefaultVoiceFuture; @@ -42,8 +43,20 @@ class TextToSpeechService { TextToSpeechService({ApiService? api}) : _api = api { // Wire minimal player events to callbacks _player.onPlayerComplete.listen((_) => _onAudioComplete()); - _player.onPlayerStateChanged.listen((s) { - if (s == PlayerState.playing) _handleStart(); + _player.onPlayerStateChanged.listen((state) { + switch (state) { + case PlayerState.playing: + _handleStart(); + break; + case PlayerState.paused: + _handlePause(); + break; + case PlayerState.stopped: + _handleCancel(); + break; + default: + break; + } }); } @@ -118,14 +131,22 @@ class TextToSpeechService { ]); } - // Set the voice (specific or default) - await _setVoiceByName(voice); - _available = true; + // Set the voice (specific or default) when using device engine + if (_engine == TtsEngine.device) { + await _setVoiceByName(voice); + } + _deviceEngineAvailable = true; } catch (e) { - _available = false; - _onError?.call(e.toString()); + _deviceEngineAvailable = false; + if (_engine != TtsEngine.server) { + _available = false; + _onError?.call(e.toString()); + _initialized = true; + return _available; + } } + _available = _engine == TtsEngine.server || _deviceEngineAvailable; _initialized = true; return _available; } @@ -145,7 +166,11 @@ class TextToSpeechService { await _startServerChunkedPlayback(text); } catch (e) { _onError?.call(e.toString()); - await _speakOnDevice(text); + if (_deviceEngineAvailable) { + await _speakOnDevice(text); + } else { + throw StateError('Server text-to-speech failed: $e'); + } } return; } @@ -155,8 +180,8 @@ class TextToSpeechService { } Future _speakOnDevice(String text) async { - if (!_available) { - throw StateError('Text-to-speech is unavailable on this device'); + if (!_deviceEngineAvailable) { + throw StateError('Device text-to-speech is unavailable'); } await _tts.stop(); if (!_voiceConfigured) { @@ -174,7 +199,8 @@ class TextToSpeechService { try { if (_engine == TtsEngine.server) { await _player.pause(); - } else if (_available) { + _handlePause(); + } else if (_deviceEngineAvailable) { await _tts.pause(); } } catch (e) { @@ -182,6 +208,22 @@ class TextToSpeechService { } } + Future resume() async { + if (!_initialized) return; + try { + if (_engine == TtsEngine.server) { + if (_waitingNext && (_currentIndex + 1) < _buffered.length) { + _waitingNext = false; + await _playNextIfBuffered(_session); + } else { + await _player.resume(); + } + } + } catch (e) { + _onError?.call(e.toString()); + } + } + Future stop() async { if (!_initialized) { return; diff --git a/lib/features/chat/services/voice_call_notification_service.dart b/lib/features/chat/services/voice_call_notification_service.dart index b2620a4..2134878 100644 --- a/lib/features/chat/services/voice_call_notification_service.dart +++ b/lib/features/chat/services/voice_call_notification_service.dart @@ -99,12 +99,19 @@ class VoiceCallNotificationService { required String modelName, required bool isMuted, required bool isSpeaking, + required bool isPaused, }) async { if (!_initialized) { await initialize(); } - final status = isSpeaking ? 'Speaking...' : 'Listening...'; + final status = isSpeaking + ? 'Speaking...' + : isMuted + ? 'Muted' + : isPaused + ? 'Paused' + : 'Listening...'; final muteAction = isMuted ? 'Unmute' : 'Mute'; final muteActionId = isMuted ? _actionUnmute : _actionMute; @@ -131,7 +138,7 @@ class VoiceCallNotificationService { muteActionId, muteAction, icon: DrawableResourceAndroidBitmap( - isMuted ? '@drawable/ic_mic_on' : '@drawable/ic_mic_off', + isMuted ? '@drawable/ic_mic_off' : '@drawable/ic_mic_on', ), showsUserInterface: false, cancelNotification: false, @@ -176,11 +183,13 @@ class VoiceCallNotificationService { required String modelName, required bool isMuted, required bool isSpeaking, + required bool isPaused, }) async { await showCallNotification( modelName: modelName, isMuted: isMuted, isSpeaking: isSpeaking, + isPaused: isPaused, ); } diff --git a/lib/features/chat/services/voice_call_service.dart b/lib/features/chat/services/voice_call_service.dart index 1f5b684..1037b38 100644 --- a/lib/features/chat/services/voice_call_service.dart +++ b/lib/features/chat/services/voice_call_service.dart @@ -19,12 +19,15 @@ enum VoiceCallState { idle, connecting, listening, + paused, processing, speaking, error, disconnected, } +enum VoiceCallPauseReason { user, mute, system } + class VoiceCallService { static const String _voiceCallStreamId = 'voice-call'; @@ -42,6 +45,8 @@ class VoiceCallService { String _accumulatedTranscript = ''; bool _isDisposed = false; bool _isMuted = false; + bool _listeningPaused = false; + final Set _pauseReasons = {}; SocketEventSubscription? _socketSubscription; Timer? _keepAliveTimer; @@ -83,6 +88,9 @@ class VoiceCallService { Future initialize() async { if (_isDisposed) return; + _pauseReasons.clear(); + _listeningPaused = false; + // Initialize notification service await _notificationService.initialize(); @@ -183,6 +191,15 @@ class VoiceCallService { if (_isDisposed) return; try { + if (_pauseReasons.isNotEmpty) { + _listeningPaused = true; + if (_state != VoiceCallState.paused) { + _updateState(VoiceCallState.paused); + } + return; + } + + _listeningPaused = false; _accumulatedTranscript = ''; // Check if voice input is available @@ -291,8 +308,12 @@ class VoiceCallService { _speakResponse(_accumulatedResponse); _accumulatedResponse = ''; } else if (_accumulatedResponse.isEmpty) { - // No response, restart listening - _startListening(); + // No response, restart listening unless paused + if (_pauseReasons.isEmpty) { + _startListening(); + } else if (_state != VoiceCallState.paused) { + _updateState(VoiceCallState.paused); + } } } } @@ -341,7 +362,12 @@ class VoiceCallService { void _handleTtsComplete() { if (_isDisposed) return; _isSpeaking = false; - // After assistant finishes speaking, start listening for user again + // After assistant finishes speaking, resume only if not paused + if (_pauseReasons.isNotEmpty) { + _listeningPaused = true; + _updateState(VoiceCallState.paused); + return; + } _startListening(); } @@ -379,24 +405,52 @@ class VoiceCallService { _sessionId = null; _accumulatedTranscript = ''; _isMuted = false; + _listeningPaused = false; + _pauseReasons.clear(); _updateState(VoiceCallState.disconnected); } - Future pauseListening() async { + Future pauseListening({ + VoiceCallPauseReason reason = VoiceCallPauseReason.user, + }) async { if (_isDisposed) return; + + final wasEmpty = _pauseReasons.isEmpty; + _pauseReasons.add(reason); + if (!wasEmpty) { + return; + } + + _listeningPaused = true; await _voiceInput.stopListening(); await _transcriptSubscription?.cancel(); await _intensitySubscription?.cancel(); + + if (_state == VoiceCallState.listening) { + _updateState(VoiceCallState.paused); + } } - Future resumeListening() async { + Future resumeListening({ + VoiceCallPauseReason reason = VoiceCallPauseReason.user, + }) async { if (_isDisposed) return; - await _startListening(); + + _pauseReasons.remove(reason); + if (_pauseReasons.isNotEmpty) { + return; + } + + if (_state == VoiceCallState.paused || _listeningPaused) { + await _startListening(); + } } Future cancelSpeaking() async { if (_isDisposed) return; await _tts.stop(); + _isSpeaking = false; + _accumulatedResponse = ''; // Immediately restart listening await _startListening(); } @@ -428,6 +482,9 @@ class VoiceCallService { modelName: modelName, isMuted: _isMuted, isSpeaking: _state == VoiceCallState.speaking, + isPaused: + _state == VoiceCallState.paused || + (_pauseReasons.isNotEmpty && !_isSpeaking), ); } catch (e) { // Silently ignore notification errors @@ -451,9 +508,14 @@ class VoiceCallService { void _toggleMute() { _isMuted = !_isMuted; if (_isMuted) { - pauseListening(); + if (_isSpeaking) { + unawaited(_tts.stop()); + _isSpeaking = false; + _accumulatedResponse = ''; + } + pauseListening(reason: VoiceCallPauseReason.mute); } else { - resumeListening(); + resumeListening(reason: VoiceCallPauseReason.mute); } _updateNotification(); } diff --git a/lib/features/chat/views/voice_call_page.dart b/lib/features/chat/views/voice_call_page.dart index 1768ade..2780386 100644 --- a/lib/features/chat/views/voice_call_page.dart +++ b/lib/features/chat/views/voice_call_page.dart @@ -348,45 +348,83 @@ class _VoiceCallPageState extends ConsumerState Widget _buildControlButtons(Color primaryColor) { final errorColor = Theme.of(context).colorScheme.error; final warningColor = Colors.orange; + final successColor = Theme.of(context).colorScheme.secondary; + + final buttons = []; + + // Retry button (only show in error state) + if (_currentState == VoiceCallState.error) { + buttons.add( + _buildActionButton( + icon: CupertinoIcons.arrow_clockwise, + label: 'Retry', + color: primaryColor, + onPressed: () async { + await _initializeCall(); + }, + ), + ); + } + + final canPause = _currentState == VoiceCallState.listening; + final canResume = _currentState == VoiceCallState.paused; + + if (canPause) { + buttons.add( + _buildActionButton( + icon: CupertinoIcons.pause_fill, + label: 'Pause', + color: warningColor, + onPressed: () async { + await _service?.pauseListening(); + }, + ), + ); + } else if (canResume) { + buttons.add( + _buildActionButton( + icon: CupertinoIcons.play_fill, + label: 'Resume', + color: successColor, + onPressed: () async { + await _service?.resumeListening(); + }, + ), + ); + } + + // Cancel speaking button (only show when speaking) + if (_currentState == VoiceCallState.speaking) { + buttons.add( + _buildActionButton( + icon: CupertinoIcons.stop_fill, + label: 'Stop', + color: warningColor, + onPressed: () async { + await _service?.cancelSpeaking(); + }, + ), + ); + } + + // End call button + buttons.add( + _buildActionButton( + icon: CupertinoIcons.phone_down_fill, + label: 'End Call', + color: errorColor, + onPressed: () async { + await _service?.stopCall(); + if (mounted) { + Navigator.of(context).pop(); + } + }, + ), + ); return Row( mainAxisAlignment: MainAxisAlignment.spaceEvenly, - children: [ - // Retry button (only show in error state) - if (_currentState == VoiceCallState.error) - _buildActionButton( - icon: CupertinoIcons.arrow_clockwise, - label: 'Retry', - color: primaryColor, - onPressed: () async { - await _initializeCall(); - }, - ), - - // Cancel speaking button (only show when speaking) - if (_currentState == VoiceCallState.speaking) - _buildActionButton( - icon: CupertinoIcons.stop_fill, - label: 'Stop', - color: warningColor, - onPressed: () async { - await _service?.cancelSpeaking(); - }, - ), - - // End call button - _buildActionButton( - icon: CupertinoIcons.phone_down_fill, - label: 'End Call', - color: errorColor, - onPressed: () async { - await _service?.stopCall(); - if (mounted) { - Navigator.of(context).pop(); - } - }, - ), - ], + children: buttons, ); } @@ -422,6 +460,8 @@ class _VoiceCallPageState extends ConsumerState return 'Connecting...'; case VoiceCallState.listening: return 'Listening'; + case VoiceCallState.paused: + return 'Paused'; case VoiceCallState.processing: return 'Thinking...'; case VoiceCallState.speaking: