feat(tts): Improve text-to-speech service with enhanced error handling and state management

This commit is contained in:
cogwheel0
2025-10-30 21:42:35 +05:30
parent d7f58498f9
commit de0f195aea
5 changed files with 238 additions and 58 deletions

View File

@@ -188,10 +188,19 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
return; return;
} }
final isPausedActive =
state.activeMessageId == messageId &&
state.status == TtsPlaybackStatus.paused;
if (isPausedActive) {
await resume();
return;
}
final isCurrentlyActive = final isCurrentlyActive =
state.activeMessageId == messageId && state.activeMessageId == messageId &&
state.status != TtsPlaybackStatus.idle && state.status != TtsPlaybackStatus.idle &&
state.status != TtsPlaybackStatus.error; state.status != TtsPlaybackStatus.error &&
state.status != TtsPlaybackStatus.paused;
if (isCurrentlyActive) { if (isCurrentlyActive) {
await stop(); await stop();
@@ -294,6 +303,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
await _service.pause(); await _service.pause();
} }
Future<void> resume() async {
if (!state.initialized || !state.available) {
return;
}
try {
await _service.resume();
} catch (e) {
if (!ref.mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.error,
errorMessage: e.toString(),
clearActiveMessageId: true,
);
}
}
Future<void> stop() async { Future<void> stop() async {
await _service.stop(); await _service.stop();
if (!ref.mounted) { if (!ref.mounted) {

View File

@@ -24,6 +24,7 @@ class TextToSpeechService {
int _expectedChunks = 0; int _expectedChunks = 0;
int _currentIndex = -1; int _currentIndex = -1;
bool _waitingNext = false; bool _waitingNext = false;
bool _deviceEngineAvailable = false;
String? _serverDefaultVoice; String? _serverDefaultVoice;
Future<String?>? _serverDefaultVoiceFuture; Future<String?>? _serverDefaultVoiceFuture;
@@ -42,8 +43,20 @@ class TextToSpeechService {
TextToSpeechService({ApiService? api}) : _api = api { TextToSpeechService({ApiService? api}) : _api = api {
// Wire minimal player events to callbacks // Wire minimal player events to callbacks
_player.onPlayerComplete.listen((_) => _onAudioComplete()); _player.onPlayerComplete.listen((_) => _onAudioComplete());
_player.onPlayerStateChanged.listen((s) { _player.onPlayerStateChanged.listen((state) {
if (s == PlayerState.playing) _handleStart(); switch (state) {
case PlayerState.playing:
_handleStart();
break;
case PlayerState.paused:
_handlePause();
break;
case PlayerState.stopped:
_handleCancel();
break;
default:
break;
}
}); });
} }
@@ -118,14 +131,22 @@ class TextToSpeechService {
]); ]);
} }
// Set the voice (specific or default) // Set the voice (specific or default) when using device engine
await _setVoiceByName(voice); if (_engine == TtsEngine.device) {
_available = true; await _setVoiceByName(voice);
}
_deviceEngineAvailable = true;
} catch (e) { } catch (e) {
_available = false; _deviceEngineAvailable = false;
_onError?.call(e.toString()); if (_engine != TtsEngine.server) {
_available = false;
_onError?.call(e.toString());
_initialized = true;
return _available;
}
} }
_available = _engine == TtsEngine.server || _deviceEngineAvailable;
_initialized = true; _initialized = true;
return _available; return _available;
} }
@@ -145,7 +166,11 @@ class TextToSpeechService {
await _startServerChunkedPlayback(text); await _startServerChunkedPlayback(text);
} catch (e) { } catch (e) {
_onError?.call(e.toString()); _onError?.call(e.toString());
await _speakOnDevice(text); if (_deviceEngineAvailable) {
await _speakOnDevice(text);
} else {
throw StateError('Server text-to-speech failed: $e');
}
} }
return; return;
} }
@@ -155,8 +180,8 @@ class TextToSpeechService {
} }
Future<void> _speakOnDevice(String text) async { Future<void> _speakOnDevice(String text) async {
if (!_available) { if (!_deviceEngineAvailable) {
throw StateError('Text-to-speech is unavailable on this device'); throw StateError('Device text-to-speech is unavailable');
} }
await _tts.stop(); await _tts.stop();
if (!_voiceConfigured) { if (!_voiceConfigured) {
@@ -174,7 +199,8 @@ class TextToSpeechService {
try { try {
if (_engine == TtsEngine.server) { if (_engine == TtsEngine.server) {
await _player.pause(); await _player.pause();
} else if (_available) { _handlePause();
} else if (_deviceEngineAvailable) {
await _tts.pause(); await _tts.pause();
} }
} catch (e) { } catch (e) {
@@ -182,6 +208,22 @@ class TextToSpeechService {
} }
} }
Future<void> resume() async {
if (!_initialized) return;
try {
if (_engine == TtsEngine.server) {
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
await _playNextIfBuffered(_session);
} else {
await _player.resume();
}
}
} catch (e) {
_onError?.call(e.toString());
}
}
Future<void> stop() async { Future<void> stop() async {
if (!_initialized) { if (!_initialized) {
return; return;

View File

@@ -99,12 +99,19 @@ class VoiceCallNotificationService {
required String modelName, required String modelName,
required bool isMuted, required bool isMuted,
required bool isSpeaking, required bool isSpeaking,
required bool isPaused,
}) async { }) async {
if (!_initialized) { if (!_initialized) {
await initialize(); await initialize();
} }
final status = isSpeaking ? 'Speaking...' : 'Listening...'; final status = isSpeaking
? 'Speaking...'
: isMuted
? 'Muted'
: isPaused
? 'Paused'
: 'Listening...';
final muteAction = isMuted ? 'Unmute' : 'Mute'; final muteAction = isMuted ? 'Unmute' : 'Mute';
final muteActionId = isMuted ? _actionUnmute : _actionMute; final muteActionId = isMuted ? _actionUnmute : _actionMute;
@@ -131,7 +138,7 @@ class VoiceCallNotificationService {
muteActionId, muteActionId,
muteAction, muteAction,
icon: DrawableResourceAndroidBitmap( icon: DrawableResourceAndroidBitmap(
isMuted ? '@drawable/ic_mic_on' : '@drawable/ic_mic_off', isMuted ? '@drawable/ic_mic_off' : '@drawable/ic_mic_on',
), ),
showsUserInterface: false, showsUserInterface: false,
cancelNotification: false, cancelNotification: false,
@@ -176,11 +183,13 @@ class VoiceCallNotificationService {
required String modelName, required String modelName,
required bool isMuted, required bool isMuted,
required bool isSpeaking, required bool isSpeaking,
required bool isPaused,
}) async { }) async {
await showCallNotification( await showCallNotification(
modelName: modelName, modelName: modelName,
isMuted: isMuted, isMuted: isMuted,
isSpeaking: isSpeaking, isSpeaking: isSpeaking,
isPaused: isPaused,
); );
} }

View File

@@ -19,12 +19,15 @@ enum VoiceCallState {
idle, idle,
connecting, connecting,
listening, listening,
paused,
processing, processing,
speaking, speaking,
error, error,
disconnected, disconnected,
} }
enum VoiceCallPauseReason { user, mute, system }
class VoiceCallService { class VoiceCallService {
static const String _voiceCallStreamId = 'voice-call'; static const String _voiceCallStreamId = 'voice-call';
@@ -42,6 +45,8 @@ class VoiceCallService {
String _accumulatedTranscript = ''; String _accumulatedTranscript = '';
bool _isDisposed = false; bool _isDisposed = false;
bool _isMuted = false; bool _isMuted = false;
bool _listeningPaused = false;
final Set<VoiceCallPauseReason> _pauseReasons = <VoiceCallPauseReason>{};
SocketEventSubscription? _socketSubscription; SocketEventSubscription? _socketSubscription;
Timer? _keepAliveTimer; Timer? _keepAliveTimer;
@@ -83,6 +88,9 @@ class VoiceCallService {
Future<void> initialize() async { Future<void> initialize() async {
if (_isDisposed) return; if (_isDisposed) return;
_pauseReasons.clear();
_listeningPaused = false;
// Initialize notification service // Initialize notification service
await _notificationService.initialize(); await _notificationService.initialize();
@@ -183,6 +191,15 @@ class VoiceCallService {
if (_isDisposed) return; if (_isDisposed) return;
try { try {
if (_pauseReasons.isNotEmpty) {
_listeningPaused = true;
if (_state != VoiceCallState.paused) {
_updateState(VoiceCallState.paused);
}
return;
}
_listeningPaused = false;
_accumulatedTranscript = ''; _accumulatedTranscript = '';
// Check if voice input is available // Check if voice input is available
@@ -291,8 +308,12 @@ class VoiceCallService {
_speakResponse(_accumulatedResponse); _speakResponse(_accumulatedResponse);
_accumulatedResponse = ''; _accumulatedResponse = '';
} else if (_accumulatedResponse.isEmpty) { } else if (_accumulatedResponse.isEmpty) {
// No response, restart listening // No response, restart listening unless paused
_startListening(); if (_pauseReasons.isEmpty) {
_startListening();
} else if (_state != VoiceCallState.paused) {
_updateState(VoiceCallState.paused);
}
} }
} }
} }
@@ -341,7 +362,12 @@ class VoiceCallService {
void _handleTtsComplete() { void _handleTtsComplete() {
if (_isDisposed) return; if (_isDisposed) return;
_isSpeaking = false; _isSpeaking = false;
// After assistant finishes speaking, start listening for user again // After assistant finishes speaking, resume only if not paused
if (_pauseReasons.isNotEmpty) {
_listeningPaused = true;
_updateState(VoiceCallState.paused);
return;
}
_startListening(); _startListening();
} }
@@ -379,24 +405,52 @@ class VoiceCallService {
_sessionId = null; _sessionId = null;
_accumulatedTranscript = ''; _accumulatedTranscript = '';
_isMuted = false; _isMuted = false;
_listeningPaused = false;
_pauseReasons.clear();
_updateState(VoiceCallState.disconnected); _updateState(VoiceCallState.disconnected);
} }
Future<void> pauseListening() async { Future<void> pauseListening({
VoiceCallPauseReason reason = VoiceCallPauseReason.user,
}) async {
if (_isDisposed) return; if (_isDisposed) return;
final wasEmpty = _pauseReasons.isEmpty;
_pauseReasons.add(reason);
if (!wasEmpty) {
return;
}
_listeningPaused = true;
await _voiceInput.stopListening(); await _voiceInput.stopListening();
await _transcriptSubscription?.cancel(); await _transcriptSubscription?.cancel();
await _intensitySubscription?.cancel(); await _intensitySubscription?.cancel();
if (_state == VoiceCallState.listening) {
_updateState(VoiceCallState.paused);
}
} }
Future<void> resumeListening() async { Future<void> resumeListening({
VoiceCallPauseReason reason = VoiceCallPauseReason.user,
}) async {
if (_isDisposed) return; if (_isDisposed) return;
await _startListening();
_pauseReasons.remove(reason);
if (_pauseReasons.isNotEmpty) {
return;
}
if (_state == VoiceCallState.paused || _listeningPaused) {
await _startListening();
}
} }
Future<void> cancelSpeaking() async { Future<void> cancelSpeaking() async {
if (_isDisposed) return; if (_isDisposed) return;
await _tts.stop(); await _tts.stop();
_isSpeaking = false;
_accumulatedResponse = '';
// Immediately restart listening // Immediately restart listening
await _startListening(); await _startListening();
} }
@@ -428,6 +482,9 @@ class VoiceCallService {
modelName: modelName, modelName: modelName,
isMuted: _isMuted, isMuted: _isMuted,
isSpeaking: _state == VoiceCallState.speaking, isSpeaking: _state == VoiceCallState.speaking,
isPaused:
_state == VoiceCallState.paused ||
(_pauseReasons.isNotEmpty && !_isSpeaking),
); );
} catch (e) { } catch (e) {
// Silently ignore notification errors // Silently ignore notification errors
@@ -451,9 +508,14 @@ class VoiceCallService {
void _toggleMute() { void _toggleMute() {
_isMuted = !_isMuted; _isMuted = !_isMuted;
if (_isMuted) { if (_isMuted) {
pauseListening(); if (_isSpeaking) {
unawaited(_tts.stop());
_isSpeaking = false;
_accumulatedResponse = '';
}
pauseListening(reason: VoiceCallPauseReason.mute);
} else { } else {
resumeListening(); resumeListening(reason: VoiceCallPauseReason.mute);
} }
_updateNotification(); _updateNotification();
} }

View File

@@ -348,45 +348,83 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
Widget _buildControlButtons(Color primaryColor) { Widget _buildControlButtons(Color primaryColor) {
final errorColor = Theme.of(context).colorScheme.error; final errorColor = Theme.of(context).colorScheme.error;
final warningColor = Colors.orange; final warningColor = Colors.orange;
final successColor = Theme.of(context).colorScheme.secondary;
final buttons = <Widget>[];
// Retry button (only show in error state)
if (_currentState == VoiceCallState.error) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.arrow_clockwise,
label: 'Retry',
color: primaryColor,
onPressed: () async {
await _initializeCall();
},
),
);
}
final canPause = _currentState == VoiceCallState.listening;
final canResume = _currentState == VoiceCallState.paused;
if (canPause) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.pause_fill,
label: 'Pause',
color: warningColor,
onPressed: () async {
await _service?.pauseListening();
},
),
);
} else if (canResume) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.play_fill,
label: 'Resume',
color: successColor,
onPressed: () async {
await _service?.resumeListening();
},
),
);
}
// Cancel speaking button (only show when speaking)
if (_currentState == VoiceCallState.speaking) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.stop_fill,
label: 'Stop',
color: warningColor,
onPressed: () async {
await _service?.cancelSpeaking();
},
),
);
}
// End call button
buttons.add(
_buildActionButton(
icon: CupertinoIcons.phone_down_fill,
label: 'End Call',
color: errorColor,
onPressed: () async {
await _service?.stopCall();
if (mounted) {
Navigator.of(context).pop();
}
},
),
);
return Row( return Row(
mainAxisAlignment: MainAxisAlignment.spaceEvenly, mainAxisAlignment: MainAxisAlignment.spaceEvenly,
children: [ children: buttons,
// Retry button (only show in error state)
if (_currentState == VoiceCallState.error)
_buildActionButton(
icon: CupertinoIcons.arrow_clockwise,
label: 'Retry',
color: primaryColor,
onPressed: () async {
await _initializeCall();
},
),
// Cancel speaking button (only show when speaking)
if (_currentState == VoiceCallState.speaking)
_buildActionButton(
icon: CupertinoIcons.stop_fill,
label: 'Stop',
color: warningColor,
onPressed: () async {
await _service?.cancelSpeaking();
},
),
// End call button
_buildActionButton(
icon: CupertinoIcons.phone_down_fill,
label: 'End Call',
color: errorColor,
onPressed: () async {
await _service?.stopCall();
if (mounted) {
Navigator.of(context).pop();
}
},
),
],
); );
} }
@@ -422,6 +460,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
return 'Connecting...'; return 'Connecting...';
case VoiceCallState.listening: case VoiceCallState.listening:
return 'Listening'; return 'Listening';
case VoiceCallState.paused:
return 'Paused';
case VoiceCallState.processing: case VoiceCallState.processing:
return 'Thinking...'; return 'Thinking...';
case VoiceCallState.speaking: case VoiceCallState.speaking: