Merge pull request #115 from cogwheel0/tts-service-enhancement

feat(tts): Improve text-to-speech service with enhanced error handling and state management
This commit is contained in:
cogwheel
2025-10-30 21:42:56 +05:30
committed by GitHub
5 changed files with 238 additions and 58 deletions

View File

@@ -188,10 +188,19 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
return;
}
final isPausedActive =
state.activeMessageId == messageId &&
state.status == TtsPlaybackStatus.paused;
if (isPausedActive) {
await resume();
return;
}
final isCurrentlyActive =
state.activeMessageId == messageId &&
state.status != TtsPlaybackStatus.idle &&
state.status != TtsPlaybackStatus.error;
state.status != TtsPlaybackStatus.error &&
state.status != TtsPlaybackStatus.paused;
if (isCurrentlyActive) {
await stop();
@@ -294,6 +303,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
await _service.pause();
}
Future<void> resume() async {
if (!state.initialized || !state.available) {
return;
}
try {
await _service.resume();
} catch (e) {
if (!ref.mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.error,
errorMessage: e.toString(),
clearActiveMessageId: true,
);
}
}
Future<void> stop() async {
await _service.stop();
if (!ref.mounted) {

View File

@@ -24,6 +24,7 @@ class TextToSpeechService {
int _expectedChunks = 0;
int _currentIndex = -1;
bool _waitingNext = false;
bool _deviceEngineAvailable = false;
String? _serverDefaultVoice;
Future<String?>? _serverDefaultVoiceFuture;
@@ -42,8 +43,20 @@ class TextToSpeechService {
TextToSpeechService({ApiService? api}) : _api = api {
// Wire minimal player events to callbacks
_player.onPlayerComplete.listen((_) => _onAudioComplete());
_player.onPlayerStateChanged.listen((s) {
if (s == PlayerState.playing) _handleStart();
_player.onPlayerStateChanged.listen((state) {
switch (state) {
case PlayerState.playing:
_handleStart();
break;
case PlayerState.paused:
_handlePause();
break;
case PlayerState.stopped:
_handleCancel();
break;
default:
break;
}
});
}
@@ -118,14 +131,22 @@ class TextToSpeechService {
]);
}
// Set the voice (specific or default)
await _setVoiceByName(voice);
_available = true;
// Set the voice (specific or default) when using device engine
if (_engine == TtsEngine.device) {
await _setVoiceByName(voice);
}
_deviceEngineAvailable = true;
} catch (e) {
_available = false;
_onError?.call(e.toString());
_deviceEngineAvailable = false;
if (_engine != TtsEngine.server) {
_available = false;
_onError?.call(e.toString());
_initialized = true;
return _available;
}
}
_available = _engine == TtsEngine.server || _deviceEngineAvailable;
_initialized = true;
return _available;
}
@@ -145,7 +166,11 @@ class TextToSpeechService {
await _startServerChunkedPlayback(text);
} catch (e) {
_onError?.call(e.toString());
await _speakOnDevice(text);
if (_deviceEngineAvailable) {
await _speakOnDevice(text);
} else {
throw StateError('Server text-to-speech failed: $e');
}
}
return;
}
@@ -155,8 +180,8 @@ class TextToSpeechService {
}
Future<void> _speakOnDevice(String text) async {
if (!_available) {
throw StateError('Text-to-speech is unavailable on this device');
if (!_deviceEngineAvailable) {
throw StateError('Device text-to-speech is unavailable');
}
await _tts.stop();
if (!_voiceConfigured) {
@@ -174,7 +199,8 @@ class TextToSpeechService {
try {
if (_engine == TtsEngine.server) {
await _player.pause();
} else if (_available) {
_handlePause();
} else if (_deviceEngineAvailable) {
await _tts.pause();
}
} catch (e) {
@@ -182,6 +208,22 @@ class TextToSpeechService {
}
}
Future<void> resume() async {
if (!_initialized) return;
try {
if (_engine == TtsEngine.server) {
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
await _playNextIfBuffered(_session);
} else {
await _player.resume();
}
}
} catch (e) {
_onError?.call(e.toString());
}
}
Future<void> stop() async {
if (!_initialized) {
return;

View File

@@ -99,12 +99,19 @@ class VoiceCallNotificationService {
required String modelName,
required bool isMuted,
required bool isSpeaking,
required bool isPaused,
}) async {
if (!_initialized) {
await initialize();
}
final status = isSpeaking ? 'Speaking...' : 'Listening...';
final status = isSpeaking
? 'Speaking...'
: isMuted
? 'Muted'
: isPaused
? 'Paused'
: 'Listening...';
final muteAction = isMuted ? 'Unmute' : 'Mute';
final muteActionId = isMuted ? _actionUnmute : _actionMute;
@@ -131,7 +138,7 @@ class VoiceCallNotificationService {
muteActionId,
muteAction,
icon: DrawableResourceAndroidBitmap(
isMuted ? '@drawable/ic_mic_on' : '@drawable/ic_mic_off',
isMuted ? '@drawable/ic_mic_off' : '@drawable/ic_mic_on',
),
showsUserInterface: false,
cancelNotification: false,
@@ -176,11 +183,13 @@ class VoiceCallNotificationService {
required String modelName,
required bool isMuted,
required bool isSpeaking,
required bool isPaused,
}) async {
await showCallNotification(
modelName: modelName,
isMuted: isMuted,
isSpeaking: isSpeaking,
isPaused: isPaused,
);
}

View File

@@ -19,12 +19,15 @@ enum VoiceCallState {
idle,
connecting,
listening,
paused,
processing,
speaking,
error,
disconnected,
}
enum VoiceCallPauseReason { user, mute, system }
class VoiceCallService {
static const String _voiceCallStreamId = 'voice-call';
@@ -42,6 +45,8 @@ class VoiceCallService {
String _accumulatedTranscript = '';
bool _isDisposed = false;
bool _isMuted = false;
bool _listeningPaused = false;
final Set<VoiceCallPauseReason> _pauseReasons = <VoiceCallPauseReason>{};
SocketEventSubscription? _socketSubscription;
Timer? _keepAliveTimer;
@@ -83,6 +88,9 @@ class VoiceCallService {
Future<void> initialize() async {
if (_isDisposed) return;
_pauseReasons.clear();
_listeningPaused = false;
// Initialize notification service
await _notificationService.initialize();
@@ -183,6 +191,15 @@ class VoiceCallService {
if (_isDisposed) return;
try {
if (_pauseReasons.isNotEmpty) {
_listeningPaused = true;
if (_state != VoiceCallState.paused) {
_updateState(VoiceCallState.paused);
}
return;
}
_listeningPaused = false;
_accumulatedTranscript = '';
// Check if voice input is available
@@ -291,8 +308,12 @@ class VoiceCallService {
_speakResponse(_accumulatedResponse);
_accumulatedResponse = '';
} else if (_accumulatedResponse.isEmpty) {
// No response, restart listening
_startListening();
// No response, restart listening unless paused
if (_pauseReasons.isEmpty) {
_startListening();
} else if (_state != VoiceCallState.paused) {
_updateState(VoiceCallState.paused);
}
}
}
}
@@ -341,7 +362,12 @@ class VoiceCallService {
void _handleTtsComplete() {
if (_isDisposed) return;
_isSpeaking = false;
// After assistant finishes speaking, start listening for user again
// After assistant finishes speaking, resume only if not paused
if (_pauseReasons.isNotEmpty) {
_listeningPaused = true;
_updateState(VoiceCallState.paused);
return;
}
_startListening();
}
@@ -379,24 +405,52 @@ class VoiceCallService {
_sessionId = null;
_accumulatedTranscript = '';
_isMuted = false;
_listeningPaused = false;
_pauseReasons.clear();
_updateState(VoiceCallState.disconnected);
}
Future<void> pauseListening() async {
Future<void> pauseListening({
VoiceCallPauseReason reason = VoiceCallPauseReason.user,
}) async {
if (_isDisposed) return;
final wasEmpty = _pauseReasons.isEmpty;
_pauseReasons.add(reason);
if (!wasEmpty) {
return;
}
_listeningPaused = true;
await _voiceInput.stopListening();
await _transcriptSubscription?.cancel();
await _intensitySubscription?.cancel();
if (_state == VoiceCallState.listening) {
_updateState(VoiceCallState.paused);
}
}
Future<void> resumeListening() async {
Future<void> resumeListening({
VoiceCallPauseReason reason = VoiceCallPauseReason.user,
}) async {
if (_isDisposed) return;
await _startListening();
_pauseReasons.remove(reason);
if (_pauseReasons.isNotEmpty) {
return;
}
if (_state == VoiceCallState.paused || _listeningPaused) {
await _startListening();
}
}
Future<void> cancelSpeaking() async {
if (_isDisposed) return;
await _tts.stop();
_isSpeaking = false;
_accumulatedResponse = '';
// Immediately restart listening
await _startListening();
}
@@ -428,6 +482,9 @@ class VoiceCallService {
modelName: modelName,
isMuted: _isMuted,
isSpeaking: _state == VoiceCallState.speaking,
isPaused:
_state == VoiceCallState.paused ||
(_pauseReasons.isNotEmpty && !_isSpeaking),
);
} catch (e) {
// Silently ignore notification errors
@@ -451,9 +508,14 @@ class VoiceCallService {
void _toggleMute() {
_isMuted = !_isMuted;
if (_isMuted) {
pauseListening();
if (_isSpeaking) {
unawaited(_tts.stop());
_isSpeaking = false;
_accumulatedResponse = '';
}
pauseListening(reason: VoiceCallPauseReason.mute);
} else {
resumeListening();
resumeListening(reason: VoiceCallPauseReason.mute);
}
_updateNotification();
}

View File

@@ -348,45 +348,83 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
Widget _buildControlButtons(Color primaryColor) {
final errorColor = Theme.of(context).colorScheme.error;
final warningColor = Colors.orange;
final successColor = Theme.of(context).colorScheme.secondary;
final buttons = <Widget>[];
// Retry button (only show in error state)
if (_currentState == VoiceCallState.error) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.arrow_clockwise,
label: 'Retry',
color: primaryColor,
onPressed: () async {
await _initializeCall();
},
),
);
}
final canPause = _currentState == VoiceCallState.listening;
final canResume = _currentState == VoiceCallState.paused;
if (canPause) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.pause_fill,
label: 'Pause',
color: warningColor,
onPressed: () async {
await _service?.pauseListening();
},
),
);
} else if (canResume) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.play_fill,
label: 'Resume',
color: successColor,
onPressed: () async {
await _service?.resumeListening();
},
),
);
}
// Cancel speaking button (only show when speaking)
if (_currentState == VoiceCallState.speaking) {
buttons.add(
_buildActionButton(
icon: CupertinoIcons.stop_fill,
label: 'Stop',
color: warningColor,
onPressed: () async {
await _service?.cancelSpeaking();
},
),
);
}
// End call button
buttons.add(
_buildActionButton(
icon: CupertinoIcons.phone_down_fill,
label: 'End Call',
color: errorColor,
onPressed: () async {
await _service?.stopCall();
if (mounted) {
Navigator.of(context).pop();
}
},
),
);
return Row(
mainAxisAlignment: MainAxisAlignment.spaceEvenly,
children: [
// Retry button (only show in error state)
if (_currentState == VoiceCallState.error)
_buildActionButton(
icon: CupertinoIcons.arrow_clockwise,
label: 'Retry',
color: primaryColor,
onPressed: () async {
await _initializeCall();
},
),
// Cancel speaking button (only show when speaking)
if (_currentState == VoiceCallState.speaking)
_buildActionButton(
icon: CupertinoIcons.stop_fill,
label: 'Stop',
color: warningColor,
onPressed: () async {
await _service?.cancelSpeaking();
},
),
// End call button
_buildActionButton(
icon: CupertinoIcons.phone_down_fill,
label: 'End Call',
color: errorColor,
onPressed: () async {
await _service?.stopCall();
if (mounted) {
Navigator.of(context).pop();
}
},
),
],
children: buttons,
);
}
@@ -422,6 +460,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
return 'Connecting...';
case VoiceCallState.listening:
return 'Listening';
case VoiceCallState.paused:
return 'Paused';
case VoiceCallState.processing:
return 'Thinking...';
case VoiceCallState.speaking: