Merge pull request #115 from cogwheel0/tts-service-enhancement
feat(tts): Improve text-to-speech service with enhanced error handling and state management
This commit is contained in:
@@ -188,10 +188,19 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
return;
|
||||
}
|
||||
|
||||
final isPausedActive =
|
||||
state.activeMessageId == messageId &&
|
||||
state.status == TtsPlaybackStatus.paused;
|
||||
if (isPausedActive) {
|
||||
await resume();
|
||||
return;
|
||||
}
|
||||
|
||||
final isCurrentlyActive =
|
||||
state.activeMessageId == messageId &&
|
||||
state.status != TtsPlaybackStatus.idle &&
|
||||
state.status != TtsPlaybackStatus.error;
|
||||
state.status != TtsPlaybackStatus.error &&
|
||||
state.status != TtsPlaybackStatus.paused;
|
||||
|
||||
if (isCurrentlyActive) {
|
||||
await stop();
|
||||
@@ -294,6 +303,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
await _service.pause();
|
||||
}
|
||||
|
||||
Future<void> resume() async {
|
||||
if (!state.initialized || !state.available) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await _service.resume();
|
||||
} catch (e) {
|
||||
if (!ref.mounted) {
|
||||
return;
|
||||
}
|
||||
state = state.copyWith(
|
||||
status: TtsPlaybackStatus.error,
|
||||
errorMessage: e.toString(),
|
||||
clearActiveMessageId: true,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> stop() async {
|
||||
await _service.stop();
|
||||
if (!ref.mounted) {
|
||||
|
||||
@@ -24,6 +24,7 @@ class TextToSpeechService {
|
||||
int _expectedChunks = 0;
|
||||
int _currentIndex = -1;
|
||||
bool _waitingNext = false;
|
||||
bool _deviceEngineAvailable = false;
|
||||
String? _serverDefaultVoice;
|
||||
Future<String?>? _serverDefaultVoiceFuture;
|
||||
|
||||
@@ -42,8 +43,20 @@ class TextToSpeechService {
|
||||
TextToSpeechService({ApiService? api}) : _api = api {
|
||||
// Wire minimal player events to callbacks
|
||||
_player.onPlayerComplete.listen((_) => _onAudioComplete());
|
||||
_player.onPlayerStateChanged.listen((s) {
|
||||
if (s == PlayerState.playing) _handleStart();
|
||||
_player.onPlayerStateChanged.listen((state) {
|
||||
switch (state) {
|
||||
case PlayerState.playing:
|
||||
_handleStart();
|
||||
break;
|
||||
case PlayerState.paused:
|
||||
_handlePause();
|
||||
break;
|
||||
case PlayerState.stopped:
|
||||
_handleCancel();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -118,14 +131,22 @@ class TextToSpeechService {
|
||||
]);
|
||||
}
|
||||
|
||||
// Set the voice (specific or default)
|
||||
await _setVoiceByName(voice);
|
||||
_available = true;
|
||||
// Set the voice (specific or default) when using device engine
|
||||
if (_engine == TtsEngine.device) {
|
||||
await _setVoiceByName(voice);
|
||||
}
|
||||
_deviceEngineAvailable = true;
|
||||
} catch (e) {
|
||||
_available = false;
|
||||
_onError?.call(e.toString());
|
||||
_deviceEngineAvailable = false;
|
||||
if (_engine != TtsEngine.server) {
|
||||
_available = false;
|
||||
_onError?.call(e.toString());
|
||||
_initialized = true;
|
||||
return _available;
|
||||
}
|
||||
}
|
||||
|
||||
_available = _engine == TtsEngine.server || _deviceEngineAvailable;
|
||||
_initialized = true;
|
||||
return _available;
|
||||
}
|
||||
@@ -145,7 +166,11 @@ class TextToSpeechService {
|
||||
await _startServerChunkedPlayback(text);
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
await _speakOnDevice(text);
|
||||
if (_deviceEngineAvailable) {
|
||||
await _speakOnDevice(text);
|
||||
} else {
|
||||
throw StateError('Server text-to-speech failed: $e');
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -155,8 +180,8 @@ class TextToSpeechService {
|
||||
}
|
||||
|
||||
Future<void> _speakOnDevice(String text) async {
|
||||
if (!_available) {
|
||||
throw StateError('Text-to-speech is unavailable on this device');
|
||||
if (!_deviceEngineAvailable) {
|
||||
throw StateError('Device text-to-speech is unavailable');
|
||||
}
|
||||
await _tts.stop();
|
||||
if (!_voiceConfigured) {
|
||||
@@ -174,7 +199,8 @@ class TextToSpeechService {
|
||||
try {
|
||||
if (_engine == TtsEngine.server) {
|
||||
await _player.pause();
|
||||
} else if (_available) {
|
||||
_handlePause();
|
||||
} else if (_deviceEngineAvailable) {
|
||||
await _tts.pause();
|
||||
}
|
||||
} catch (e) {
|
||||
@@ -182,6 +208,22 @@ class TextToSpeechService {
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> resume() async {
|
||||
if (!_initialized) return;
|
||||
try {
|
||||
if (_engine == TtsEngine.server) {
|
||||
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
|
||||
_waitingNext = false;
|
||||
await _playNextIfBuffered(_session);
|
||||
} else {
|
||||
await _player.resume();
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> stop() async {
|
||||
if (!_initialized) {
|
||||
return;
|
||||
|
||||
@@ -99,12 +99,19 @@ class VoiceCallNotificationService {
|
||||
required String modelName,
|
||||
required bool isMuted,
|
||||
required bool isSpeaking,
|
||||
required bool isPaused,
|
||||
}) async {
|
||||
if (!_initialized) {
|
||||
await initialize();
|
||||
}
|
||||
|
||||
final status = isSpeaking ? 'Speaking...' : 'Listening...';
|
||||
final status = isSpeaking
|
||||
? 'Speaking...'
|
||||
: isMuted
|
||||
? 'Muted'
|
||||
: isPaused
|
||||
? 'Paused'
|
||||
: 'Listening...';
|
||||
final muteAction = isMuted ? 'Unmute' : 'Mute';
|
||||
final muteActionId = isMuted ? _actionUnmute : _actionMute;
|
||||
|
||||
@@ -131,7 +138,7 @@ class VoiceCallNotificationService {
|
||||
muteActionId,
|
||||
muteAction,
|
||||
icon: DrawableResourceAndroidBitmap(
|
||||
isMuted ? '@drawable/ic_mic_on' : '@drawable/ic_mic_off',
|
||||
isMuted ? '@drawable/ic_mic_off' : '@drawable/ic_mic_on',
|
||||
),
|
||||
showsUserInterface: false,
|
||||
cancelNotification: false,
|
||||
@@ -176,11 +183,13 @@ class VoiceCallNotificationService {
|
||||
required String modelName,
|
||||
required bool isMuted,
|
||||
required bool isSpeaking,
|
||||
required bool isPaused,
|
||||
}) async {
|
||||
await showCallNotification(
|
||||
modelName: modelName,
|
||||
isMuted: isMuted,
|
||||
isSpeaking: isSpeaking,
|
||||
isPaused: isPaused,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,12 +19,15 @@ enum VoiceCallState {
|
||||
idle,
|
||||
connecting,
|
||||
listening,
|
||||
paused,
|
||||
processing,
|
||||
speaking,
|
||||
error,
|
||||
disconnected,
|
||||
}
|
||||
|
||||
enum VoiceCallPauseReason { user, mute, system }
|
||||
|
||||
class VoiceCallService {
|
||||
static const String _voiceCallStreamId = 'voice-call';
|
||||
|
||||
@@ -42,6 +45,8 @@ class VoiceCallService {
|
||||
String _accumulatedTranscript = '';
|
||||
bool _isDisposed = false;
|
||||
bool _isMuted = false;
|
||||
bool _listeningPaused = false;
|
||||
final Set<VoiceCallPauseReason> _pauseReasons = <VoiceCallPauseReason>{};
|
||||
SocketEventSubscription? _socketSubscription;
|
||||
Timer? _keepAliveTimer;
|
||||
|
||||
@@ -83,6 +88,9 @@ class VoiceCallService {
|
||||
Future<void> initialize() async {
|
||||
if (_isDisposed) return;
|
||||
|
||||
_pauseReasons.clear();
|
||||
_listeningPaused = false;
|
||||
|
||||
// Initialize notification service
|
||||
await _notificationService.initialize();
|
||||
|
||||
@@ -183,6 +191,15 @@ class VoiceCallService {
|
||||
if (_isDisposed) return;
|
||||
|
||||
try {
|
||||
if (_pauseReasons.isNotEmpty) {
|
||||
_listeningPaused = true;
|
||||
if (_state != VoiceCallState.paused) {
|
||||
_updateState(VoiceCallState.paused);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
_listeningPaused = false;
|
||||
_accumulatedTranscript = '';
|
||||
|
||||
// Check if voice input is available
|
||||
@@ -291,8 +308,12 @@ class VoiceCallService {
|
||||
_speakResponse(_accumulatedResponse);
|
||||
_accumulatedResponse = '';
|
||||
} else if (_accumulatedResponse.isEmpty) {
|
||||
// No response, restart listening
|
||||
_startListening();
|
||||
// No response, restart listening unless paused
|
||||
if (_pauseReasons.isEmpty) {
|
||||
_startListening();
|
||||
} else if (_state != VoiceCallState.paused) {
|
||||
_updateState(VoiceCallState.paused);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -341,7 +362,12 @@ class VoiceCallService {
|
||||
void _handleTtsComplete() {
|
||||
if (_isDisposed) return;
|
||||
_isSpeaking = false;
|
||||
// After assistant finishes speaking, start listening for user again
|
||||
// After assistant finishes speaking, resume only if not paused
|
||||
if (_pauseReasons.isNotEmpty) {
|
||||
_listeningPaused = true;
|
||||
_updateState(VoiceCallState.paused);
|
||||
return;
|
||||
}
|
||||
_startListening();
|
||||
}
|
||||
|
||||
@@ -379,24 +405,52 @@ class VoiceCallService {
|
||||
_sessionId = null;
|
||||
_accumulatedTranscript = '';
|
||||
_isMuted = false;
|
||||
_listeningPaused = false;
|
||||
_pauseReasons.clear();
|
||||
_updateState(VoiceCallState.disconnected);
|
||||
}
|
||||
|
||||
Future<void> pauseListening() async {
|
||||
Future<void> pauseListening({
|
||||
VoiceCallPauseReason reason = VoiceCallPauseReason.user,
|
||||
}) async {
|
||||
if (_isDisposed) return;
|
||||
|
||||
final wasEmpty = _pauseReasons.isEmpty;
|
||||
_pauseReasons.add(reason);
|
||||
if (!wasEmpty) {
|
||||
return;
|
||||
}
|
||||
|
||||
_listeningPaused = true;
|
||||
await _voiceInput.stopListening();
|
||||
await _transcriptSubscription?.cancel();
|
||||
await _intensitySubscription?.cancel();
|
||||
|
||||
if (_state == VoiceCallState.listening) {
|
||||
_updateState(VoiceCallState.paused);
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> resumeListening() async {
|
||||
Future<void> resumeListening({
|
||||
VoiceCallPauseReason reason = VoiceCallPauseReason.user,
|
||||
}) async {
|
||||
if (_isDisposed) return;
|
||||
await _startListening();
|
||||
|
||||
_pauseReasons.remove(reason);
|
||||
if (_pauseReasons.isNotEmpty) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (_state == VoiceCallState.paused || _listeningPaused) {
|
||||
await _startListening();
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> cancelSpeaking() async {
|
||||
if (_isDisposed) return;
|
||||
await _tts.stop();
|
||||
_isSpeaking = false;
|
||||
_accumulatedResponse = '';
|
||||
// Immediately restart listening
|
||||
await _startListening();
|
||||
}
|
||||
@@ -428,6 +482,9 @@ class VoiceCallService {
|
||||
modelName: modelName,
|
||||
isMuted: _isMuted,
|
||||
isSpeaking: _state == VoiceCallState.speaking,
|
||||
isPaused:
|
||||
_state == VoiceCallState.paused ||
|
||||
(_pauseReasons.isNotEmpty && !_isSpeaking),
|
||||
);
|
||||
} catch (e) {
|
||||
// Silently ignore notification errors
|
||||
@@ -451,9 +508,14 @@ class VoiceCallService {
|
||||
void _toggleMute() {
|
||||
_isMuted = !_isMuted;
|
||||
if (_isMuted) {
|
||||
pauseListening();
|
||||
if (_isSpeaking) {
|
||||
unawaited(_tts.stop());
|
||||
_isSpeaking = false;
|
||||
_accumulatedResponse = '';
|
||||
}
|
||||
pauseListening(reason: VoiceCallPauseReason.mute);
|
||||
} else {
|
||||
resumeListening();
|
||||
resumeListening(reason: VoiceCallPauseReason.mute);
|
||||
}
|
||||
_updateNotification();
|
||||
}
|
||||
|
||||
@@ -348,45 +348,83 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
|
||||
Widget _buildControlButtons(Color primaryColor) {
|
||||
final errorColor = Theme.of(context).colorScheme.error;
|
||||
final warningColor = Colors.orange;
|
||||
final successColor = Theme.of(context).colorScheme.secondary;
|
||||
|
||||
final buttons = <Widget>[];
|
||||
|
||||
// Retry button (only show in error state)
|
||||
if (_currentState == VoiceCallState.error) {
|
||||
buttons.add(
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.arrow_clockwise,
|
||||
label: 'Retry',
|
||||
color: primaryColor,
|
||||
onPressed: () async {
|
||||
await _initializeCall();
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
final canPause = _currentState == VoiceCallState.listening;
|
||||
final canResume = _currentState == VoiceCallState.paused;
|
||||
|
||||
if (canPause) {
|
||||
buttons.add(
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.pause_fill,
|
||||
label: 'Pause',
|
||||
color: warningColor,
|
||||
onPressed: () async {
|
||||
await _service?.pauseListening();
|
||||
},
|
||||
),
|
||||
);
|
||||
} else if (canResume) {
|
||||
buttons.add(
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.play_fill,
|
||||
label: 'Resume',
|
||||
color: successColor,
|
||||
onPressed: () async {
|
||||
await _service?.resumeListening();
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// Cancel speaking button (only show when speaking)
|
||||
if (_currentState == VoiceCallState.speaking) {
|
||||
buttons.add(
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.stop_fill,
|
||||
label: 'Stop',
|
||||
color: warningColor,
|
||||
onPressed: () async {
|
||||
await _service?.cancelSpeaking();
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// End call button
|
||||
buttons.add(
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.phone_down_fill,
|
||||
label: 'End Call',
|
||||
color: errorColor,
|
||||
onPressed: () async {
|
||||
await _service?.stopCall();
|
||||
if (mounted) {
|
||||
Navigator.of(context).pop();
|
||||
}
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
return Row(
|
||||
mainAxisAlignment: MainAxisAlignment.spaceEvenly,
|
||||
children: [
|
||||
// Retry button (only show in error state)
|
||||
if (_currentState == VoiceCallState.error)
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.arrow_clockwise,
|
||||
label: 'Retry',
|
||||
color: primaryColor,
|
||||
onPressed: () async {
|
||||
await _initializeCall();
|
||||
},
|
||||
),
|
||||
|
||||
// Cancel speaking button (only show when speaking)
|
||||
if (_currentState == VoiceCallState.speaking)
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.stop_fill,
|
||||
label: 'Stop',
|
||||
color: warningColor,
|
||||
onPressed: () async {
|
||||
await _service?.cancelSpeaking();
|
||||
},
|
||||
),
|
||||
|
||||
// End call button
|
||||
_buildActionButton(
|
||||
icon: CupertinoIcons.phone_down_fill,
|
||||
label: 'End Call',
|
||||
color: errorColor,
|
||||
onPressed: () async {
|
||||
await _service?.stopCall();
|
||||
if (mounted) {
|
||||
Navigator.of(context).pop();
|
||||
}
|
||||
},
|
||||
),
|
||||
],
|
||||
children: buttons,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -422,6 +460,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
|
||||
return 'Connecting...';
|
||||
case VoiceCallState.listening:
|
||||
return 'Listening';
|
||||
case VoiceCallState.paused:
|
||||
return 'Paused';
|
||||
case VoiceCallState.processing:
|
||||
return 'Thinking...';
|
||||
case VoiceCallState.speaking:
|
||||
|
||||
Reference in New Issue
Block a user