feat(tts): add auto mode for text-to-speech engine selection
This commit is contained in:
@@ -107,11 +107,9 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
// Listen to settings changes and update TTS when initialized
|
||||
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
|
||||
if (_service.isInitialized && _service.isAvailable) {
|
||||
final selectedVoice = next.ttsEngine == TtsEngine.server
|
||||
? next.ttsServerVoiceId
|
||||
: next.ttsVoice;
|
||||
_service.updateSettings(
|
||||
voice: selectedVoice,
|
||||
voice: next.ttsVoice,
|
||||
serverVoice: next.ttsServerVoiceId,
|
||||
speechRate: next.ttsSpeechRate,
|
||||
pitch: next.ttsPitch,
|
||||
volume: next.ttsVolume,
|
||||
@@ -137,9 +135,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
final settings = ref.read(appSettingsProvider);
|
||||
final future = _service
|
||||
.initialize(
|
||||
voice: settings.ttsEngine == TtsEngine.server
|
||||
? settings.ttsServerVoiceId
|
||||
: settings.ttsVoice,
|
||||
deviceVoice: settings.ttsVoice,
|
||||
serverVoice: settings.ttsServerVoiceId,
|
||||
speechRate: settings.ttsSpeechRate,
|
||||
pitch: settings.ttsPitch,
|
||||
volume: settings.ttsVolume,
|
||||
|
||||
@@ -16,8 +16,9 @@ class TextToSpeechService {
|
||||
final FlutterTts _tts = FlutterTts();
|
||||
final AudioPlayer _player = AudioPlayer();
|
||||
final ApiService? _api;
|
||||
TtsEngine _engine = TtsEngine.device;
|
||||
TtsEngine _engine = TtsEngine.auto;
|
||||
String? _preferredVoice;
|
||||
String? _serverPreferredVoice;
|
||||
bool _initialized = false;
|
||||
bool _available = false;
|
||||
bool _voiceConfigured = false;
|
||||
@@ -41,6 +42,8 @@ class TextToSpeechService {
|
||||
|
||||
bool get isInitialized => _initialized;
|
||||
bool get isAvailable => _available;
|
||||
bool get deviceEngineAvailable => _deviceEngineAvailable;
|
||||
bool get serverEngineAvailable => _api != null;
|
||||
|
||||
TextToSpeechService({ApiService? api}) : _api = api {
|
||||
// Wire minimal player events to callbacks
|
||||
@@ -59,6 +62,69 @@ class TextToSpeechService {
|
||||
});
|
||||
}
|
||||
|
||||
Future<void> _configureDeviceEngine({
|
||||
required String? voice,
|
||||
required double speechRate,
|
||||
required double pitch,
|
||||
required double volume,
|
||||
}) async {
|
||||
_deviceEngineAvailable = false;
|
||||
try {
|
||||
await _tts.awaitSpeakCompletion(false);
|
||||
await _tts.setVolume(volume);
|
||||
await _tts.setSpeechRate(speechRate);
|
||||
await _tts.setPitch(pitch);
|
||||
|
||||
if (!kIsWeb && Platform.isIOS) {
|
||||
await _tts.setSharedInstance(true);
|
||||
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
|
||||
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
|
||||
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
|
||||
]);
|
||||
}
|
||||
|
||||
if (_engine != TtsEngine.server) {
|
||||
await _setVoiceByName(_preferredVoice);
|
||||
} else {
|
||||
_voiceConfigured = false;
|
||||
}
|
||||
|
||||
_deviceEngineAvailable = true;
|
||||
} catch (e) {
|
||||
_voiceConfigured = false;
|
||||
_deviceEngineAvailable = false;
|
||||
rethrow;
|
||||
}
|
||||
}
|
||||
|
||||
bool _computeAvailability() {
|
||||
final serverAvailable = _api != null;
|
||||
switch (_engine) {
|
||||
case TtsEngine.device:
|
||||
return _deviceEngineAvailable;
|
||||
case TtsEngine.server:
|
||||
return serverAvailable;
|
||||
case TtsEngine.auto:
|
||||
return _deviceEngineAvailable || serverAvailable;
|
||||
}
|
||||
}
|
||||
|
||||
bool _shouldUseServer() {
|
||||
if (_engine == TtsEngine.server) {
|
||||
return _api != null;
|
||||
}
|
||||
if (_engine == TtsEngine.device) {
|
||||
return false;
|
||||
}
|
||||
// Auto: prefer device when available, otherwise fall back to server
|
||||
if (_deviceEngineAvailable) {
|
||||
return false;
|
||||
}
|
||||
return _api != null;
|
||||
}
|
||||
|
||||
/// Register callbacks for TTS lifecycle events
|
||||
void bindHandlers({
|
||||
VoidCallback? onStart,
|
||||
@@ -96,56 +162,58 @@ class TextToSpeechService {
|
||||
|
||||
/// Initialize the native TTS engine lazily
|
||||
Future<bool> initialize({
|
||||
String? voice,
|
||||
String? deviceVoice,
|
||||
String? serverVoice,
|
||||
double speechRate = 0.5,
|
||||
double pitch = 1.0,
|
||||
double volume = 1.0,
|
||||
TtsEngine engine = TtsEngine.device,
|
||||
TtsEngine engine = TtsEngine.auto,
|
||||
}) async {
|
||||
if (_initialized) {
|
||||
_engine = engine;
|
||||
if (deviceVoice != null) {
|
||||
_preferredVoice = deviceVoice;
|
||||
_voiceConfigured = false;
|
||||
}
|
||||
if (serverVoice != null) {
|
||||
_serverPreferredVoice = serverVoice;
|
||||
}
|
||||
_available = _computeAvailability();
|
||||
return _available;
|
||||
}
|
||||
|
||||
try {
|
||||
_engine = engine;
|
||||
_preferredVoice = voice;
|
||||
await _tts.awaitSpeakCompletion(false);
|
||||
_engine = engine;
|
||||
_preferredVoice = deviceVoice;
|
||||
_serverPreferredVoice = serverVoice;
|
||||
_voiceConfigured = false;
|
||||
|
||||
// Set volume
|
||||
await _tts.setVolume(volume);
|
||||
|
||||
// Set speech rate
|
||||
await _tts.setSpeechRate(speechRate);
|
||||
|
||||
// Set pitch
|
||||
await _tts.setPitch(pitch);
|
||||
|
||||
if (!kIsWeb && Platform.isIOS) {
|
||||
await _tts.setSharedInstance(true);
|
||||
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
|
||||
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
|
||||
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
|
||||
]);
|
||||
if (_engine != TtsEngine.server || _api == null) {
|
||||
try {
|
||||
await _configureDeviceEngine(
|
||||
voice: deviceVoice,
|
||||
speechRate: speechRate,
|
||||
pitch: pitch,
|
||||
volume: volume,
|
||||
);
|
||||
} catch (e) {
|
||||
if (_engine == TtsEngine.device) {
|
||||
_available = false;
|
||||
_onError?.call(e.toString());
|
||||
_initialized = true;
|
||||
return _available;
|
||||
}
|
||||
}
|
||||
|
||||
// Set the voice (specific or default) when using device engine
|
||||
if (_engine == TtsEngine.device) {
|
||||
await _setVoiceByName(voice);
|
||||
}
|
||||
_deviceEngineAvailable = true;
|
||||
} catch (e) {
|
||||
} else {
|
||||
_deviceEngineAvailable = false;
|
||||
if (_engine != TtsEngine.server) {
|
||||
_available = false;
|
||||
_onError?.call(e.toString());
|
||||
_initialized = true;
|
||||
return _available;
|
||||
}
|
||||
try {
|
||||
await _tts.awaitSpeakCompletion(false);
|
||||
await _tts.setVolume(volume);
|
||||
await _tts.setSpeechRate(speechRate);
|
||||
await _tts.setPitch(pitch);
|
||||
} catch (_) {}
|
||||
}
|
||||
|
||||
_available = _engine == TtsEngine.server || _deviceEngineAvailable;
|
||||
_available = _computeAvailability();
|
||||
_initialized = true;
|
||||
return _available;
|
||||
}
|
||||
@@ -156,10 +224,23 @@ class TextToSpeechService {
|
||||
}
|
||||
|
||||
if (!_initialized) {
|
||||
await initialize(voice: _preferredVoice, engine: _engine);
|
||||
await initialize(
|
||||
deviceVoice: _preferredVoice,
|
||||
serverVoice: _serverPreferredVoice,
|
||||
engine: _engine,
|
||||
);
|
||||
}
|
||||
|
||||
if (_engine == TtsEngine.server && _api != null) {
|
||||
final bool useServer = _shouldUseServer();
|
||||
|
||||
if (useServer) {
|
||||
if (_api == null) {
|
||||
if (_deviceEngineAvailable) {
|
||||
await _speakOnDevice(text);
|
||||
return;
|
||||
}
|
||||
throw StateError('Server text-to-speech is unavailable');
|
||||
}
|
||||
// Server-backed TTS with sentence chunking & queued playback
|
||||
try {
|
||||
await _startServerChunkedPlayback(text);
|
||||
@@ -196,7 +277,7 @@ class TextToSpeechService {
|
||||
Future<void> pause() async {
|
||||
if (!_initialized) return;
|
||||
try {
|
||||
if (_engine == TtsEngine.server) {
|
||||
if (_shouldUseServer()) {
|
||||
await _player.pause();
|
||||
_handlePause();
|
||||
} else if (_deviceEngineAvailable) {
|
||||
@@ -210,7 +291,7 @@ class TextToSpeechService {
|
||||
Future<void> resume() async {
|
||||
if (!_initialized) return;
|
||||
try {
|
||||
if (_engine == TtsEngine.server) {
|
||||
if (_shouldUseServer()) {
|
||||
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
|
||||
_waitingNext = false;
|
||||
await _playNextIfBuffered(_session);
|
||||
@@ -235,7 +316,7 @@ class TextToSpeechService {
|
||||
_expectedChunks = 0;
|
||||
_currentIndex = -1;
|
||||
_waitingNext = false;
|
||||
if (_engine == TtsEngine.server) {
|
||||
if (_shouldUseServer()) {
|
||||
await _player.stop();
|
||||
_handleCancel();
|
||||
} else {
|
||||
@@ -254,17 +335,23 @@ class TextToSpeechService {
|
||||
/// Update TTS settings on-the-fly
|
||||
Future<void> updateSettings({
|
||||
Object? voice = const _VoiceNotProvided(),
|
||||
Object? serverVoice = const _VoiceNotProvided(),
|
||||
double? speechRate,
|
||||
double? pitch,
|
||||
double? volume,
|
||||
TtsEngine? engine,
|
||||
}) async {
|
||||
final voiceProvided = voice is! _VoiceNotProvided;
|
||||
final serverVoiceProvided = serverVoice is! _VoiceNotProvided;
|
||||
final voiceValue = voiceProvided ? voice as String? : null;
|
||||
final serverVoiceValue = serverVoiceProvided
|
||||
? serverVoice as String?
|
||||
: null;
|
||||
if (!_initialized || !_available) {
|
||||
// Allow engine and voice to update before init
|
||||
if (engine != null) _engine = engine;
|
||||
if (voiceProvided) _preferredVoice = voiceValue;
|
||||
if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -275,6 +362,9 @@ class TextToSpeechService {
|
||||
if (voiceProvided) {
|
||||
_preferredVoice = voiceValue;
|
||||
}
|
||||
if (serverVoiceProvided) {
|
||||
_serverPreferredVoice = serverVoiceValue;
|
||||
}
|
||||
if (volume != null) {
|
||||
await _tts.setVolume(volume);
|
||||
}
|
||||
@@ -284,13 +374,15 @@ class TextToSpeechService {
|
||||
if (pitch != null) {
|
||||
await _tts.setPitch(pitch);
|
||||
}
|
||||
// Set specific voice by name on device engine
|
||||
if (_engine == TtsEngine.device && voiceProvided) {
|
||||
// Set specific voice by name on device-capable engines
|
||||
if (_engine != TtsEngine.server && voiceProvided) {
|
||||
await _setVoiceByName(_preferredVoice);
|
||||
}
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
}
|
||||
|
||||
_available = _computeAvailability();
|
||||
}
|
||||
|
||||
/// Set voice by name, or use system default if null
|
||||
@@ -343,7 +435,11 @@ class TextToSpeechService {
|
||||
/// Get available voices from the TTS engine
|
||||
Future<List<Map<String, dynamic>>> getAvailableVoices() async {
|
||||
if (!_initialized) {
|
||||
await initialize(voice: _preferredVoice, engine: _engine);
|
||||
await initialize(
|
||||
deviceVoice: _preferredVoice,
|
||||
serverVoice: _serverPreferredVoice,
|
||||
engine: _engine,
|
||||
);
|
||||
}
|
||||
|
||||
if (_engine == TtsEngine.server && _api != null) {
|
||||
@@ -425,6 +521,10 @@ class TextToSpeechService {
|
||||
}
|
||||
|
||||
Future<String?> _resolveServerVoice() async {
|
||||
final serverSelected = _serverPreferredVoice?.trim();
|
||||
if (serverSelected != null && serverSelected.isNotEmpty) {
|
||||
return serverSelected;
|
||||
}
|
||||
final selected = _preferredVoice?.trim();
|
||||
if (selected != null && selected.isNotEmpty) {
|
||||
return selected;
|
||||
|
||||
@@ -132,9 +132,8 @@ class VoiceCallService {
|
||||
// Initialize TTS with current app settings (engine/voice/rate/pitch/volume)
|
||||
final settings = _ref.read(appSettingsProvider);
|
||||
await _tts.initialize(
|
||||
voice: settings.ttsEngine == TtsEngine.server
|
||||
? settings.ttsServerVoiceId
|
||||
: settings.ttsVoice,
|
||||
deviceVoice: settings.ttsVoice,
|
||||
serverVoice: settings.ttsServerVoiceId,
|
||||
speechRate: settings.ttsSpeechRate,
|
||||
pitch: settings.ttsPitch,
|
||||
volume: settings.ttsVolume,
|
||||
@@ -587,11 +586,9 @@ VoiceCallService voiceCallService(Ref ref) {
|
||||
// Keep TTS settings in sync with app settings during a call
|
||||
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
|
||||
// Update voice/engine and runtime parameters
|
||||
final selectedVoice = next.ttsEngine == TtsEngine.server
|
||||
? next.ttsServerVoiceId
|
||||
: next.ttsVoice;
|
||||
service._tts.updateSettings(
|
||||
voice: selectedVoice,
|
||||
voice: next.ttsVoice,
|
||||
serverVoice: next.ttsServerVoiceId,
|
||||
speechRate: next.ttsSpeechRate,
|
||||
pitch: next.ttsPitch,
|
||||
volume: next.ttsVolume,
|
||||
|
||||
Reference in New Issue
Block a user