feat(tts): add auto mode for text-to-speech engine selection

This commit is contained in:
cogwheel0
2025-11-02 21:31:13 +05:30
parent da249eaa31
commit cfadeffd24
19 changed files with 579 additions and 154 deletions

View File

@@ -107,11 +107,9 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
// Listen to settings changes and update TTS when initialized
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
if (_service.isInitialized && _service.isAvailable) {
final selectedVoice = next.ttsEngine == TtsEngine.server
? next.ttsServerVoiceId
: next.ttsVoice;
_service.updateSettings(
voice: selectedVoice,
voice: next.ttsVoice,
serverVoice: next.ttsServerVoiceId,
speechRate: next.ttsSpeechRate,
pitch: next.ttsPitch,
volume: next.ttsVolume,
@@ -137,9 +135,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
final settings = ref.read(appSettingsProvider);
final future = _service
.initialize(
voice: settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId
: settings.ttsVoice,
deviceVoice: settings.ttsVoice,
serverVoice: settings.ttsServerVoiceId,
speechRate: settings.ttsSpeechRate,
pitch: settings.ttsPitch,
volume: settings.ttsVolume,

View File

@@ -16,8 +16,9 @@ class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
final AudioPlayer _player = AudioPlayer();
final ApiService? _api;
TtsEngine _engine = TtsEngine.device;
TtsEngine _engine = TtsEngine.auto;
String? _preferredVoice;
String? _serverPreferredVoice;
bool _initialized = false;
bool _available = false;
bool _voiceConfigured = false;
@@ -41,6 +42,8 @@ class TextToSpeechService {
bool get isInitialized => _initialized;
bool get isAvailable => _available;
bool get deviceEngineAvailable => _deviceEngineAvailable;
bool get serverEngineAvailable => _api != null;
TextToSpeechService({ApiService? api}) : _api = api {
// Wire minimal player events to callbacks
@@ -59,6 +62,69 @@ class TextToSpeechService {
});
}
Future<void> _configureDeviceEngine({
required String? voice,
required double speechRate,
required double pitch,
required double volume,
}) async {
_deviceEngineAvailable = false;
try {
await _tts.awaitSpeakCompletion(false);
await _tts.setVolume(volume);
await _tts.setSpeechRate(speechRate);
await _tts.setPitch(pitch);
if (!kIsWeb && Platform.isIOS) {
await _tts.setSharedInstance(true);
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]);
}
if (_engine != TtsEngine.server) {
await _setVoiceByName(_preferredVoice);
} else {
_voiceConfigured = false;
}
_deviceEngineAvailable = true;
} catch (e) {
_voiceConfigured = false;
_deviceEngineAvailable = false;
rethrow;
}
}
bool _computeAvailability() {
final serverAvailable = _api != null;
switch (_engine) {
case TtsEngine.device:
return _deviceEngineAvailable;
case TtsEngine.server:
return serverAvailable;
case TtsEngine.auto:
return _deviceEngineAvailable || serverAvailable;
}
}
bool _shouldUseServer() {
if (_engine == TtsEngine.server) {
return _api != null;
}
if (_engine == TtsEngine.device) {
return false;
}
// Auto: prefer device when available, otherwise fall back to server
if (_deviceEngineAvailable) {
return false;
}
return _api != null;
}
/// Register callbacks for TTS lifecycle events
void bindHandlers({
VoidCallback? onStart,
@@ -96,56 +162,58 @@ class TextToSpeechService {
/// Initialize the native TTS engine lazily
Future<bool> initialize({
String? voice,
String? deviceVoice,
String? serverVoice,
double speechRate = 0.5,
double pitch = 1.0,
double volume = 1.0,
TtsEngine engine = TtsEngine.device,
TtsEngine engine = TtsEngine.auto,
}) async {
if (_initialized) {
_engine = engine;
if (deviceVoice != null) {
_preferredVoice = deviceVoice;
_voiceConfigured = false;
}
if (serverVoice != null) {
_serverPreferredVoice = serverVoice;
}
_available = _computeAvailability();
return _available;
}
try {
_engine = engine;
_preferredVoice = voice;
await _tts.awaitSpeakCompletion(false);
_engine = engine;
_preferredVoice = deviceVoice;
_serverPreferredVoice = serverVoice;
_voiceConfigured = false;
// Set volume
await _tts.setVolume(volume);
// Set speech rate
await _tts.setSpeechRate(speechRate);
// Set pitch
await _tts.setPitch(pitch);
if (!kIsWeb && Platform.isIOS) {
await _tts.setSharedInstance(true);
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]);
if (_engine != TtsEngine.server || _api == null) {
try {
await _configureDeviceEngine(
voice: deviceVoice,
speechRate: speechRate,
pitch: pitch,
volume: volume,
);
} catch (e) {
if (_engine == TtsEngine.device) {
_available = false;
_onError?.call(e.toString());
_initialized = true;
return _available;
}
}
// Set the voice (specific or default) when using device engine
if (_engine == TtsEngine.device) {
await _setVoiceByName(voice);
}
_deviceEngineAvailable = true;
} catch (e) {
} else {
_deviceEngineAvailable = false;
if (_engine != TtsEngine.server) {
_available = false;
_onError?.call(e.toString());
_initialized = true;
return _available;
}
try {
await _tts.awaitSpeakCompletion(false);
await _tts.setVolume(volume);
await _tts.setSpeechRate(speechRate);
await _tts.setPitch(pitch);
} catch (_) {}
}
_available = _engine == TtsEngine.server || _deviceEngineAvailable;
_available = _computeAvailability();
_initialized = true;
return _available;
}
@@ -156,10 +224,23 @@ class TextToSpeechService {
}
if (!_initialized) {
await initialize(voice: _preferredVoice, engine: _engine);
await initialize(
deviceVoice: _preferredVoice,
serverVoice: _serverPreferredVoice,
engine: _engine,
);
}
if (_engine == TtsEngine.server && _api != null) {
final bool useServer = _shouldUseServer();
if (useServer) {
if (_api == null) {
if (_deviceEngineAvailable) {
await _speakOnDevice(text);
return;
}
throw StateError('Server text-to-speech is unavailable');
}
// Server-backed TTS with sentence chunking & queued playback
try {
await _startServerChunkedPlayback(text);
@@ -196,7 +277,7 @@ class TextToSpeechService {
Future<void> pause() async {
if (!_initialized) return;
try {
if (_engine == TtsEngine.server) {
if (_shouldUseServer()) {
await _player.pause();
_handlePause();
} else if (_deviceEngineAvailable) {
@@ -210,7 +291,7 @@ class TextToSpeechService {
Future<void> resume() async {
if (!_initialized) return;
try {
if (_engine == TtsEngine.server) {
if (_shouldUseServer()) {
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
await _playNextIfBuffered(_session);
@@ -235,7 +316,7 @@ class TextToSpeechService {
_expectedChunks = 0;
_currentIndex = -1;
_waitingNext = false;
if (_engine == TtsEngine.server) {
if (_shouldUseServer()) {
await _player.stop();
_handleCancel();
} else {
@@ -254,17 +335,23 @@ class TextToSpeechService {
/// Update TTS settings on-the-fly
Future<void> updateSettings({
Object? voice = const _VoiceNotProvided(),
Object? serverVoice = const _VoiceNotProvided(),
double? speechRate,
double? pitch,
double? volume,
TtsEngine? engine,
}) async {
final voiceProvided = voice is! _VoiceNotProvided;
final serverVoiceProvided = serverVoice is! _VoiceNotProvided;
final voiceValue = voiceProvided ? voice as String? : null;
final serverVoiceValue = serverVoiceProvided
? serverVoice as String?
: null;
if (!_initialized || !_available) {
// Allow engine and voice to update before init
if (engine != null) _engine = engine;
if (voiceProvided) _preferredVoice = voiceValue;
if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue;
return;
}
@@ -275,6 +362,9 @@ class TextToSpeechService {
if (voiceProvided) {
_preferredVoice = voiceValue;
}
if (serverVoiceProvided) {
_serverPreferredVoice = serverVoiceValue;
}
if (volume != null) {
await _tts.setVolume(volume);
}
@@ -284,13 +374,15 @@ class TextToSpeechService {
if (pitch != null) {
await _tts.setPitch(pitch);
}
// Set specific voice by name on device engine
if (_engine == TtsEngine.device && voiceProvided) {
// Set specific voice by name on device-capable engines
if (_engine != TtsEngine.server && voiceProvided) {
await _setVoiceByName(_preferredVoice);
}
} catch (e) {
_onError?.call(e.toString());
}
_available = _computeAvailability();
}
/// Set voice by name, or use system default if null
@@ -343,7 +435,11 @@ class TextToSpeechService {
/// Get available voices from the TTS engine
Future<List<Map<String, dynamic>>> getAvailableVoices() async {
if (!_initialized) {
await initialize(voice: _preferredVoice, engine: _engine);
await initialize(
deviceVoice: _preferredVoice,
serverVoice: _serverPreferredVoice,
engine: _engine,
);
}
if (_engine == TtsEngine.server && _api != null) {
@@ -425,6 +521,10 @@ class TextToSpeechService {
}
Future<String?> _resolveServerVoice() async {
final serverSelected = _serverPreferredVoice?.trim();
if (serverSelected != null && serverSelected.isNotEmpty) {
return serverSelected;
}
final selected = _preferredVoice?.trim();
if (selected != null && selected.isNotEmpty) {
return selected;

View File

@@ -132,9 +132,8 @@ class VoiceCallService {
// Initialize TTS with current app settings (engine/voice/rate/pitch/volume)
final settings = _ref.read(appSettingsProvider);
await _tts.initialize(
voice: settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId
: settings.ttsVoice,
deviceVoice: settings.ttsVoice,
serverVoice: settings.ttsServerVoiceId,
speechRate: settings.ttsSpeechRate,
pitch: settings.ttsPitch,
volume: settings.ttsVolume,
@@ -587,11 +586,9 @@ VoiceCallService voiceCallService(Ref ref) {
// Keep TTS settings in sync with app settings during a call
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
// Update voice/engine and runtime parameters
final selectedVoice = next.ttsEngine == TtsEngine.server
? next.ttsServerVoiceId
: next.ttsVoice;
service._tts.updateSettings(
voice: selectedVoice,
voice: next.ttsVoice,
serverVoice: next.ttsServerVoiceId,
speechRate: next.ttsSpeechRate,
pitch: next.ttsPitch,
volume: next.ttsVolume,

View File

@@ -698,6 +698,35 @@ class AppCustomizationPage extends ConsumerWidget {
) {
final theme = context.conduitTheme;
final l10n = AppLocalizations.of(context)!;
final ttsService = ref.watch(textToSpeechServiceProvider);
final bool deviceAvailable =
ttsService.deviceEngineAvailable || !ttsService.isInitialized;
final bool serverAvailable = ttsService.serverEngineAvailable;
final bool autoSelectable = deviceAvailable || serverAvailable;
final bool deviceSelectable = deviceAvailable;
final bool serverSelectable = serverAvailable;
final ttsDescription = _ttsPreferenceDescription(l10n, settings);
final warnings = <String>[];
switch (settings.ttsEngine) {
case TtsEngine.auto:
if (!deviceAvailable) {
warnings.add(l10n.ttsDeviceUnavailableWarning);
}
if (!serverAvailable) {
warnings.add(l10n.ttsServerUnavailableWarning);
}
break;
case TtsEngine.device:
if (!deviceAvailable) {
warnings.add(l10n.ttsDeviceUnavailableWarning);
}
break;
case TtsEngine.server:
if (!serverAvailable) {
warnings.add(l10n.ttsServerUnavailableWarning);
}
break;
}
return Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
@@ -733,82 +762,160 @@ class AppCustomizationPage extends ConsumerWidget {
) ??
TextStyle(color: theme.sidebarForeground, fontSize: 14),
),
const Spacer(),
Wrap(
spacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.ttsEngineDevice),
selected: settings.ttsEngine == TtsEngine.device,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
notifier.setTtsEngine(TtsEngine.device);
// Keep previous voice (device voices)
],
),
const SizedBox(height: Spacing.sm),
Wrap(
spacing: Spacing.sm,
runSpacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.ttsEngineAuto),
selected: settings.ttsEngine == TtsEngine.auto,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.auto
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: autoSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.auto
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: autoSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: autoSelectable
? (value) {
if (value) {
ref
.read(appSettingsProvider.notifier)
.setTtsEngine(TtsEngine.auto);
}
}
},
),
ChoiceChip(
label: Text(l10n.ttsEngineServer),
selected: settings.ttsEngine == TtsEngine.server,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
// Clear device-specific voice so server can default
notifier.setTtsVoice(null);
notifier.setTtsEngine(TtsEngine.server);
: null,
),
ChoiceChip(
label: Text(l10n.ttsEngineDevice),
selected: settings.ttsEngine == TtsEngine.device,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: deviceSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: deviceSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: deviceSelectable
? (value) {
if (value) {
ref
.read(appSettingsProvider.notifier)
.setTtsEngine(TtsEngine.device);
}
}
},
),
],
: null,
),
ChoiceChip(
label: Text(l10n.ttsEngineServer),
selected: settings.ttsEngine == TtsEngine.server,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: serverSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: serverSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: serverSelectable
? (value) {
if (value) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
notifier.setTtsVoice(null);
notifier.setTtsEngine(TtsEngine.server);
}
}
: null,
),
],
),
const SizedBox(height: Spacing.sm),
AnimatedSwitcher(
duration: const Duration(milliseconds: 200),
child: Text(
ttsDescription,
key: ValueKey<String>(
'tts-desc-${settings.ttsEngine.name}',
),
style:
theme.bodyMedium?.copyWith(
color: theme.sidebarForeground.withValues(
alpha: 0.9,
),
) ??
TextStyle(
color: theme.sidebarForeground.withValues(
alpha: 0.9,
),
fontSize: 14,
),
),
),
if (warnings.isNotEmpty) ...[
const SizedBox(height: Spacing.sm),
...warnings.map(
(warning) => Padding(
padding: const EdgeInsets.only(top: Spacing.xs),
child: Text(
warning,
style:
theme.bodySmall?.copyWith(
color: theme.error,
fontWeight: FontWeight.w600,
) ??
TextStyle(
color: theme.error,
fontSize: 12,
fontWeight: FontWeight.w600,
),
),
),
),
],
],
),
),
const SizedBox(height: Spacing.sm),
_ExpandableCard(
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
subtitle: _ttsVoiceSubtitle(l10n, settings),
icon: UiUtils.platformIcon(
ios: CupertinoIcons.speaker_3,
android: Icons.record_voice_over,
@@ -827,14 +934,7 @@ class AppCustomizationPage extends ConsumerWidget {
color: theme.buttonPrimary,
),
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ??
settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
subtitle: _ttsVoiceSubtitle(l10n, settings),
onTap: () => _showVoicePickerSheet(context, ref, settings),
),
const SizedBox(height: Spacing.md),
@@ -928,6 +1028,39 @@ class AppCustomizationPage extends ConsumerWidget {
}
}
String _ttsPreferenceDescription(
AppLocalizations l10n,
AppSettings settings,
) {
switch (settings.ttsEngine) {
case TtsEngine.auto:
return l10n.ttsEngineAutoDescription;
case TtsEngine.device:
return l10n.ttsEngineDeviceDescription;
case TtsEngine.server:
return l10n.ttsEngineServerDescription;
}
}
String _ttsVoiceSubtitle(AppLocalizations l10n, AppSettings settings) {
final deviceName = _getDisplayVoiceName(
settings.ttsVoice,
l10n.ttsSystemDefault,
);
final serverVoice =
(settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? '';
final serverName = _getDisplayVoiceName(serverVoice, l10n.ttsSystemDefault);
switch (settings.ttsEngine) {
case TtsEngine.auto:
return '${l10n.ttsEngineDevice}: $deviceName${l10n.ttsEngineServer}: $serverName';
case TtsEngine.device:
return deviceName;
case TtsEngine.server:
return serverName;
}
}
Widget _buildSliderTile(
BuildContext context,
WidgetRef ref, {