feat(tts): server-backed TTS engine selection

Introduce server TTS support and engine selection while keeping
device TTS as the default.

- Add new persistence keys for storing TTS engine and selected
  server voice (ttsEngine, ttsServerVoiceId, ttsServerVoiceName).
- Extend TextToSpeechService to support two engines:
  TtsEngine.device (FlutterTts) and TtsEngine.server (remote audio).
- Wire in an AudioPlayer and optional ApiService to fetch raw
  audio bytes from the server and play them, with event hooks
  mapped to existing lifecycle callbacks.
- Implement fallback to device TTS on server errors or empty
  responses, and ensure player lifecycle (pause/stop/dispose)
  is handled when using server engine.
- Allow engine and preferred voice to be configured before
  initialization and updated at runtime via updateSettings.

This enables selecting a server-side voice and using a remote
TTS provider while preserving compatibility with the existing
device TTS implementation.
This commit is contained in:
cogwheel0
2025-10-23 16:31:15 +05:30
parent 2337568baf
commit 561e7dd616
10 changed files with 404 additions and 36 deletions

View File

@@ -3,6 +3,7 @@ import 'dart:async';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/services/settings_service.dart';
import '../../../core/providers/app_providers.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../services/text_to_speech_service.dart';
@@ -79,11 +80,15 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
// Listen to settings changes and update TTS when initialized
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
if (_service.isInitialized && _service.isAvailable) {
final selectedVoice = next.ttsEngine == TtsEngine.server
? next.ttsServerVoiceId
: next.ttsVoice;
_service.updateSettings(
voice: next.ttsVoice,
voice: selectedVoice,
speechRate: next.ttsSpeechRate,
pitch: next.ttsPitch,
volume: next.ttsVolume,
engine: next.ttsEngine,
);
}
}, fireImmediately: false);
@@ -105,10 +110,13 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
final settings = ref.read(appSettingsProvider);
final future = _service
.initialize(
voice: settings.ttsVoice,
voice: settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId
: settings.ttsVoice,
speechRate: settings.ttsSpeechRate,
pitch: settings.ttsPitch,
volume: settings.ttsVolume,
engine: settings.ttsEngine,
)
.then((available) {
if (!ref.mounted) {
@@ -289,7 +297,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
}
final textToSpeechServiceProvider = Provider<TextToSpeechService>((ref) {
final service = TextToSpeechService();
final api = ref.watch(apiServiceProvider);
final service = TextToSpeechService(api: api);
ref.onDispose(() {
unawaited(service.dispose());
});

View File

@@ -1,13 +1,21 @@
import 'dart:async';
import 'dart:io' show Platform;
import 'package:audioplayers/audioplayers.dart';
import 'package:flutter/foundation.dart';
import 'package:flutter/widgets.dart';
import 'package:flutter_tts/flutter_tts.dart';
import '../../../core/services/api_service.dart';
import '../../../core/services/settings_service.dart';
/// Lightweight wrapper around FlutterTts to centralize configuration
class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
final AudioPlayer _player = AudioPlayer();
final ApiService? _api;
TtsEngine _engine = TtsEngine.device;
String? _preferredVoice;
bool _initialized = false;
bool _available = false;
bool _voiceConfigured = false;
@@ -22,6 +30,14 @@ class TextToSpeechService {
bool get isInitialized => _initialized;
bool get isAvailable => _available;
TextToSpeechService({ApiService? api}) : _api = api {
// Wire minimal player events to callbacks
_player.onPlayerComplete.listen((_) => _handleComplete());
_player.onPlayerStateChanged.listen((s) {
if (s == PlayerState.playing) _handleStart();
});
}
/// Register callbacks for TTS lifecycle events
void bindHandlers({
VoidCallback? onStart,
@@ -52,12 +68,15 @@ class TextToSpeechService {
double speechRate = 0.5,
double pitch = 1.0,
double volume = 1.0,
TtsEngine engine = TtsEngine.device,
}) async {
if (_initialized) {
return _available;
}
try {
_engine = engine;
_preferredVoice = voice;
await _tts.awaitSpeakCompletion(false);
// Set volume
@@ -97,34 +116,61 @@ class TextToSpeechService {
}
if (!_initialized) {
await initialize();
await initialize(voice: _preferredVoice, engine: _engine);
}
if (_engine == TtsEngine.server && _api != null) {
// Server-backed TTS path
try {
final effectiveVoice =
(_preferredVoice == null || _preferredVoice!.trim().isEmpty)
? 'alloy'
: _preferredVoice!;
final bytes = await _api.generateSpeech(
text: text,
voice: effectiveVoice,
);
if (bytes.isEmpty) {
throw Exception('Empty audio response');
}
await _player.stop();
final data = Uint8List.fromList(bytes);
await _player.play(BytesSource(data));
} catch (e) {
_onError?.call(e.toString());
// Fallback to device TTS on failure
await _speakOnDevice(text);
}
return;
}
// Device TTS path
await _speakOnDevice(text);
}
Future<void> _speakOnDevice(String text) async {
if (!_available) {
throw StateError('Text-to-speech is unavailable on this device');
}
await _tts.stop();
if (!_voiceConfigured) {
await _configurePreferredVoice();
}
final result = await _tts.speak(text);
if (result == null) {
return;
}
if (result is int && result != 1) {
_onError?.call('Text-to-speech engine returned code $result');
}
}
Future<void> pause() async {
if (!_initialized || !_available) {
return;
}
if (!_initialized) return;
try {
await _tts.pause();
if (_engine == TtsEngine.server) {
await _player.pause();
} else if (_available) {
await _tts.pause();
}
} catch (e) {
_onError?.call(e.toString());
}
@@ -136,7 +182,11 @@ class TextToSpeechService {
}
try {
await _tts.stop();
if (_engine == TtsEngine.server) {
await _player.stop();
} else {
await _tts.stop();
}
} catch (e) {
_onError?.call(e.toString());
}
@@ -144,6 +194,7 @@ class TextToSpeechService {
Future<void> dispose() async {
await stop();
await _player.dispose();
}
/// Update TTS settings on-the-fly
@@ -152,12 +203,22 @@ class TextToSpeechService {
double? speechRate,
double? pitch,
double? volume,
TtsEngine? engine,
}) async {
if (!_initialized || !_available) {
// Allow engine and voice to update before init
if (engine != null) _engine = engine;
if (voice != null) _preferredVoice = voice;
return;
}
try {
if (engine != null) {
_engine = engine;
}
if (voice != null) {
_preferredVoice = voice;
}
if (volume != null) {
await _tts.setVolume(volume);
}
@@ -167,8 +228,10 @@ class TextToSpeechService {
if (pitch != null) {
await _tts.setPitch(pitch);
}
// Set specific voice by name
await _setVoiceByName(voice);
// Set specific voice by name on device engine
if (_engine == TtsEngine.device) {
await _setVoiceByName(_preferredVoice);
}
} catch (e) {
_onError?.call(e.toString());
}
@@ -224,7 +287,31 @@ class TextToSpeechService {
/// Get available voices from the TTS engine
Future<List<Map<String, dynamic>>> getAvailableVoices() async {
if (!_initialized) {
await initialize();
await initialize(voice: _preferredVoice, engine: _engine);
}
if (_engine == TtsEngine.server && _api != null) {
try {
final serverVoices = await _api.getAvailableServerVoices();
final mapped = serverVoices
.map(
(v) => {
'name': (v['name'] ?? v['id'] ?? '').toString(),
'locale': (v['locale'] ?? '').toString(),
},
)
.where((e) => (e['name'] as String).isNotEmpty)
.toList();
if (mapped.isEmpty) {
return [
{'name': 'alloy', 'locale': ''},
];
}
return mapped;
} catch (e) {
_onError?.call(e.toString());
// Fall back to device voices
}
}
if (!_available) {

View File

@@ -441,10 +441,97 @@ class AppCustomizationPage extends ConsumerWidget {
TextStyle(color: theme.sidebarForeground, fontSize: 18),
),
const SizedBox(height: Spacing.sm),
ConduitCard(
padding: const EdgeInsets.all(Spacing.md),
child: Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
Row(
children: [
_buildIconBadge(
context,
UiUtils.platformIcon(
ios: CupertinoIcons.settings,
android: Icons.settings_voice,
),
color: theme.buttonPrimary,
),
const SizedBox(width: Spacing.sm),
const Text('Engine'),
const Spacer(),
Wrap(
spacing: Spacing.sm,
children: [
ChoiceChip(
label: const Text('On Device'),
selected: settings.ttsEngine == TtsEngine.device,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
notifier.setTtsEngine(TtsEngine.device);
// Keep previous voice (device voices)
}
},
),
ChoiceChip(
label: const Text('Server'),
selected: settings.ttsEngine == TtsEngine.server,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
// Clear device-specific voice so server can default
notifier.setTtsVoice(null);
notifier.setTtsEngine(TtsEngine.server);
}
},
),
],
),
],
),
],
),
),
const SizedBox(height: Spacing.sm),
_ExpandableCard(
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsVoice,
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
icon: UiUtils.platformIcon(
@@ -466,7 +553,11 @@ class AppCustomizationPage extends ConsumerWidget {
),
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsVoice,
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ??
settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
onTap: () => _showVoicePickerSheet(context, ref, settings),
@@ -616,7 +707,10 @@ class AppCustomizationPage extends ConsumerWidget {
final theme = context.conduitTheme;
final ttsService = ref.read(textToSpeechServiceProvider);
// Fetch available voices
// Ensure the service uses the currently selected engine before fetching
await ttsService.updateSettings(engine: settings.ttsEngine);
// Fetch available voices from the active engine
final allVoices = await ttsService.getAvailableVoices();
if (!context.mounted) return;
@@ -729,17 +823,29 @@ class AppCustomizationPage extends ConsumerWidget {
style:
theme.bodyMedium?.copyWith(
color: theme.sidebarForeground,
fontWeight: settings.ttsVoice == null
fontWeight:
(settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId == null
: settings.ttsVoice == null)
? FontWeight.bold
: FontWeight.normal,
) ??
TextStyle(color: theme.sidebarForeground),
),
trailing: settings.ttsVoice == null
trailing:
(settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId == null
: settings.ttsVoice == null)
? Icon(Icons.check, color: theme.buttonPrimary)
: null,
onTap: () {
ref.read(appSettingsProvider.notifier).setTtsVoice(null);
final notifier = ref.read(appSettingsProvider.notifier);
if (settings.ttsEngine == TtsEngine.server) {
notifier.setTtsServerVoiceId(null);
notifier.setTtsServerVoiceName(null);
} else {
notifier.setTtsVoice(null);
}
Navigator.of(sheetContext).pop();
},
),
@@ -823,7 +929,9 @@ class AppCustomizationPage extends ConsumerWidget {
final voiceId = _getVoiceIdentifier(voice);
final displayName = _formatVoiceName(voice);
final subtitle = _getVoiceSubtitle(voice);
final isSelected = settings.ttsVoice == voiceId;
final isSelected = settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId == voiceId
: settings.ttsVoice == voiceId;
return ListTile(
leading: Icon(
@@ -865,9 +973,15 @@ class AppCustomizationPage extends ConsumerWidget {
? Icon(Icons.check, color: theme.buttonPrimary)
: null,
onTap: () {
ref
.read(appSettingsProvider.notifier)
.setTtsVoice(voiceId);
final notifier = ref.read(
appSettingsProvider.notifier,
);
if (settings.ttsEngine == TtsEngine.server) {
notifier.setTtsServerVoiceId(voiceId);
notifier.setTtsServerVoiceName(displayName);
} else {
notifier.setTtsVoice(voiceId);
}
Navigator.of(sheetContext).pop();
},
);