feat(tts): server-backed TTS engine selection
Introduce server TTS support and engine selection while keeping device TTS as the default. - Add new persistence keys for storing TTS engine and selected server voice (ttsEngine, ttsServerVoiceId, ttsServerVoiceName). - Extend TextToSpeechService to support two engines: TtsEngine.device (FlutterTts) and TtsEngine.server (remote audio). - Wire in an AudioPlayer and optional ApiService to fetch raw audio bytes from the server and play them, with event hooks mapped to existing lifecycle callbacks. - Implement fallback to device TTS on server errors or empty responses, and ensure player lifecycle (pause/stop/dispose) is handled when using server engine. - Allow engine and preferred voice to be configured before initialization and updated at runtime via updateSettings. This enables selecting a server-side voice and using a remote TTS provider while preserving compatibility with the existing device TTS implementation.
This commit is contained in:
@@ -3,6 +3,7 @@ import 'dart:async';
|
||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||
|
||||
import '../../../core/services/settings_service.dart';
|
||||
import '../../../core/providers/app_providers.dart';
|
||||
import '../../../core/utils/markdown_to_text.dart';
|
||||
import '../services/text_to_speech_service.dart';
|
||||
|
||||
@@ -79,11 +80,15 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
// Listen to settings changes and update TTS when initialized
|
||||
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
|
||||
if (_service.isInitialized && _service.isAvailable) {
|
||||
final selectedVoice = next.ttsEngine == TtsEngine.server
|
||||
? next.ttsServerVoiceId
|
||||
: next.ttsVoice;
|
||||
_service.updateSettings(
|
||||
voice: next.ttsVoice,
|
||||
voice: selectedVoice,
|
||||
speechRate: next.ttsSpeechRate,
|
||||
pitch: next.ttsPitch,
|
||||
volume: next.ttsVolume,
|
||||
engine: next.ttsEngine,
|
||||
);
|
||||
}
|
||||
}, fireImmediately: false);
|
||||
@@ -105,10 +110,13 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
final settings = ref.read(appSettingsProvider);
|
||||
final future = _service
|
||||
.initialize(
|
||||
voice: settings.ttsVoice,
|
||||
voice: settings.ttsEngine == TtsEngine.server
|
||||
? settings.ttsServerVoiceId
|
||||
: settings.ttsVoice,
|
||||
speechRate: settings.ttsSpeechRate,
|
||||
pitch: settings.ttsPitch,
|
||||
volume: settings.ttsVolume,
|
||||
engine: settings.ttsEngine,
|
||||
)
|
||||
.then((available) {
|
||||
if (!ref.mounted) {
|
||||
@@ -289,7 +297,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
||||
}
|
||||
|
||||
final textToSpeechServiceProvider = Provider<TextToSpeechService>((ref) {
|
||||
final service = TextToSpeechService();
|
||||
final api = ref.watch(apiServiceProvider);
|
||||
final service = TextToSpeechService(api: api);
|
||||
ref.onDispose(() {
|
||||
unawaited(service.dispose());
|
||||
});
|
||||
|
||||
@@ -1,13 +1,21 @@
|
||||
import 'dart:async';
|
||||
import 'dart:io' show Platform;
|
||||
|
||||
import 'package:audioplayers/audioplayers.dart';
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter/widgets.dart';
|
||||
import 'package:flutter_tts/flutter_tts.dart';
|
||||
|
||||
import '../../../core/services/api_service.dart';
|
||||
import '../../../core/services/settings_service.dart';
|
||||
|
||||
/// Lightweight wrapper around FlutterTts to centralize configuration
|
||||
class TextToSpeechService {
|
||||
final FlutterTts _tts = FlutterTts();
|
||||
final AudioPlayer _player = AudioPlayer();
|
||||
final ApiService? _api;
|
||||
TtsEngine _engine = TtsEngine.device;
|
||||
String? _preferredVoice;
|
||||
bool _initialized = false;
|
||||
bool _available = false;
|
||||
bool _voiceConfigured = false;
|
||||
@@ -22,6 +30,14 @@ class TextToSpeechService {
|
||||
bool get isInitialized => _initialized;
|
||||
bool get isAvailable => _available;
|
||||
|
||||
TextToSpeechService({ApiService? api}) : _api = api {
|
||||
// Wire minimal player events to callbacks
|
||||
_player.onPlayerComplete.listen((_) => _handleComplete());
|
||||
_player.onPlayerStateChanged.listen((s) {
|
||||
if (s == PlayerState.playing) _handleStart();
|
||||
});
|
||||
}
|
||||
|
||||
/// Register callbacks for TTS lifecycle events
|
||||
void bindHandlers({
|
||||
VoidCallback? onStart,
|
||||
@@ -52,12 +68,15 @@ class TextToSpeechService {
|
||||
double speechRate = 0.5,
|
||||
double pitch = 1.0,
|
||||
double volume = 1.0,
|
||||
TtsEngine engine = TtsEngine.device,
|
||||
}) async {
|
||||
if (_initialized) {
|
||||
return _available;
|
||||
}
|
||||
|
||||
try {
|
||||
_engine = engine;
|
||||
_preferredVoice = voice;
|
||||
await _tts.awaitSpeakCompletion(false);
|
||||
|
||||
// Set volume
|
||||
@@ -97,34 +116,61 @@ class TextToSpeechService {
|
||||
}
|
||||
|
||||
if (!_initialized) {
|
||||
await initialize();
|
||||
await initialize(voice: _preferredVoice, engine: _engine);
|
||||
}
|
||||
|
||||
if (_engine == TtsEngine.server && _api != null) {
|
||||
// Server-backed TTS path
|
||||
try {
|
||||
final effectiveVoice =
|
||||
(_preferredVoice == null || _preferredVoice!.trim().isEmpty)
|
||||
? 'alloy'
|
||||
: _preferredVoice!;
|
||||
|
||||
final bytes = await _api.generateSpeech(
|
||||
text: text,
|
||||
voice: effectiveVoice,
|
||||
);
|
||||
if (bytes.isEmpty) {
|
||||
throw Exception('Empty audio response');
|
||||
}
|
||||
await _player.stop();
|
||||
final data = Uint8List.fromList(bytes);
|
||||
await _player.play(BytesSource(data));
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
// Fallback to device TTS on failure
|
||||
await _speakOnDevice(text);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Device TTS path
|
||||
await _speakOnDevice(text);
|
||||
}
|
||||
|
||||
Future<void> _speakOnDevice(String text) async {
|
||||
if (!_available) {
|
||||
throw StateError('Text-to-speech is unavailable on this device');
|
||||
}
|
||||
|
||||
await _tts.stop();
|
||||
if (!_voiceConfigured) {
|
||||
await _configurePreferredVoice();
|
||||
}
|
||||
final result = await _tts.speak(text);
|
||||
if (result == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (result is int && result != 1) {
|
||||
_onError?.call('Text-to-speech engine returned code $result');
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> pause() async {
|
||||
if (!_initialized || !_available) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_initialized) return;
|
||||
try {
|
||||
await _tts.pause();
|
||||
if (_engine == TtsEngine.server) {
|
||||
await _player.pause();
|
||||
} else if (_available) {
|
||||
await _tts.pause();
|
||||
}
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
}
|
||||
@@ -136,7 +182,11 @@ class TextToSpeechService {
|
||||
}
|
||||
|
||||
try {
|
||||
await _tts.stop();
|
||||
if (_engine == TtsEngine.server) {
|
||||
await _player.stop();
|
||||
} else {
|
||||
await _tts.stop();
|
||||
}
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
}
|
||||
@@ -144,6 +194,7 @@ class TextToSpeechService {
|
||||
|
||||
Future<void> dispose() async {
|
||||
await stop();
|
||||
await _player.dispose();
|
||||
}
|
||||
|
||||
/// Update TTS settings on-the-fly
|
||||
@@ -152,12 +203,22 @@ class TextToSpeechService {
|
||||
double? speechRate,
|
||||
double? pitch,
|
||||
double? volume,
|
||||
TtsEngine? engine,
|
||||
}) async {
|
||||
if (!_initialized || !_available) {
|
||||
// Allow engine and voice to update before init
|
||||
if (engine != null) _engine = engine;
|
||||
if (voice != null) _preferredVoice = voice;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (engine != null) {
|
||||
_engine = engine;
|
||||
}
|
||||
if (voice != null) {
|
||||
_preferredVoice = voice;
|
||||
}
|
||||
if (volume != null) {
|
||||
await _tts.setVolume(volume);
|
||||
}
|
||||
@@ -167,8 +228,10 @@ class TextToSpeechService {
|
||||
if (pitch != null) {
|
||||
await _tts.setPitch(pitch);
|
||||
}
|
||||
// Set specific voice by name
|
||||
await _setVoiceByName(voice);
|
||||
// Set specific voice by name on device engine
|
||||
if (_engine == TtsEngine.device) {
|
||||
await _setVoiceByName(_preferredVoice);
|
||||
}
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
}
|
||||
@@ -224,7 +287,31 @@ class TextToSpeechService {
|
||||
/// Get available voices from the TTS engine
|
||||
Future<List<Map<String, dynamic>>> getAvailableVoices() async {
|
||||
if (!_initialized) {
|
||||
await initialize();
|
||||
await initialize(voice: _preferredVoice, engine: _engine);
|
||||
}
|
||||
|
||||
if (_engine == TtsEngine.server && _api != null) {
|
||||
try {
|
||||
final serverVoices = await _api.getAvailableServerVoices();
|
||||
final mapped = serverVoices
|
||||
.map(
|
||||
(v) => {
|
||||
'name': (v['name'] ?? v['id'] ?? '').toString(),
|
||||
'locale': (v['locale'] ?? '').toString(),
|
||||
},
|
||||
)
|
||||
.where((e) => (e['name'] as String).isNotEmpty)
|
||||
.toList();
|
||||
if (mapped.isEmpty) {
|
||||
return [
|
||||
{'name': 'alloy', 'locale': ''},
|
||||
];
|
||||
}
|
||||
return mapped;
|
||||
} catch (e) {
|
||||
_onError?.call(e.toString());
|
||||
// Fall back to device voices
|
||||
}
|
||||
}
|
||||
|
||||
if (!_available) {
|
||||
|
||||
@@ -441,10 +441,97 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
TextStyle(color: theme.sidebarForeground, fontSize: 18),
|
||||
),
|
||||
const SizedBox(height: Spacing.sm),
|
||||
ConduitCard(
|
||||
padding: const EdgeInsets.all(Spacing.md),
|
||||
child: Column(
|
||||
crossAxisAlignment: CrossAxisAlignment.start,
|
||||
children: [
|
||||
Row(
|
||||
children: [
|
||||
_buildIconBadge(
|
||||
context,
|
||||
UiUtils.platformIcon(
|
||||
ios: CupertinoIcons.settings,
|
||||
android: Icons.settings_voice,
|
||||
),
|
||||
color: theme.buttonPrimary,
|
||||
),
|
||||
const SizedBox(width: Spacing.sm),
|
||||
const Text('Engine'),
|
||||
const Spacer(),
|
||||
Wrap(
|
||||
spacing: Spacing.sm,
|
||||
children: [
|
||||
ChoiceChip(
|
||||
label: const Text('On Device'),
|
||||
selected: settings.ttsEngine == TtsEngine.device,
|
||||
showCheckmark: false,
|
||||
selectedColor: theme.buttonPrimary,
|
||||
backgroundColor: theme.cardBackground,
|
||||
side: BorderSide(
|
||||
color: settings.ttsEngine == TtsEngine.device
|
||||
? theme.buttonPrimary.withValues(alpha: 0.6)
|
||||
: theme.textPrimary.withValues(alpha: 0.2),
|
||||
),
|
||||
labelStyle: TextStyle(
|
||||
color: settings.ttsEngine == TtsEngine.device
|
||||
? theme.buttonPrimaryText
|
||||
: theme.textPrimary,
|
||||
fontWeight: FontWeight.w600,
|
||||
),
|
||||
onSelected: (v) {
|
||||
if (v) {
|
||||
final notifier = ref.read(
|
||||
appSettingsProvider.notifier,
|
||||
);
|
||||
notifier.setTtsEngine(TtsEngine.device);
|
||||
// Keep previous voice (device voices)
|
||||
}
|
||||
},
|
||||
),
|
||||
ChoiceChip(
|
||||
label: const Text('Server'),
|
||||
selected: settings.ttsEngine == TtsEngine.server,
|
||||
showCheckmark: false,
|
||||
selectedColor: theme.buttonPrimary,
|
||||
backgroundColor: theme.cardBackground,
|
||||
side: BorderSide(
|
||||
color: settings.ttsEngine == TtsEngine.server
|
||||
? theme.buttonPrimary.withValues(alpha: 0.6)
|
||||
: theme.textPrimary.withValues(alpha: 0.2),
|
||||
),
|
||||
labelStyle: TextStyle(
|
||||
color: settings.ttsEngine == TtsEngine.server
|
||||
? theme.buttonPrimaryText
|
||||
: theme.textPrimary,
|
||||
fontWeight: FontWeight.w600,
|
||||
),
|
||||
onSelected: (v) {
|
||||
if (v) {
|
||||
final notifier = ref.read(
|
||||
appSettingsProvider.notifier,
|
||||
);
|
||||
// Clear device-specific voice so server can default
|
||||
notifier.setTtsVoice(null);
|
||||
notifier.setTtsEngine(TtsEngine.server);
|
||||
}
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
const SizedBox(height: Spacing.sm),
|
||||
_ExpandableCard(
|
||||
title: l10n.ttsVoice,
|
||||
subtitle: _getDisplayVoiceName(
|
||||
settings.ttsVoice,
|
||||
settings.ttsEngine == TtsEngine.server
|
||||
? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
|
||||
'')
|
||||
: (settings.ttsVoice ?? ''),
|
||||
l10n.ttsSystemDefault,
|
||||
),
|
||||
icon: UiUtils.platformIcon(
|
||||
@@ -466,7 +553,11 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
),
|
||||
title: l10n.ttsVoice,
|
||||
subtitle: _getDisplayVoiceName(
|
||||
settings.ttsVoice,
|
||||
settings.ttsEngine == TtsEngine.server
|
||||
? ((settings.ttsServerVoiceName ??
|
||||
settings.ttsServerVoiceId) ??
|
||||
'')
|
||||
: (settings.ttsVoice ?? ''),
|
||||
l10n.ttsSystemDefault,
|
||||
),
|
||||
onTap: () => _showVoicePickerSheet(context, ref, settings),
|
||||
@@ -616,7 +707,10 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
final theme = context.conduitTheme;
|
||||
final ttsService = ref.read(textToSpeechServiceProvider);
|
||||
|
||||
// Fetch available voices
|
||||
// Ensure the service uses the currently selected engine before fetching
|
||||
await ttsService.updateSettings(engine: settings.ttsEngine);
|
||||
|
||||
// Fetch available voices from the active engine
|
||||
final allVoices = await ttsService.getAvailableVoices();
|
||||
|
||||
if (!context.mounted) return;
|
||||
@@ -729,17 +823,29 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
style:
|
||||
theme.bodyMedium?.copyWith(
|
||||
color: theme.sidebarForeground,
|
||||
fontWeight: settings.ttsVoice == null
|
||||
fontWeight:
|
||||
(settings.ttsEngine == TtsEngine.server
|
||||
? settings.ttsServerVoiceId == null
|
||||
: settings.ttsVoice == null)
|
||||
? FontWeight.bold
|
||||
: FontWeight.normal,
|
||||
) ??
|
||||
TextStyle(color: theme.sidebarForeground),
|
||||
),
|
||||
trailing: settings.ttsVoice == null
|
||||
trailing:
|
||||
(settings.ttsEngine == TtsEngine.server
|
||||
? settings.ttsServerVoiceId == null
|
||||
: settings.ttsVoice == null)
|
||||
? Icon(Icons.check, color: theme.buttonPrimary)
|
||||
: null,
|
||||
onTap: () {
|
||||
ref.read(appSettingsProvider.notifier).setTtsVoice(null);
|
||||
final notifier = ref.read(appSettingsProvider.notifier);
|
||||
if (settings.ttsEngine == TtsEngine.server) {
|
||||
notifier.setTtsServerVoiceId(null);
|
||||
notifier.setTtsServerVoiceName(null);
|
||||
} else {
|
||||
notifier.setTtsVoice(null);
|
||||
}
|
||||
Navigator.of(sheetContext).pop();
|
||||
},
|
||||
),
|
||||
@@ -823,7 +929,9 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
final voiceId = _getVoiceIdentifier(voice);
|
||||
final displayName = _formatVoiceName(voice);
|
||||
final subtitle = _getVoiceSubtitle(voice);
|
||||
final isSelected = settings.ttsVoice == voiceId;
|
||||
final isSelected = settings.ttsEngine == TtsEngine.server
|
||||
? settings.ttsServerVoiceId == voiceId
|
||||
: settings.ttsVoice == voiceId;
|
||||
|
||||
return ListTile(
|
||||
leading: Icon(
|
||||
@@ -865,9 +973,15 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
? Icon(Icons.check, color: theme.buttonPrimary)
|
||||
: null,
|
||||
onTap: () {
|
||||
ref
|
||||
.read(appSettingsProvider.notifier)
|
||||
.setTtsVoice(voiceId);
|
||||
final notifier = ref.read(
|
||||
appSettingsProvider.notifier,
|
||||
);
|
||||
if (settings.ttsEngine == TtsEngine.server) {
|
||||
notifier.setTtsServerVoiceId(voiceId);
|
||||
notifier.setTtsServerVoiceName(displayName);
|
||||
} else {
|
||||
notifier.setTtsVoice(voiceId);
|
||||
}
|
||||
Navigator.of(sheetContext).pop();
|
||||
},
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user