feat(tts): server-backed TTS engine selection
Introduce server TTS support and engine selection while keeping device TTS as the default. - Add new persistence keys for storing TTS engine and selected server voice (ttsEngine, ttsServerVoiceId, ttsServerVoiceName). - Extend TextToSpeechService to support two engines: TtsEngine.device (FlutterTts) and TtsEngine.server (remote audio). - Wire in an AudioPlayer and optional ApiService to fetch raw audio bytes from the server and play them, with event hooks mapped to existing lifecycle callbacks. - Implement fallback to device TTS on server errors or empty responses, and ensure player lifecycle (pause/stop/dispose) is handled when using server engine. - Allow engine and preferred voice to be configured before initialization and updated at runtime via updateSettings. This enables selecting a server-side voice and using a remote TTS provider while preserving compatibility with the existing device TTS implementation.
This commit is contained in:
@@ -25,6 +25,9 @@ final class PreferenceKeys {
|
||||
static const String ttsSpeechRate = 'tts_speech_rate';
|
||||
static const String ttsPitch = 'tts_pitch';
|
||||
static const String ttsVolume = 'tts_volume';
|
||||
static const String ttsEngine = 'tts_engine'; // 'device' | 'server'
|
||||
static const String ttsServerVoiceId = 'tts_server_voice_id';
|
||||
static const String ttsServerVoiceName = 'tts_server_voice_name';
|
||||
}
|
||||
|
||||
final class LegacyPreferenceKeys {
|
||||
|
||||
@@ -1830,7 +1830,11 @@ Future<List<String>> availableVoices(Ref ref) async {
|
||||
if (api == null) return [];
|
||||
|
||||
try {
|
||||
return await api.getAvailableVoices();
|
||||
final voices = await api.getAvailableServerVoices();
|
||||
return voices
|
||||
.map((v) => (v['name'] ?? v['id'] ?? '').toString())
|
||||
.where((s) => s.isNotEmpty)
|
||||
.toList();
|
||||
} catch (e) {
|
||||
DebugLogger.error('voices-failed', scope: 'voices', error: e);
|
||||
return [];
|
||||
|
||||
@@ -2261,12 +2261,24 @@ class ApiService {
|
||||
}
|
||||
|
||||
// Audio
|
||||
Future<List<String>> getAvailableVoices() async {
|
||||
_traceApi('Fetching available voices');
|
||||
Future<List<Map<String, dynamic>>> getAvailableServerVoices() async {
|
||||
_traceApi('Fetching server TTS voices');
|
||||
final response = await _dio.get('/api/v1/audio/voices');
|
||||
final data = response.data;
|
||||
if (data is Map<String, dynamic>) {
|
||||
final voices = data['voices'];
|
||||
if (voices is List) {
|
||||
return voices
|
||||
.whereType<Map>()
|
||||
.map((e) => e.cast<String, dynamic>())
|
||||
.toList();
|
||||
}
|
||||
}
|
||||
if (data is List) {
|
||||
return data.cast<String>();
|
||||
// Fallback: plain list of ids
|
||||
return data
|
||||
.map((e) => {'id': e.toString(), 'name': e.toString()})
|
||||
.toList();
|
||||
}
|
||||
return [];
|
||||
}
|
||||
@@ -2279,13 +2291,15 @@ class ApiService {
|
||||
_traceApi('Generating speech for text: $textPreview...');
|
||||
final response = await _dio.post(
|
||||
'/api/v1/audio/speech',
|
||||
data: {'text': text, if (voice != null) 'voice': voice},
|
||||
data: {'input': text, if (voice != null) 'voice': voice},
|
||||
options: Options(responseType: ResponseType.bytes),
|
||||
);
|
||||
|
||||
// Return audio data as bytes
|
||||
if (response.data is List) {
|
||||
return (response.data as List).cast<int>();
|
||||
}
|
||||
final data = response.data;
|
||||
if (data is List<int>) return data;
|
||||
if (data is Uint8List) return data.toList();
|
||||
if (data is List) return (data).cast<int>();
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,9 @@ import 'animation_service.dart';
|
||||
|
||||
part 'settings_service.g.dart';
|
||||
|
||||
/// TTS engine selection
|
||||
enum TtsEngine { device, server }
|
||||
|
||||
/// Service for managing app-wide settings including accessibility preferences
|
||||
class SettingsService {
|
||||
static const String _reduceMotionKey = PreferenceKeys.reduceMotion;
|
||||
@@ -142,6 +145,12 @@ class SettingsService {
|
||||
ttsPitch: (box.get(PreferenceKeys.ttsPitch) as num?)?.toDouble() ?? 1.0,
|
||||
ttsVolume:
|
||||
(box.get(PreferenceKeys.ttsVolume) as num?)?.toDouble() ?? 1.0,
|
||||
ttsEngine: _parseTtsEngine(
|
||||
box.get(PreferenceKeys.ttsEngine) as String?,
|
||||
),
|
||||
ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?,
|
||||
ttsServerVoiceName:
|
||||
box.get(PreferenceKeys.ttsServerVoiceName) as String?,
|
||||
),
|
||||
);
|
||||
}
|
||||
@@ -164,6 +173,7 @@ class SettingsService {
|
||||
PreferenceKeys.ttsSpeechRate: settings.ttsSpeechRate,
|
||||
PreferenceKeys.ttsPitch: settings.ttsPitch,
|
||||
PreferenceKeys.ttsVolume: settings.ttsVolume,
|
||||
PreferenceKeys.ttsEngine: settings.ttsEngine.name,
|
||||
};
|
||||
|
||||
await box.putAll(updates);
|
||||
@@ -185,6 +195,33 @@ class SettingsService {
|
||||
} else {
|
||||
await box.delete(PreferenceKeys.ttsVoice);
|
||||
}
|
||||
|
||||
// Server-specific voice id and friendly name
|
||||
if (settings.ttsServerVoiceId != null &&
|
||||
settings.ttsServerVoiceId!.isNotEmpty) {
|
||||
await box.put(PreferenceKeys.ttsServerVoiceId, settings.ttsServerVoiceId);
|
||||
} else {
|
||||
await box.delete(PreferenceKeys.ttsServerVoiceId);
|
||||
}
|
||||
if (settings.ttsServerVoiceName != null &&
|
||||
settings.ttsServerVoiceName!.isNotEmpty) {
|
||||
await box.put(
|
||||
PreferenceKeys.ttsServerVoiceName,
|
||||
settings.ttsServerVoiceName,
|
||||
);
|
||||
} else {
|
||||
await box.delete(PreferenceKeys.ttsServerVoiceName);
|
||||
}
|
||||
}
|
||||
|
||||
static TtsEngine _parseTtsEngine(String? raw) {
|
||||
switch ((raw ?? '').toLowerCase()) {
|
||||
case 'server':
|
||||
return TtsEngine.server;
|
||||
case 'device':
|
||||
default:
|
||||
return TtsEngine.device;
|
||||
}
|
||||
}
|
||||
|
||||
// Voice input specific settings
|
||||
@@ -314,6 +351,9 @@ class AppSettings {
|
||||
final double ttsSpeechRate;
|
||||
final double ttsPitch;
|
||||
final double ttsVolume;
|
||||
final TtsEngine ttsEngine;
|
||||
final String? ttsServerVoiceId;
|
||||
final String? ttsServerVoiceName;
|
||||
const AppSettings({
|
||||
this.reduceMotion = false,
|
||||
this.animationSpeed = 1.0,
|
||||
@@ -332,6 +372,9 @@ class AppSettings {
|
||||
this.ttsSpeechRate = 0.5,
|
||||
this.ttsPitch = 1.0,
|
||||
this.ttsVolume = 1.0,
|
||||
this.ttsEngine = TtsEngine.device,
|
||||
this.ttsServerVoiceId,
|
||||
this.ttsServerVoiceName,
|
||||
});
|
||||
|
||||
AppSettings copyWith({
|
||||
@@ -352,6 +395,9 @@ class AppSettings {
|
||||
double? ttsSpeechRate,
|
||||
double? ttsPitch,
|
||||
double? ttsVolume,
|
||||
TtsEngine? ttsEngine,
|
||||
Object? ttsServerVoiceId = const _DefaultValue(),
|
||||
Object? ttsServerVoiceName = const _DefaultValue(),
|
||||
}) {
|
||||
return AppSettings(
|
||||
reduceMotion: reduceMotion ?? this.reduceMotion,
|
||||
@@ -375,6 +421,13 @@ class AppSettings {
|
||||
ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate,
|
||||
ttsPitch: ttsPitch ?? this.ttsPitch,
|
||||
ttsVolume: ttsVolume ?? this.ttsVolume,
|
||||
ttsEngine: ttsEngine ?? this.ttsEngine,
|
||||
ttsServerVoiceId: ttsServerVoiceId is _DefaultValue
|
||||
? this.ttsServerVoiceId
|
||||
: ttsServerVoiceId as String?,
|
||||
ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
|
||||
? this.ttsServerVoiceName
|
||||
: ttsServerVoiceName as String?,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -397,6 +450,9 @@ class AppSettings {
|
||||
other.ttsSpeechRate == ttsSpeechRate &&
|
||||
other.ttsPitch == ttsPitch &&
|
||||
other.ttsVolume == ttsVolume &&
|
||||
other.ttsEngine == ttsEngine &&
|
||||
other.ttsServerVoiceId == ttsServerVoiceId &&
|
||||
other.ttsServerVoiceName == ttsServerVoiceName &&
|
||||
_listEquals(other.quickPills, quickPills);
|
||||
// socketTransportMode intentionally not included in == to avoid frequent rebuilds
|
||||
}
|
||||
@@ -420,6 +476,9 @@ class AppSettings {
|
||||
ttsSpeechRate,
|
||||
ttsPitch,
|
||||
ttsVolume,
|
||||
ttsEngine,
|
||||
ttsServerVoiceId,
|
||||
ttsServerVoiceName,
|
||||
Object.hashAllUnordered(quickPills),
|
||||
);
|
||||
}
|
||||
@@ -543,6 +602,21 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
|
||||
await SettingsService.saveSettings(state);
|
||||
}
|
||||
|
||||
Future<void> setTtsEngine(TtsEngine engine) async {
|
||||
state = state.copyWith(ttsEngine: engine);
|
||||
await SettingsService.saveSettings(state);
|
||||
}
|
||||
|
||||
Future<void> setTtsServerVoiceName(String? name) async {
|
||||
state = state.copyWith(ttsServerVoiceName: name);
|
||||
await SettingsService.saveSettings(state);
|
||||
}
|
||||
|
||||
Future<void> setTtsServerVoiceId(String? id) async {
|
||||
state = state.copyWith(ttsServerVoiceId: id);
|
||||
await SettingsService.saveSettings(state);
|
||||
}
|
||||
|
||||
Future<void> resetToDefaults() async {
|
||||
const defaultSettings = AppSettings();
|
||||
await SettingsService.saveSettings(defaultSettings);
|
||||
|
||||
Reference in New Issue
Block a user