feat(tts): server-backed TTS engine selection

Introduce server TTS support and engine selection while keeping
device TTS as the default.

- Add new persistence keys for storing TTS engine and selected
  server voice (ttsEngine, ttsServerVoiceId, ttsServerVoiceName).
- Extend TextToSpeechService to support two engines:
  TtsEngine.device (FlutterTts) and TtsEngine.server (remote audio).
- Wire in an AudioPlayer and optional ApiService to fetch raw
  audio bytes from the server and play them, with event hooks
  mapped to existing lifecycle callbacks.
- Implement fallback to device TTS on server errors or empty
  responses, and ensure player lifecycle (pause/stop/dispose)
  is handled when using server engine.
- Allow engine and preferred voice to be configured before
  initialization and updated at runtime via updateSettings.

This enables selecting a server-side voice and using a remote
TTS provider while preserving compatibility with the existing
device TTS implementation.
This commit is contained in:
cogwheel0
2025-10-23 16:31:15 +05:30
parent 2337568baf
commit 561e7dd616
10 changed files with 404 additions and 36 deletions

View File

@@ -2261,12 +2261,24 @@ class ApiService {
}
// Audio
Future<List<String>> getAvailableVoices() async {
_traceApi('Fetching available voices');
Future<List<Map<String, dynamic>>> getAvailableServerVoices() async {
_traceApi('Fetching server TTS voices');
final response = await _dio.get('/api/v1/audio/voices');
final data = response.data;
if (data is Map<String, dynamic>) {
final voices = data['voices'];
if (voices is List) {
return voices
.whereType<Map>()
.map((e) => e.cast<String, dynamic>())
.toList();
}
}
if (data is List) {
return data.cast<String>();
// Fallback: plain list of ids
return data
.map((e) => {'id': e.toString(), 'name': e.toString()})
.toList();
}
return [];
}
@@ -2279,13 +2291,15 @@ class ApiService {
_traceApi('Generating speech for text: $textPreview...');
final response = await _dio.post(
'/api/v1/audio/speech',
data: {'text': text, if (voice != null) 'voice': voice},
data: {'input': text, if (voice != null) 'voice': voice},
options: Options(responseType: ResponseType.bytes),
);
// Return audio data as bytes
if (response.data is List) {
return (response.data as List).cast<int>();
}
final data = response.data;
if (data is List<int>) return data;
if (data is Uint8List) return data.toList();
if (data is List) return (data).cast<int>();
return [];
}

View File

@@ -8,6 +8,9 @@ import 'animation_service.dart';
part 'settings_service.g.dart';
/// TTS engine selection
enum TtsEngine { device, server }
/// Service for managing app-wide settings including accessibility preferences
class SettingsService {
static const String _reduceMotionKey = PreferenceKeys.reduceMotion;
@@ -142,6 +145,12 @@ class SettingsService {
ttsPitch: (box.get(PreferenceKeys.ttsPitch) as num?)?.toDouble() ?? 1.0,
ttsVolume:
(box.get(PreferenceKeys.ttsVolume) as num?)?.toDouble() ?? 1.0,
ttsEngine: _parseTtsEngine(
box.get(PreferenceKeys.ttsEngine) as String?,
),
ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?,
ttsServerVoiceName:
box.get(PreferenceKeys.ttsServerVoiceName) as String?,
),
);
}
@@ -164,6 +173,7 @@ class SettingsService {
PreferenceKeys.ttsSpeechRate: settings.ttsSpeechRate,
PreferenceKeys.ttsPitch: settings.ttsPitch,
PreferenceKeys.ttsVolume: settings.ttsVolume,
PreferenceKeys.ttsEngine: settings.ttsEngine.name,
};
await box.putAll(updates);
@@ -185,6 +195,33 @@ class SettingsService {
} else {
await box.delete(PreferenceKeys.ttsVoice);
}
// Server-specific voice id and friendly name
if (settings.ttsServerVoiceId != null &&
settings.ttsServerVoiceId!.isNotEmpty) {
await box.put(PreferenceKeys.ttsServerVoiceId, settings.ttsServerVoiceId);
} else {
await box.delete(PreferenceKeys.ttsServerVoiceId);
}
if (settings.ttsServerVoiceName != null &&
settings.ttsServerVoiceName!.isNotEmpty) {
await box.put(
PreferenceKeys.ttsServerVoiceName,
settings.ttsServerVoiceName,
);
} else {
await box.delete(PreferenceKeys.ttsServerVoiceName);
}
}
static TtsEngine _parseTtsEngine(String? raw) {
switch ((raw ?? '').toLowerCase()) {
case 'server':
return TtsEngine.server;
case 'device':
default:
return TtsEngine.device;
}
}
// Voice input specific settings
@@ -314,6 +351,9 @@ class AppSettings {
final double ttsSpeechRate;
final double ttsPitch;
final double ttsVolume;
final TtsEngine ttsEngine;
final String? ttsServerVoiceId;
final String? ttsServerVoiceName;
const AppSettings({
this.reduceMotion = false,
this.animationSpeed = 1.0,
@@ -332,6 +372,9 @@ class AppSettings {
this.ttsSpeechRate = 0.5,
this.ttsPitch = 1.0,
this.ttsVolume = 1.0,
this.ttsEngine = TtsEngine.device,
this.ttsServerVoiceId,
this.ttsServerVoiceName,
});
AppSettings copyWith({
@@ -352,6 +395,9 @@ class AppSettings {
double? ttsSpeechRate,
double? ttsPitch,
double? ttsVolume,
TtsEngine? ttsEngine,
Object? ttsServerVoiceId = const _DefaultValue(),
Object? ttsServerVoiceName = const _DefaultValue(),
}) {
return AppSettings(
reduceMotion: reduceMotion ?? this.reduceMotion,
@@ -375,6 +421,13 @@ class AppSettings {
ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate,
ttsPitch: ttsPitch ?? this.ttsPitch,
ttsVolume: ttsVolume ?? this.ttsVolume,
ttsEngine: ttsEngine ?? this.ttsEngine,
ttsServerVoiceId: ttsServerVoiceId is _DefaultValue
? this.ttsServerVoiceId
: ttsServerVoiceId as String?,
ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
? this.ttsServerVoiceName
: ttsServerVoiceName as String?,
);
}
@@ -397,6 +450,9 @@ class AppSettings {
other.ttsSpeechRate == ttsSpeechRate &&
other.ttsPitch == ttsPitch &&
other.ttsVolume == ttsVolume &&
other.ttsEngine == ttsEngine &&
other.ttsServerVoiceId == ttsServerVoiceId &&
other.ttsServerVoiceName == ttsServerVoiceName &&
_listEquals(other.quickPills, quickPills);
// socketTransportMode intentionally not included in == to avoid frequent rebuilds
}
@@ -420,6 +476,9 @@ class AppSettings {
ttsSpeechRate,
ttsPitch,
ttsVolume,
ttsEngine,
ttsServerVoiceId,
ttsServerVoiceName,
Object.hashAllUnordered(quickPills),
);
}
@@ -543,6 +602,21 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
await SettingsService.saveSettings(state);
}
Future<void> setTtsEngine(TtsEngine engine) async {
state = state.copyWith(ttsEngine: engine);
await SettingsService.saveSettings(state);
}
Future<void> setTtsServerVoiceName(String? name) async {
state = state.copyWith(ttsServerVoiceName: name);
await SettingsService.saveSettings(state);
}
Future<void> setTtsServerVoiceId(String? id) async {
state = state.copyWith(ttsServerVoiceId: id);
await SettingsService.saveSettings(state);
}
Future<void> resetToDefaults() async {
const defaultSettings = AppSettings();
await SettingsService.saveSettings(defaultSettings);