Merge pull request #114 from cogwheel0/feat-tts-server-default-voice
feat(tts): add server default voice retrieval
This commit is contained in:
@@ -2361,6 +2361,22 @@ class ApiService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Audio
|
// Audio
|
||||||
|
Future<String?> getDefaultServerVoice() async {
|
||||||
|
_traceApi('Fetching default server TTS voice');
|
||||||
|
final response = await _dio.get('/api/v1/audio/config');
|
||||||
|
final data = response.data;
|
||||||
|
if (data is Map<String, dynamic>) {
|
||||||
|
final ttsConfig = data['tts'];
|
||||||
|
if (ttsConfig is Map<String, dynamic>) {
|
||||||
|
final voice = ttsConfig['VOICE'] ?? ttsConfig['voice'];
|
||||||
|
if (voice is String && voice.trim().isNotEmpty) {
|
||||||
|
return voice.trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
Future<List<Map<String, dynamic>>> getAvailableServerVoices() async {
|
Future<List<Map<String, dynamic>>> getAvailableServerVoices() async {
|
||||||
_traceApi('Fetching server TTS voices');
|
_traceApi('Fetching server TTS voices');
|
||||||
final response = await _dio.get('/api/v1/audio/voices');
|
final response = await _dio.get('/api/v1/audio/voices');
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ class TextToSpeechService {
|
|||||||
int _expectedChunks = 0;
|
int _expectedChunks = 0;
|
||||||
int _currentIndex = -1;
|
int _currentIndex = -1;
|
||||||
bool _waitingNext = false;
|
bool _waitingNext = false;
|
||||||
|
String? _serverDefaultVoice;
|
||||||
|
Future<String?>? _serverDefaultVoiceFuture;
|
||||||
|
|
||||||
VoidCallback? _onStart;
|
VoidCallback? _onStart;
|
||||||
VoidCallback? _onComplete;
|
VoidCallback? _onComplete;
|
||||||
@@ -209,16 +211,18 @@ class TextToSpeechService {
|
|||||||
|
|
||||||
/// Update TTS settings on-the-fly
|
/// Update TTS settings on-the-fly
|
||||||
Future<void> updateSettings({
|
Future<void> updateSettings({
|
||||||
String? voice,
|
Object? voice = const _VoiceNotProvided(),
|
||||||
double? speechRate,
|
double? speechRate,
|
||||||
double? pitch,
|
double? pitch,
|
||||||
double? volume,
|
double? volume,
|
||||||
TtsEngine? engine,
|
TtsEngine? engine,
|
||||||
}) async {
|
}) async {
|
||||||
|
final voiceProvided = voice is! _VoiceNotProvided;
|
||||||
|
final voiceValue = voiceProvided ? voice as String? : null;
|
||||||
if (!_initialized || !_available) {
|
if (!_initialized || !_available) {
|
||||||
// Allow engine and voice to update before init
|
// Allow engine and voice to update before init
|
||||||
if (engine != null) _engine = engine;
|
if (engine != null) _engine = engine;
|
||||||
if (voice != null) _preferredVoice = voice;
|
if (voiceProvided) _preferredVoice = voiceValue;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -226,8 +230,8 @@ class TextToSpeechService {
|
|||||||
if (engine != null) {
|
if (engine != null) {
|
||||||
_engine = engine;
|
_engine = engine;
|
||||||
}
|
}
|
||||||
if (voice != null) {
|
if (voiceProvided) {
|
||||||
_preferredVoice = voice;
|
_preferredVoice = voiceValue;
|
||||||
}
|
}
|
||||||
if (volume != null) {
|
if (volume != null) {
|
||||||
await _tts.setVolume(volume);
|
await _tts.setVolume(volume);
|
||||||
@@ -239,7 +243,7 @@ class TextToSpeechService {
|
|||||||
await _tts.setPitch(pitch);
|
await _tts.setPitch(pitch);
|
||||||
}
|
}
|
||||||
// Set specific voice by name on device engine
|
// Set specific voice by name on device engine
|
||||||
if (_engine == TtsEngine.device) {
|
if (_engine == TtsEngine.device && voiceProvided) {
|
||||||
await _setVoiceByName(_preferredVoice);
|
await _setVoiceByName(_preferredVoice);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -304,18 +308,45 @@ class TextToSpeechService {
|
|||||||
try {
|
try {
|
||||||
final serverVoices = await _api.getAvailableServerVoices();
|
final serverVoices = await _api.getAvailableServerVoices();
|
||||||
final mapped = serverVoices
|
final mapped = serverVoices
|
||||||
.map(
|
.map((v) {
|
||||||
(v) => {
|
final id = (v['id'] ?? v['name'] ?? '').toString();
|
||||||
'name': (v['name'] ?? v['id'] ?? '').toString(),
|
final name = (v['name'] ?? v['id'] ?? '').toString();
|
||||||
'locale': (v['locale'] ?? '').toString(),
|
final localeValue = (v['locale'] ?? v['language'] ?? '')
|
||||||
},
|
.toString();
|
||||||
)
|
return {'id': id, 'name': name, 'locale': localeValue};
|
||||||
.where((e) => (e['name'] as String).isNotEmpty)
|
})
|
||||||
|
.where((entry) {
|
||||||
|
final name = entry['name'];
|
||||||
|
return name is String && name.trim().isNotEmpty;
|
||||||
|
})
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
|
final defaultVoice = await _getServerDefaultVoice();
|
||||||
|
if (defaultVoice != null && defaultVoice.isNotEmpty) {
|
||||||
|
final normalized = defaultVoice.toLowerCase();
|
||||||
|
final exists = mapped.any((voice) {
|
||||||
|
final name = voice['name'];
|
||||||
|
final id = voice['id'];
|
||||||
|
final lowerName = name is String ? name.toLowerCase() : '';
|
||||||
|
final lowerId = id is String ? id.toLowerCase() : '';
|
||||||
|
return lowerName == normalized || lowerId == normalized;
|
||||||
|
});
|
||||||
|
if (!exists) {
|
||||||
|
mapped.insert(0, {
|
||||||
|
'id': defaultVoice,
|
||||||
|
'name': defaultVoice,
|
||||||
|
'locale': '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (mapped.isEmpty) {
|
if (mapped.isEmpty) {
|
||||||
return [
|
if (defaultVoice != null && defaultVoice.isNotEmpty) {
|
||||||
{'name': 'alloy', 'locale': ''},
|
return [
|
||||||
];
|
{'id': defaultVoice, 'name': defaultVoice, 'locale': ''},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return const [];
|
||||||
}
|
}
|
||||||
return mapped;
|
return mapped;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@@ -351,13 +382,54 @@ class TextToSpeechService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Future<String?> _resolveServerVoice() async {
|
||||||
|
final selected = _preferredVoice?.trim();
|
||||||
|
if (selected != null && selected.isNotEmpty) {
|
||||||
|
return selected;
|
||||||
|
}
|
||||||
|
final configVoice = await _getServerDefaultVoice();
|
||||||
|
if (configVoice != null && configVoice.isNotEmpty) {
|
||||||
|
return configVoice;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<String?> _getServerDefaultVoice() async {
|
||||||
|
if (_api == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (_serverDefaultVoice != null) {
|
||||||
|
return _serverDefaultVoice;
|
||||||
|
}
|
||||||
|
final pending = _serverDefaultVoiceFuture;
|
||||||
|
if (pending != null) {
|
||||||
|
return pending;
|
||||||
|
}
|
||||||
|
|
||||||
|
final future = _api.getDefaultServerVoice();
|
||||||
|
_serverDefaultVoiceFuture = future;
|
||||||
|
|
||||||
|
try {
|
||||||
|
final voice = await future;
|
||||||
|
final trimmed = voice?.trim();
|
||||||
|
if (trimmed != null && trimmed.isNotEmpty) {
|
||||||
|
_serverDefaultVoice = trimmed;
|
||||||
|
return _serverDefaultVoice;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} catch (e) {
|
||||||
|
_onError?.call(e.toString());
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
_serverDefaultVoiceFuture = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ===== Server chunked playback =====
|
// ===== Server chunked playback =====
|
||||||
|
|
||||||
Future<void> _startServerChunkedPlayback(String text) async {
|
Future<void> _startServerChunkedPlayback(String text) async {
|
||||||
final effectiveVoice =
|
final resolvedVoice = await _resolveServerVoice();
|
||||||
(_preferredVoice == null || _preferredVoice!.trim().isEmpty)
|
final effectiveVoice = resolvedVoice;
|
||||||
? 'alloy'
|
|
||||||
: _preferredVoice!;
|
|
||||||
|
|
||||||
// Reset queue and create a new session
|
// Reset queue and create a new session
|
||||||
_session++;
|
_session++;
|
||||||
@@ -398,7 +470,7 @@ class TextToSpeechService {
|
|||||||
|
|
||||||
Future<void> _prefetchRemainingChunks(
|
Future<void> _prefetchRemainingChunks(
|
||||||
List<String> remaining,
|
List<String> remaining,
|
||||||
String voice,
|
String? voice,
|
||||||
int session,
|
int session,
|
||||||
) async {
|
) async {
|
||||||
for (final chunk in remaining) {
|
for (final chunk in remaining) {
|
||||||
@@ -423,7 +495,7 @@ class TextToSpeechService {
|
|||||||
|
|
||||||
Future<List<int>> _fetchServerAudio(
|
Future<List<int>> _fetchServerAudio(
|
||||||
String text,
|
String text,
|
||||||
String voice,
|
String? voice,
|
||||||
int session,
|
int session,
|
||||||
) async {
|
) async {
|
||||||
return await _api!.generateSpeech(text: text, voice: voice);
|
return await _api!.generateSpeech(text: text, voice: voice);
|
||||||
@@ -829,3 +901,7 @@ class TextToSpeechService {
|
|||||||
_onError?.call(safeMessage);
|
_onError?.call(safeMessage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class _VoiceNotProvided {
|
||||||
|
const _VoiceNotProvided();
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user