From 44149d5f81739f06d714e41f2077729f6430b5ca Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Thu, 30 Oct 2025 16:10:20 +0530 Subject: [PATCH] feat(tts): add server default voice retrieval and integrate it into --- lib/core/services/api_service.dart | 16 +++ .../chat/services/text_to_speech_service.dart | 118 ++++++++++++++---- 2 files changed, 113 insertions(+), 21 deletions(-) diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart index eca3729..a92a254 100644 --- a/lib/core/services/api_service.dart +++ b/lib/core/services/api_service.dart @@ -2361,6 +2361,22 @@ class ApiService { } // Audio + Future getDefaultServerVoice() async { + _traceApi('Fetching default server TTS voice'); + final response = await _dio.get('/api/v1/audio/config'); + final data = response.data; + if (data is Map) { + final ttsConfig = data['tts']; + if (ttsConfig is Map) { + final voice = ttsConfig['VOICE'] ?? ttsConfig['voice']; + if (voice is String && voice.trim().isNotEmpty) { + return voice.trim(); + } + } + } + return null; + } + Future>> getAvailableServerVoices() async { _traceApi('Fetching server TTS voices'); final response = await _dio.get('/api/v1/audio/voices'); diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart index 9f01ebb..2d4277d 100644 --- a/lib/features/chat/services/text_to_speech_service.dart +++ b/lib/features/chat/services/text_to_speech_service.dart @@ -24,6 +24,8 @@ class TextToSpeechService { int _expectedChunks = 0; int _currentIndex = -1; bool _waitingNext = false; + String? _serverDefaultVoice; + Future? _serverDefaultVoiceFuture; VoidCallback? _onStart; VoidCallback? _onComplete; @@ -209,16 +211,18 @@ class TextToSpeechService { /// Update TTS settings on-the-fly Future updateSettings({ - String? voice, + Object? voice = const _VoiceNotProvided(), double? speechRate, double? pitch, double? volume, TtsEngine? engine, }) async { + final voiceProvided = voice is! _VoiceNotProvided; + final voiceValue = voiceProvided ? voice as String? : null; if (!_initialized || !_available) { // Allow engine and voice to update before init if (engine != null) _engine = engine; - if (voice != null) _preferredVoice = voice; + if (voiceProvided) _preferredVoice = voiceValue; return; } @@ -226,8 +230,8 @@ class TextToSpeechService { if (engine != null) { _engine = engine; } - if (voice != null) { - _preferredVoice = voice; + if (voiceProvided) { + _preferredVoice = voiceValue; } if (volume != null) { await _tts.setVolume(volume); @@ -239,7 +243,7 @@ class TextToSpeechService { await _tts.setPitch(pitch); } // Set specific voice by name on device engine - if (_engine == TtsEngine.device) { + if (_engine == TtsEngine.device && voiceProvided) { await _setVoiceByName(_preferredVoice); } } catch (e) { @@ -304,18 +308,45 @@ class TextToSpeechService { try { final serverVoices = await _api.getAvailableServerVoices(); final mapped = serverVoices - .map( - (v) => { - 'name': (v['name'] ?? v['id'] ?? '').toString(), - 'locale': (v['locale'] ?? '').toString(), - }, - ) - .where((e) => (e['name'] as String).isNotEmpty) + .map((v) { + final id = (v['id'] ?? v['name'] ?? '').toString(); + final name = (v['name'] ?? v['id'] ?? '').toString(); + final localeValue = (v['locale'] ?? v['language'] ?? '') + .toString(); + return {'id': id, 'name': name, 'locale': localeValue}; + }) + .where((entry) { + final name = entry['name']; + return name is String && name.trim().isNotEmpty; + }) .toList(); + + final defaultVoice = await _getServerDefaultVoice(); + if (defaultVoice != null && defaultVoice.isNotEmpty) { + final normalized = defaultVoice.toLowerCase(); + final exists = mapped.any((voice) { + final name = voice['name']; + final id = voice['id']; + final lowerName = name is String ? name.toLowerCase() : ''; + final lowerId = id is String ? id.toLowerCase() : ''; + return lowerName == normalized || lowerId == normalized; + }); + if (!exists) { + mapped.insert(0, { + 'id': defaultVoice, + 'name': defaultVoice, + 'locale': '', + }); + } + } + if (mapped.isEmpty) { - return [ - {'name': 'alloy', 'locale': ''}, - ]; + if (defaultVoice != null && defaultVoice.isNotEmpty) { + return [ + {'id': defaultVoice, 'name': defaultVoice, 'locale': ''}, + ]; + } + return const []; } return mapped; } catch (e) { @@ -351,13 +382,54 @@ class TextToSpeechService { } } + Future _resolveServerVoice() async { + final selected = _preferredVoice?.trim(); + if (selected != null && selected.isNotEmpty) { + return selected; + } + final configVoice = await _getServerDefaultVoice(); + if (configVoice != null && configVoice.isNotEmpty) { + return configVoice; + } + return null; + } + + Future _getServerDefaultVoice() async { + if (_api == null) { + return null; + } + if (_serverDefaultVoice != null) { + return _serverDefaultVoice; + } + final pending = _serverDefaultVoiceFuture; + if (pending != null) { + return pending; + } + + final future = _api.getDefaultServerVoice(); + _serverDefaultVoiceFuture = future; + + try { + final voice = await future; + final trimmed = voice?.trim(); + if (trimmed != null && trimmed.isNotEmpty) { + _serverDefaultVoice = trimmed; + return _serverDefaultVoice; + } + return null; + } catch (e) { + _onError?.call(e.toString()); + return null; + } finally { + _serverDefaultVoiceFuture = null; + } + } + // ===== Server chunked playback ===== Future _startServerChunkedPlayback(String text) async { - final effectiveVoice = - (_preferredVoice == null || _preferredVoice!.trim().isEmpty) - ? 'alloy' - : _preferredVoice!; + final resolvedVoice = await _resolveServerVoice(); + final effectiveVoice = resolvedVoice; // Reset queue and create a new session _session++; @@ -398,7 +470,7 @@ class TextToSpeechService { Future _prefetchRemainingChunks( List remaining, - String voice, + String? voice, int session, ) async { for (final chunk in remaining) { @@ -423,7 +495,7 @@ class TextToSpeechService { Future> _fetchServerAudio( String text, - String voice, + String? voice, int session, ) async { return await _api!.generateSpeech(text: text, voice: voice); @@ -829,3 +901,7 @@ class TextToSpeechService { _onError?.call(safeMessage); } } + +class _VoiceNotProvided { + const _VoiceNotProvided(); +}