diff --git a/ios/Podfile.lock b/ios/Podfile.lock index c990258..9a90a3c 100644 --- a/ios/Podfile.lock +++ b/ios/Podfile.lock @@ -1,4 +1,6 @@ PODS: + - audioplayers_darwin (0.0.1): + - Flutter - connectivity_plus (0.0.1): - Flutter - DKImagePickerController/Core (4.3.9): @@ -84,6 +86,7 @@ PODS: - FlutterMacOS DEPENDENCIES: + - audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/ios`) - connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`) - file_picker (from `.symlinks/plugins/file_picker/ios`) - Flutter (from `Flutter`) @@ -113,6 +116,8 @@ SPEC REPOS: - SwiftyGif EXTERNAL SOURCES: + audioplayers_darwin: + :path: ".symlinks/plugins/audioplayers_darwin/ios" connectivity_plus: :path: ".symlinks/plugins/connectivity_plus/ios" file_picker: @@ -155,6 +160,7 @@ EXTERNAL SOURCES: :path: ".symlinks/plugins/webview_flutter_wkwebview/darwin" SPEC CHECKSUMS: + audioplayers_darwin: ccf9c770ee768abb07e26d90af093f7bab1c12ab connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60 diff --git a/lib/core/persistence/persistence_keys.dart b/lib/core/persistence/persistence_keys.dart index 80e58b7..6afce43 100644 --- a/lib/core/persistence/persistence_keys.dart +++ b/lib/core/persistence/persistence_keys.dart @@ -25,6 +25,9 @@ final class PreferenceKeys { static const String ttsSpeechRate = 'tts_speech_rate'; static const String ttsPitch = 'tts_pitch'; static const String ttsVolume = 'tts_volume'; + static const String ttsEngine = 'tts_engine'; // 'device' | 'server' + static const String ttsServerVoiceId = 'tts_server_voice_id'; + static const String ttsServerVoiceName = 'tts_server_voice_name'; } final class LegacyPreferenceKeys { diff --git a/lib/core/providers/app_providers.dart b/lib/core/providers/app_providers.dart index 9257d86..8ba3794 100644 --- a/lib/core/providers/app_providers.dart +++ b/lib/core/providers/app_providers.dart @@ -1830,7 +1830,11 @@ Future> availableVoices(Ref ref) async { if (api == null) return []; try { - return await api.getAvailableVoices(); + final voices = await api.getAvailableServerVoices(); + return voices + .map((v) => (v['name'] ?? v['id'] ?? '').toString()) + .where((s) => s.isNotEmpty) + .toList(); } catch (e) { DebugLogger.error('voices-failed', scope: 'voices', error: e); return []; diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart index acc7a77..46044aa 100644 --- a/lib/core/services/api_service.dart +++ b/lib/core/services/api_service.dart @@ -2261,12 +2261,24 @@ class ApiService { } // Audio - Future> getAvailableVoices() async { - _traceApi('Fetching available voices'); + Future>> getAvailableServerVoices() async { + _traceApi('Fetching server TTS voices'); final response = await _dio.get('/api/v1/audio/voices'); final data = response.data; + if (data is Map) { + final voices = data['voices']; + if (voices is List) { + return voices + .whereType() + .map((e) => e.cast()) + .toList(); + } + } if (data is List) { - return data.cast(); + // Fallback: plain list of ids + return data + .map((e) => {'id': e.toString(), 'name': e.toString()}) + .toList(); } return []; } @@ -2279,13 +2291,15 @@ class ApiService { _traceApi('Generating speech for text: $textPreview...'); final response = await _dio.post( '/api/v1/audio/speech', - data: {'text': text, if (voice != null) 'voice': voice}, + data: {'input': text, if (voice != null) 'voice': voice}, + options: Options(responseType: ResponseType.bytes), ); // Return audio data as bytes - if (response.data is List) { - return (response.data as List).cast(); - } + final data = response.data; + if (data is List) return data; + if (data is Uint8List) return data.toList(); + if (data is List) return (data).cast(); return []; } diff --git a/lib/core/services/settings_service.dart b/lib/core/services/settings_service.dart index b21a168..b40b533 100644 --- a/lib/core/services/settings_service.dart +++ b/lib/core/services/settings_service.dart @@ -8,6 +8,9 @@ import 'animation_service.dart'; part 'settings_service.g.dart'; +/// TTS engine selection +enum TtsEngine { device, server } + /// Service for managing app-wide settings including accessibility preferences class SettingsService { static const String _reduceMotionKey = PreferenceKeys.reduceMotion; @@ -142,6 +145,12 @@ class SettingsService { ttsPitch: (box.get(PreferenceKeys.ttsPitch) as num?)?.toDouble() ?? 1.0, ttsVolume: (box.get(PreferenceKeys.ttsVolume) as num?)?.toDouble() ?? 1.0, + ttsEngine: _parseTtsEngine( + box.get(PreferenceKeys.ttsEngine) as String?, + ), + ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?, + ttsServerVoiceName: + box.get(PreferenceKeys.ttsServerVoiceName) as String?, ), ); } @@ -164,6 +173,7 @@ class SettingsService { PreferenceKeys.ttsSpeechRate: settings.ttsSpeechRate, PreferenceKeys.ttsPitch: settings.ttsPitch, PreferenceKeys.ttsVolume: settings.ttsVolume, + PreferenceKeys.ttsEngine: settings.ttsEngine.name, }; await box.putAll(updates); @@ -185,6 +195,33 @@ class SettingsService { } else { await box.delete(PreferenceKeys.ttsVoice); } + + // Server-specific voice id and friendly name + if (settings.ttsServerVoiceId != null && + settings.ttsServerVoiceId!.isNotEmpty) { + await box.put(PreferenceKeys.ttsServerVoiceId, settings.ttsServerVoiceId); + } else { + await box.delete(PreferenceKeys.ttsServerVoiceId); + } + if (settings.ttsServerVoiceName != null && + settings.ttsServerVoiceName!.isNotEmpty) { + await box.put( + PreferenceKeys.ttsServerVoiceName, + settings.ttsServerVoiceName, + ); + } else { + await box.delete(PreferenceKeys.ttsServerVoiceName); + } + } + + static TtsEngine _parseTtsEngine(String? raw) { + switch ((raw ?? '').toLowerCase()) { + case 'server': + return TtsEngine.server; + case 'device': + default: + return TtsEngine.device; + } } // Voice input specific settings @@ -314,6 +351,9 @@ class AppSettings { final double ttsSpeechRate; final double ttsPitch; final double ttsVolume; + final TtsEngine ttsEngine; + final String? ttsServerVoiceId; + final String? ttsServerVoiceName; const AppSettings({ this.reduceMotion = false, this.animationSpeed = 1.0, @@ -332,6 +372,9 @@ class AppSettings { this.ttsSpeechRate = 0.5, this.ttsPitch = 1.0, this.ttsVolume = 1.0, + this.ttsEngine = TtsEngine.device, + this.ttsServerVoiceId, + this.ttsServerVoiceName, }); AppSettings copyWith({ @@ -352,6 +395,9 @@ class AppSettings { double? ttsSpeechRate, double? ttsPitch, double? ttsVolume, + TtsEngine? ttsEngine, + Object? ttsServerVoiceId = const _DefaultValue(), + Object? ttsServerVoiceName = const _DefaultValue(), }) { return AppSettings( reduceMotion: reduceMotion ?? this.reduceMotion, @@ -375,6 +421,13 @@ class AppSettings { ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate, ttsPitch: ttsPitch ?? this.ttsPitch, ttsVolume: ttsVolume ?? this.ttsVolume, + ttsEngine: ttsEngine ?? this.ttsEngine, + ttsServerVoiceId: ttsServerVoiceId is _DefaultValue + ? this.ttsServerVoiceId + : ttsServerVoiceId as String?, + ttsServerVoiceName: ttsServerVoiceName is _DefaultValue + ? this.ttsServerVoiceName + : ttsServerVoiceName as String?, ); } @@ -397,6 +450,9 @@ class AppSettings { other.ttsSpeechRate == ttsSpeechRate && other.ttsPitch == ttsPitch && other.ttsVolume == ttsVolume && + other.ttsEngine == ttsEngine && + other.ttsServerVoiceId == ttsServerVoiceId && + other.ttsServerVoiceName == ttsServerVoiceName && _listEquals(other.quickPills, quickPills); // socketTransportMode intentionally not included in == to avoid frequent rebuilds } @@ -420,6 +476,9 @@ class AppSettings { ttsSpeechRate, ttsPitch, ttsVolume, + ttsEngine, + ttsServerVoiceId, + ttsServerVoiceName, Object.hashAllUnordered(quickPills), ); } @@ -543,6 +602,21 @@ class AppSettingsNotifier extends _$AppSettingsNotifier { await SettingsService.saveSettings(state); } + Future setTtsEngine(TtsEngine engine) async { + state = state.copyWith(ttsEngine: engine); + await SettingsService.saveSettings(state); + } + + Future setTtsServerVoiceName(String? name) async { + state = state.copyWith(ttsServerVoiceName: name); + await SettingsService.saveSettings(state); + } + + Future setTtsServerVoiceId(String? id) async { + state = state.copyWith(ttsServerVoiceId: id); + await SettingsService.saveSettings(state); + } + Future resetToDefaults() async { const defaultSettings = AppSettings(); await SettingsService.saveSettings(defaultSettings); diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart index b25e341..a68aff4 100644 --- a/lib/features/chat/providers/text_to_speech_provider.dart +++ b/lib/features/chat/providers/text_to_speech_provider.dart @@ -3,6 +3,7 @@ import 'dart:async'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import '../../../core/services/settings_service.dart'; +import '../../../core/providers/app_providers.dart'; import '../../../core/utils/markdown_to_text.dart'; import '../services/text_to_speech_service.dart'; @@ -14,6 +15,11 @@ class TextToSpeechState { final TtsPlaybackStatus status; final String? activeMessageId; final String? errorMessage; + final List sentences; + final List sentenceOffsets; // start indices in full text + final int activeSentenceIndex; // -1 when none + final int? wordStartInSentence; // nullable; only for on-device + final int? wordEndInSentence; // nullable; only for on-device const TextToSpeechState({ this.initialized = false, @@ -21,6 +27,11 @@ class TextToSpeechState { this.status = TtsPlaybackStatus.idle, this.activeMessageId, this.errorMessage, + this.sentences = const [], + this.sentenceOffsets = const [], + this.activeSentenceIndex = -1, + this.wordStartInSentence, + this.wordEndInSentence, }); bool get isSpeaking => status == TtsPlaybackStatus.speaking; @@ -36,6 +47,12 @@ class TextToSpeechState { bool clearActiveMessageId = false, String? errorMessage, bool clearErrorMessage = false, + List? sentences, + List? sentenceOffsets, + int? activeSentenceIndex, + bool clearWord = false, + int? wordStartInSentence, + int? wordEndInSentence, }) { return TextToSpeechState( initialized: initialized ?? this.initialized, @@ -47,6 +64,15 @@ class TextToSpeechState { errorMessage: clearErrorMessage ? null : errorMessage ?? this.errorMessage, + sentences: sentences ?? this.sentences, + sentenceOffsets: sentenceOffsets ?? this.sentenceOffsets, + activeSentenceIndex: activeSentenceIndex ?? this.activeSentenceIndex, + wordStartInSentence: clearWord + ? null + : (wordStartInSentence ?? this.wordStartInSentence), + wordEndInSentence: clearWord + ? null + : (wordEndInSentence ?? this.wordEndInSentence), ); } } @@ -69,6 +95,8 @@ class TextToSpeechController extends Notifier { onPause: _handlePause, onContinue: _handleContinue, onError: _handleError, + onSentenceIndex: _handleSentenceIndex, + onDeviceWordProgress: _handleDeviceWordProgress, ); ref.onDispose(() { @@ -79,11 +107,15 @@ class TextToSpeechController extends Notifier { // Listen to settings changes and update TTS when initialized ref.listen(appSettingsProvider, (previous, next) { if (_service.isInitialized && _service.isAvailable) { + final selectedVoice = next.ttsEngine == TtsEngine.server + ? next.ttsServerVoiceId + : next.ttsVoice; _service.updateSettings( - voice: next.ttsVoice, + voice: selectedVoice, speechRate: next.ttsSpeechRate, pitch: next.ttsPitch, volume: next.ttsVolume, + engine: next.ttsEngine, ); } }, fireImmediately: false); @@ -105,10 +137,13 @@ class TextToSpeechController extends Notifier { final settings = ref.read(appSettingsProvider); final future = _service .initialize( - voice: settings.ttsVoice, + voice: settings.ttsEngine == TtsEngine.server + ? settings.ttsServerVoiceId + : settings.ttsVoice, speechRate: settings.ttsSpeechRate, pitch: settings.ttsPitch, volume: settings.ttsVolume, + engine: settings.ttsEngine, ) .then((available) { if (!ref.mounted) { @@ -176,15 +211,23 @@ class TextToSpeechController extends Notifier { return; } + // Prepare sentence split for highlighting + final cleanText = MarkdownToText.convert(text); + final sentences = _splitForTts(cleanText); + final offsets = _computeOffsets(sentences); + state = state.copyWith( status: TtsPlaybackStatus.loading, activeMessageId: messageId, clearErrorMessage: true, + sentences: sentences, + sentenceOffsets: offsets, + activeSentenceIndex: sentences.isEmpty ? -1 : 0, + clearWord: true, ); try { // Convert markdown to clean text for TTS - final cleanText = MarkdownToText.convert(text); if (cleanText.isEmpty) { // No speakable content if (!ref.mounted) { @@ -216,6 +259,34 @@ class TextToSpeechController extends Notifier { } } + List _splitForTts(String text) { + final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim(); + if (normalized.isEmpty) return const []; + final parts = []; + final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)"); + int index = 0; + for (final match in sentenceRegex.allMatches('$normalized ')) { + final s = match.group(1) ?? ''; + if (s.trim().isNotEmpty) parts.add(s.trim()); + index = match.end; + } + if (index < normalized.length) { + final tail = normalized.substring(index).trim(); + if (tail.isNotEmpty) parts.add(tail); + } + return parts; + } + + List _computeOffsets(List sentences) { + final offsets = []; + int acc = 0; + for (final s in sentences) { + offsets.add(acc); + acc += s.length + 1; // assume a space or punctuation between + } + return offsets; + } + Future pause() async { if (!state.initialized || !state.available) { return; @@ -286,10 +357,46 @@ class TextToSpeechController extends Notifier { clearActiveMessageId: true, ); } + + void _handleSentenceIndex(int index) { + if (!ref.mounted) return; + final clamped = index.clamp( + -1, + state.sentences.isEmpty ? -1 : state.sentences.length - 1, + ); + state = state.copyWith( + activeSentenceIndex: clamped, + // clear per-word highlight when sentence switches (server or device) + clearWord: true, + ); + } + + void _handleDeviceWordProgress(int start, int end) { + if (!ref.mounted) return; + // Map global offsets to sentence index + final offsets = state.sentenceOffsets; + if (offsets.isEmpty) return; + int idx = 0; + for (var i = 0; i < offsets.length; i++) { + final sStart = offsets[i]; + final sEnd = i + 1 < offsets.length ? offsets[i + 1] : 1 << 30; + if (start >= sStart && start < sEnd) { + idx = i; + break; + } + } + final sentenceStart = offsets[idx]; + state = state.copyWith( + activeSentenceIndex: idx, + wordStartInSentence: (start - sentenceStart).clamp(0, 1 << 20), + wordEndInSentence: (end - sentenceStart).clamp(0, 1 << 20), + ); + } } final textToSpeechServiceProvider = Provider((ref) { - final service = TextToSpeechService(); + final api = ref.watch(apiServiceProvider); + final service = TextToSpeechService(api: api); ref.onDispose(() { unawaited(service.dispose()); }); diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart index 6591f41..9f01ebb 100644 --- a/lib/features/chat/services/text_to_speech_service.dart +++ b/lib/features/chat/services/text_to_speech_service.dart @@ -1,16 +1,29 @@ import 'dart:async'; import 'dart:io' show Platform; +import 'package:audioplayers/audioplayers.dart'; import 'package:flutter/foundation.dart'; import 'package:flutter/widgets.dart'; import 'package:flutter_tts/flutter_tts.dart'; +import '../../../core/services/api_service.dart'; +import '../../../core/services/settings_service.dart'; + /// Lightweight wrapper around FlutterTts to centralize configuration class TextToSpeechService { final FlutterTts _tts = FlutterTts(); + final AudioPlayer _player = AudioPlayer(); + final ApiService? _api; + TtsEngine _engine = TtsEngine.device; + String? _preferredVoice; bool _initialized = false; bool _available = false; bool _voiceConfigured = false; + int _session = 0; // increments to cancel in-flight work + final List _buffered = []; // server chunks + int _expectedChunks = 0; + int _currentIndex = -1; + bool _waitingNext = false; VoidCallback? _onStart; VoidCallback? _onComplete; @@ -18,10 +31,20 @@ class TextToSpeechService { VoidCallback? _onPause; VoidCallback? _onContinue; void Function(String message)? _onError; + void Function(int sentenceIndex)? _onSentenceIndex; + void Function(int start, int end)? _onDeviceWordProgress; bool get isInitialized => _initialized; bool get isAvailable => _available; + TextToSpeechService({ApiService? api}) : _api = api { + // Wire minimal player events to callbacks + _player.onPlayerComplete.listen((_) => _onAudioComplete()); + _player.onPlayerStateChanged.listen((s) { + if (s == PlayerState.playing) _handleStart(); + }); + } + /// Register callbacks for TTS lifecycle events void bindHandlers({ VoidCallback? onStart, @@ -30,6 +53,8 @@ class TextToSpeechService { VoidCallback? onPause, VoidCallback? onContinue, void Function(String message)? onError, + void Function(int sentenceIndex)? onSentenceIndex, + void Function(int start, int end)? onDeviceWordProgress, }) { _onStart = onStart; _onComplete = onComplete; @@ -37,6 +62,8 @@ class TextToSpeechService { _onPause = onPause; _onContinue = onContinue; _onError = onError; + _onSentenceIndex = onSentenceIndex; + _onDeviceWordProgress = onDeviceWordProgress; _tts.setStartHandler(_handleStart); _tts.setCompletionHandler(_handleComplete); @@ -44,6 +71,13 @@ class TextToSpeechService { _tts.setPauseHandler(_handlePause); _tts.setContinueHandler(_handleContinue); _tts.setErrorHandler(_handleError); + try { + _tts.setProgressHandler((String text, int start, int end, String word) { + _onDeviceWordProgress?.call(start, end); + }); + } catch (_) { + // Some platforms may not support progress handler + } } /// Initialize the native TTS engine lazily @@ -52,12 +86,15 @@ class TextToSpeechService { double speechRate = 0.5, double pitch = 1.0, double volume = 1.0, + TtsEngine engine = TtsEngine.device, }) async { if (_initialized) { return _available; } try { + _engine = engine; + _preferredVoice = voice; await _tts.awaitSpeakCompletion(false); // Set volume @@ -97,34 +134,47 @@ class TextToSpeechService { } if (!_initialized) { - await initialize(); + await initialize(voice: _preferredVoice, engine: _engine); } + if (_engine == TtsEngine.server && _api != null) { + // Server-backed TTS with sentence chunking & queued playback + try { + await _startServerChunkedPlayback(text); + } catch (e) { + _onError?.call(e.toString()); + await _speakOnDevice(text); + } + return; + } + + // Device TTS path + await _speakOnDevice(text); + } + + Future _speakOnDevice(String text) async { if (!_available) { throw StateError('Text-to-speech is unavailable on this device'); } - await _tts.stop(); if (!_voiceConfigured) { await _configurePreferredVoice(); } final result = await _tts.speak(text); - if (result == null) { - return; - } - if (result is int && result != 1) { _onError?.call('Text-to-speech engine returned code $result'); } + _onSentenceIndex?.call(0); } Future pause() async { - if (!_initialized || !_available) { - return; - } - + if (!_initialized) return; try { - await _tts.pause(); + if (_engine == TtsEngine.server) { + await _player.pause(); + } else if (_available) { + await _tts.pause(); + } } catch (e) { _onError?.call(e.toString()); } @@ -136,7 +186,17 @@ class TextToSpeechService { } try { - await _tts.stop(); + // Cancel any in-flight server work + _session++; + _buffered.clear(); + _expectedChunks = 0; + _currentIndex = -1; + _waitingNext = false; + if (_engine == TtsEngine.server) { + await _player.stop(); + } else { + await _tts.stop(); + } } catch (e) { _onError?.call(e.toString()); } @@ -144,6 +204,7 @@ class TextToSpeechService { Future dispose() async { await stop(); + await _player.dispose(); } /// Update TTS settings on-the-fly @@ -152,12 +213,22 @@ class TextToSpeechService { double? speechRate, double? pitch, double? volume, + TtsEngine? engine, }) async { if (!_initialized || !_available) { + // Allow engine and voice to update before init + if (engine != null) _engine = engine; + if (voice != null) _preferredVoice = voice; return; } try { + if (engine != null) { + _engine = engine; + } + if (voice != null) { + _preferredVoice = voice; + } if (volume != null) { await _tts.setVolume(volume); } @@ -167,8 +238,10 @@ class TextToSpeechService { if (pitch != null) { await _tts.setPitch(pitch); } - // Set specific voice by name - await _setVoiceByName(voice); + // Set specific voice by name on device engine + if (_engine == TtsEngine.device) { + await _setVoiceByName(_preferredVoice); + } } catch (e) { _onError?.call(e.toString()); } @@ -224,7 +297,31 @@ class TextToSpeechService { /// Get available voices from the TTS engine Future>> getAvailableVoices() async { if (!_initialized) { - await initialize(); + await initialize(voice: _preferredVoice, engine: _engine); + } + + if (_engine == TtsEngine.server && _api != null) { + try { + final serverVoices = await _api.getAvailableServerVoices(); + final mapped = serverVoices + .map( + (v) => { + 'name': (v['name'] ?? v['id'] ?? '').toString(), + 'locale': (v['locale'] ?? '').toString(), + }, + ) + .where((e) => (e['name'] as String).isNotEmpty) + .toList(); + if (mapped.isEmpty) { + return [ + {'name': 'alloy', 'locale': ''}, + ]; + } + return mapped; + } catch (e) { + _onError?.call(e.toString()); + // Fall back to device voices + } } if (!_available) { @@ -254,6 +351,151 @@ class TextToSpeechService { } } + // ===== Server chunked playback ===== + + Future _startServerChunkedPlayback(String text) async { + final effectiveVoice = + (_preferredVoice == null || _preferredVoice!.trim().isEmpty) + ? 'alloy' + : _preferredVoice!; + + // Reset queue and create a new session + _session++; + final session = _session; + _buffered.clear(); + _expectedChunks = 0; + _currentIndex = -1; + _waitingNext = false; + + final chunks = _splitForTts(text); + if (chunks.isEmpty) return; + _expectedChunks = chunks.length; + + // Fetch first chunk to start playback quickly + final firstBytes = await _fetchServerAudio( + chunks.first, + effectiveVoice, + session, + ); + if (session != _session) return; // canceled + if (firstBytes.isEmpty) throw Exception('Empty audio response'); + + await _player.stop(); + _buffered.add(Uint8List.fromList(firstBytes)); + _currentIndex = 0; + await _player.play(BytesSource(_buffered.first)); + _onSentenceIndex?.call(0); + + // Prefetch the rest in background + unawaited( + _prefetchRemainingChunks( + chunks.skip(1).toList(), + effectiveVoice, + session, + ), + ); + } + + Future _prefetchRemainingChunks( + List remaining, + String voice, + int session, + ) async { + for (final chunk in remaining) { + if (session != _session) return; // canceled + try { + final audio = await _fetchServerAudio(chunk, voice, session); + if (session != _session) return; + if (audio.isNotEmpty) { + _buffered.add(Uint8List.fromList(audio)); + // If the player finished the previous chunk and is waiting, start now + if (_waitingNext && (_currentIndex + 1) < _buffered.length) { + _waitingNext = false; + await _playNextIfBuffered(session); + } + } + } catch (e) { + _onError?.call(e.toString()); + // continue with other chunks + } + } + } + + Future> _fetchServerAudio( + String text, + String voice, + int session, + ) async { + return await _api!.generateSpeech(text: text, voice: voice); + } + + Future _onAudioComplete() async { + final session = _session; + // If there are more expected chunks + if ((_currentIndex + 1) < _expectedChunks) { + // If next chunk is already buffered, play it + if ((_currentIndex + 1) < _buffered.length) { + await _playNextIfBuffered(session); + } else { + // Wait for prefetch to provide it + _waitingNext = true; + } + return; + } + // No more chunks – this is the real completion + _handleComplete(); + } + + Future _playNextIfBuffered(int session) async { + if (session != _session) return; + final nextIndex = _currentIndex + 1; + if (nextIndex < 0 || nextIndex >= _buffered.length) return; + _currentIndex = nextIndex; + final bytes = _buffered[nextIndex]; + await _player.play(BytesSource(bytes)); + _onSentenceIndex?.call(_currentIndex); + } + + List _splitForTts(String text) { + // Normalize whitespace + final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim(); + if (normalized.isEmpty) return const []; + + // Split on sentence-ending punctuation while keeping the delimiter + final parts = []; + final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)"); + int index = 0; + for (final match in sentenceRegex.allMatches('$normalized ')) { + final s = match.group(1) ?? ''; + if (s.trim().isNotEmpty) parts.add(s.trim()); + index = match.end; + } + if (index < normalized.length) { + final tail = normalized.substring(index).trim(); + if (tail.isNotEmpty) parts.add(tail); + } + + // Fallback to length-based splits for very long segments + const maxLen = 300; + final chunks = []; + for (final p in parts.isEmpty ? [normalized] : parts) { + if (p.length <= maxLen) { + chunks.add(p); + } else { + // Try splitting on commas/spaces + var remaining = p; + while (remaining.length > maxLen) { + int cut = remaining.lastIndexOf(RegExp(r",\s|\s"), maxLen); + cut = cut <= 0 ? maxLen : cut; + chunks.add(remaining.substring(0, cut).trim()); + remaining = remaining.substring(cut).trim(); + } + if (remaining.isNotEmpty) chunks.add(remaining); + } + } + return chunks; + } + Future _configurePreferredVoice() async { if (_voiceConfigured) { return; diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart index 370ee77..5d6a42b 100644 --- a/lib/features/chat/widgets/assistant_message_widget.dart +++ b/lib/features/chat/widgets/assistant_message_widget.dart @@ -18,6 +18,7 @@ import 'package:conduit/l10n/app_localizations.dart'; import 'enhanced_attachment.dart'; import 'package:conduit/shared/widgets/chat_action_button.dart'; import '../../../shared/widgets/model_avatar.dart'; +import '../../../shared/widgets/conduit_components.dart'; import 'package:url_launcher/url_launcher_string.dart'; import '../providers/chat_providers.dart' show sendMessageWithContainer; import '../../../core/utils/debug_logger.dart'; @@ -457,12 +458,72 @@ class _AssistantMessageWidgetState extends ConsumerState } if (children.isEmpty) return const SizedBox.shrink(); + // Append TTS karaoke bar if this is the active message + final ttsState = ref.watch(textToSpeechControllerProvider); + final isActive = + ttsState.activeMessageId == _messageId && + (ttsState.status == TtsPlaybackStatus.speaking || + ttsState.status == TtsPlaybackStatus.paused || + ttsState.status == TtsPlaybackStatus.loading); + if (isActive && ttsState.activeSentenceIndex >= 0) { + children.add(const SizedBox(height: Spacing.sm)); + children.add(_buildKaraokeBar(ttsState)); + } + return Column( crossAxisAlignment: CrossAxisAlignment.start, children: children, ); } + Widget _buildKaraokeBar(TextToSpeechState ttsState) { + final theme = context.conduitTheme; + final idx = ttsState.activeSentenceIndex; + if (idx < 0 || idx >= ttsState.sentences.length) { + return const SizedBox.shrink(); + } + final sentence = ttsState.sentences[idx]; + final ws = ttsState.wordStartInSentence; + final we = ttsState.wordEndInSentence; + + final baseStyle = TextStyle( + color: theme.textPrimary, + height: 1.2, + fontSize: 14, + ); + final highlightStyle = baseStyle.copyWith( + backgroundColor: theme.buttonPrimary.withValues(alpha: 0.25), + color: theme.textPrimary, + fontWeight: FontWeight.w600, + ); + + InlineSpan buildSpans() { + if (ws == null || + we == null || + ws < 0 || + we <= ws || + ws >= sentence.length) { + return TextSpan(text: sentence, style: baseStyle); + } + final safeEnd = we.clamp(0, sentence.length); + final before = sentence.substring(0, ws); + final word = sentence.substring(ws, safeEnd); + final after = sentence.substring(safeEnd); + return TextSpan( + children: [ + if (before.isNotEmpty) TextSpan(text: before, style: baseStyle), + TextSpan(text: word, style: highlightStyle), + if (after.isNotEmpty) TextSpan(text: after, style: baseStyle), + ], + ); + } + + return ConduitCard( + padding: const EdgeInsets.all(Spacing.sm), + child: RichText(text: buildSpans()), + ); + } + bool get _shouldShowTypingIndicator => widget.isStreaming && _isAssistantResponseEmpty; diff --git a/lib/features/profile/views/app_customization_page.dart b/lib/features/profile/views/app_customization_page.dart index ba2c4ac..bc5e01d 100644 --- a/lib/features/profile/views/app_customization_page.dart +++ b/lib/features/profile/views/app_customization_page.dart @@ -441,10 +441,97 @@ class AppCustomizationPage extends ConsumerWidget { TextStyle(color: theme.sidebarForeground, fontSize: 18), ), const SizedBox(height: Spacing.sm), + ConduitCard( + padding: const EdgeInsets.all(Spacing.md), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Row( + children: [ + _buildIconBadge( + context, + UiUtils.platformIcon( + ios: CupertinoIcons.settings, + android: Icons.settings_voice, + ), + color: theme.buttonPrimary, + ), + const SizedBox(width: Spacing.sm), + const Text('Engine'), + const Spacer(), + Wrap( + spacing: Spacing.sm, + children: [ + ChoiceChip( + label: const Text('On Device'), + selected: settings.ttsEngine == TtsEngine.device, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.ttsEngine == TtsEngine.device + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues(alpha: 0.2), + ), + labelStyle: TextStyle( + color: settings.ttsEngine == TtsEngine.device + ? theme.buttonPrimaryText + : theme.textPrimary, + fontWeight: FontWeight.w600, + ), + onSelected: (v) { + if (v) { + final notifier = ref.read( + appSettingsProvider.notifier, + ); + notifier.setTtsEngine(TtsEngine.device); + // Keep previous voice (device voices) + } + }, + ), + ChoiceChip( + label: const Text('Server'), + selected: settings.ttsEngine == TtsEngine.server, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.ttsEngine == TtsEngine.server + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues(alpha: 0.2), + ), + labelStyle: TextStyle( + color: settings.ttsEngine == TtsEngine.server + ? theme.buttonPrimaryText + : theme.textPrimary, + fontWeight: FontWeight.w600, + ), + onSelected: (v) { + if (v) { + final notifier = ref.read( + appSettingsProvider.notifier, + ); + // Clear device-specific voice so server can default + notifier.setTtsVoice(null); + notifier.setTtsEngine(TtsEngine.server); + } + }, + ), + ], + ), + ], + ), + ], + ), + ), + const SizedBox(height: Spacing.sm), _ExpandableCard( title: l10n.ttsVoice, subtitle: _getDisplayVoiceName( - settings.ttsVoice, + settings.ttsEngine == TtsEngine.server + ? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? + '') + : (settings.ttsVoice ?? ''), l10n.ttsSystemDefault, ), icon: UiUtils.platformIcon( @@ -466,7 +553,11 @@ class AppCustomizationPage extends ConsumerWidget { ), title: l10n.ttsVoice, subtitle: _getDisplayVoiceName( - settings.ttsVoice, + settings.ttsEngine == TtsEngine.server + ? ((settings.ttsServerVoiceName ?? + settings.ttsServerVoiceId) ?? + '') + : (settings.ttsVoice ?? ''), l10n.ttsSystemDefault, ), onTap: () => _showVoicePickerSheet(context, ref, settings), @@ -616,7 +707,10 @@ class AppCustomizationPage extends ConsumerWidget { final theme = context.conduitTheme; final ttsService = ref.read(textToSpeechServiceProvider); - // Fetch available voices + // Ensure the service uses the currently selected engine before fetching + await ttsService.updateSettings(engine: settings.ttsEngine); + + // Fetch available voices from the active engine final allVoices = await ttsService.getAvailableVoices(); if (!context.mounted) return; @@ -729,17 +823,29 @@ class AppCustomizationPage extends ConsumerWidget { style: theme.bodyMedium?.copyWith( color: theme.sidebarForeground, - fontWeight: settings.ttsVoice == null + fontWeight: + (settings.ttsEngine == TtsEngine.server + ? settings.ttsServerVoiceId == null + : settings.ttsVoice == null) ? FontWeight.bold : FontWeight.normal, ) ?? TextStyle(color: theme.sidebarForeground), ), - trailing: settings.ttsVoice == null + trailing: + (settings.ttsEngine == TtsEngine.server + ? settings.ttsServerVoiceId == null + : settings.ttsVoice == null) ? Icon(Icons.check, color: theme.buttonPrimary) : null, onTap: () { - ref.read(appSettingsProvider.notifier).setTtsVoice(null); + final notifier = ref.read(appSettingsProvider.notifier); + if (settings.ttsEngine == TtsEngine.server) { + notifier.setTtsServerVoiceId(null); + notifier.setTtsServerVoiceName(null); + } else { + notifier.setTtsVoice(null); + } Navigator.of(sheetContext).pop(); }, ), @@ -823,7 +929,9 @@ class AppCustomizationPage extends ConsumerWidget { final voiceId = _getVoiceIdentifier(voice); final displayName = _formatVoiceName(voice); final subtitle = _getVoiceSubtitle(voice); - final isSelected = settings.ttsVoice == voiceId; + final isSelected = settings.ttsEngine == TtsEngine.server + ? settings.ttsServerVoiceId == voiceId + : settings.ttsVoice == voiceId; return ListTile( leading: Icon( @@ -865,9 +973,15 @@ class AppCustomizationPage extends ConsumerWidget { ? Icon(Icons.check, color: theme.buttonPrimary) : null, onTap: () { - ref - .read(appSettingsProvider.notifier) - .setTtsVoice(voiceId); + final notifier = ref.read( + appSettingsProvider.notifier, + ); + if (settings.ttsEngine == TtsEngine.server) { + notifier.setTtsServerVoiceId(voiceId); + notifier.setTtsServerVoiceName(displayName); + } else { + notifier.setTtsVoice(voiceId); + } Navigator.of(sheetContext).pop(); }, ); diff --git a/pubspec.lock b/pubspec.lock index 9b612c1..a57444b 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -65,6 +65,62 @@ packages: url: "https://pub.dev" source: hosted version: "2.13.0" + audioplayers: + dependency: "direct main" + description: + name: audioplayers + sha256: c05c6147124cd63e725e861335a8b4d57300b80e6e92cea7c145c739223bbaef + url: "https://pub.dev" + source: hosted + version: "5.2.1" + audioplayers_android: + dependency: transitive + description: + name: audioplayers_android + sha256: b00e1a0e11365d88576320ec2d8c192bc21f1afb6c0e5995d1c57ae63156acb5 + url: "https://pub.dev" + source: hosted + version: "4.0.3" + audioplayers_darwin: + dependency: transitive + description: + name: audioplayers_darwin + sha256: "3034e99a6df8d101da0f5082dcca0a2a99db62ab1d4ddb3277bed3f6f81afe08" + url: "https://pub.dev" + source: hosted + version: "5.0.2" + audioplayers_linux: + dependency: transitive + description: + name: audioplayers_linux + sha256: "60787e73fefc4d2e0b9c02c69885402177e818e4e27ef087074cf27c02246c9e" + url: "https://pub.dev" + source: hosted + version: "3.1.0" + audioplayers_platform_interface: + dependency: transitive + description: + name: audioplayers_platform_interface + sha256: "365c547f1bb9e77d94dd1687903a668d8f7ac3409e48e6e6a3668a1ac2982adb" + url: "https://pub.dev" + source: hosted + version: "6.1.0" + audioplayers_web: + dependency: transitive + description: + name: audioplayers_web + sha256: "22cd0173e54d92bd9b2c80b1204eb1eb159ece87475ab58c9788a70ec43c2a62" + url: "https://pub.dev" + source: hosted + version: "4.1.0" + audioplayers_windows: + dependency: transitive + description: + name: audioplayers_windows + sha256: "9536812c9103563644ada2ef45ae523806b0745f7a78e89d1b5fb1951de90e1a" + url: "https://pub.dev" + source: hosted + version: "3.1.0" boolean_selector: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index 4aa6980..8e2d10a 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -47,6 +47,7 @@ dependencies: record: ^6.1.1 stts: ^1.2.5 flutter_tts: ^4.2.3 + audioplayers: ^5.2.1 image_picker: ^1.2.0 file_picker: ^10.3.3 path_provider: ^2.1.4