diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart index dc234d9..a68aff4 100644 --- a/lib/features/chat/providers/text_to_speech_provider.dart +++ b/lib/features/chat/providers/text_to_speech_provider.dart @@ -15,6 +15,11 @@ class TextToSpeechState { final TtsPlaybackStatus status; final String? activeMessageId; final String? errorMessage; + final List sentences; + final List sentenceOffsets; // start indices in full text + final int activeSentenceIndex; // -1 when none + final int? wordStartInSentence; // nullable; only for on-device + final int? wordEndInSentence; // nullable; only for on-device const TextToSpeechState({ this.initialized = false, @@ -22,6 +27,11 @@ class TextToSpeechState { this.status = TtsPlaybackStatus.idle, this.activeMessageId, this.errorMessage, + this.sentences = const [], + this.sentenceOffsets = const [], + this.activeSentenceIndex = -1, + this.wordStartInSentence, + this.wordEndInSentence, }); bool get isSpeaking => status == TtsPlaybackStatus.speaking; @@ -37,6 +47,12 @@ class TextToSpeechState { bool clearActiveMessageId = false, String? errorMessage, bool clearErrorMessage = false, + List? sentences, + List? sentenceOffsets, + int? activeSentenceIndex, + bool clearWord = false, + int? wordStartInSentence, + int? wordEndInSentence, }) { return TextToSpeechState( initialized: initialized ?? this.initialized, @@ -48,6 +64,15 @@ class TextToSpeechState { errorMessage: clearErrorMessage ? null : errorMessage ?? this.errorMessage, + sentences: sentences ?? this.sentences, + sentenceOffsets: sentenceOffsets ?? this.sentenceOffsets, + activeSentenceIndex: activeSentenceIndex ?? this.activeSentenceIndex, + wordStartInSentence: clearWord + ? null + : (wordStartInSentence ?? this.wordStartInSentence), + wordEndInSentence: clearWord + ? null + : (wordEndInSentence ?? this.wordEndInSentence), ); } } @@ -70,6 +95,8 @@ class TextToSpeechController extends Notifier { onPause: _handlePause, onContinue: _handleContinue, onError: _handleError, + onSentenceIndex: _handleSentenceIndex, + onDeviceWordProgress: _handleDeviceWordProgress, ); ref.onDispose(() { @@ -184,15 +211,23 @@ class TextToSpeechController extends Notifier { return; } + // Prepare sentence split for highlighting + final cleanText = MarkdownToText.convert(text); + final sentences = _splitForTts(cleanText); + final offsets = _computeOffsets(sentences); + state = state.copyWith( status: TtsPlaybackStatus.loading, activeMessageId: messageId, clearErrorMessage: true, + sentences: sentences, + sentenceOffsets: offsets, + activeSentenceIndex: sentences.isEmpty ? -1 : 0, + clearWord: true, ); try { // Convert markdown to clean text for TTS - final cleanText = MarkdownToText.convert(text); if (cleanText.isEmpty) { // No speakable content if (!ref.mounted) { @@ -224,6 +259,34 @@ class TextToSpeechController extends Notifier { } } + List _splitForTts(String text) { + final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim(); + if (normalized.isEmpty) return const []; + final parts = []; + final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)"); + int index = 0; + for (final match in sentenceRegex.allMatches('$normalized ')) { + final s = match.group(1) ?? ''; + if (s.trim().isNotEmpty) parts.add(s.trim()); + index = match.end; + } + if (index < normalized.length) { + final tail = normalized.substring(index).trim(); + if (tail.isNotEmpty) parts.add(tail); + } + return parts; + } + + List _computeOffsets(List sentences) { + final offsets = []; + int acc = 0; + for (final s in sentences) { + offsets.add(acc); + acc += s.length + 1; // assume a space or punctuation between + } + return offsets; + } + Future pause() async { if (!state.initialized || !state.available) { return; @@ -294,6 +357,41 @@ class TextToSpeechController extends Notifier { clearActiveMessageId: true, ); } + + void _handleSentenceIndex(int index) { + if (!ref.mounted) return; + final clamped = index.clamp( + -1, + state.sentences.isEmpty ? -1 : state.sentences.length - 1, + ); + state = state.copyWith( + activeSentenceIndex: clamped, + // clear per-word highlight when sentence switches (server or device) + clearWord: true, + ); + } + + void _handleDeviceWordProgress(int start, int end) { + if (!ref.mounted) return; + // Map global offsets to sentence index + final offsets = state.sentenceOffsets; + if (offsets.isEmpty) return; + int idx = 0; + for (var i = 0; i < offsets.length; i++) { + final sStart = offsets[i]; + final sEnd = i + 1 < offsets.length ? offsets[i + 1] : 1 << 30; + if (start >= sStart && start < sEnd) { + idx = i; + break; + } + } + final sentenceStart = offsets[idx]; + state = state.copyWith( + activeSentenceIndex: idx, + wordStartInSentence: (start - sentenceStart).clamp(0, 1 << 20), + wordEndInSentence: (end - sentenceStart).clamp(0, 1 << 20), + ); + } } final textToSpeechServiceProvider = Provider((ref) { diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart index 5d344e6..9f01ebb 100644 --- a/lib/features/chat/services/text_to_speech_service.dart +++ b/lib/features/chat/services/text_to_speech_service.dart @@ -31,6 +31,8 @@ class TextToSpeechService { VoidCallback? _onPause; VoidCallback? _onContinue; void Function(String message)? _onError; + void Function(int sentenceIndex)? _onSentenceIndex; + void Function(int start, int end)? _onDeviceWordProgress; bool get isInitialized => _initialized; bool get isAvailable => _available; @@ -51,6 +53,8 @@ class TextToSpeechService { VoidCallback? onPause, VoidCallback? onContinue, void Function(String message)? onError, + void Function(int sentenceIndex)? onSentenceIndex, + void Function(int start, int end)? onDeviceWordProgress, }) { _onStart = onStart; _onComplete = onComplete; @@ -58,6 +62,8 @@ class TextToSpeechService { _onPause = onPause; _onContinue = onContinue; _onError = onError; + _onSentenceIndex = onSentenceIndex; + _onDeviceWordProgress = onDeviceWordProgress; _tts.setStartHandler(_handleStart); _tts.setCompletionHandler(_handleComplete); @@ -65,6 +71,13 @@ class TextToSpeechService { _tts.setPauseHandler(_handlePause); _tts.setContinueHandler(_handleContinue); _tts.setErrorHandler(_handleError); + try { + _tts.setProgressHandler((String text, int start, int end, String word) { + _onDeviceWordProgress?.call(start, end); + }); + } catch (_) { + // Some platforms may not support progress handler + } } /// Initialize the native TTS engine lazily @@ -151,6 +164,7 @@ class TextToSpeechService { if (result is int && result != 1) { _onError?.call('Text-to-speech engine returned code $result'); } + _onSentenceIndex?.call(0); } Future pause() async { @@ -370,6 +384,7 @@ class TextToSpeechService { _buffered.add(Uint8List.fromList(firstBytes)); _currentIndex = 0; await _player.play(BytesSource(_buffered.first)); + _onSentenceIndex?.call(0); // Prefetch the rest in background unawaited( @@ -438,6 +453,7 @@ class TextToSpeechService { _currentIndex = nextIndex; final bytes = _buffered[nextIndex]; await _player.play(BytesSource(bytes)); + _onSentenceIndex?.call(_currentIndex); } List _splitForTts(String text) { diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart index 370ee77..5d6a42b 100644 --- a/lib/features/chat/widgets/assistant_message_widget.dart +++ b/lib/features/chat/widgets/assistant_message_widget.dart @@ -18,6 +18,7 @@ import 'package:conduit/l10n/app_localizations.dart'; import 'enhanced_attachment.dart'; import 'package:conduit/shared/widgets/chat_action_button.dart'; import '../../../shared/widgets/model_avatar.dart'; +import '../../../shared/widgets/conduit_components.dart'; import 'package:url_launcher/url_launcher_string.dart'; import '../providers/chat_providers.dart' show sendMessageWithContainer; import '../../../core/utils/debug_logger.dart'; @@ -457,12 +458,72 @@ class _AssistantMessageWidgetState extends ConsumerState } if (children.isEmpty) return const SizedBox.shrink(); + // Append TTS karaoke bar if this is the active message + final ttsState = ref.watch(textToSpeechControllerProvider); + final isActive = + ttsState.activeMessageId == _messageId && + (ttsState.status == TtsPlaybackStatus.speaking || + ttsState.status == TtsPlaybackStatus.paused || + ttsState.status == TtsPlaybackStatus.loading); + if (isActive && ttsState.activeSentenceIndex >= 0) { + children.add(const SizedBox(height: Spacing.sm)); + children.add(_buildKaraokeBar(ttsState)); + } + return Column( crossAxisAlignment: CrossAxisAlignment.start, children: children, ); } + Widget _buildKaraokeBar(TextToSpeechState ttsState) { + final theme = context.conduitTheme; + final idx = ttsState.activeSentenceIndex; + if (idx < 0 || idx >= ttsState.sentences.length) { + return const SizedBox.shrink(); + } + final sentence = ttsState.sentences[idx]; + final ws = ttsState.wordStartInSentence; + final we = ttsState.wordEndInSentence; + + final baseStyle = TextStyle( + color: theme.textPrimary, + height: 1.2, + fontSize: 14, + ); + final highlightStyle = baseStyle.copyWith( + backgroundColor: theme.buttonPrimary.withValues(alpha: 0.25), + color: theme.textPrimary, + fontWeight: FontWeight.w600, + ); + + InlineSpan buildSpans() { + if (ws == null || + we == null || + ws < 0 || + we <= ws || + ws >= sentence.length) { + return TextSpan(text: sentence, style: baseStyle); + } + final safeEnd = we.clamp(0, sentence.length); + final before = sentence.substring(0, ws); + final word = sentence.substring(ws, safeEnd); + final after = sentence.substring(safeEnd); + return TextSpan( + children: [ + if (before.isNotEmpty) TextSpan(text: before, style: baseStyle), + TextSpan(text: word, style: highlightStyle), + if (after.isNotEmpty) TextSpan(text: after, style: baseStyle), + ], + ); + } + + return ConduitCard( + padding: const EdgeInsets.all(Spacing.sm), + child: RichText(text: buildSpans()), + ); + } + bool get _shouldShowTypingIndicator => widget.isStreaming && _isAssistantResponseEmpty;