feat: text to speech

This commit is contained in:
cogwheel0
2025-09-20 23:58:18 +05:30
parent 33fbc31672
commit c05644f731
16 changed files with 697 additions and 105 deletions

View File

@@ -0,0 +1,261 @@
import 'dart:async';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../services/text_to_speech_service.dart';
enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
class TextToSpeechState {
final bool initialized;
final bool available;
final TtsPlaybackStatus status;
final String? activeMessageId;
final String? errorMessage;
const TextToSpeechState({
this.initialized = false,
this.available = false,
this.status = TtsPlaybackStatus.idle,
this.activeMessageId,
this.errorMessage,
});
bool get isSpeaking => status == TtsPlaybackStatus.speaking;
bool get isBusy =>
status == TtsPlaybackStatus.loading ||
status == TtsPlaybackStatus.initializing;
TextToSpeechState copyWith({
bool? initialized,
bool? available,
TtsPlaybackStatus? status,
String? activeMessageId,
bool clearActiveMessageId = false,
String? errorMessage,
bool clearErrorMessage = false,
}) {
return TextToSpeechState(
initialized: initialized ?? this.initialized,
available: available ?? this.available,
status: status ?? this.status,
activeMessageId: clearActiveMessageId
? null
: activeMessageId ?? this.activeMessageId,
errorMessage: clearErrorMessage
? null
: errorMessage ?? this.errorMessage,
);
}
}
class TextToSpeechController extends StateNotifier<TextToSpeechState> {
TextToSpeechController(this._service) : super(const TextToSpeechState()) {
_service.bindHandlers(
onStart: _handleStart,
onComplete: _handleCompletion,
onCancel: _handleCancellation,
onPause: _handlePause,
onContinue: _handleContinue,
onError: _handleError,
);
}
final TextToSpeechService _service;
Future<bool>? _initializationFuture;
Future<bool> _ensureInitialized() {
final existing = _initializationFuture;
if (existing != null) {
return existing;
}
state = state.copyWith(
status: TtsPlaybackStatus.initializing,
clearErrorMessage: true,
);
final future = _service
.initialize()
.then((available) {
if (!mounted) {
return available;
}
state = state.copyWith(
initialized: true,
available: available,
status: TtsPlaybackStatus.idle,
);
return available;
})
.catchError((error, _) {
if (!mounted) {
return false;
}
state = state.copyWith(
initialized: true,
available: false,
status: TtsPlaybackStatus.error,
errorMessage: error.toString(),
clearActiveMessageId: true,
);
return false;
});
_initializationFuture = future;
future.whenComplete(() {
_initializationFuture = null;
});
return future;
}
Future<void> toggleForMessage({
required String messageId,
required String text,
}) async {
if (text.trim().isEmpty) {
return;
}
final available = await _ensureInitialized();
if (!available) {
if (!mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.error,
errorMessage: 'Text-to-speech unavailable',
clearActiveMessageId: true,
);
return;
}
final isCurrentlyActive =
state.activeMessageId == messageId &&
state.status != TtsPlaybackStatus.idle;
if (isCurrentlyActive) {
await stop();
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.loading,
activeMessageId: messageId,
clearErrorMessage: true,
);
try {
await _service.speak(text);
if (!mounted) {
return;
}
if (state.status == TtsPlaybackStatus.loading) {
state = state.copyWith(status: TtsPlaybackStatus.speaking);
}
} catch (e) {
if (!mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.error,
errorMessage: e.toString(),
clearActiveMessageId: true,
);
}
}
Future<void> pause() async {
if (!state.initialized || !state.available) {
return;
}
await _service.pause();
}
Future<void> stop() async {
await _service.stop();
if (!mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.idle,
clearActiveMessageId: true,
clearErrorMessage: true,
);
}
void _handleStart() {
if (!mounted) {
return;
}
state = state.copyWith(status: TtsPlaybackStatus.speaking);
}
void _handleCompletion() {
if (!mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.idle,
clearActiveMessageId: true,
);
}
void _handleCancellation() {
if (!mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.idle,
clearActiveMessageId: true,
);
}
void _handlePause() {
if (!mounted) {
return;
}
state = state.copyWith(status: TtsPlaybackStatus.paused);
}
void _handleContinue() {
if (!mounted) {
return;
}
state = state.copyWith(status: TtsPlaybackStatus.speaking);
}
void _handleError(String message) {
if (!mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.error,
errorMessage: message,
clearActiveMessageId: true,
);
}
@override
void dispose() {
unawaited(_service.stop());
super.dispose();
}
}
final textToSpeechServiceProvider = Provider<TextToSpeechService>((ref) {
final service = TextToSpeechService();
ref.onDispose(() {
unawaited(service.dispose());
});
return service;
});
final textToSpeechControllerProvider =
StateNotifierProvider<TextToSpeechController, TextToSpeechState>((ref) {
final service = ref.watch(textToSpeechServiceProvider);
return TextToSpeechController(service);
});

View File

@@ -0,0 +1,151 @@
import 'dart:async';
import 'dart:io' show Platform;
import 'package:flutter/foundation.dart';
import 'package:flutter_tts/flutter_tts.dart';
/// Lightweight wrapper around FlutterTts to centralize configuration
class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
bool _initialized = false;
bool _available = false;
VoidCallback? _onStart;
VoidCallback? _onComplete;
VoidCallback? _onCancel;
VoidCallback? _onPause;
VoidCallback? _onContinue;
void Function(String message)? _onError;
bool get isInitialized => _initialized;
bool get isAvailable => _available;
/// Register callbacks for TTS lifecycle events
void bindHandlers({
VoidCallback? onStart,
VoidCallback? onComplete,
VoidCallback? onCancel,
VoidCallback? onPause,
VoidCallback? onContinue,
void Function(String message)? onError,
}) {
_onStart = onStart;
_onComplete = onComplete;
_onCancel = onCancel;
_onPause = onPause;
_onContinue = onContinue;
_onError = onError;
_tts.setStartHandler(_handleStart);
_tts.setCompletionHandler(_handleComplete);
_tts.setCancelHandler(_handleCancel);
_tts.setPauseHandler(_handlePause);
_tts.setContinueHandler(_handleContinue);
_tts.setErrorHandler(_handleError);
}
/// Initialize the native TTS engine lazily
Future<bool> initialize() async {
if (_initialized) {
return _available;
}
try {
await _tts.awaitSpeakCompletion(false);
if (!kIsWeb && Platform.isIOS) {
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]);
}
_available = true;
} catch (e) {
_available = false;
_onError?.call(e.toString());
}
_initialized = true;
return _available;
}
Future<void> speak(String text) async {
if (text.trim().isEmpty) {
throw ArgumentError('Cannot speak empty text');
}
if (!_initialized) {
await initialize();
}
if (!_available) {
throw StateError('Text-to-speech is unavailable on this device');
}
await _tts.stop();
final result = await _tts.speak(text);
if (result == null) {
return;
}
if (result is int && result != 1) {
_onError?.call('Text-to-speech engine returned code $result');
}
}
Future<void> pause() async {
if (!_initialized || !_available) {
return;
}
try {
await _tts.pause();
} catch (e) {
_onError?.call(e.toString());
}
}
Future<void> stop() async {
if (!_initialized) {
return;
}
try {
await _tts.stop();
} catch (e) {
_onError?.call(e.toString());
}
}
Future<void> dispose() async {
await stop();
}
void _handleStart() {
_onStart?.call();
}
void _handleComplete() {
_onComplete?.call();
}
void _handleCancel() {
_onCancel?.call();
}
void _handlePause() {
_onPause?.call();
}
void _handleContinue() {
_onContinue?.call();
}
void _handleError(dynamic message) {
final safeMessage = message == null
? 'Unknown TTS error'
: message.toString();
_onError?.call(safeMessage);
}
}

View File

@@ -10,6 +10,7 @@ import '../../../shared/widgets/markdown/streaming_markdown_widget.dart';
import '../../../core/utils/reasoning_parser.dart';
import '../../../core/utils/message_segments.dart';
import '../../../core/utils/tool_calls_parser.dart';
import '../providers/text_to_speech_provider.dart';
import 'enhanced_image_attachment.dart';
import 'package:conduit/l10n/app_localizations.dart';
import 'enhanced_attachment.dart';
@@ -54,6 +55,7 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
Widget? _cachedAvatar;
bool _allowTypingIndicator = false;
Timer? _typingGateTimer;
String _ttsPlainText = '';
// press state handled by shared ChatActionButton
@override
@@ -154,8 +156,12 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
}
}
final segments = out.isEmpty ? [MessageSegment.text(raw)] : out;
final speechText = _buildTtsPlainText(segments, raw);
setState(() {
_segments = out.isEmpty ? [MessageSegment.text(raw)] : out;
_segments = segments;
_ttsPlainText = speechText;
});
_updateTypingIndicatorGate();
}
@@ -179,6 +185,73 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
}
}
String get _messageId {
try {
final dynamic idValue = widget.message.id;
if (idValue == null) {
return '';
}
return idValue.toString();
} catch (_) {
return '';
}
}
String _buildTtsPlainText(List<MessageSegment> segments, String fallback) {
if (segments.isEmpty) {
return _sanitizeForSpeech(fallback);
}
final buffer = StringBuffer();
for (final segment in segments) {
if (!segment.isText) {
continue;
}
final text = segment.text ?? '';
final sanitized = _sanitizeForSpeech(text);
if (sanitized.isEmpty) {
continue;
}
if (buffer.isNotEmpty) {
buffer.writeln();
buffer.writeln();
}
buffer.write(sanitized);
}
final result = buffer.toString().trim();
if (result.isEmpty) {
return _sanitizeForSpeech(fallback);
}
return result;
}
String _sanitizeForSpeech(String input) {
if (input.isEmpty) {
return '';
}
var text = input;
text = text.replaceAll(RegExp(r'```'), ' ');
text = text.replaceAll(RegExp(r'`'), '');
text = text.replaceAll(RegExp(r'!\[(.*?)\]\((.*?)\)'), r'$1');
text = text.replaceAll(RegExp(r'\[(.*?)\]\((.*?)\)'), r'$1');
text = text.replaceAll(RegExp(r'\*\*'), '');
text = text.replaceAll(RegExp(r'__'), '');
text = text.replaceAll(RegExp(r'\*'), '');
text = text.replaceAll(RegExp(r'_'), '');
text = text.replaceAll(RegExp(r'~'), '');
text = text.replaceAll(RegExp(r'^[-*+]\s+', multiLine: true), '');
text = text.replaceAll(RegExp(r'^>\s?', multiLine: true), '');
text = text.replaceAll('&nbsp;', ' ');
text = text.replaceAll('&amp;', '&');
text = text.replaceAll('&lt;', '<');
text = text.replaceAll('&gt;', '>');
text = text.replaceAll(RegExp(r'[ \t]{2,}'), ' ');
text = text.replaceAll(RegExp(r'\n{3,}'), '\n\n');
return text.trim();
}
// No streaming-specific markdown fixes needed here; handled by Markdown widget
Widget _buildToolCallTile(ToolCallEntry tc) {
@@ -888,21 +961,65 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
}
Widget _buildActionButtons() {
final l10n = AppLocalizations.of(context)!;
final ttsState = ref.watch(textToSpeechControllerProvider);
final messageId = _messageId;
final hasSpeechText = _ttsPlainText.trim().isNotEmpty;
final isErrorMessage =
widget.message.content.contains('⚠️') ||
widget.message.content.contains('Error') ||
widget.message.content.contains('timeout') ||
widget.message.content.contains('retry options');
final isActiveMessage = ttsState.activeMessageId == messageId;
final isSpeaking =
isActiveMessage && ttsState.status == TtsPlaybackStatus.speaking;
final isPaused =
isActiveMessage && ttsState.status == TtsPlaybackStatus.paused;
final isBusy =
isActiveMessage &&
(ttsState.status == TtsPlaybackStatus.loading ||
ttsState.status == TtsPlaybackStatus.initializing);
final bool disableDueToStreaming = widget.isStreaming && !isActiveMessage;
final bool ttsAvailable = !ttsState.initialized || ttsState.available;
final bool showStopState =
isActiveMessage && (isSpeaking || isPaused || isBusy);
final bool shouldShowTtsButton = hasSpeechText && messageId.isNotEmpty;
final bool canStartTts =
shouldShowTtsButton && !disableDueToStreaming && ttsAvailable;
VoidCallback? ttsOnTap;
if (showStopState || canStartTts) {
ttsOnTap = () {
if (messageId.isEmpty) {
return;
}
ref
.read(textToSpeechControllerProvider.notifier)
.toggleForMessage(messageId: messageId, text: _ttsPlainText);
};
}
final IconData listenIcon = Platform.isIOS
? CupertinoIcons.speaker_2_fill
: Icons.volume_up;
final IconData stopIcon = Platform.isIOS
? CupertinoIcons.stop_fill
: Icons.stop;
final IconData ttsIcon = showStopState ? stopIcon : listenIcon;
final String ttsLabel = showStopState ? l10n.ttsStop : l10n.ttsListen;
return Wrap(
spacing: 8,
runSpacing: 8,
children: [
if (shouldShowTtsButton)
_buildActionButton(icon: ttsIcon, label: ttsLabel, onTap: ttsOnTap),
_buildActionButton(
icon: Platform.isIOS
? CupertinoIcons.doc_on_clipboard
: Icons.content_copy,
label: AppLocalizations.of(context)!.copy,
label: l10n.copy,
onTap: widget.onCopy,
),
if (isErrorMessage) ...[
@@ -910,13 +1027,13 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
icon: Platform.isIOS
? CupertinoIcons.arrow_clockwise
: Icons.refresh,
label: AppLocalizations.of(context)!.retry,
label: l10n.retry,
onTap: widget.onRegenerate,
),
] else ...[
_buildActionButton(
icon: Platform.isIOS ? CupertinoIcons.refresh : Icons.refresh,
label: AppLocalizations.of(context)!.regenerate,
label: l10n.regenerate,
onTap: widget.onRegenerate,
),
],

View File

@@ -217,6 +217,8 @@
"imageGeneration": "Bildgenerierung",
"imageGenerationDescription": "Bilder aus deinen Prompts erstellen.",
"copy": "Kopieren",
"ttsListen": "Anhören",
"ttsStop": "Stoppen",
"edit": "Bearbeiten",
"regenerate": "Neu generieren",
"noConversationsYet": "Noch keine Unterhaltungen"

View File

@@ -441,6 +441,14 @@
"@imageGenerationDescription": {"description": "Explains creating images via model prompts."},
"copy": "Copy",
"@copy": {"description": "Action to copy text to clipboard."},
"ttsListen": "Listen",
"@ttsListen": {
"description": "Action to play the assistant message using text to speech"
},
"ttsStop": "Stop",
"@ttsStop": {
"description": "Action to stop text to speech playback"
},
"edit": "Edit",
"@edit": {"description": "Action to edit an item/message."},
"regenerate": "Regenerate",

View File

@@ -217,6 +217,8 @@
"imageGeneration": "Génération d'images",
"imageGenerationDescription": "Créez des images à partir de vos prompts.",
"copy": "Copier",
"ttsListen": "Écouter",
"ttsStop": "Arrêter",
"edit": "Modifier",
"regenerate": "Régénérer",
"noConversationsYet": "Aucune conversation pour l'instant"

View File

@@ -217,6 +217,8 @@
"imageGeneration": "Generazione immagini",
"imageGenerationDescription": "Crea immagini dai tuoi prompt.",
"copy": "Copia",
"ttsListen": "Ascolta",
"ttsStop": "Interrompi",
"edit": "Modifica",
"regenerate": "Rigenera",
"noConversationsYet": "Ancora nessuna conversazione"

View File

@@ -1290,6 +1290,18 @@ abstract class AppLocalizations {
/// **'Copy'**
String get copy;
/// Action to play the assistant message using text to speech
///
/// In en, this message translates to:
/// **'Listen'**
String get ttsListen;
/// Action to stop text to speech playback
///
/// In en, this message translates to:
/// **'Stop'**
String get ttsStop;
/// Action to edit an item/message.
///
/// In en, this message translates to:

View File

@@ -658,6 +658,12 @@ class AppLocalizationsDe extends AppLocalizations {
@override
String get copy => 'Kopieren';
@override
String get ttsListen => 'Anhören';
@override
String get ttsStop => 'Stoppen';
@override
String get edit => 'Bearbeiten';

View File

@@ -653,6 +653,12 @@ class AppLocalizationsEn extends AppLocalizations {
@override
String get copy => 'Copy';
@override
String get ttsListen => 'Listen';
@override
String get ttsStop => 'Stop';
@override
String get edit => 'Edit';

View File

@@ -664,6 +664,12 @@ class AppLocalizationsFr extends AppLocalizations {
@override
String get copy => 'Copier';
@override
String get ttsListen => 'Écouter';
@override
String get ttsStop => 'Arrêter';
@override
String get edit => 'Modifier';

View File

@@ -655,6 +655,12 @@ class AppLocalizationsIt extends AppLocalizations {
@override
String get copy => 'Copia';
@override
String get ttsListen => 'Ascolta';
@override
String get ttsStop => 'Interrompi';
@override
String get edit => 'Modifica';