Merge pull request #96 from cogwheel0/server-side-tts

server-side-tts
This commit is contained in:
cogwheel
2025-10-23 17:08:34 +05:30
committed by GitHub
11 changed files with 719 additions and 37 deletions

View File

@@ -1,4 +1,6 @@
PODS:
- audioplayers_darwin (0.0.1):
- Flutter
- connectivity_plus (0.0.1):
- Flutter
- DKImagePickerController/Core (4.3.9):
@@ -84,6 +86,7 @@ PODS:
- FlutterMacOS
DEPENDENCIES:
- audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/ios`)
- connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`)
- file_picker (from `.symlinks/plugins/file_picker/ios`)
- Flutter (from `Flutter`)
@@ -113,6 +116,8 @@ SPEC REPOS:
- SwiftyGif
EXTERNAL SOURCES:
audioplayers_darwin:
:path: ".symlinks/plugins/audioplayers_darwin/ios"
connectivity_plus:
:path: ".symlinks/plugins/connectivity_plus/ios"
file_picker:
@@ -155,6 +160,7 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/webview_flutter_wkwebview/darwin"
SPEC CHECKSUMS:
audioplayers_darwin: ccf9c770ee768abb07e26d90af093f7bab1c12ab
connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60

View File

@@ -25,6 +25,9 @@ final class PreferenceKeys {
static const String ttsSpeechRate = 'tts_speech_rate';
static const String ttsPitch = 'tts_pitch';
static const String ttsVolume = 'tts_volume';
static const String ttsEngine = 'tts_engine'; // 'device' | 'server'
static const String ttsServerVoiceId = 'tts_server_voice_id';
static const String ttsServerVoiceName = 'tts_server_voice_name';
}
final class LegacyPreferenceKeys {

View File

@@ -1830,7 +1830,11 @@ Future<List<String>> availableVoices(Ref ref) async {
if (api == null) return [];
try {
return await api.getAvailableVoices();
final voices = await api.getAvailableServerVoices();
return voices
.map((v) => (v['name'] ?? v['id'] ?? '').toString())
.where((s) => s.isNotEmpty)
.toList();
} catch (e) {
DebugLogger.error('voices-failed', scope: 'voices', error: e);
return [];

View File

@@ -2261,12 +2261,24 @@ class ApiService {
}
// Audio
Future<List<String>> getAvailableVoices() async {
_traceApi('Fetching available voices');
Future<List<Map<String, dynamic>>> getAvailableServerVoices() async {
_traceApi('Fetching server TTS voices');
final response = await _dio.get('/api/v1/audio/voices');
final data = response.data;
if (data is Map<String, dynamic>) {
final voices = data['voices'];
if (voices is List) {
return voices
.whereType<Map>()
.map((e) => e.cast<String, dynamic>())
.toList();
}
}
if (data is List) {
return data.cast<String>();
// Fallback: plain list of ids
return data
.map((e) => {'id': e.toString(), 'name': e.toString()})
.toList();
}
return [];
}
@@ -2279,13 +2291,15 @@ class ApiService {
_traceApi('Generating speech for text: $textPreview...');
final response = await _dio.post(
'/api/v1/audio/speech',
data: {'text': text, if (voice != null) 'voice': voice},
data: {'input': text, if (voice != null) 'voice': voice},
options: Options(responseType: ResponseType.bytes),
);
// Return audio data as bytes
if (response.data is List) {
return (response.data as List).cast<int>();
}
final data = response.data;
if (data is List<int>) return data;
if (data is Uint8List) return data.toList();
if (data is List) return (data).cast<int>();
return [];
}

View File

@@ -8,6 +8,9 @@ import 'animation_service.dart';
part 'settings_service.g.dart';
/// TTS engine selection
enum TtsEngine { device, server }
/// Service for managing app-wide settings including accessibility preferences
class SettingsService {
static const String _reduceMotionKey = PreferenceKeys.reduceMotion;
@@ -142,6 +145,12 @@ class SettingsService {
ttsPitch: (box.get(PreferenceKeys.ttsPitch) as num?)?.toDouble() ?? 1.0,
ttsVolume:
(box.get(PreferenceKeys.ttsVolume) as num?)?.toDouble() ?? 1.0,
ttsEngine: _parseTtsEngine(
box.get(PreferenceKeys.ttsEngine) as String?,
),
ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?,
ttsServerVoiceName:
box.get(PreferenceKeys.ttsServerVoiceName) as String?,
),
);
}
@@ -164,6 +173,7 @@ class SettingsService {
PreferenceKeys.ttsSpeechRate: settings.ttsSpeechRate,
PreferenceKeys.ttsPitch: settings.ttsPitch,
PreferenceKeys.ttsVolume: settings.ttsVolume,
PreferenceKeys.ttsEngine: settings.ttsEngine.name,
};
await box.putAll(updates);
@@ -185,6 +195,33 @@ class SettingsService {
} else {
await box.delete(PreferenceKeys.ttsVoice);
}
// Server-specific voice id and friendly name
if (settings.ttsServerVoiceId != null &&
settings.ttsServerVoiceId!.isNotEmpty) {
await box.put(PreferenceKeys.ttsServerVoiceId, settings.ttsServerVoiceId);
} else {
await box.delete(PreferenceKeys.ttsServerVoiceId);
}
if (settings.ttsServerVoiceName != null &&
settings.ttsServerVoiceName!.isNotEmpty) {
await box.put(
PreferenceKeys.ttsServerVoiceName,
settings.ttsServerVoiceName,
);
} else {
await box.delete(PreferenceKeys.ttsServerVoiceName);
}
}
static TtsEngine _parseTtsEngine(String? raw) {
switch ((raw ?? '').toLowerCase()) {
case 'server':
return TtsEngine.server;
case 'device':
default:
return TtsEngine.device;
}
}
// Voice input specific settings
@@ -314,6 +351,9 @@ class AppSettings {
final double ttsSpeechRate;
final double ttsPitch;
final double ttsVolume;
final TtsEngine ttsEngine;
final String? ttsServerVoiceId;
final String? ttsServerVoiceName;
const AppSettings({
this.reduceMotion = false,
this.animationSpeed = 1.0,
@@ -332,6 +372,9 @@ class AppSettings {
this.ttsSpeechRate = 0.5,
this.ttsPitch = 1.0,
this.ttsVolume = 1.0,
this.ttsEngine = TtsEngine.device,
this.ttsServerVoiceId,
this.ttsServerVoiceName,
});
AppSettings copyWith({
@@ -352,6 +395,9 @@ class AppSettings {
double? ttsSpeechRate,
double? ttsPitch,
double? ttsVolume,
TtsEngine? ttsEngine,
Object? ttsServerVoiceId = const _DefaultValue(),
Object? ttsServerVoiceName = const _DefaultValue(),
}) {
return AppSettings(
reduceMotion: reduceMotion ?? this.reduceMotion,
@@ -375,6 +421,13 @@ class AppSettings {
ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate,
ttsPitch: ttsPitch ?? this.ttsPitch,
ttsVolume: ttsVolume ?? this.ttsVolume,
ttsEngine: ttsEngine ?? this.ttsEngine,
ttsServerVoiceId: ttsServerVoiceId is _DefaultValue
? this.ttsServerVoiceId
: ttsServerVoiceId as String?,
ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
? this.ttsServerVoiceName
: ttsServerVoiceName as String?,
);
}
@@ -397,6 +450,9 @@ class AppSettings {
other.ttsSpeechRate == ttsSpeechRate &&
other.ttsPitch == ttsPitch &&
other.ttsVolume == ttsVolume &&
other.ttsEngine == ttsEngine &&
other.ttsServerVoiceId == ttsServerVoiceId &&
other.ttsServerVoiceName == ttsServerVoiceName &&
_listEquals(other.quickPills, quickPills);
// socketTransportMode intentionally not included in == to avoid frequent rebuilds
}
@@ -420,6 +476,9 @@ class AppSettings {
ttsSpeechRate,
ttsPitch,
ttsVolume,
ttsEngine,
ttsServerVoiceId,
ttsServerVoiceName,
Object.hashAllUnordered(quickPills),
);
}
@@ -543,6 +602,21 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
await SettingsService.saveSettings(state);
}
Future<void> setTtsEngine(TtsEngine engine) async {
state = state.copyWith(ttsEngine: engine);
await SettingsService.saveSettings(state);
}
Future<void> setTtsServerVoiceName(String? name) async {
state = state.copyWith(ttsServerVoiceName: name);
await SettingsService.saveSettings(state);
}
Future<void> setTtsServerVoiceId(String? id) async {
state = state.copyWith(ttsServerVoiceId: id);
await SettingsService.saveSettings(state);
}
Future<void> resetToDefaults() async {
const defaultSettings = AppSettings();
await SettingsService.saveSettings(defaultSettings);

View File

@@ -3,6 +3,7 @@ import 'dart:async';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/services/settings_service.dart';
import '../../../core/providers/app_providers.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../services/text_to_speech_service.dart';
@@ -14,6 +15,11 @@ class TextToSpeechState {
final TtsPlaybackStatus status;
final String? activeMessageId;
final String? errorMessage;
final List<String> sentences;
final List<int> sentenceOffsets; // start indices in full text
final int activeSentenceIndex; // -1 when none
final int? wordStartInSentence; // nullable; only for on-device
final int? wordEndInSentence; // nullable; only for on-device
const TextToSpeechState({
this.initialized = false,
@@ -21,6 +27,11 @@ class TextToSpeechState {
this.status = TtsPlaybackStatus.idle,
this.activeMessageId,
this.errorMessage,
this.sentences = const [],
this.sentenceOffsets = const [],
this.activeSentenceIndex = -1,
this.wordStartInSentence,
this.wordEndInSentence,
});
bool get isSpeaking => status == TtsPlaybackStatus.speaking;
@@ -36,6 +47,12 @@ class TextToSpeechState {
bool clearActiveMessageId = false,
String? errorMessage,
bool clearErrorMessage = false,
List<String>? sentences,
List<int>? sentenceOffsets,
int? activeSentenceIndex,
bool clearWord = false,
int? wordStartInSentence,
int? wordEndInSentence,
}) {
return TextToSpeechState(
initialized: initialized ?? this.initialized,
@@ -47,6 +64,15 @@ class TextToSpeechState {
errorMessage: clearErrorMessage
? null
: errorMessage ?? this.errorMessage,
sentences: sentences ?? this.sentences,
sentenceOffsets: sentenceOffsets ?? this.sentenceOffsets,
activeSentenceIndex: activeSentenceIndex ?? this.activeSentenceIndex,
wordStartInSentence: clearWord
? null
: (wordStartInSentence ?? this.wordStartInSentence),
wordEndInSentence: clearWord
? null
: (wordEndInSentence ?? this.wordEndInSentence),
);
}
}
@@ -69,6 +95,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
onPause: _handlePause,
onContinue: _handleContinue,
onError: _handleError,
onSentenceIndex: _handleSentenceIndex,
onDeviceWordProgress: _handleDeviceWordProgress,
);
ref.onDispose(() {
@@ -79,11 +107,15 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
// Listen to settings changes and update TTS when initialized
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
if (_service.isInitialized && _service.isAvailable) {
final selectedVoice = next.ttsEngine == TtsEngine.server
? next.ttsServerVoiceId
: next.ttsVoice;
_service.updateSettings(
voice: next.ttsVoice,
voice: selectedVoice,
speechRate: next.ttsSpeechRate,
pitch: next.ttsPitch,
volume: next.ttsVolume,
engine: next.ttsEngine,
);
}
}, fireImmediately: false);
@@ -105,10 +137,13 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
final settings = ref.read(appSettingsProvider);
final future = _service
.initialize(
voice: settings.ttsVoice,
voice: settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId
: settings.ttsVoice,
speechRate: settings.ttsSpeechRate,
pitch: settings.ttsPitch,
volume: settings.ttsVolume,
engine: settings.ttsEngine,
)
.then((available) {
if (!ref.mounted) {
@@ -176,15 +211,23 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
return;
}
// Prepare sentence split for highlighting
final cleanText = MarkdownToText.convert(text);
final sentences = _splitForTts(cleanText);
final offsets = _computeOffsets(sentences);
state = state.copyWith(
status: TtsPlaybackStatus.loading,
activeMessageId: messageId,
clearErrorMessage: true,
sentences: sentences,
sentenceOffsets: offsets,
activeSentenceIndex: sentences.isEmpty ? -1 : 0,
clearWord: true,
);
try {
// Convert markdown to clean text for TTS
final cleanText = MarkdownToText.convert(text);
if (cleanText.isEmpty) {
// No speakable content
if (!ref.mounted) {
@@ -216,6 +259,34 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
}
}
List<String> _splitForTts(String text) {
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
if (normalized.isEmpty) return const [];
final parts = <String>[];
final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
int index = 0;
for (final match in sentenceRegex.allMatches('$normalized ')) {
final s = match.group(1) ?? '';
if (s.trim().isNotEmpty) parts.add(s.trim());
index = match.end;
}
if (index < normalized.length) {
final tail = normalized.substring(index).trim();
if (tail.isNotEmpty) parts.add(tail);
}
return parts;
}
List<int> _computeOffsets(List<String> sentences) {
final offsets = <int>[];
int acc = 0;
for (final s in sentences) {
offsets.add(acc);
acc += s.length + 1; // assume a space or punctuation between
}
return offsets;
}
Future<void> pause() async {
if (!state.initialized || !state.available) {
return;
@@ -286,10 +357,46 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
clearActiveMessageId: true,
);
}
void _handleSentenceIndex(int index) {
if (!ref.mounted) return;
final clamped = index.clamp(
-1,
state.sentences.isEmpty ? -1 : state.sentences.length - 1,
);
state = state.copyWith(
activeSentenceIndex: clamped,
// clear per-word highlight when sentence switches (server or device)
clearWord: true,
);
}
void _handleDeviceWordProgress(int start, int end) {
if (!ref.mounted) return;
// Map global offsets to sentence index
final offsets = state.sentenceOffsets;
if (offsets.isEmpty) return;
int idx = 0;
for (var i = 0; i < offsets.length; i++) {
final sStart = offsets[i];
final sEnd = i + 1 < offsets.length ? offsets[i + 1] : 1 << 30;
if (start >= sStart && start < sEnd) {
idx = i;
break;
}
}
final sentenceStart = offsets[idx];
state = state.copyWith(
activeSentenceIndex: idx,
wordStartInSentence: (start - sentenceStart).clamp(0, 1 << 20),
wordEndInSentence: (end - sentenceStart).clamp(0, 1 << 20),
);
}
}
final textToSpeechServiceProvider = Provider<TextToSpeechService>((ref) {
final service = TextToSpeechService();
final api = ref.watch(apiServiceProvider);
final service = TextToSpeechService(api: api);
ref.onDispose(() {
unawaited(service.dispose());
});

View File

@@ -1,16 +1,29 @@
import 'dart:async';
import 'dart:io' show Platform;
import 'package:audioplayers/audioplayers.dart';
import 'package:flutter/foundation.dart';
import 'package:flutter/widgets.dart';
import 'package:flutter_tts/flutter_tts.dart';
import '../../../core/services/api_service.dart';
import '../../../core/services/settings_service.dart';
/// Lightweight wrapper around FlutterTts to centralize configuration
class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
final AudioPlayer _player = AudioPlayer();
final ApiService? _api;
TtsEngine _engine = TtsEngine.device;
String? _preferredVoice;
bool _initialized = false;
bool _available = false;
bool _voiceConfigured = false;
int _session = 0; // increments to cancel in-flight work
final List<Uint8List> _buffered = <Uint8List>[]; // server chunks
int _expectedChunks = 0;
int _currentIndex = -1;
bool _waitingNext = false;
VoidCallback? _onStart;
VoidCallback? _onComplete;
@@ -18,10 +31,20 @@ class TextToSpeechService {
VoidCallback? _onPause;
VoidCallback? _onContinue;
void Function(String message)? _onError;
void Function(int sentenceIndex)? _onSentenceIndex;
void Function(int start, int end)? _onDeviceWordProgress;
bool get isInitialized => _initialized;
bool get isAvailable => _available;
TextToSpeechService({ApiService? api}) : _api = api {
// Wire minimal player events to callbacks
_player.onPlayerComplete.listen((_) => _onAudioComplete());
_player.onPlayerStateChanged.listen((s) {
if (s == PlayerState.playing) _handleStart();
});
}
/// Register callbacks for TTS lifecycle events
void bindHandlers({
VoidCallback? onStart,
@@ -30,6 +53,8 @@ class TextToSpeechService {
VoidCallback? onPause,
VoidCallback? onContinue,
void Function(String message)? onError,
void Function(int sentenceIndex)? onSentenceIndex,
void Function(int start, int end)? onDeviceWordProgress,
}) {
_onStart = onStart;
_onComplete = onComplete;
@@ -37,6 +62,8 @@ class TextToSpeechService {
_onPause = onPause;
_onContinue = onContinue;
_onError = onError;
_onSentenceIndex = onSentenceIndex;
_onDeviceWordProgress = onDeviceWordProgress;
_tts.setStartHandler(_handleStart);
_tts.setCompletionHandler(_handleComplete);
@@ -44,6 +71,13 @@ class TextToSpeechService {
_tts.setPauseHandler(_handlePause);
_tts.setContinueHandler(_handleContinue);
_tts.setErrorHandler(_handleError);
try {
_tts.setProgressHandler((String text, int start, int end, String word) {
_onDeviceWordProgress?.call(start, end);
});
} catch (_) {
// Some platforms may not support progress handler
}
}
/// Initialize the native TTS engine lazily
@@ -52,12 +86,15 @@ class TextToSpeechService {
double speechRate = 0.5,
double pitch = 1.0,
double volume = 1.0,
TtsEngine engine = TtsEngine.device,
}) async {
if (_initialized) {
return _available;
}
try {
_engine = engine;
_preferredVoice = voice;
await _tts.awaitSpeakCompletion(false);
// Set volume
@@ -97,34 +134,47 @@ class TextToSpeechService {
}
if (!_initialized) {
await initialize();
await initialize(voice: _preferredVoice, engine: _engine);
}
if (_engine == TtsEngine.server && _api != null) {
// Server-backed TTS with sentence chunking & queued playback
try {
await _startServerChunkedPlayback(text);
} catch (e) {
_onError?.call(e.toString());
await _speakOnDevice(text);
}
return;
}
// Device TTS path
await _speakOnDevice(text);
}
Future<void> _speakOnDevice(String text) async {
if (!_available) {
throw StateError('Text-to-speech is unavailable on this device');
}
await _tts.stop();
if (!_voiceConfigured) {
await _configurePreferredVoice();
}
final result = await _tts.speak(text);
if (result == null) {
return;
}
if (result is int && result != 1) {
_onError?.call('Text-to-speech engine returned code $result');
}
_onSentenceIndex?.call(0);
}
Future<void> pause() async {
if (!_initialized || !_available) {
return;
}
if (!_initialized) return;
try {
await _tts.pause();
if (_engine == TtsEngine.server) {
await _player.pause();
} else if (_available) {
await _tts.pause();
}
} catch (e) {
_onError?.call(e.toString());
}
@@ -136,7 +186,17 @@ class TextToSpeechService {
}
try {
await _tts.stop();
// Cancel any in-flight server work
_session++;
_buffered.clear();
_expectedChunks = 0;
_currentIndex = -1;
_waitingNext = false;
if (_engine == TtsEngine.server) {
await _player.stop();
} else {
await _tts.stop();
}
} catch (e) {
_onError?.call(e.toString());
}
@@ -144,6 +204,7 @@ class TextToSpeechService {
Future<void> dispose() async {
await stop();
await _player.dispose();
}
/// Update TTS settings on-the-fly
@@ -152,12 +213,22 @@ class TextToSpeechService {
double? speechRate,
double? pitch,
double? volume,
TtsEngine? engine,
}) async {
if (!_initialized || !_available) {
// Allow engine and voice to update before init
if (engine != null) _engine = engine;
if (voice != null) _preferredVoice = voice;
return;
}
try {
if (engine != null) {
_engine = engine;
}
if (voice != null) {
_preferredVoice = voice;
}
if (volume != null) {
await _tts.setVolume(volume);
}
@@ -167,8 +238,10 @@ class TextToSpeechService {
if (pitch != null) {
await _tts.setPitch(pitch);
}
// Set specific voice by name
await _setVoiceByName(voice);
// Set specific voice by name on device engine
if (_engine == TtsEngine.device) {
await _setVoiceByName(_preferredVoice);
}
} catch (e) {
_onError?.call(e.toString());
}
@@ -224,7 +297,31 @@ class TextToSpeechService {
/// Get available voices from the TTS engine
Future<List<Map<String, dynamic>>> getAvailableVoices() async {
if (!_initialized) {
await initialize();
await initialize(voice: _preferredVoice, engine: _engine);
}
if (_engine == TtsEngine.server && _api != null) {
try {
final serverVoices = await _api.getAvailableServerVoices();
final mapped = serverVoices
.map(
(v) => {
'name': (v['name'] ?? v['id'] ?? '').toString(),
'locale': (v['locale'] ?? '').toString(),
},
)
.where((e) => (e['name'] as String).isNotEmpty)
.toList();
if (mapped.isEmpty) {
return [
{'name': 'alloy', 'locale': ''},
];
}
return mapped;
} catch (e) {
_onError?.call(e.toString());
// Fall back to device voices
}
}
if (!_available) {
@@ -254,6 +351,151 @@ class TextToSpeechService {
}
}
// ===== Server chunked playback =====
Future<void> _startServerChunkedPlayback(String text) async {
final effectiveVoice =
(_preferredVoice == null || _preferredVoice!.trim().isEmpty)
? 'alloy'
: _preferredVoice!;
// Reset queue and create a new session
_session++;
final session = _session;
_buffered.clear();
_expectedChunks = 0;
_currentIndex = -1;
_waitingNext = false;
final chunks = _splitForTts(text);
if (chunks.isEmpty) return;
_expectedChunks = chunks.length;
// Fetch first chunk to start playback quickly
final firstBytes = await _fetchServerAudio(
chunks.first,
effectiveVoice,
session,
);
if (session != _session) return; // canceled
if (firstBytes.isEmpty) throw Exception('Empty audio response');
await _player.stop();
_buffered.add(Uint8List.fromList(firstBytes));
_currentIndex = 0;
await _player.play(BytesSource(_buffered.first));
_onSentenceIndex?.call(0);
// Prefetch the rest in background
unawaited(
_prefetchRemainingChunks(
chunks.skip(1).toList(),
effectiveVoice,
session,
),
);
}
Future<void> _prefetchRemainingChunks(
List<String> remaining,
String voice,
int session,
) async {
for (final chunk in remaining) {
if (session != _session) return; // canceled
try {
final audio = await _fetchServerAudio(chunk, voice, session);
if (session != _session) return;
if (audio.isNotEmpty) {
_buffered.add(Uint8List.fromList(audio));
// If the player finished the previous chunk and is waiting, start now
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
await _playNextIfBuffered(session);
}
}
} catch (e) {
_onError?.call(e.toString());
// continue with other chunks
}
}
}
Future<List<int>> _fetchServerAudio(
String text,
String voice,
int session,
) async {
return await _api!.generateSpeech(text: text, voice: voice);
}
Future<void> _onAudioComplete() async {
final session = _session;
// If there are more expected chunks
if ((_currentIndex + 1) < _expectedChunks) {
// If next chunk is already buffered, play it
if ((_currentIndex + 1) < _buffered.length) {
await _playNextIfBuffered(session);
} else {
// Wait for prefetch to provide it
_waitingNext = true;
}
return;
}
// No more chunks this is the real completion
_handleComplete();
}
Future<void> _playNextIfBuffered(int session) async {
if (session != _session) return;
final nextIndex = _currentIndex + 1;
if (nextIndex < 0 || nextIndex >= _buffered.length) return;
_currentIndex = nextIndex;
final bytes = _buffered[nextIndex];
await _player.play(BytesSource(bytes));
_onSentenceIndex?.call(_currentIndex);
}
List<String> _splitForTts(String text) {
// Normalize whitespace
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
if (normalized.isEmpty) return const [];
// Split on sentence-ending punctuation while keeping the delimiter
final parts = <String>[];
final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
int index = 0;
for (final match in sentenceRegex.allMatches('$normalized ')) {
final s = match.group(1) ?? '';
if (s.trim().isNotEmpty) parts.add(s.trim());
index = match.end;
}
if (index < normalized.length) {
final tail = normalized.substring(index).trim();
if (tail.isNotEmpty) parts.add(tail);
}
// Fallback to length-based splits for very long segments
const maxLen = 300;
final chunks = <String>[];
for (final p in parts.isEmpty ? [normalized] : parts) {
if (p.length <= maxLen) {
chunks.add(p);
} else {
// Try splitting on commas/spaces
var remaining = p;
while (remaining.length > maxLen) {
int cut = remaining.lastIndexOf(RegExp(r",\s|\s"), maxLen);
cut = cut <= 0 ? maxLen : cut;
chunks.add(remaining.substring(0, cut).trim());
remaining = remaining.substring(cut).trim();
}
if (remaining.isNotEmpty) chunks.add(remaining);
}
}
return chunks;
}
Future<void> _configurePreferredVoice() async {
if (_voiceConfigured) {
return;

View File

@@ -18,6 +18,7 @@ import 'package:conduit/l10n/app_localizations.dart';
import 'enhanced_attachment.dart';
import 'package:conduit/shared/widgets/chat_action_button.dart';
import '../../../shared/widgets/model_avatar.dart';
import '../../../shared/widgets/conduit_components.dart';
import 'package:url_launcher/url_launcher_string.dart';
import '../providers/chat_providers.dart' show sendMessageWithContainer;
import '../../../core/utils/debug_logger.dart';
@@ -457,12 +458,72 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
}
if (children.isEmpty) return const SizedBox.shrink();
// Append TTS karaoke bar if this is the active message
final ttsState = ref.watch(textToSpeechControllerProvider);
final isActive =
ttsState.activeMessageId == _messageId &&
(ttsState.status == TtsPlaybackStatus.speaking ||
ttsState.status == TtsPlaybackStatus.paused ||
ttsState.status == TtsPlaybackStatus.loading);
if (isActive && ttsState.activeSentenceIndex >= 0) {
children.add(const SizedBox(height: Spacing.sm));
children.add(_buildKaraokeBar(ttsState));
}
return Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: children,
);
}
Widget _buildKaraokeBar(TextToSpeechState ttsState) {
final theme = context.conduitTheme;
final idx = ttsState.activeSentenceIndex;
if (idx < 0 || idx >= ttsState.sentences.length) {
return const SizedBox.shrink();
}
final sentence = ttsState.sentences[idx];
final ws = ttsState.wordStartInSentence;
final we = ttsState.wordEndInSentence;
final baseStyle = TextStyle(
color: theme.textPrimary,
height: 1.2,
fontSize: 14,
);
final highlightStyle = baseStyle.copyWith(
backgroundColor: theme.buttonPrimary.withValues(alpha: 0.25),
color: theme.textPrimary,
fontWeight: FontWeight.w600,
);
InlineSpan buildSpans() {
if (ws == null ||
we == null ||
ws < 0 ||
we <= ws ||
ws >= sentence.length) {
return TextSpan(text: sentence, style: baseStyle);
}
final safeEnd = we.clamp(0, sentence.length);
final before = sentence.substring(0, ws);
final word = sentence.substring(ws, safeEnd);
final after = sentence.substring(safeEnd);
return TextSpan(
children: [
if (before.isNotEmpty) TextSpan(text: before, style: baseStyle),
TextSpan(text: word, style: highlightStyle),
if (after.isNotEmpty) TextSpan(text: after, style: baseStyle),
],
);
}
return ConduitCard(
padding: const EdgeInsets.all(Spacing.sm),
child: RichText(text: buildSpans()),
);
}
bool get _shouldShowTypingIndicator =>
widget.isStreaming && _isAssistantResponseEmpty;

View File

@@ -441,10 +441,97 @@ class AppCustomizationPage extends ConsumerWidget {
TextStyle(color: theme.sidebarForeground, fontSize: 18),
),
const SizedBox(height: Spacing.sm),
ConduitCard(
padding: const EdgeInsets.all(Spacing.md),
child: Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
Row(
children: [
_buildIconBadge(
context,
UiUtils.platformIcon(
ios: CupertinoIcons.settings,
android: Icons.settings_voice,
),
color: theme.buttonPrimary,
),
const SizedBox(width: Spacing.sm),
const Text('Engine'),
const Spacer(),
Wrap(
spacing: Spacing.sm,
children: [
ChoiceChip(
label: const Text('On Device'),
selected: settings.ttsEngine == TtsEngine.device,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
notifier.setTtsEngine(TtsEngine.device);
// Keep previous voice (device voices)
}
},
),
ChoiceChip(
label: const Text('Server'),
selected: settings.ttsEngine == TtsEngine.server,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
// Clear device-specific voice so server can default
notifier.setTtsVoice(null);
notifier.setTtsEngine(TtsEngine.server);
}
},
),
],
),
],
),
],
),
),
const SizedBox(height: Spacing.sm),
_ExpandableCard(
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsVoice,
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
icon: UiUtils.platformIcon(
@@ -466,7 +553,11 @@ class AppCustomizationPage extends ConsumerWidget {
),
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsVoice,
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ??
settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
onTap: () => _showVoicePickerSheet(context, ref, settings),
@@ -616,7 +707,10 @@ class AppCustomizationPage extends ConsumerWidget {
final theme = context.conduitTheme;
final ttsService = ref.read(textToSpeechServiceProvider);
// Fetch available voices
// Ensure the service uses the currently selected engine before fetching
await ttsService.updateSettings(engine: settings.ttsEngine);
// Fetch available voices from the active engine
final allVoices = await ttsService.getAvailableVoices();
if (!context.mounted) return;
@@ -729,17 +823,29 @@ class AppCustomizationPage extends ConsumerWidget {
style:
theme.bodyMedium?.copyWith(
color: theme.sidebarForeground,
fontWeight: settings.ttsVoice == null
fontWeight:
(settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId == null
: settings.ttsVoice == null)
? FontWeight.bold
: FontWeight.normal,
) ??
TextStyle(color: theme.sidebarForeground),
),
trailing: settings.ttsVoice == null
trailing:
(settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId == null
: settings.ttsVoice == null)
? Icon(Icons.check, color: theme.buttonPrimary)
: null,
onTap: () {
ref.read(appSettingsProvider.notifier).setTtsVoice(null);
final notifier = ref.read(appSettingsProvider.notifier);
if (settings.ttsEngine == TtsEngine.server) {
notifier.setTtsServerVoiceId(null);
notifier.setTtsServerVoiceName(null);
} else {
notifier.setTtsVoice(null);
}
Navigator.of(sheetContext).pop();
},
),
@@ -823,7 +929,9 @@ class AppCustomizationPage extends ConsumerWidget {
final voiceId = _getVoiceIdentifier(voice);
final displayName = _formatVoiceName(voice);
final subtitle = _getVoiceSubtitle(voice);
final isSelected = settings.ttsVoice == voiceId;
final isSelected = settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId == voiceId
: settings.ttsVoice == voiceId;
return ListTile(
leading: Icon(
@@ -865,9 +973,15 @@ class AppCustomizationPage extends ConsumerWidget {
? Icon(Icons.check, color: theme.buttonPrimary)
: null,
onTap: () {
ref
.read(appSettingsProvider.notifier)
.setTtsVoice(voiceId);
final notifier = ref.read(
appSettingsProvider.notifier,
);
if (settings.ttsEngine == TtsEngine.server) {
notifier.setTtsServerVoiceId(voiceId);
notifier.setTtsServerVoiceName(displayName);
} else {
notifier.setTtsVoice(voiceId);
}
Navigator.of(sheetContext).pop();
},
);

View File

@@ -65,6 +65,62 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.13.0"
audioplayers:
dependency: "direct main"
description:
name: audioplayers
sha256: c05c6147124cd63e725e861335a8b4d57300b80e6e92cea7c145c739223bbaef
url: "https://pub.dev"
source: hosted
version: "5.2.1"
audioplayers_android:
dependency: transitive
description:
name: audioplayers_android
sha256: b00e1a0e11365d88576320ec2d8c192bc21f1afb6c0e5995d1c57ae63156acb5
url: "https://pub.dev"
source: hosted
version: "4.0.3"
audioplayers_darwin:
dependency: transitive
description:
name: audioplayers_darwin
sha256: "3034e99a6df8d101da0f5082dcca0a2a99db62ab1d4ddb3277bed3f6f81afe08"
url: "https://pub.dev"
source: hosted
version: "5.0.2"
audioplayers_linux:
dependency: transitive
description:
name: audioplayers_linux
sha256: "60787e73fefc4d2e0b9c02c69885402177e818e4e27ef087074cf27c02246c9e"
url: "https://pub.dev"
source: hosted
version: "3.1.0"
audioplayers_platform_interface:
dependency: transitive
description:
name: audioplayers_platform_interface
sha256: "365c547f1bb9e77d94dd1687903a668d8f7ac3409e48e6e6a3668a1ac2982adb"
url: "https://pub.dev"
source: hosted
version: "6.1.0"
audioplayers_web:
dependency: transitive
description:
name: audioplayers_web
sha256: "22cd0173e54d92bd9b2c80b1204eb1eb159ece87475ab58c9788a70ec43c2a62"
url: "https://pub.dev"
source: hosted
version: "4.1.0"
audioplayers_windows:
dependency: transitive
description:
name: audioplayers_windows
sha256: "9536812c9103563644ada2ef45ae523806b0745f7a78e89d1b5fb1951de90e1a"
url: "https://pub.dev"
source: hosted
version: "3.1.0"
boolean_selector:
dependency: transitive
description:

View File

@@ -47,6 +47,7 @@ dependencies:
record: ^6.1.1
stts: ^1.2.5
flutter_tts: ^4.2.3
audioplayers: ^5.2.1
image_picker: ^1.2.0
file_picker: ^10.3.3
path_provider: ^2.1.4