Merge pull request #129 from cogwheel0/add-server-speech-to-text-features

add-server-speech-to-text-features
This commit is contained in:
cogwheel
2025-11-05 01:00:48 +05:30
committed by GitHub
31 changed files with 2194 additions and 463 deletions

BIN
flutter_01.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 257 KiB

View File

@@ -49,13 +49,13 @@ PODS:
- Flutter
- image_picker_ios (0.0.1):
- Flutter
- mic_stream_recorder (0.0.1):
- Flutter
- package_info_plus (0.4.5):
- Flutter
- path_provider_foundation (0.0.1):
- Flutter
- FlutterMacOS
- record_ios (1.1.0):
- Flutter
- SDWebImage (5.21.1):
- SDWebImage/Core (= 5.21.1)
- SDWebImage/Core (5.21.1)
@@ -96,9 +96,9 @@ DEPENDENCIES:
- flutter_secure_storage (from `.symlinks/plugins/flutter_secure_storage/ios`)
- flutter_tts (from `.symlinks/plugins/flutter_tts/ios`)
- image_picker_ios (from `.symlinks/plugins/image_picker_ios/ios`)
- mic_stream_recorder (from `.symlinks/plugins/mic_stream_recorder/ios`)
- package_info_plus (from `.symlinks/plugins/package_info_plus/ios`)
- path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`)
- record_ios (from `.symlinks/plugins/record_ios/ios`)
- share_handler_ios (from `.symlinks/plugins/share_handler_ios/ios`)
- share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`)
- share_plus (from `.symlinks/plugins/share_plus/ios`)
@@ -135,12 +135,12 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/flutter_tts/ios"
image_picker_ios:
:path: ".symlinks/plugins/image_picker_ios/ios"
mic_stream_recorder:
:path: ".symlinks/plugins/mic_stream_recorder/ios"
package_info_plus:
:path: ".symlinks/plugins/package_info_plus/ios"
path_provider_foundation:
:path: ".symlinks/plugins/path_provider_foundation/darwin"
record_ios:
:path: ".symlinks/plugins/record_ios/ios"
share_handler_ios:
:path: ".symlinks/plugins/share_handler_ios/ios"
share_handler_ios_models:
@@ -172,9 +172,9 @@ SPEC CHECKSUMS:
flutter_secure_storage: 1ed9476fba7e7a782b22888f956cce43e2c62f13
flutter_tts: b88dbc8655d3dc961bc4a796e4e16a4cc1795833
image_picker_ios: 7fe1ff8e34c1790d6fff70a32484959f563a928a
mic_stream_recorder: 27d2d1225563a3a28bf4019fc5cc198cffd7dad1
package_info_plus: af8e2ca6888548050f16fa2f1938db7b5a5df499
path_provider_foundation: 080d55be775b7414fd5a5ef3ac137b97b097e564
record_ios: f75fa1d57f840012775c0e93a38a7f3ceea1a374
SDWebImage: f29024626962457f3470184232766516dee8dfea
share_handler_ios: e2244e990f826b2c8eaa291ac3831569438ba0fb
share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871

View File

@@ -11,6 +11,7 @@ final class PreferenceKeys {
static const String voiceLocaleId = 'voice_locale_id';
static const String voiceHoldToTalk = 'voice_hold_to_talk';
static const String voiceAutoSendFinal = 'voice_auto_send_final';
static const String voiceSttPreference = 'voice_stt_preference';
static const String socketTransportMode = 'socket_transport_mode';
static const String quickPills = 'quick_pills';
static const String sendOnEnterKey = 'send_on_enter';
@@ -27,6 +28,7 @@ final class PreferenceKeys {
static const String ttsEngine = 'tts_engine'; // 'device' | 'server'
static const String ttsServerVoiceId = 'tts_server_voice_id';
static const String ttsServerVoiceName = 'tts_server_voice_name';
static const String voiceSilenceDuration = 'voice_silence_duration';
}
final class LegacyPreferenceKeys {

View File

@@ -90,6 +90,7 @@ class PersistenceMigrator {
copyString(PreferenceKeys.voiceLocaleId);
copyBool(PreferenceKeys.voiceHoldToTalk);
copyBool(PreferenceKeys.voiceAutoSendFinal);
copyString(PreferenceKeys.voiceSttPreference);
copyString(PreferenceKeys.socketTransportMode);
copyStringList(PreferenceKeys.quickPills);
copyBool(PreferenceKeys.sendOnEnterKey);
@@ -194,6 +195,7 @@ class PersistenceMigrator {
PreferenceKeys.voiceLocaleId,
PreferenceKeys.voiceHoldToTalk,
PreferenceKeys.voiceAutoSendFinal,
PreferenceKeys.voiceSttPreference,
PreferenceKeys.socketTransportMode,
PreferenceKeys.quickPills,
PreferenceKeys.sendOnEnterKey,

View File

@@ -4,7 +4,7 @@ import 'dart:io';
import 'package:dio/dio.dart';
import 'package:dio/io.dart';
import 'package:flutter/foundation.dart';
// import 'package:http_parser/http_parser.dart';
import 'package:http_parser/http_parser.dart';
// Removed legacy websocket/socket.io imports
import 'package:uuid/uuid.dart';
import '../models/backend_config.dart';
@@ -1607,15 +1607,69 @@ class ApiService {
return [];
}
Future<Map<String, dynamic>> transcribeSpeech({
required Uint8List audioBytes,
String? fileName,
String? mimeType,
String? language,
}) async {
if (audioBytes.isEmpty) {
throw ArgumentError('audioBytes cannot be empty for transcription');
}
final sanitizedFileName = (fileName != null && fileName.trim().isNotEmpty
? fileName.trim()
: 'audio.m4a');
final resolvedMimeType = (mimeType != null && mimeType.trim().isNotEmpty)
? mimeType.trim()
: _inferMimeTypeFromName(sanitizedFileName);
_traceApi(
'Uploading $sanitizedFileName (${audioBytes.length} bytes) for transcription',
);
final formData = FormData.fromMap({
'file': MultipartFile.fromBytes(
audioBytes,
filename: sanitizedFileName,
contentType: _parseMediaType(resolvedMimeType),
),
if (language != null && language.trim().isNotEmpty)
'language': language.trim(),
});
final response = await _dio.post(
'/api/v1/audio/transcriptions',
data: formData,
options: Options(headers: const {'accept': 'application/json'}),
);
final data = response.data;
if (data is Map<String, dynamic>) {
return data;
}
if (data is String) {
return {'text': data};
}
throw StateError(
'Unexpected transcription response type: ${data.runtimeType}',
);
}
Future<({Uint8List bytes, String mimeType})> generateSpeech({
required String text,
String? voice,
double? speed,
}) async {
final textPreview = text.length > 50 ? text.substring(0, 50) : text;
_traceApi('Generating speech for text: $textPreview...');
final response = await _dio.post(
'/api/v1/audio/speech',
data: {'input': text, if (voice != null) 'voice': voice},
data: {
'input': text,
if (voice != null) 'voice': voice,
if (speed != null) 'speed': speed,
},
options: Options(responseType: ResponseType.bytes),
);
@@ -1690,7 +1744,43 @@ class ApiService {
return bytes.length >= 2 && bytes[0] == 0xFF && (bytes[1] & 0xE0) == 0xE0;
}
// Server audio transcription removed; rely on on-device STT in UI layer
String _inferMimeTypeFromName(String name) {
final dotIndex = name.lastIndexOf('.');
if (dotIndex == -1 || dotIndex == name.length - 1) {
return 'audio/mpeg';
}
final ext = name.substring(dotIndex + 1).toLowerCase();
switch (ext) {
case 'wav':
return 'audio/wav';
case 'ogg':
return 'audio/ogg';
case 'm4a':
case 'mp4':
return 'audio/mp4';
case 'aac':
return 'audio/aac';
case 'webm':
return 'audio/webm';
case 'flac':
return 'audio/flac';
case 'mp3':
return 'audio/mpeg';
default:
return 'audio/mpeg';
}
}
MediaType? _parseMediaType(String? value) {
if (value == null || value.isEmpty) {
return null;
}
try {
return MediaType.parse(value);
} catch (_) {
return null;
}
}
// Image Generation
Future<List<Map<String, dynamic>>> getImageModels() async {

View File

@@ -8,8 +8,11 @@ import 'animation_service.dart';
part 'settings_service.g.dart';
/// Speech-to-text preference selection.
enum SttPreference { auto, deviceOnly, serverOnly }
/// TTS engine selection
enum TtsEngine { device, server }
enum TtsEngine { auto, device, server }
/// Service for managing app-wide settings including accessibility preferences
class SettingsService {
@@ -32,6 +35,9 @@ class SettingsService {
.quickPills; // StringList of identifiers e.g. ['web','image','tools']
// Chat input behavior
static const String _sendOnEnterKey = PreferenceKeys.sendOnEnterKey;
// Voice silence duration for auto-stop (milliseconds)
static const String _voiceSilenceDurationKey =
PreferenceKeys.voiceSilenceDuration;
static Box<dynamic> _preferencesBox() =>
Hive.box<dynamic>(HiveBoxNames.preferences);
@@ -151,6 +157,11 @@ class SettingsService {
ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?,
ttsServerVoiceName:
box.get(PreferenceKeys.ttsServerVoiceName) as String?,
sttPreference: _parseSttPreference(
box.get(PreferenceKeys.voiceSttPreference) as String?,
),
voiceSilenceDuration:
(box.get(_voiceSilenceDurationKey) as int? ?? 2000).clamp(300, 3000),
),
);
}
@@ -174,6 +185,8 @@ class SettingsService {
PreferenceKeys.ttsPitch: settings.ttsPitch,
PreferenceKeys.ttsVolume: settings.ttsVolume,
PreferenceKeys.ttsEngine: settings.ttsEngine.name,
PreferenceKeys.voiceSttPreference: settings.sttPreference.name,
_voiceSilenceDurationKey: settings.voiceSilenceDuration,
};
await box.putAll(updates);
@@ -216,11 +229,31 @@ class SettingsService {
static TtsEngine _parseTtsEngine(String? raw) {
switch ((raw ?? '').toLowerCase()) {
case 'auto':
case '':
return TtsEngine.auto;
case 'server':
return TtsEngine.server;
case 'device':
default:
return TtsEngine.device;
default:
return TtsEngine.auto;
}
}
static SttPreference _parseSttPreference(String? raw) {
switch ((raw ?? '').toLowerCase()) {
case 'deviceonly':
case 'device_only':
case 'device':
return SttPreference.deviceOnly;
case 'serveronly':
case 'server_only':
case 'server':
return SttPreference.serverOnly;
case 'auto':
default:
return SttPreference.auto;
}
}
@@ -304,6 +337,16 @@ class SettingsService {
return _preferencesBox().put(_sendOnEnterKey, value);
}
static Future<int> getVoiceSilenceDuration() {
final value = _preferencesBox().get(_voiceSilenceDurationKey) as int?;
return Future.value((value ?? 2000).clamp(300, 3000));
}
static Future<void> setVoiceSilenceDuration(int milliseconds) {
final sanitized = milliseconds.clamp(300, 3000);
return _preferencesBox().put(_voiceSilenceDurationKey, sanitized);
}
/// Get effective animation duration considering all settings
static Duration getEffectiveAnimationDuration(
BuildContext context,
@@ -359,6 +402,7 @@ class AppSettings {
final String socketTransportMode; // 'polling' or 'ws'
final List<String> quickPills; // e.g., ['web','image']
final bool sendOnEnter;
final SttPreference sttPreference;
final String? ttsVoice;
final double ttsSpeechRate;
final double ttsPitch;
@@ -366,6 +410,7 @@ class AppSettings {
final TtsEngine ttsEngine;
final String? ttsServerVoiceId;
final String? ttsServerVoiceName;
final int voiceSilenceDuration;
const AppSettings({
this.reduceMotion = false,
this.animationSpeed = 1.0,
@@ -380,13 +425,15 @@ class AppSettings {
this.socketTransportMode = 'ws',
this.quickPills = const [],
this.sendOnEnter = false,
this.sttPreference = SttPreference.auto,
this.ttsVoice,
this.ttsSpeechRate = 0.5,
this.ttsPitch = 1.0,
this.ttsVolume = 1.0,
this.ttsEngine = TtsEngine.device,
this.ttsEngine = TtsEngine.auto,
this.ttsServerVoiceId,
this.ttsServerVoiceName,
this.voiceSilenceDuration = 2000,
});
AppSettings copyWith({
@@ -403,6 +450,7 @@ class AppSettings {
String? socketTransportMode,
List<String>? quickPills,
bool? sendOnEnter,
SttPreference? sttPreference,
Object? ttsVoice = const _DefaultValue(),
double? ttsSpeechRate,
double? ttsPitch,
@@ -410,6 +458,7 @@ class AppSettings {
TtsEngine? ttsEngine,
Object? ttsServerVoiceId = const _DefaultValue(),
Object? ttsServerVoiceName = const _DefaultValue(),
int? voiceSilenceDuration,
}) {
return AppSettings(
reduceMotion: reduceMotion ?? this.reduceMotion,
@@ -429,6 +478,7 @@ class AppSettings {
socketTransportMode: socketTransportMode ?? this.socketTransportMode,
quickPills: quickPills ?? this.quickPills,
sendOnEnter: sendOnEnter ?? this.sendOnEnter,
sttPreference: sttPreference ?? this.sttPreference,
ttsVoice: ttsVoice is _DefaultValue ? this.ttsVoice : ttsVoice as String?,
ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate,
ttsPitch: ttsPitch ?? this.ttsPitch,
@@ -440,6 +490,7 @@ class AppSettings {
ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
? this.ttsServerVoiceName
: ttsServerVoiceName as String?,
voiceSilenceDuration: voiceSilenceDuration ?? this.voiceSilenceDuration,
);
}
@@ -457,6 +508,7 @@ class AppSettings {
other.voiceLocaleId == voiceLocaleId &&
other.voiceHoldToTalk == voiceHoldToTalk &&
other.voiceAutoSendFinal == voiceAutoSendFinal &&
other.sttPreference == sttPreference &&
other.sendOnEnter == sendOnEnter &&
other.ttsVoice == ttsVoice &&
other.ttsSpeechRate == ttsSpeechRate &&
@@ -465,13 +517,14 @@ class AppSettings {
other.ttsEngine == ttsEngine &&
other.ttsServerVoiceId == ttsServerVoiceId &&
other.ttsServerVoiceName == ttsServerVoiceName &&
other.voiceSilenceDuration == voiceSilenceDuration &&
_listEquals(other.quickPills, quickPills);
// socketTransportMode intentionally not included in == to avoid frequent rebuilds
}
@override
int get hashCode {
return Object.hash(
return Object.hashAll([
reduceMotion,
animationSpeed,
hapticFeedback,
@@ -482,6 +535,7 @@ class AppSettings {
voiceLocaleId,
voiceHoldToTalk,
voiceAutoSendFinal,
sttPreference,
socketTransportMode,
sendOnEnter,
ttsVoice,
@@ -491,8 +545,9 @@ class AppSettings {
ttsEngine,
ttsServerVoiceId,
ttsServerVoiceName,
voiceSilenceDuration,
Object.hashAllUnordered(quickPills),
);
]);
}
}
@@ -603,6 +658,14 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
await SettingsService.setSendOnEnter(value);
}
Future<void> setSttPreference(SttPreference preference) async {
if (state.sttPreference == preference) {
return;
}
state = state.copyWith(sttPreference: preference);
await SettingsService.saveSettings(state);
}
Future<void> setTtsVoice(String? voice) async {
state = state.copyWith(ttsVoice: voice);
await SettingsService.saveSettings(state);
@@ -638,6 +701,11 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
await SettingsService.saveSettings(state);
}
Future<void> setVoiceSilenceDuration(int milliseconds) async {
state = state.copyWith(voiceSilenceDuration: milliseconds);
await SettingsService.setVoiceSilenceDuration(milliseconds);
}
Future<void> resetToDefaults() async {
const defaultSettings = AppSettings();
await SettingsService.saveSettings(defaultSettings);

View File

@@ -277,6 +277,13 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
)..start();
}
Timer? imageCollectionDebounce;
String? pendingImageContent;
String? pendingImageMessageId;
String? pendingImageSignature;
String? lastProcessedImageSignature;
int imageCollectionRequestId = 0;
void disposeSocketSubscriptions() {
if (socketSubscriptions.isEmpty) {
return;
@@ -287,56 +294,119 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
} catch (_) {}
}
socketSubscriptions.clear();
imageCollectionDebounce?.cancel();
imageCollectionDebounce = null;
pendingImageContent = null;
pendingImageMessageId = null;
pendingImageSignature = null;
lastProcessedImageSignature = null;
imageCollectionRequestId = 0;
socketWatchdog?.stop();
}
bool isSearching = false;
void runPendingImageCollection() {
imageCollectionDebounce?.cancel();
imageCollectionDebounce = null;
final content = pendingImageContent;
final targetMessageId = pendingImageMessageId;
final signature = pendingImageSignature;
if (content == null || targetMessageId == null || signature == null) {
return;
}
pendingImageContent = null;
pendingImageMessageId = null;
pendingImageSignature = null;
final requestId = ++imageCollectionRequestId;
unawaited(
workerManager
.schedule<String, List<Map<String, dynamic>>>(
_collectImageReferencesWorker,
content,
debugLabel: 'stream_collect_images',
)
.then((collected) {
if (requestId != imageCollectionRequestId) {
return;
}
final currentMessages = getMessages();
if (currentMessages.isEmpty) {
return;
}
final last = currentMessages.last;
if (last.id != targetMessageId || last.role != 'assistant') {
return;
}
lastProcessedImageSignature = signature;
if (collected.isEmpty) {
return;
}
final existing = last.files ?? <Map<String, dynamic>>[];
final seen = <String>{
for (final f in existing)
if (f['url'] is String) (f['url'] as String) else '',
}..removeWhere((e) => e.isEmpty);
final merged = <Map<String, dynamic>>[...existing];
for (final f in collected) {
final url = f['url'] as String?;
if (url != null && url.isNotEmpty && !seen.contains(url)) {
merged.add({'type': 'image', 'url': url});
seen.add(url);
}
}
if (merged.length != existing.length) {
updateLastMessageWith((m) => m.copyWith(files: merged));
}
})
.catchError((_) {}),
);
}
void updateImagesFromCurrentContent() {
try {
final msgs = getMessages();
if (msgs.isEmpty || msgs.last.role != 'assistant') return;
final content = msgs.last.content;
final last = msgs.last;
final content = last.content;
if (content.isEmpty) return;
final targetMessageId = msgs.last.id;
unawaited(
workerManager
.schedule<String, List<Map<String, dynamic>>>(
_collectImageReferencesWorker,
content,
debugLabel: 'stream_collect_images',
)
.then((collected) {
if (collected.isEmpty) return;
final currentMessages = getMessages();
if (currentMessages.isEmpty) return;
final last = currentMessages.last;
if (last.id != targetMessageId || last.role != 'assistant') {
return;
}
final targetMessageId = last.id;
final signature =
'$targetMessageId:${content.hashCode}:${content.length}';
final existing = last.files ?? <Map<String, dynamic>>[];
final seen = <String>{
for (final f in existing)
if (f['url'] is String) (f['url'] as String) else '',
}..removeWhere((e) => e.isEmpty);
if (signature == lastProcessedImageSignature &&
pendingImageSignature == null) {
return;
}
if (signature == pendingImageSignature) {
return;
}
final merged = <Map<String, dynamic>>[...existing];
for (final f in collected) {
final url = f['url'] as String?;
if (url != null && url.isNotEmpty && !seen.contains(url)) {
merged.add({'type': 'image', 'url': url});
seen.add(url);
}
}
pendingImageMessageId = targetMessageId;
pendingImageContent = content;
pendingImageSignature = signature;
if (merged.length != existing.length) {
updateLastMessageWith((m) => m.copyWith(files: merged));
}
})
.catchError((_) {}),
);
final shouldDelay = last.isStreaming;
imageCollectionDebounce?.cancel();
if (shouldDelay) {
imageCollectionDebounce = Timer(
const Duration(milliseconds: 200),
runPendingImageCollection,
);
} else {
runPendingImageCollection();
}
} catch (_) {}
}

View File

@@ -647,13 +647,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
return;
}
// Log content replacement for debugging
DebugLogger.log(
'Replacing message content: messageId=${lastMessage.id}, '
'oldLength=${lastMessage.content.length}, newLength=${content.length}',
scope: 'chat/providers',
);
_ensureFormatterForMessage(lastMessage);
// Defensive check: ensure the formatter is for the correct message

View File

@@ -107,11 +107,9 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
// Listen to settings changes and update TTS when initialized
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
if (_service.isInitialized && _service.isAvailable) {
final selectedVoice = next.ttsEngine == TtsEngine.server
? next.ttsServerVoiceId
: next.ttsVoice;
_service.updateSettings(
voice: selectedVoice,
voice: next.ttsVoice,
serverVoice: next.ttsServerVoiceId,
speechRate: next.ttsSpeechRate,
pitch: next.ttsPitch,
volume: next.ttsVolume,
@@ -137,9 +135,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
final settings = ref.read(appSettingsProvider);
final future = _service
.initialize(
voice: settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId
: settings.ttsVoice,
deviceVoice: settings.ttsVoice,
serverVoice: settings.ttsServerVoiceId,
speechRate: settings.ttsSpeechRate,
pitch: settings.ttsPitch,
volume: settings.ttsVolume,
@@ -222,8 +219,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
// Prepare sentence split for highlighting
final cleanText = MarkdownToText.convert(text);
final sentences = _splitForTts(cleanText);
final offsets = _computeOffsets(sentences);
final sentences = _service.splitTextForSpeech(cleanText);
final offsets = _computeOffsets(cleanText, sentences);
state = state.copyWith(
status: TtsPlaybackStatus.loading,
@@ -268,30 +265,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
}
}
List<String> _splitForTts(String text) {
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
if (normalized.isEmpty) return const [];
final parts = <String>[];
final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
int index = 0;
for (final match in sentenceRegex.allMatches('$normalized ')) {
final s = match.group(1) ?? '';
if (s.trim().isNotEmpty) parts.add(s.trim());
index = match.end;
}
if (index < normalized.length) {
final tail = normalized.substring(index).trim();
if (tail.isNotEmpty) parts.add(tail);
}
return parts;
}
List<int> _computeOffsets(List<String> sentences) {
List<int> _computeOffsets(String source, List<String> sentences) {
if (sentences.isEmpty) return const [];
final offsets = <int>[];
int acc = 0;
for (final s in sentences) {
offsets.add(acc);
acc += s.length + 1; // assume a space or punctuation between
var cursor = 0;
for (final sentence in sentences) {
final chunk = sentence.trim();
if (chunk.isEmpty) {
offsets.add(cursor);
continue;
}
final index = source.indexOf(chunk, cursor);
if (index == -1) {
offsets.add(cursor);
cursor += chunk.length;
} else {
offsets.add(index);
cursor = index + chunk.length;
}
}
return offsets;
}

View File

@@ -16,8 +16,10 @@ class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
final AudioPlayer _player = AudioPlayer();
final ApiService? _api;
TtsEngine _engine = TtsEngine.device;
TtsEngine _engine = TtsEngine.auto;
String? _preferredVoice;
String? _serverPreferredVoice;
double _speechRate = 0.5;
bool _initialized = false;
bool _available = false;
bool _voiceConfigured = false;
@@ -41,6 +43,8 @@ class TextToSpeechService {
bool get isInitialized => _initialized;
bool get isAvailable => _available;
bool get deviceEngineAvailable => _deviceEngineAvailable;
bool get serverEngineAvailable => _api != null;
TextToSpeechService({ApiService? api}) : _api = api {
// Wire minimal player events to callbacks
@@ -59,6 +63,69 @@ class TextToSpeechService {
});
}
Future<void> _configureDeviceEngine({
required String? voice,
required double speechRate,
required double pitch,
required double volume,
}) async {
_deviceEngineAvailable = false;
try {
await _tts.awaitSpeakCompletion(false);
await _tts.setVolume(volume);
await _tts.setSpeechRate(speechRate);
await _tts.setPitch(pitch);
if (!kIsWeb && Platform.isIOS) {
await _tts.setSharedInstance(true);
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]);
}
if (_engine != TtsEngine.server) {
await _setVoiceByName(_preferredVoice);
} else {
_voiceConfigured = false;
}
_deviceEngineAvailable = true;
} catch (e) {
_voiceConfigured = false;
_deviceEngineAvailable = false;
rethrow;
}
}
bool _computeAvailability() {
final serverAvailable = _api != null;
switch (_engine) {
case TtsEngine.device:
return _deviceEngineAvailable;
case TtsEngine.server:
return serverAvailable;
case TtsEngine.auto:
return _deviceEngineAvailable || serverAvailable;
}
}
bool _shouldUseServer() {
if (_engine == TtsEngine.server) {
return _api != null;
}
if (_engine == TtsEngine.device) {
return false;
}
// Auto: prefer device when available, otherwise fall back to server
if (_deviceEngineAvailable) {
return false;
}
return _api != null;
}
/// Register callbacks for TTS lifecycle events
void bindHandlers({
VoidCallback? onStart,
@@ -96,56 +163,60 @@ class TextToSpeechService {
/// Initialize the native TTS engine lazily
Future<bool> initialize({
String? voice,
String? deviceVoice,
String? serverVoice,
double speechRate = 0.5,
double pitch = 1.0,
double volume = 1.0,
TtsEngine engine = TtsEngine.device,
TtsEngine engine = TtsEngine.auto,
}) async {
if (_initialized) {
_engine = engine;
_speechRate = speechRate;
if (deviceVoice != null) {
_preferredVoice = deviceVoice;
_voiceConfigured = false;
}
if (serverVoice != null) {
_serverPreferredVoice = serverVoice;
}
_available = _computeAvailability();
return _available;
}
try {
_engine = engine;
_preferredVoice = voice;
await _tts.awaitSpeakCompletion(false);
_engine = engine;
_speechRate = speechRate;
_preferredVoice = deviceVoice;
_serverPreferredVoice = serverVoice;
_voiceConfigured = false;
// Set volume
await _tts.setVolume(volume);
// Set speech rate
await _tts.setSpeechRate(speechRate);
// Set pitch
await _tts.setPitch(pitch);
if (!kIsWeb && Platform.isIOS) {
await _tts.setSharedInstance(true);
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]);
if (_engine != TtsEngine.server || _api == null) {
try {
await _configureDeviceEngine(
voice: deviceVoice,
speechRate: speechRate,
pitch: pitch,
volume: volume,
);
} catch (e) {
if (_engine == TtsEngine.device) {
_available = false;
_onError?.call(e.toString());
_initialized = true;
return _available;
}
}
// Set the voice (specific or default) when using device engine
if (_engine == TtsEngine.device) {
await _setVoiceByName(voice);
}
_deviceEngineAvailable = true;
} catch (e) {
} else {
_deviceEngineAvailable = false;
if (_engine != TtsEngine.server) {
_available = false;
_onError?.call(e.toString());
_initialized = true;
return _available;
}
try {
await _tts.awaitSpeakCompletion(false);
await _tts.setVolume(volume);
await _tts.setSpeechRate(speechRate);
await _tts.setPitch(pitch);
} catch (_) {}
}
_available = _engine == TtsEngine.server || _deviceEngineAvailable;
_available = _computeAvailability();
_initialized = true;
return _available;
}
@@ -156,10 +227,23 @@ class TextToSpeechService {
}
if (!_initialized) {
await initialize(voice: _preferredVoice, engine: _engine);
await initialize(
deviceVoice: _preferredVoice,
serverVoice: _serverPreferredVoice,
engine: _engine,
);
}
if (_engine == TtsEngine.server && _api != null) {
final bool useServer = _shouldUseServer();
if (useServer) {
if (_api == null) {
if (_deviceEngineAvailable) {
await _speakOnDevice(text);
return;
}
throw StateError('Server text-to-speech is unavailable');
}
// Server-backed TTS with sentence chunking & queued playback
try {
await _startServerChunkedPlayback(text);
@@ -196,7 +280,7 @@ class TextToSpeechService {
Future<void> pause() async {
if (!_initialized) return;
try {
if (_engine == TtsEngine.server) {
if (_shouldUseServer()) {
await _player.pause();
_handlePause();
} else if (_deviceEngineAvailable) {
@@ -210,7 +294,7 @@ class TextToSpeechService {
Future<void> resume() async {
if (!_initialized) return;
try {
if (_engine == TtsEngine.server) {
if (_shouldUseServer()) {
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
await _playNextIfBuffered(_session);
@@ -235,7 +319,7 @@ class TextToSpeechService {
_expectedChunks = 0;
_currentIndex = -1;
_waitingNext = false;
if (_engine == TtsEngine.server) {
if (_shouldUseServer()) {
await _player.stop();
_handleCancel();
} else {
@@ -254,17 +338,24 @@ class TextToSpeechService {
/// Update TTS settings on-the-fly
Future<void> updateSettings({
Object? voice = const _VoiceNotProvided(),
Object? serverVoice = const _VoiceNotProvided(),
double? speechRate,
double? pitch,
double? volume,
TtsEngine? engine,
}) async {
final voiceProvided = voice is! _VoiceNotProvided;
final serverVoiceProvided = serverVoice is! _VoiceNotProvided;
final voiceValue = voiceProvided ? voice as String? : null;
final serverVoiceValue = serverVoiceProvided
? serverVoice as String?
: null;
if (!_initialized || !_available) {
// Allow engine and voice to update before init
if (engine != null) _engine = engine;
if (voiceProvided) _preferredVoice = voiceValue;
if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue;
if (speechRate != null) _speechRate = speechRate;
return;
}
@@ -275,22 +366,28 @@ class TextToSpeechService {
if (voiceProvided) {
_preferredVoice = voiceValue;
}
if (serverVoiceProvided) {
_serverPreferredVoice = serverVoiceValue;
}
if (volume != null) {
await _tts.setVolume(volume);
}
if (speechRate != null) {
_speechRate = speechRate;
await _tts.setSpeechRate(speechRate);
}
if (pitch != null) {
await _tts.setPitch(pitch);
}
// Set specific voice by name on device engine
if (_engine == TtsEngine.device && voiceProvided) {
// Set specific voice by name on device-capable engines
if (_engine != TtsEngine.server && voiceProvided) {
await _setVoiceByName(_preferredVoice);
}
} catch (e) {
_onError?.call(e.toString());
}
_available = _computeAvailability();
}
/// Set voice by name, or use system default if null
@@ -343,7 +440,11 @@ class TextToSpeechService {
/// Get available voices from the TTS engine
Future<List<Map<String, dynamic>>> getAvailableVoices() async {
if (!_initialized) {
await initialize(voice: _preferredVoice, engine: _engine);
await initialize(
deviceVoice: _preferredVoice,
serverVoice: _serverPreferredVoice,
engine: _engine,
);
}
if (_engine == TtsEngine.server && _api != null) {
@@ -425,6 +526,10 @@ class TextToSpeechService {
}
Future<String?> _resolveServerVoice() async {
final serverSelected = _serverPreferredVoice?.trim();
if (serverSelected != null && serverSelected.isNotEmpty) {
return serverSelected;
}
final selected = _preferredVoice?.trim();
if (selected != null && selected.isNotEmpty) {
return selected;
@@ -545,9 +650,19 @@ class TextToSpeechService {
String? voice,
int session,
) async {
return await _api!.generateSpeech(text: text, voice: voice);
return await _api!.generateSpeech(
text: text,
voice: voice,
speed: _speechRate,
);
}
/// Splits [text] into the chunks used for playback sequencing.
///
/// This mirrors the server-side streaming behavior so UI consumers can stay
/// in sync with sentence indices reported during playback.
List<String> splitTextForSpeech(String text) => _splitForTts(text);
Future<void> _onAudioComplete() async {
final session = _session;
// If there are more expected chunks
@@ -580,43 +695,66 @@ class TextToSpeechService {
}
List<String> _splitForTts(String text) {
// Normalize whitespace
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
if (normalized.isEmpty) return const [];
// Mirrors OpenWebUI's extractSentencesForAudio implementation
// See: src/lib/utils/index.ts lines 953-970, 907-928
// Split on sentence-ending punctuation while keeping the delimiter
final parts = <String>[];
final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
int index = 0;
for (final match in sentenceRegex.allMatches('$normalized ')) {
final s = match.group(1) ?? '';
if (s.trim().isNotEmpty) parts.add(s.trim());
index = match.end;
}
if (index < normalized.length) {
final tail = normalized.substring(index).trim();
if (tail.isNotEmpty) parts.add(tail);
}
// 1. Preserve code blocks (replace with placeholders)
final codeBlocks = <String>[];
var processed = text;
var codeBlockIndex = 0;
// Fallback to length-based splits for very long segments
const maxLen = 300;
final chunks = <String>[];
for (final p in parts.isEmpty ? [normalized] : parts) {
if (p.length <= maxLen) {
chunks.add(p);
// Match triple backticks code blocks
final codeBlockRegex = RegExp(r'```[\s\S]*?```', multiLine: true);
processed = processed.replaceAllMapped(codeBlockRegex, (match) {
final placeholder = '\u0000$codeBlockIndex\u0000';
codeBlocks.add(match.group(0)!);
codeBlockIndex++;
return placeholder;
});
// 2. Split on sentence-ending punctuation: .!?
// OpenWebUI uses: /(?<=[.!?])\s+/
final sentences = processed
.split(RegExp(r'(?<=[.!?])\s+'))
.map((s) => s.trim())
.where((s) => s.isNotEmpty)
.toList();
// 3. Restore code blocks from placeholders
final restoredSentences = sentences
.map((sentence) {
return sentence.replaceAllMapped(RegExp(r'\u0000(\d+)\u0000'), (
match,
) {
final idx = int.parse(match.group(1)!);
return idx < codeBlocks.length ? codeBlocks[idx] : '';
});
})
.where((s) => s.isNotEmpty)
.toList();
// 4. Merge short sentences (< 4 words OR < 50 chars)
// OpenWebUI logic from extractSentencesForAudio
final mergedChunks = <String>[];
for (final sentence in restoredSentences) {
if (mergedChunks.isEmpty) {
mergedChunks.add(sentence);
} else {
// Try splitting on commas/spaces
var remaining = p;
while (remaining.length > maxLen) {
int cut = remaining.lastIndexOf(RegExp(r",\s|\s"), maxLen);
cut = cut <= 0 ? maxLen : cut;
chunks.add(remaining.substring(0, cut).trim());
remaining = remaining.substring(cut).trim();
final lastIndex = mergedChunks.length - 1;
final previousText = mergedChunks[lastIndex];
final wordCount = previousText.split(RegExp(r'\s+')).length;
final charCount = previousText.length;
// Merge if previous chunk is too short
if (wordCount < 4 || charCount < 50) {
mergedChunks[lastIndex] = '$previousText $sentence';
} else {
mergedChunks.add(sentence);
}
if (remaining.isNotEmpty) chunks.add(remaining);
}
}
return chunks;
return mergedChunks.isEmpty ? [text.trim()] : mergedChunks;
}
Future<void> _configurePreferredVoice() async {

View File

@@ -108,11 +108,18 @@ class VoiceCallService {
throw Exception('Voice input initialization failed');
}
// Check if local STT is available
// Check if preferred STT path is available
final hasLocalStt = _voiceInput.hasLocalStt;
if (!hasLocalStt) {
final hasServerStt = _voiceInput.hasServerStt;
final ready = switch (_voiceInput.preference) {
SttPreference.deviceOnly => hasLocalStt,
SttPreference.serverOnly => hasServerStt,
SttPreference.auto => hasLocalStt || hasServerStt,
};
if (!ready) {
_updateState(VoiceCallState.error);
throw Exception('Speech recognition not available on this device');
throw Exception('Preferred speech recognition engine is unavailable');
}
// Check microphone permissions
@@ -125,9 +132,8 @@ class VoiceCallService {
// Initialize TTS with current app settings (engine/voice/rate/pitch/volume)
final settings = _ref.read(appSettingsProvider);
await _tts.initialize(
voice: settings.ttsEngine == TtsEngine.server
? settings.ttsServerVoiceId
: settings.ttsVoice,
deviceVoice: settings.ttsVoice,
serverVoice: settings.ttsServerVoiceId,
speechRate: settings.ttsSpeechRate,
pitch: settings.ttsPitch,
volume: settings.ttsVolume,
@@ -202,10 +208,18 @@ class VoiceCallService {
_listeningPaused = false;
_accumulatedTranscript = '';
// Check if voice input is available
if (!_voiceInput.hasLocalStt) {
final hasLocalStt = _voiceInput.hasLocalStt;
final hasServerStt = _voiceInput.hasServerStt;
final pref = _voiceInput.preference;
final engineAvailable = switch (pref) {
SttPreference.deviceOnly => hasLocalStt,
SttPreference.serverOnly => hasServerStt,
SttPreference.auto => hasLocalStt || hasServerStt,
};
if (!engineAvailable) {
_updateState(VoiceCallState.error);
throw Exception('Voice input not available on this device');
throw Exception('Preferred speech recognition engine is unavailable');
}
_updateState(VoiceCallState.listening);
@@ -572,11 +586,9 @@ VoiceCallService voiceCallService(Ref ref) {
// Keep TTS settings in sync with app settings during a call
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
// Update voice/engine and runtime parameters
final selectedVoice = next.ttsEngine == TtsEngine.server
? next.ttsServerVoiceId
: next.ttsVoice;
service._tts.updateSettings(
voice: selectedVoice,
voice: next.ttsVoice,
serverVoice: next.ttsServerVoiceId,
speechRate: next.ttsSpeechRate,
pitch: next.ttsPitch,
volume: next.ttsVolume,

View File

@@ -1,14 +1,19 @@
import 'dart:async';
import 'dart:io' show Platform;
import 'dart:io' show File, Platform;
import 'package:flutter/widgets.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import 'package:riverpod_annotation/riverpod_annotation.dart';
import 'package:record/record.dart';
import 'package:mic_stream_recorder/mic_stream_recorder.dart';
import 'package:stts/stts.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import '../../../core/providers/app_providers.dart';
import '../../../core/services/api_service.dart';
import '../../../core/services/settings_service.dart';
part 'voice_input_service.g.dart';
// Removed path imports as server transcription fallback was removed
// Lightweight replacement for previous stt.LocaleName used across the UI
class LocaleName {
@@ -18,31 +23,50 @@ class LocaleName {
}
class VoiceInputService {
final AudioRecorder _recorder = AudioRecorder();
final MicStreamRecorder _recorder = MicStreamRecorder();
final Stt _speech = Stt();
final ApiService? _api;
final Ref? _ref;
bool _isInitialized = false;
bool _isListening = false;
bool _localSttAvailable = false;
SttPreference _preference = SttPreference.auto;
bool _usingServerStt = false;
String? _selectedLocaleId;
List<LocaleName> _locales = const [];
StreamController<String>? _textStreamController;
String _currentText = '';
// Public stream for UI waveform visualization (emits partial text length as proxy)
StreamController<int>? _intensityController;
Stream<int> get intensityStream =>
_intensityController?.stream ?? const Stream<int>.empty();
int _lastIntensity = 0;
Timer? _intensityDecayTimer;
Timer? _silenceTimer;
bool _hasDetectedSpeech = false;
int _amplitudeCallbackCount = 0;
Timer? _amplitudeFallbackTimer;
/// Public stream of partial/final transcript strings and special audio tokens.
Stream<String> get textStream =>
_textStreamController?.stream ?? const Stream<String>.empty();
Timer? _autoStopTimer;
StreamSubscription<Amplitude>? _ampSub;
StreamSubscription<double>? _ampSub;
StreamSubscription<SttRecognition>? _sttResultSub;
StreamSubscription<SttState>? _sttStateSub;
bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;
bool get hasServerStt => _api != null;
SttPreference get preference => _preference;
bool get allowsServerFallback => _preference != SttPreference.deviceOnly;
bool get prefersServerOnly => _preference == SttPreference.serverOnly;
bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly;
VoiceInputService({ApiService? api, Ref? ref})
: _api = api,
_ref = ref;
void updatePreference(SttPreference preference) {
_preference = preference;
}
Future<bool> initialize() async {
if (_isInitialized) return true;
@@ -87,17 +111,15 @@ class VoiceInputService {
Future<bool> checkPermissions() async {
try {
// Prefer stts permission check which will request microphone permission
final mic = await _speech.hasPermission();
if (mic) return true;
return await _recorder.hasPermission();
return await _speech.hasPermission();
} catch (_) {
return false;
}
}
bool get isListening => _isListening;
bool get isAvailable => _isInitialized; // service usable (local or fallback)
bool get isAvailable =>
_isInitialized && (_localSttAvailable || hasServerStt);
bool get hasLocalStt => _localSttAvailable;
// Add a method to check if on-device STT is properly supported
@@ -166,7 +188,7 @@ class VoiceInputService {
}
if (_isListening) {
stopListening();
unawaited(stopListening());
}
_textStreamController = StreamController<String>.broadcast();
@@ -174,82 +196,109 @@ class VoiceInputService {
_isListening = true;
_intensityController = StreamController<int>.broadcast();
_lastIntensity = 0;
_usingServerStt = false;
// Begin a gentle decay timer so the UI level bars fall when silent
_intensityDecayTimer?.cancel();
_intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), (
t,
) {
if (!_isListening) return;
if (_lastIntensity <= 0) return;
_lastIntensity = (_lastIntensity - 1).clamp(0, 10);
try {
_intensityController?.add(_lastIntensity);
} catch (_) {}
});
_startIntensityDecayTimer();
final bool canUseLocal = _localSttAvailable;
final bool serverAvailable = hasServerStt;
final bool shouldUseLocal =
canUseLocal && _preference != SttPreference.serverOnly;
final bool shouldUseServer =
serverAvailable &&
(_preference == SttPreference.serverOnly || !shouldUseLocal);
if (shouldUseLocal) {
_autoStopTimer?.cancel();
_autoStopTimer = Timer(const Duration(seconds: 60), () {
if (_isListening) {
unawaited(_stopListening());
}
});
// Check if speech recognition is available before trying to use it
if (_localSttAvailable) {
// Schedule a check for speech recognition availability
Future.microtask(() async {
try {
final isStillAvailable = await _speech.isSupported();
if (!isStillAvailable && _isListening) {
// Speech recognition no longer available; stop listening
_localSttAvailable = false;
_stopListening();
return;
if (hasServerStt && allowsServerFallback) {
unawaited(_beginServerFallback());
} else {
unawaited(_stopListening());
}
}
} catch (e) {
} catch (_) {
// ignore availability check errors
}
});
// Local on-device STT path
_autoStopTimer?.cancel();
_autoStopTimer = Timer(const Duration(seconds: 60), () {
if (_isListening) {
_stopListening();
}
});
// Listen for results and state changes; keep subscriptions so we can cancel later
_sttResultSub = _speech.onResultChanged.listen((SttRecognition result) {
if (!_isListening) return;
final prevLen = _currentText.length;
_currentText = result.text;
_textStreamController?.add(_currentText);
// Map number of new characters to a rough 0..10 intensity
final delta = (_currentText.length - prevLen).clamp(0, 50);
final mapped = (delta / 5.0).ceil(); // 0 chars -> 0, 1-5 -> 1, ...
final mapped = (delta / 5.0).ceil();
_lastIntensity = mapped.clamp(0, 10);
try {
_intensityController?.add(_lastIntensity);
} catch (_) {}
if (result.isFinal) {
_stopListening();
unawaited(_stopListening());
}
}, onError: (_) {});
_sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
try {
if (_selectedLocaleId != null) {
_speech.setLanguage(_selectedLocaleId!).catchError((_) {});
}
// Start recognition (no await blocking the sync flow)
_speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
// On-device STT failed; stop listening entirely as server transcription is removed
Future(() async {
try {
if (_selectedLocaleId != null) {
await _speech.setLanguage(_selectedLocaleId!);
}
await _speech.start(SttRecognitionOptions(punctuation: true));
} catch (error) {
_localSttAvailable = false;
_stopListening();
});
} catch (e) {
_localSttAvailable = false;
_stopListening();
}
if (!_isListening) return;
if (hasServerStt && allowsServerFallback) {
await _beginServerFallback();
} else {
_textStreamController?.addError(error);
await _stopListening();
}
}
});
} else if (shouldUseServer) {
_usingServerStt = true;
_autoStopTimer?.cancel();
_autoStopTimer = Timer(const Duration(seconds: 90), () {
if (_isListening) {
unawaited(_stopListening());
}
});
Future(() async {
try {
await _startServerRecording();
} catch (error) {
if (!_isListening) return;
_textStreamController?.addError(error);
await _stopListening();
}
});
} else {
// No local STT available; stop immediately since server transcription is removed
_stopListening();
final Exception error;
if (prefersDeviceOnly) {
error = Exception(
'On-device speech recognition required but unavailable',
);
} else if (prefersServerOnly) {
error = Exception('Server speech-to-text is not configured');
} else {
error = Exception('Speech recognition not available on this device');
}
Future.microtask(() {
_textStreamController?.addError(error);
unawaited(_stopListening());
});
}
return _textStreamController!.stream;
@@ -258,14 +307,11 @@ class VoiceInputService {
/// Centralized entry point to begin voice recognition.
/// Ensures initialization and microphone permission before starting.
Future<Stream<String>> beginListening() async {
// Ensure service is ready
await initialize();
// Ensure microphone permission (triggers OS prompt if needed)
final hasMic = await checkPermissions();
if (!hasMic) {
throw Exception('Microphone permission not granted');
}
// Start listening and return the transcript stream
return startListening();
}
@@ -277,53 +323,349 @@ class VoiceInputService {
if (!_isListening) return;
_isListening = false;
if (_localSttAvailable) {
try {
await _speech.stop();
} catch (_) {}
// Cancel STT subscriptions
try {
_sttResultSub?.cancel();
} catch (_) {}
_sttResultSub = null;
try {
_sttStateSub?.cancel();
} catch (_) {}
_sttStateSub = null;
}
_autoStopTimer?.cancel();
_autoStopTimer = null;
_ampSub?.cancel();
_silenceTimer?.cancel();
_silenceTimer = null;
_amplitudeFallbackTimer?.cancel();
_amplitudeFallbackTimer = null;
if (_usingServerStt) {
await _finalizeServerRecording();
} else {
await _stopLocalStt();
}
await _ampSub?.cancel();
_ampSub = null;
_intensityDecayTimer?.cancel();
_intensityDecayTimer = null;
_lastIntensity = 0;
if (_currentText.isNotEmpty) {
if (!_usingServerStt && _currentText.isNotEmpty) {
_textStreamController?.add(_currentText);
}
_textStreamController?.close();
_textStreamController = null;
_intensityController?.close();
_intensityController = null;
await _closeControllers();
_usingServerStt = false;
_hasDetectedSpeech = false;
}
Future<void> _stopLocalStt() async {
if (_sttResultSub != null) {
try {
await _sttResultSub?.cancel();
} catch (_) {}
_sttResultSub = null;
}
if (_sttStateSub != null) {
try {
await _sttStateSub?.cancel();
} catch (_) {}
_sttStateSub = null;
}
if (_localSttAvailable) {
try {
await _speech.stop();
} catch (_) {}
}
}
Future<void> _beginServerFallback() async {
if (!allowsServerFallback) {
_textStreamController?.addError(
Exception('Server speech-to-text disabled in preferences'),
);
await _stopListening();
return;
}
await _stopLocalStt();
if (!hasServerStt) {
_textStreamController?.addError(
Exception('Server speech-to-text unavailable'),
);
await _stopListening();
return;
}
_usingServerStt = true;
_autoStopTimer?.cancel();
_autoStopTimer = Timer(const Duration(seconds: 90), () {
if (_isListening) {
unawaited(_stopListening());
}
});
try {
await _startServerRecording();
} catch (error) {
_textStreamController?.addError(error);
await _stopListening();
}
}
Future<void> _startServerRecording() async {
final path = await _createRecordingPath();
_hasDetectedSpeech = false;
await _recorder.startRecording(path);
await _ampSub?.cancel();
_amplitudeFallbackTimer?.cancel();
_amplitudeCallbackCount = 0;
_ampSub = _recorder.amplitudeStream.listen((amplitude) {
_amplitudeCallbackCount++;
if (!_isListening) return;
_lastIntensity = _normalizedToIntensity(amplitude);
try {
_intensityController?.add(_lastIntensity);
} catch (_) {}
_handleServerAmplitude(amplitude);
});
_amplitudeFallbackTimer = Timer(const Duration(seconds: 1), () {
if (_amplitudeCallbackCount == 0) {
_silenceTimer = Timer(const Duration(seconds: 15), () {
if (_isListening && _usingServerStt) {
unawaited(_stopListening());
}
});
}
});
}
void _handleServerAmplitude(double amplitude) {
if (!_usingServerStt || !_isListening) return;
const double speechThreshold = 0.55;
if (amplitude.isNaN || amplitude.isInfinite) return;
if (amplitude > speechThreshold) {
_hasDetectedSpeech = true;
_silenceTimer?.cancel();
_silenceTimer = null;
} else if (_hasDetectedSpeech && _silenceTimer == null) {
final silenceDuration = _ref?.read(appSettingsProvider).voiceSilenceDuration ?? 2000;
_silenceTimer = Timer(Duration(milliseconds: silenceDuration), () {
if (_isListening && _usingServerStt) {
unawaited(_stopListening());
}
});
}
}
Future<String> _createRecordingPath() async {
final directory = await getTemporaryDirectory();
final timestamp = DateTime.now().millisecondsSinceEpoch;
final fileName = 'conduit_voice_$timestamp.m4a';
return p.join(directory.path, fileName);
}
Future<void> _finalizeServerRecording() async {
final api = _api;
if (api == null) return;
final path = await _recorder.stopRecording();
if (path == null || path.isEmpty) return;
final file = File(path);
try {
if (!await file.exists()) return;
final bytes = await file.readAsBytes();
if (bytes.isEmpty) return;
final response = await api.transcribeSpeech(
audioBytes: bytes,
fileName: p.basename(path),
mimeType: 'audio/mp4',
language: _languageForServer(),
);
final transcript = _extractTranscriptionText(response);
if (transcript != null && transcript.trim().isNotEmpty) {
_currentText = transcript.trim();
_textStreamController?.add(_currentText);
} else {
throw StateError('Empty transcription result');
}
} catch (error) {
_textStreamController?.addError(error);
} finally {
unawaited(_cleanupRecordingFile(file));
}
}
Future<void> _cleanupRecordingFile(File file) async {
try {
if (await file.exists()) {
await file.delete();
}
} catch (_) {}
}
String? _languageForServer() {
final locale = _selectedLocaleId;
if (locale != null && locale.isNotEmpty) {
final primary = locale.split(RegExp('[-_]')).first.toLowerCase();
if (primary.length >= 2) {
return primary;
}
}
try {
final fallback = WidgetsBinding.instance.platformDispatcher.locale;
final primary = fallback.languageCode.toLowerCase();
if (primary.isNotEmpty) {
return primary;
}
} catch (_) {}
return null;
}
String? _extractTranscriptionText(Map<String, dynamic> data) {
final direct = data['text'];
if (direct is String && direct.trim().isNotEmpty) {
return direct;
}
final display = data['display_text'] ?? data['DisplayText'];
if (display is String && display.trim().isNotEmpty) {
return display;
}
final result = data['result'];
if (result is Map<String, dynamic>) {
final resultText = result['text'];
if (resultText is String && resultText.trim().isNotEmpty) {
return resultText;
}
}
final combined = data['combinedRecognizedPhrases'];
if (combined is List && combined.isNotEmpty) {
final first = combined.first;
if (first is Map<String, dynamic>) {
final candidate =
first['display'] ??
first['Display'] ??
first['transcript'] ??
first['text'];
if (candidate is String && candidate.trim().isNotEmpty) {
return candidate;
}
} else if (first is String && first.trim().isNotEmpty) {
return first;
}
}
final results = data['results'];
if (results is Map<String, dynamic>) {
final channels = results['channels'];
if (channels is List && channels.isNotEmpty) {
final channel = channels.first;
if (channel is Map<String, dynamic>) {
final alternatives = channel['alternatives'];
if (alternatives is List && alternatives.isNotEmpty) {
final alternative = alternatives.first;
if (alternative is Map<String, dynamic>) {
final transcript =
alternative['transcript'] ?? alternative['text'];
if (transcript is String && transcript.trim().isNotEmpty) {
return transcript;
}
}
}
}
}
}
final segments = data['segments'];
if (segments is List && segments.isNotEmpty) {
final buffer = StringBuffer();
for (final segment in segments) {
if (segment is Map<String, dynamic>) {
final text = segment['text'];
if (text is String && text.trim().isNotEmpty) {
buffer.write(text.trim());
buffer.write(' ');
}
} else if (segment is String && segment.trim().isNotEmpty) {
buffer.write(segment.trim());
buffer.write(' ');
}
}
final combinedText = buffer.toString().trim();
if (combinedText.isNotEmpty) {
return combinedText;
}
}
return null;
}
int _normalizedToIntensity(double value) {
if (value.isNaN || value.isInfinite) return 0;
return (value * 10).round().clamp(0, 10);
}
Future<void> _closeControllers() async {
if (_textStreamController != null) {
try {
await _textStreamController?.close();
} catch (_) {}
_textStreamController = null;
}
if (_intensityController != null) {
try {
await _intensityController?.close();
} catch (_) {}
_intensityController = null;
}
}
void _startIntensityDecayTimer() {
_intensityDecayTimer?.cancel();
_intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), (
_,
) {
if (!_isListening) return;
if (_lastIntensity <= 0) return;
_lastIntensity = (_lastIntensity - 1).clamp(0, 10);
try {
_intensityController?.add(_lastIntensity);
} catch (_) {}
});
}
void dispose() {
stopListening();
_silenceTimer?.cancel();
try {
_speech.dispose().catchError((_) {});
} catch (_) {}
}
// Recording fallback removed; only on-device STT is supported now
// Native locales not used in server transcription mode
}
final voiceInputServiceProvider = Provider<VoiceInputService>((ref) {
return VoiceInputService();
final api = ref.watch(apiServiceProvider);
final service = VoiceInputService(api: api, ref: ref);
final currentSettings = ref.read(appSettingsProvider);
service.updatePreference(currentSettings.sttPreference);
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
if (previous?.sttPreference != next.sttPreference) {
service.updatePreference(next.sttPreference);
}
});
ref.onDispose(service.dispose);
return service;
});
@Riverpod(keepAlive: true)
@@ -332,8 +674,16 @@ Future<bool> voiceInputAvailable(Ref ref) async {
if (!service.isSupportedPlatform) return false;
final initialized = await service.initialize();
if (!initialized) return false;
// If local STT exists, we consider it available; otherwise ensure mic permission for fallback
if (service.hasLocalStt) return true;
switch (service.preference) {
case SttPreference.deviceOnly:
return service.hasLocalStt;
case SttPreference.serverOnly:
return service.hasServerStt;
case SttPreference.auto:
if (service.hasLocalStt) return true;
if (!service.hasServerStt) return false;
break;
}
final hasPermission = await service.checkPermissions();
if (!hasPermission) return false;
return service.isAvailable;
@@ -349,3 +699,18 @@ final voiceIntensityStreamProvider = StreamProvider<int>((ref) {
final service = ref.watch(voiceInputServiceProvider);
return service.intensityStream;
});
final localVoiceRecognitionAvailableProvider = FutureProvider<bool>((
ref,
) async {
final service = ref.watch(voiceInputServiceProvider);
final initialized = await service.initialize();
if (!initialized) return false;
if (service.hasLocalStt) return true;
return service.checkOnDeviceSupport();
});
final serverVoiceRecognitionAvailableProvider = Provider<bool>((ref) {
final service = ref.watch(voiceInputServiceProvider);
return service.hasServerStt;
});

View File

@@ -2380,7 +2380,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
}
}
// Server transcription removed; only on-device STT is supported
// When on-device STT is unavailable we fall back to server transcription.
Future<void> _stopListening() async {
_intensitySub?.cancel();

View File

@@ -71,6 +71,11 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
bool _allowTypingIndicator = false;
Timer? _typingGateTimer;
String _ttsPlainText = '';
Timer? _ttsPlainTextDebounce;
Map<String, dynamic>? _pendingTtsPlainTextPayload;
String? _pendingTtsPlainTextSource;
String? _lastAppliedTtsPlainTextSource;
int _ttsPlainTextRequestId = 0;
// Active version index (-1 means current/live content)
int _activeVersionIndex = -1;
// press state handled by shared ChatActionButton
@@ -162,13 +167,11 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
final rSegs = ReasoningParser.segments(raw);
final out = <MessageSegment>[];
final textBuf = StringBuffer();
final textSegments = <String>[];
if (rSegs == null || rSegs.isEmpty) {
final tSegs = ToolCallsParser.segments(raw);
if (tSegs == null || tSegs.isEmpty) {
out.add(MessageSegment.text(raw));
textBuf.write(raw);
textSegments.add(raw);
} else {
for (final s in tSegs) {
@@ -176,7 +179,6 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
out.add(MessageSegment.tool(s.entry!));
} else if ((s.text ?? '').isNotEmpty) {
out.add(MessageSegment.text(s.text!));
textBuf.write(s.text);
textSegments.add(s.text!);
}
}
@@ -190,7 +192,6 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
final tSegs = ToolCallsParser.segments(t);
if (tSegs == null || tSegs.isEmpty) {
out.add(MessageSegment.text(t));
textBuf.write(t);
textSegments.add(t);
} else {
for (final s in tSegs) {
@@ -198,7 +199,6 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
out.add(MessageSegment.tool(s.entry!));
} else if ((s.text ?? '').isNotEmpty) {
out.add(MessageSegment.text(s.text!));
textBuf.write(s.text);
textSegments.add(s.text!);
}
}
@@ -208,23 +208,15 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
}
final segments = out.isEmpty ? [MessageSegment.text(raw)] : out;
String speechText;
try {
final worker = ref.read(workerManagerProvider);
speechText = await worker.schedule<Map<String, dynamic>, String>(
_buildTtsPlainTextWorker,
{'segments': textSegments, 'fallback': raw},
debugLabel: 'tts_plain_text',
);
} catch (_) {
speechText = _buildTtsPlainTextFallback(textSegments, raw);
}
if (!mounted) return;
setState(() {
_segments = segments;
_ttsPlainText = speechText;
});
_scheduleTtsPlainTextBuild(
List<String>.from(textSegments, growable: false),
raw,
);
_updateTypingIndicatorGate();
}
@@ -290,6 +282,96 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
return result;
}
void _scheduleTtsPlainTextBuild(List<String> segments, String raw) {
final hasContent =
segments.any((segment) => segment.trim().isNotEmpty) ||
raw.trim().isNotEmpty;
if (!hasContent) {
_pendingTtsPlainTextPayload = null;
_pendingTtsPlainTextSource = null;
_lastAppliedTtsPlainTextSource = '';
if (_ttsPlainText.isNotEmpty && mounted) {
setState(() {
_ttsPlainText = '';
});
}
return;
}
if (_pendingTtsPlainTextPayload == null &&
raw == _lastAppliedTtsPlainTextSource) {
return;
}
if (raw == _pendingTtsPlainTextSource &&
_pendingTtsPlainTextPayload != null) {
return;
}
final pendingSegments = List<String>.from(segments, growable: false);
_pendingTtsPlainTextPayload = {
'segments': pendingSegments,
'fallback': raw,
};
_pendingTtsPlainTextSource = raw;
final delay = widget.isStreaming
? const Duration(milliseconds: 250)
: Duration.zero;
_ttsPlainTextDebounce?.cancel();
if (delay == Duration.zero) {
_runPendingTtsPlainTextBuild();
} else {
_ttsPlainTextDebounce = Timer(delay, _runPendingTtsPlainTextBuild);
}
}
void _runPendingTtsPlainTextBuild() {
_ttsPlainTextDebounce?.cancel();
_ttsPlainTextDebounce = null;
final payload = _pendingTtsPlainTextPayload;
final source = _pendingTtsPlainTextSource;
if (payload == null || source == null) {
return;
}
_pendingTtsPlainTextPayload = null;
_pendingTtsPlainTextSource = null;
final requestId = ++_ttsPlainTextRequestId;
unawaited(_executeTtsPlainTextBuild(payload, source, requestId));
}
Future<void> _executeTtsPlainTextBuild(
Map<String, dynamic> payload,
String raw,
int requestId,
) async {
final segments = (payload['segments'] as List).cast<String>();
String speechText;
try {
final worker = ref.read(workerManagerProvider);
speechText = await worker.schedule<Map<String, dynamic>, String>(
_buildTtsPlainTextWorker,
payload,
debugLabel: 'tts_plain_text',
);
} catch (_) {
speechText = _buildTtsPlainTextFallback(segments, raw);
}
if (!mounted || requestId != _ttsPlainTextRequestId) {
return;
}
_lastAppliedTtsPlainTextSource = raw;
if (_ttsPlainText != speechText) {
setState(() {
_ttsPlainText = speechText;
});
}
}
// No streaming-specific markdown fixes needed here; handled by Markdown widget
Widget _buildToolCallTile(ToolCallEntry tc) {
@@ -622,6 +704,9 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
@override
void dispose() {
_typingGateTimer?.cancel();
_ttsPlainTextDebounce?.cancel();
_pendingTtsPlainTextPayload = null;
_pendingTtsPlainTextSource = null;
_fadeController.dispose();
_slideController.dispose();
super.dispose();

View File

@@ -2460,7 +2460,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
HapticFeedback.selectionClick();
}
// Server transcription removed; only on-device STT updates the input text
// When on-device STT is unavailable we rely on server transcription.
void _showVoiceUnavailable(String message) {
if (!mounted) return;

View File

@@ -14,6 +14,7 @@ import '../../../shared/utils/ui_utils.dart';
import '../../../core/providers/app_providers.dart';
import '../../../l10n/app_localizations.dart';
import '../../chat/providers/text_to_speech_provider.dart';
import '../../chat/services/voice_input_service.dart';
class AppCustomizationPage extends ConsumerWidget {
const AppCustomizationPage({super.key});
@@ -70,6 +71,8 @@ class AppCustomizationPage extends ConsumerWidget {
languageLabel,
),
const SizedBox(height: Spacing.xl),
_buildSttSection(context, ref, settings),
const SizedBox(height: Spacing.xl),
_buildTtsDropdownSection(context, ref, settings),
const SizedBox(height: Spacing.xl),
_buildChatSection(context, ref, settings),
@@ -468,6 +471,303 @@ class AppCustomizationPage extends ConsumerWidget {
);
}
Widget _buildSttSection(
BuildContext context,
WidgetRef ref,
AppSettings settings,
) {
final theme = context.conduitTheme;
final l10n = AppLocalizations.of(context)!;
final localSupport = ref.watch(localVoiceRecognitionAvailableProvider);
final bool localAvailable = localSupport.maybeWhen(
data: (value) => value,
orElse: () => false,
);
final bool localLoading = localSupport.isLoading;
final bool serverAvailable = ref.watch(
serverVoiceRecognitionAvailableProvider,
);
final notifier = ref.read(appSettingsProvider.notifier);
final description = _sttPreferenceDescription(l10n, settings.sttPreference);
final warnings = <String>[];
if (settings.sttPreference == SttPreference.deviceOnly &&
!localAvailable &&
!localLoading) {
warnings.add(l10n.sttDeviceUnavailableWarning);
}
if (settings.sttPreference == SttPreference.serverOnly &&
!serverAvailable) {
warnings.add(l10n.sttServerUnavailableWarning);
}
final bool autoSelectable =
localAvailable || serverAvailable || localLoading;
final bool deviceSelectable = localAvailable || localLoading;
final bool serverSelectable = serverAvailable;
return Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
Text(
l10n.sttSettings,
style:
theme.headingSmall?.copyWith(color: theme.sidebarForeground) ??
TextStyle(color: theme.sidebarForeground, fontSize: 18),
),
const SizedBox(height: Spacing.sm),
ConduitCard(
padding: const EdgeInsets.all(Spacing.md),
child: Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
Row(
children: [
_buildIconBadge(
context,
UiUtils.platformIcon(
ios: CupertinoIcons.mic,
android: Icons.mic,
),
color: theme.buttonPrimary,
),
const SizedBox(width: Spacing.md),
Expanded(
child: Text(
l10n.sttEngineLabel,
style:
theme.bodyMedium?.copyWith(
color: theme.sidebarForeground,
fontWeight: FontWeight.w600,
) ??
TextStyle(
color: theme.sidebarForeground,
fontSize: 14,
fontWeight: FontWeight.w600,
),
),
),
],
),
const SizedBox(height: Spacing.sm),
Wrap(
spacing: Spacing.sm,
runSpacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.sttEngineAuto),
selected: settings.sttPreference == SttPreference.auto,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.sttPreference == SttPreference.auto
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.sttPreference == SttPreference.auto
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: autoSelectable
? (value) {
if (value) {
notifier.setSttPreference(SttPreference.auto);
}
}
: null,
),
ChoiceChip(
label: Text(l10n.sttEngineDevice),
selected:
settings.sttPreference == SttPreference.deviceOnly,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.sttPreference == SttPreference.deviceOnly
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.sttPreference == SttPreference.deviceOnly
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: deviceSelectable
? (value) {
if (value) {
notifier.setSttPreference(
SttPreference.deviceOnly,
);
}
}
: null,
),
ChoiceChip(
label: Text(l10n.sttEngineServer),
selected:
settings.sttPreference == SttPreference.serverOnly,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.sttPreference == SttPreference.serverOnly
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.sttPreference == SttPreference.serverOnly
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: serverSelectable
? (value) {
if (value) {
notifier.setSttPreference(
SttPreference.serverOnly,
);
}
}
: null,
),
],
),
if (localLoading) ...[
const SizedBox(height: Spacing.sm),
LinearProgressIndicator(
minHeight: 3,
color: theme.buttonPrimary,
backgroundColor: theme.cardBorder.withValues(alpha: 0.4),
),
],
const SizedBox(height: Spacing.sm),
AnimatedSwitcher(
duration: const Duration(milliseconds: 200),
child: Text(
description,
key: ValueKey<String>(
'stt-desc-${settings.sttPreference.name}',
),
style:
theme.bodyMedium?.copyWith(
color: theme.sidebarForeground.withValues(alpha: 0.9),
) ??
TextStyle(
color: theme.sidebarForeground.withValues(alpha: 0.9),
fontSize: 14,
),
),
),
if (warnings.isNotEmpty) ...[
const SizedBox(height: Spacing.sm),
...warnings.map(
(warning) => Padding(
padding: const EdgeInsets.only(top: Spacing.xs),
child: Text(
warning,
style:
theme.bodySmall?.copyWith(
color: theme.error,
fontWeight: FontWeight.w600,
) ??
TextStyle(
color: theme.error,
fontSize: 12,
fontWeight: FontWeight.w600,
),
),
),
),
],
if (settings.sttPreference == SttPreference.serverOnly ||
(settings.sttPreference == SttPreference.auto &&
serverAvailable)) ...[
const SizedBox(height: Spacing.md),
const Divider(),
const SizedBox(height: Spacing.md),
Row(
children: [
Expanded(
child: Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
Text(
l10n.sttSilenceDuration,
style: theme.bodyMedium?.copyWith(
color: theme.sidebarForeground,
fontWeight: FontWeight.w600,
) ??
TextStyle(
color: theme.sidebarForeground,
fontSize: 14,
fontWeight: FontWeight.w600,
),
),
const SizedBox(height: Spacing.xs),
Text(
'${settings.voiceSilenceDuration}ms',
style: theme.bodySmall?.copyWith(
color: theme.sidebarForeground
.withValues(alpha: 0.7),
) ??
TextStyle(
color: theme.sidebarForeground
.withValues(alpha: 0.7),
fontSize: 12,
),
),
],
),
),
Text(
'${(settings.voiceSilenceDuration / 1000).toStringAsFixed(1)}s',
style: theme.bodyMedium?.copyWith(
color: theme.buttonPrimary,
fontWeight: FontWeight.w600,
) ??
TextStyle(
color: theme.buttonPrimary,
fontSize: 14,
fontWeight: FontWeight.w600,
),
),
],
),
const SizedBox(height: Spacing.sm),
Slider(
value: settings.voiceSilenceDuration.toDouble(),
min: 300,
max: 3000,
divisions: 27,
activeColor: theme.buttonPrimary,
inactiveColor: theme.cardBorder.withValues(alpha: 0.4),
onChanged: (value) {
notifier.setVoiceSilenceDuration(value.round());
},
),
Text(
l10n.sttSilenceDurationDescription,
style: theme.bodySmall?.copyWith(
color: theme.sidebarForeground.withValues(alpha: 0.7),
) ??
TextStyle(
color: theme.sidebarForeground.withValues(alpha: 0.7),
fontSize: 12,
),
),
],
],
),
),
],
);
}
Widget _buildTtsDropdownSection(
BuildContext context,
WidgetRef ref,
@@ -475,6 +775,35 @@ class AppCustomizationPage extends ConsumerWidget {
) {
final theme = context.conduitTheme;
final l10n = AppLocalizations.of(context)!;
final ttsService = ref.watch(textToSpeechServiceProvider);
final bool deviceAvailable =
ttsService.deviceEngineAvailable || !ttsService.isInitialized;
final bool serverAvailable = ttsService.serverEngineAvailable;
final bool autoSelectable = deviceAvailable || serverAvailable;
final bool deviceSelectable = deviceAvailable;
final bool serverSelectable = serverAvailable;
final ttsDescription = _ttsPreferenceDescription(l10n, settings);
final warnings = <String>[];
switch (settings.ttsEngine) {
case TtsEngine.auto:
if (!deviceAvailable) {
warnings.add(l10n.ttsDeviceUnavailableWarning);
}
if (!serverAvailable) {
warnings.add(l10n.ttsServerUnavailableWarning);
}
break;
case TtsEngine.device:
if (!deviceAvailable) {
warnings.add(l10n.ttsDeviceUnavailableWarning);
}
break;
case TtsEngine.server:
if (!serverAvailable) {
warnings.add(l10n.ttsServerUnavailableWarning);
}
break;
}
return Column(
crossAxisAlignment: CrossAxisAlignment.start,
children: [
@@ -510,82 +839,154 @@ class AppCustomizationPage extends ConsumerWidget {
) ??
TextStyle(color: theme.sidebarForeground, fontSize: 14),
),
const Spacer(),
Wrap(
spacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.ttsEngineDevice),
selected: settings.ttsEngine == TtsEngine.device,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
notifier.setTtsEngine(TtsEngine.device);
// Keep previous voice (device voices)
],
),
const SizedBox(height: Spacing.sm),
Wrap(
spacing: Spacing.sm,
runSpacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.ttsEngineAuto),
selected: settings.ttsEngine == TtsEngine.auto,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.auto
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: autoSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.auto
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: autoSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: autoSelectable
? (value) {
if (value) {
ref
.read(appSettingsProvider.notifier)
.setTtsEngine(TtsEngine.auto);
}
}
},
),
ChoiceChip(
label: Text(l10n.ttsEngineServer),
selected: settings.ttsEngine == TtsEngine.server,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: (v) {
if (v) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
// Clear device-specific voice so server can default
notifier.setTtsVoice(null);
notifier.setTtsEngine(TtsEngine.server);
: null,
),
ChoiceChip(
label: Text(l10n.ttsEngineDevice),
selected: settings.ttsEngine == TtsEngine.device,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: deviceSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.device
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: deviceSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: deviceSelectable
? (value) {
if (value) {
ref
.read(appSettingsProvider.notifier)
.setTtsEngine(TtsEngine.device);
}
}
},
),
],
: null,
),
ChoiceChip(
label: Text(l10n.ttsEngineServer),
selected: settings.ttsEngine == TtsEngine.server,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: serverSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.server
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: serverSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: serverSelectable
? (value) {
if (value) {
final notifier = ref.read(
appSettingsProvider.notifier,
);
notifier.setTtsVoice(null);
notifier.setTtsEngine(TtsEngine.server);
}
}
: null,
),
],
),
const SizedBox(height: Spacing.sm),
AnimatedSwitcher(
duration: const Duration(milliseconds: 200),
child: Text(
ttsDescription,
key: ValueKey<String>('tts-desc-${settings.ttsEngine.name}'),
style:
theme.bodyMedium?.copyWith(
color: theme.sidebarForeground.withValues(alpha: 0.9),
) ??
TextStyle(
color: theme.sidebarForeground.withValues(alpha: 0.9),
fontSize: 14,
),
),
),
if (warnings.isNotEmpty) ...[
const SizedBox(height: Spacing.sm),
...warnings.map(
(warning) => Padding(
padding: const EdgeInsets.only(top: Spacing.xs),
child: Text(
warning,
style:
theme.bodySmall?.copyWith(
color: theme.error,
fontWeight: FontWeight.w600,
) ??
TextStyle(
color: theme.error,
fontSize: 12,
fontWeight: FontWeight.w600,
),
),
),
),
],
],
),
),
const SizedBox(height: Spacing.sm),
_ExpandableCard(
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
subtitle: _ttsVoiceSubtitle(l10n, settings),
icon: UiUtils.platformIcon(
ios: CupertinoIcons.speaker_3,
android: Icons.record_voice_over,
@@ -604,14 +1005,7 @@ class AppCustomizationPage extends ConsumerWidget {
color: theme.buttonPrimary,
),
title: l10n.ttsVoice,
subtitle: _getDisplayVoiceName(
settings.ttsEngine == TtsEngine.server
? ((settings.ttsServerVoiceName ??
settings.ttsServerVoiceId) ??
'')
: (settings.ttsVoice ?? ''),
l10n.ttsSystemDefault,
),
subtitle: _ttsVoiceSubtitle(l10n, settings),
onTap: () => _showVoicePickerSheet(context, ref, settings),
),
const SizedBox(height: Spacing.md),
@@ -627,49 +1021,13 @@ class AppCustomizationPage extends ConsumerWidget {
value: settings.ttsSpeechRate,
min: 0.25,
max: 2.0,
divisions: 7,
divisions: 35,
label: '${(settings.ttsSpeechRate * 100).round()}%',
onChanged: (value) => ref
.read(appSettingsProvider.notifier)
.setTtsSpeechRate(value),
),
const SizedBox(height: Spacing.md),
// Pitch Slider
_buildSliderTile(
context,
ref,
icon: UiUtils.platformIcon(
ios: CupertinoIcons.waveform,
android: Icons.graphic_eq,
),
title: l10n.ttsPitch,
value: settings.ttsPitch,
min: 0.5,
max: 2.0,
divisions: 6,
label: settings.ttsPitch.toStringAsFixed(1),
onChanged: (value) =>
ref.read(appSettingsProvider.notifier).setTtsPitch(value),
),
const SizedBox(height: Spacing.md),
// Volume Slider
_buildSliderTile(
context,
ref,
icon: UiUtils.platformIcon(
ios: CupertinoIcons.volume_up,
android: Icons.volume_up,
),
title: l10n.ttsVolume,
value: settings.ttsVolume,
min: 0.0,
max: 1.0,
divisions: 10,
label: '${(settings.ttsVolume * 100).round()}%',
onChanged: (value) =>
ref.read(appSettingsProvider.notifier).setTtsVolume(value),
),
const SizedBox(height: Spacing.md),
// Preview Button
_CustomizationTile(
leading: _buildIconBadge(
@@ -691,6 +1049,53 @@ class AppCustomizationPage extends ConsumerWidget {
);
}
String _sttPreferenceDescription(
AppLocalizations l10n,
SttPreference preference,
) {
switch (preference) {
case SttPreference.auto:
return l10n.sttEngineAutoDescription;
case SttPreference.deviceOnly:
return l10n.sttEngineDeviceDescription;
case SttPreference.serverOnly:
return l10n.sttEngineServerDescription;
}
}
String _ttsPreferenceDescription(
AppLocalizations l10n,
AppSettings settings,
) {
switch (settings.ttsEngine) {
case TtsEngine.auto:
return l10n.ttsEngineAutoDescription;
case TtsEngine.device:
return l10n.ttsEngineDeviceDescription;
case TtsEngine.server:
return l10n.ttsEngineServerDescription;
}
}
String _ttsVoiceSubtitle(AppLocalizations l10n, AppSettings settings) {
final deviceName = _getDisplayVoiceName(
settings.ttsVoice,
l10n.ttsSystemDefault,
);
final serverVoice =
(settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? '';
final serverName = _getDisplayVoiceName(serverVoice, l10n.ttsSystemDefault);
switch (settings.ttsEngine) {
case TtsEngine.auto:
return '${l10n.ttsEngineDevice}: $deviceName${l10n.ttsEngineServer}: $serverName';
case TtsEngine.device:
return deviceName;
case TtsEngine.server:
return serverName;
}
}
Widget _buildSliderTile(
BuildContext context,
WidgetRef ref, {

View File

@@ -307,7 +307,25 @@
"chatSettings": "Chat",
"sendOnEnter": "Mit Enter senden",
"sendOnEnterDescription": "Enter sendet (Soft-Tastatur). Cmd/Ctrl+Enter ebenfalls verfügbar",
"sttSettings": "Sprache zu Text",
"sttEngineLabel": "Erkennungs-Engine",
"sttEngineAuto": "Automatisch",
"sttEngineDevice": "Auf dem Gerät",
"sttEngineServer": "Server",
"sttEngineAutoDescription": "Verwendet die Erkennung auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.",
"sttEngineDeviceDescription": "Behält Audio auf diesem Gerät. Spracheingabe funktioniert nicht, wenn das Gerät keine Spracherkennung unterstützt.",
"sttEngineServerDescription": "Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.",
"sttDeviceUnavailableWarning": "Auf diesem Gerät steht keine Spracherkennung zur Verfügung.",
"sttServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.",
"sttSilenceDuration": "Stille-Dauer",
"sttSilenceDurationDescription": "Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird",
"ttsSettings": "Text zu Sprache",
"ttsEngineAuto": "Automatisch",
"ttsEngineAutoDescription": "Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.",
"ttsEngineDeviceDescription": "Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.",
"ttsEngineServerDescription": "Sendet die Ausgabe immer an deinen OpenWebUI-Server.",
"ttsDeviceUnavailableWarning": "Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.",
"ttsServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Sprachausgabe, um diese Option zu nutzen.",
"ttsVoice": "Stimme",
"ttsSpeechRate": "Sprechgeschwindigkeit",
"ttsPitch": "Tonhöhe",

View File

@@ -1219,10 +1219,62 @@
"@sendOnEnterDescription": {
"description": "Explanation of how the Send on Enter toggle behaves."
},
"sttSettings": "Speech to Text",
"@sttSettings": {
"description": "Section header for speech-to-text settings."
},
"sttEngineLabel": "Recognition engine",
"@sttEngineLabel": {
"description": "Label shown above the speech-to-text engine chips."
},
"sttEngineAuto": "Auto",
"@sttEngineAuto": {
"description": "Chip label for automatic speech-to-text selection."
},
"sttEngineDevice": "On device",
"@sttEngineDevice": {
"description": "Chip label for on-device speech recognition."
},
"sttEngineServer": "Server",
"@sttEngineServer": {
"description": "Chip label for server speech recognition."
},
"sttEngineAutoDescription": "Use on-device recognition when available and fall back to your server.",
"@sttEngineAutoDescription": {
"description": "Description shown when automatic speech-to-text preference is active."
},
"sttEngineDeviceDescription": "Keep audio on this device. Voice input stops working if on-device speech recognition isnt supported.",
"@sttEngineDeviceDescription": {
"description": "Description shown when on-device speech-to-text preference is active."
},
"sttEngineServerDescription": "Always send recordings to your OpenWebUI server for transcription.",
"@sttEngineServerDescription": {
"description": "Description shown when server speech-to-text preference is active."
},
"sttDeviceUnavailableWarning": "On-device speech recognition isnt available on this device.",
"@sttDeviceUnavailableWarning": {
"description": "Warning shown when the user selects on-device speech recognition but it is unavailable."
},
"sttServerUnavailableWarning": "Connect to a server with transcription enabled to use this option.",
"@sttServerUnavailableWarning": {
"description": "Warning shown when the user selects server speech recognition but no server is available."
},
"sttSilenceDuration": "Silence Duration",
"@sttSilenceDuration": {
"description": "Label for the silence duration setting in server speech-to-text."
},
"sttSilenceDurationDescription": "Time to wait after silence before auto-stopping recording",
"@sttSilenceDurationDescription": {
"description": "Description for the silence duration slider in server speech-to-text settings."
},
"ttsEngineLabel": "Engine",
"@ttsEngineLabel": {
"description": "Label for selecting the text-to-speech engine."
},
"ttsEngineAuto": "Auto",
"@ttsEngineAuto": {
"description": "Chip label for automatically selecting the text-to-speech engine."
},
"ttsEngineDevice": "On device",
"@ttsEngineDevice": {
"description": "Chip label for using on-device text-to-speech."
@@ -1231,6 +1283,26 @@
"@ttsEngineServer": {
"description": "Chip label for using server-side text-to-speech."
},
"ttsEngineAutoDescription": "Use on-device speech when available and fall back to your server.",
"@ttsEngineAutoDescription": {
"description": "Description shown when automatic text-to-speech preference is active."
},
"ttsEngineDeviceDescription": "Keep synthesis on this device. Voice playback stops working if on-device TTS isnt supported.",
"@ttsEngineDeviceDescription": {
"description": "Description shown when on-device text-to-speech preference is active."
},
"ttsEngineServerDescription": "Always request audio from your OpenWebUI server.",
"@ttsEngineServerDescription": {
"description": "Description shown when server text-to-speech preference is active."
},
"ttsDeviceUnavailableWarning": "On-device text-to-speech isnt available on this device.",
"@ttsDeviceUnavailableWarning": {
"description": "Warning shown when on-device text-to-speech is unavailable."
},
"ttsServerUnavailableWarning": "Connect to a server with text-to-speech enabled to use this option.",
"@ttsServerUnavailableWarning": {
"description": "Warning shown when server text-to-speech is unavailable."
},
"ttsSettings": "Text to Speech",
"@ttsSettings": {
"description": "Section header for TTS-related customization options."

View File

@@ -307,7 +307,25 @@
"chatSettings": "Conversación",
"sendOnEnter": "Enviar con Enter",
"sendOnEnterDescription": "Enter envía (teclado virtual). Cmd/Ctrl+Enter también disponible",
"sttSettings": "Voz a texto",
"sttEngineLabel": "Motor de reconocimiento",
"sttEngineAuto": "Automático",
"sttEngineDevice": "En el dispositivo",
"sttEngineServer": "Servidor",
"sttEngineAutoDescription": "Usa el reconocimiento en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.",
"sttEngineDeviceDescription": "Mantiene el audio en este dispositivo. La entrada de voz no funciona si el dispositivo no admite reconocimiento de voz.",
"sttEngineServerDescription": "Envía siempre las grabaciones a tu servidor OpenWebUI para la transcripción.",
"sttDeviceUnavailableWarning": "El reconocimiento de voz en el dispositivo no está disponible en este dispositivo.",
"sttServerUnavailableWarning": "Conéctate a un servidor con transcripción habilitada para usar esta opción.",
"sttSilenceDuration": "Duración del silencio",
"sttSilenceDurationDescription": "Tiempo de espera después del silencio antes de detener automáticamente la grabación",
"ttsSettings": "Texto a voz",
"ttsEngineAuto": "Automático",
"ttsEngineAutoDescription": "Usa la síntesis en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.",
"ttsEngineDeviceDescription": "Mantiene la síntesis en este dispositivo. La reproducción de voz no funciona si el dispositivo no admite TTS.",
"ttsEngineServerDescription": "Solicita siempre el audio a tu servidor OpenWebUI.",
"ttsDeviceUnavailableWarning": "La síntesis de voz en el dispositivo no está disponible en este dispositivo.",
"ttsServerUnavailableWarning": "Conéctate a un servidor con texto a voz habilitado para usar esta opción.",
"ttsVoice": "Voz",
"ttsSpeechRate": "Velocidad de voz",
"ttsPitch": "Tono",

View File

@@ -307,7 +307,25 @@
"chatSettings": "Discussion",
"sendOnEnter": "Envoyer avec Entrée",
"sendOnEnterDescription": "Entrée envoie (clavier logiciel). Cmd/Ctrl+Entrée aussi disponible",
"sttSettings": "Voix vers texte",
"sttEngineLabel": "Moteur de reconnaissance",
"sttEngineAuto": "Auto",
"sttEngineDevice": "Sur lappareil",
"sttEngineServer": "Serveur",
"sttEngineAutoDescription": "Utilise la reconnaissance sur lappareil quand cest possible, sinon bascule vers votre serveur.",
"sttEngineDeviceDescription": "Conserve laudio sur cet appareil. Lentrée vocale cesse de fonctionner si la reconnaissance vocale nest pas prise en charge.",
"sttEngineServerDescription": "Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.",
"sttDeviceUnavailableWarning": "La reconnaissance vocale sur lappareil nest pas disponible sur cet appareil.",
"sttServerUnavailableWarning": "Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.",
"sttSilenceDuration": "Durée du silence",
"sttSilenceDurationDescription": "Temps d'attente après le silence avant d'arrêter automatiquement l'enregistrement",
"ttsSettings": "Synthèse vocale",
"ttsEngineAuto": "Auto",
"ttsEngineAutoDescription": "Utilise la synthèse locale quand cest possible, sinon bascule vers votre serveur.",
"ttsEngineDeviceDescription": "Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si lappareil noffre pas la synthèse vocale.",
"ttsEngineServerDescription": "Demande toujours l'audio à votre serveur OpenWebUI.",
"ttsDeviceUnavailableWarning": "La synthèse vocale sur lappareil nest pas disponible sur cet appareil.",
"ttsServerUnavailableWarning": "Connectez-vous à un serveur avec la synthèse vocale activée pour utiliser cette option.",
"ttsVoice": "Voix",
"ttsSpeechRate": "Vitesse de parole",
"ttsPitch": "Hauteur",

View File

@@ -307,7 +307,25 @@
"chatSettings": "Chat",
"sendOnEnter": "Invia con Invio",
"sendOnEnterDescription": "Invio invia (tastiera software). Cmd/Ctrl+Invio disponibile",
"sttSettings": "Voce in testo",
"sttEngineLabel": "Motore di riconoscimento",
"sttEngineAuto": "Automatico",
"sttEngineDevice": "Sul dispositivo",
"sttEngineServer": "Server",
"sttEngineAutoDescription": "Usa il riconoscimento sul dispositivo quando disponibile e altrimenti passa al tuo server.",
"sttEngineDeviceDescription": "Mantiene laudio su questo dispositivo. Linput vocale non funziona se il dispositivo non supporta il riconoscimento vocale.",
"sttEngineServerDescription": "Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.",
"sttDeviceUnavailableWarning": "Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.",
"sttServerUnavailableWarning": "Collegati a un server con la trascrizione abilitata per usare questa opzione.",
"sttSilenceDuration": "Durata del silenzio",
"sttSilenceDurationDescription": "Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione",
"ttsSettings": "Sintesi vocale",
"ttsEngineAuto": "Automatico",
"ttsEngineAutoDescription": "Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.",
"ttsEngineDeviceDescription": "Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.",
"ttsEngineServerDescription": "Richiede sempre l'audio dal tuo server OpenWebUI.",
"ttsDeviceUnavailableWarning": "La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.",
"ttsServerUnavailableWarning": "Collegati a un server con la sintesi vocale abilitata per usare questa opzione.",
"ttsVoice": "Voce",
"ttsSpeechRate": "Velocità di sintesi vocale",
"ttsPitch": "Tonalità",

View File

@@ -1784,12 +1784,90 @@ abstract class AppLocalizations {
/// **'Enter sends (soft keyboard). Cmd/Ctrl+Enter also available'**
String get sendOnEnterDescription;
/// Section header for speech-to-text settings.
///
/// In en, this message translates to:
/// **'Speech to Text'**
String get sttSettings;
/// Label shown above the speech-to-text engine chips.
///
/// In en, this message translates to:
/// **'Recognition engine'**
String get sttEngineLabel;
/// Chip label for automatic speech-to-text selection.
///
/// In en, this message translates to:
/// **'Auto'**
String get sttEngineAuto;
/// Chip label for on-device speech recognition.
///
/// In en, this message translates to:
/// **'On device'**
String get sttEngineDevice;
/// Chip label for server speech recognition.
///
/// In en, this message translates to:
/// **'Server'**
String get sttEngineServer;
/// Description shown when automatic speech-to-text preference is active.
///
/// In en, this message translates to:
/// **'Use on-device recognition when available and fall back to your server.'**
String get sttEngineAutoDescription;
/// Description shown when on-device speech-to-text preference is active.
///
/// In en, this message translates to:
/// **'Keep audio on this device. Voice input stops working if on-device speech recognition isnt supported.'**
String get sttEngineDeviceDescription;
/// Description shown when server speech-to-text preference is active.
///
/// In en, this message translates to:
/// **'Always send recordings to your OpenWebUI server for transcription.'**
String get sttEngineServerDescription;
/// Warning shown when the user selects on-device speech recognition but it is unavailable.
///
/// In en, this message translates to:
/// **'On-device speech recognition isnt available on this device.'**
String get sttDeviceUnavailableWarning;
/// Warning shown when the user selects server speech recognition but no server is available.
///
/// In en, this message translates to:
/// **'Connect to a server with transcription enabled to use this option.'**
String get sttServerUnavailableWarning;
/// Label for the silence duration setting in server speech-to-text.
///
/// In en, this message translates to:
/// **'Silence Duration'**
String get sttSilenceDuration;
/// Description for the silence duration slider in server speech-to-text settings.
///
/// In en, this message translates to:
/// **'Time to wait after silence before auto-stopping recording'**
String get sttSilenceDurationDescription;
/// Label for selecting the text-to-speech engine.
///
/// In en, this message translates to:
/// **'Engine'**
String get ttsEngineLabel;
/// Chip label for automatically selecting the text-to-speech engine.
///
/// In en, this message translates to:
/// **'Auto'**
String get ttsEngineAuto;
/// Chip label for using on-device text-to-speech.
///
/// In en, this message translates to:
@@ -1802,6 +1880,36 @@ abstract class AppLocalizations {
/// **'Server'**
String get ttsEngineServer;
/// Description shown when automatic text-to-speech preference is active.
///
/// In en, this message translates to:
/// **'Use on-device speech when available and fall back to your server.'**
String get ttsEngineAutoDescription;
/// Description shown when on-device text-to-speech preference is active.
///
/// In en, this message translates to:
/// **'Keep synthesis on this device. Voice playback stops working if on-device TTS isnt supported.'**
String get ttsEngineDeviceDescription;
/// Description shown when server text-to-speech preference is active.
///
/// In en, this message translates to:
/// **'Always request audio from your OpenWebUI server.'**
String get ttsEngineServerDescription;
/// Warning shown when on-device text-to-speech is unavailable.
///
/// In en, this message translates to:
/// **'On-device text-to-speech isnt available on this device.'**
String get ttsDeviceUnavailableWarning;
/// Warning shown when server text-to-speech is unavailable.
///
/// In en, this message translates to:
/// **'Connect to a server with text-to-speech enabled to use this option.'**
String get ttsServerUnavailableWarning;
/// Section header for TTS-related customization options.
///
/// In en, this message translates to:

View File

@@ -931,15 +931,80 @@ class AppLocalizationsDe extends AppLocalizations {
String get sendOnEnterDescription =>
'Enter sendet (Soft-Tastatur). Cmd/Ctrl+Enter ebenfalls verfügbar';
@override
String get sttSettings => 'Sprache zu Text';
@override
String get sttEngineLabel => 'Erkennungs-Engine';
@override
String get sttEngineAuto => 'Automatisch';
@override
String get sttEngineDevice => 'Auf dem Gerät';
@override
String get sttEngineServer => 'Server';
@override
String get sttEngineAutoDescription =>
'Verwendet die Erkennung auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.';
@override
String get sttEngineDeviceDescription =>
'Behält Audio auf diesem Gerät. Spracheingabe funktioniert nicht, wenn das Gerät keine Spracherkennung unterstützt.';
@override
String get sttEngineServerDescription =>
'Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.';
@override
String get sttDeviceUnavailableWarning =>
'Auf diesem Gerät steht keine Spracherkennung zur Verfügung.';
@override
String get sttServerUnavailableWarning =>
'Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.';
@override
String get sttSilenceDuration => 'Stille-Dauer';
@override
String get sttSilenceDurationDescription =>
'Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird';
@override
String get ttsEngineLabel => 'Engine';
@override
String get ttsEngineAuto => 'Automatisch';
@override
String get ttsEngineDevice => 'Auf dem Gerät';
@override
String get ttsEngineServer => 'Server';
@override
String get ttsEngineAutoDescription =>
'Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.';
@override
String get ttsEngineDeviceDescription =>
'Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.';
@override
String get ttsEngineServerDescription =>
'Sendet die Ausgabe immer an deinen OpenWebUI-Server.';
@override
String get ttsDeviceUnavailableWarning =>
'Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.';
@override
String get ttsServerUnavailableWarning =>
'Verbinde dich mit einem Server mit aktivierter Sprachausgabe, um diese Option zu nutzen.';
@override
String get ttsSettings => 'Text zu Sprache';

View File

@@ -925,15 +925,80 @@ class AppLocalizationsEn extends AppLocalizations {
String get sendOnEnterDescription =>
'Enter sends (soft keyboard). Cmd/Ctrl+Enter also available';
@override
String get sttSettings => 'Speech to Text';
@override
String get sttEngineLabel => 'Recognition engine';
@override
String get sttEngineAuto => 'Auto';
@override
String get sttEngineDevice => 'On device';
@override
String get sttEngineServer => 'Server';
@override
String get sttEngineAutoDescription =>
'Use on-device recognition when available and fall back to your server.';
@override
String get sttEngineDeviceDescription =>
'Keep audio on this device. Voice input stops working if on-device speech recognition isnt supported.';
@override
String get sttEngineServerDescription =>
'Always send recordings to your OpenWebUI server for transcription.';
@override
String get sttDeviceUnavailableWarning =>
'On-device speech recognition isnt available on this device.';
@override
String get sttServerUnavailableWarning =>
'Connect to a server with transcription enabled to use this option.';
@override
String get sttSilenceDuration => 'Silence Duration';
@override
String get sttSilenceDurationDescription =>
'Time to wait after silence before auto-stopping recording';
@override
String get ttsEngineLabel => 'Engine';
@override
String get ttsEngineAuto => 'Auto';
@override
String get ttsEngineDevice => 'On device';
@override
String get ttsEngineServer => 'Server';
@override
String get ttsEngineAutoDescription =>
'Use on-device speech when available and fall back to your server.';
@override
String get ttsEngineDeviceDescription =>
'Keep synthesis on this device. Voice playback stops working if on-device TTS isnt supported.';
@override
String get ttsEngineServerDescription =>
'Always request audio from your OpenWebUI server.';
@override
String get ttsDeviceUnavailableWarning =>
'On-device text-to-speech isnt available on this device.';
@override
String get ttsServerUnavailableWarning =>
'Connect to a server with text-to-speech enabled to use this option.';
@override
String get ttsSettings => 'Text to Speech';

View File

@@ -940,15 +940,80 @@ class AppLocalizationsFr extends AppLocalizations {
String get sendOnEnterDescription =>
'Entrée envoie (clavier logiciel). Cmd/Ctrl+Entrée aussi disponible';
@override
String get sttSettings => 'Voix vers texte';
@override
String get sttEngineLabel => 'Moteur de reconnaissance';
@override
String get sttEngineAuto => 'Auto';
@override
String get sttEngineDevice => 'Sur lappareil';
@override
String get sttEngineServer => 'Serveur';
@override
String get sttEngineAutoDescription =>
'Utilise la reconnaissance sur lappareil quand cest possible, sinon bascule vers votre serveur.';
@override
String get sttEngineDeviceDescription =>
'Conserve laudio sur cet appareil. Lentrée vocale cesse de fonctionner si la reconnaissance vocale nest pas prise en charge.';
@override
String get sttEngineServerDescription =>
'Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.';
@override
String get sttDeviceUnavailableWarning =>
'La reconnaissance vocale sur lappareil nest pas disponible sur cet appareil.';
@override
String get sttServerUnavailableWarning =>
'Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.';
@override
String get sttSilenceDuration => 'Durée du silence';
@override
String get sttSilenceDurationDescription =>
'Temps d\'attente après le silence avant d\'arrêter automatiquement l\'enregistrement';
@override
String get ttsEngineLabel => 'Moteur';
@override
String get ttsEngineAuto => 'Auto';
@override
String get ttsEngineDevice => 'Sur l\'appareil';
@override
String get ttsEngineServer => 'Serveur';
@override
String get ttsEngineAutoDescription =>
'Utilise la synthèse locale quand cest possible, sinon bascule vers votre serveur.';
@override
String get ttsEngineDeviceDescription =>
'Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si lappareil noffre pas la synthèse vocale.';
@override
String get ttsEngineServerDescription =>
'Demande toujours l\'audio à votre serveur OpenWebUI.';
@override
String get ttsDeviceUnavailableWarning =>
'La synthèse vocale sur lappareil nest pas disponible sur cet appareil.';
@override
String get ttsServerUnavailableWarning =>
'Connectez-vous à un serveur avec la synthèse vocale activée pour utiliser cette option.';
@override
String get ttsSettings => 'Synthèse vocale';

View File

@@ -929,15 +929,80 @@ class AppLocalizationsIt extends AppLocalizations {
String get sendOnEnterDescription =>
'Invio invia (tastiera software). Cmd/Ctrl+Invio disponibile';
@override
String get sttSettings => 'Voce in testo';
@override
String get sttEngineLabel => 'Motore di riconoscimento';
@override
String get sttEngineAuto => 'Automatico';
@override
String get sttEngineDevice => 'Sul dispositivo';
@override
String get sttEngineServer => 'Server';
@override
String get sttEngineAutoDescription =>
'Usa il riconoscimento sul dispositivo quando disponibile e altrimenti passa al tuo server.';
@override
String get sttEngineDeviceDescription =>
'Mantiene laudio su questo dispositivo. Linput vocale non funziona se il dispositivo non supporta il riconoscimento vocale.';
@override
String get sttEngineServerDescription =>
'Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.';
@override
String get sttDeviceUnavailableWarning =>
'Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.';
@override
String get sttServerUnavailableWarning =>
'Collegati a un server con la trascrizione abilitata per usare questa opzione.';
@override
String get sttSilenceDuration => 'Durata del silenzio';
@override
String get sttSilenceDurationDescription =>
'Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione';
@override
String get ttsEngineLabel => 'Motore';
@override
String get ttsEngineAuto => 'Automatico';
@override
String get ttsEngineDevice => 'Sul dispositivo';
@override
String get ttsEngineServer => 'Server';
@override
String get ttsEngineAutoDescription =>
'Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.';
@override
String get ttsEngineDeviceDescription =>
'Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.';
@override
String get ttsEngineServerDescription =>
'Richiede sempre l\'audio dal tuo server OpenWebUI.';
@override
String get ttsDeviceUnavailableWarning =>
'La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.';
@override
String get ttsServerUnavailableWarning =>
'Collegati a un server con la sintesi vocale abilitata per usare questa opzione.';
@override
String get ttsSettings => 'Sintesi vocale';

View File

@@ -307,7 +307,25 @@
"chatSettings": "Chat",
"sendOnEnter": "Verzenden met Enter",
"sendOnEnterDescription": "Enter verzendt (softtoetsenbord). Cmd/Ctrl+Enter ook beschikbaar",
"sttSettings": "Spraak naar tekst",
"sttEngineLabel": "Herkenningsengine",
"sttEngineAuto": "Automatisch",
"sttEngineDevice": "Op het apparaat",
"sttEngineServer": "Server",
"sttEngineAutoDescription": "Gebruikt spraakherkenning op het apparaat wanneer beschikbaar en valt anders terug op je server.",
"sttEngineDeviceDescription": "Houdt audio op dit apparaat. Spraakinput werkt niet als het apparaat geen spraakherkenning ondersteunt.",
"sttEngineServerDescription": "Stuurt opnames altijd naar je OpenWebUI-server voor transcriptie.",
"sttDeviceUnavailableWarning": "Spraakherkenning op het apparaat is niet beschikbaar op dit apparaat.",
"sttServerUnavailableWarning": "Verbind met een server met transcriptie ingeschakeld om deze optie te gebruiken.",
"sttSilenceDuration": "Stilteduur",
"sttSilenceDurationDescription": "Tijd om te wachten na stilte voordat de opname automatisch stopt",
"ttsSettings": "Tekst naar spraak",
"ttsEngineAuto": "Automatisch",
"ttsEngineAutoDescription": "Gebruikt spraaksynthese op het apparaat wanneer beschikbaar en valt anders terug op je server.",
"ttsEngineDeviceDescription": "Houdt de synthese op dit apparaat. Spraakweergave werkt niet als het apparaat geen TTS ondersteunt.",
"ttsEngineServerDescription": "Vraagt altijd audio op bij je OpenWebUI-server.",
"ttsDeviceUnavailableWarning": "Spraaksynthese op het apparaat is niet beschikbaar op dit apparaat.",
"ttsServerUnavailableWarning": "Verbind met een server met tekst-naar-spraak ingeschakeld om deze optie te gebruiken.",
"ttsVoice": "Stem",
"ttsSpeechRate": "Spraaksnelheid",
"ttsPitch": "Toonhoogte",

View File

@@ -307,7 +307,25 @@
"chatSettings": "Чат",
"sendOnEnter": "Отправка по Enter",
"sendOnEnterDescription": "Enter отправляет (программная клавиатура). Также доступно Cmd/Ctrl+Enter",
"sttSettings": "Речь в текст",
"sttEngineLabel": "Движок распознавания",
"sttEngineAuto": "Авто",
"sttEngineDevice": "На устройстве",
"sttEngineServer": "Сервер",
"sttEngineAutoDescription": "Использует распознавание на устройстве, когда это возможно, иначе переключается на ваш сервер.",
"sttEngineDeviceDescription": "Оставляет звук на этом устройстве. Голосовой ввод не работает, если устройство не поддерживает распознавание речи.",
"sttEngineServerDescription": "Всегда отправляет записи на сервер OpenWebUI для транскрибации.",
"sttDeviceUnavailableWarning": "Распознавание речи на устройстве недоступно на этом устройстве.",
"sttServerUnavailableWarning": "Подключитесь к серверу с включённой транскрибацией, чтобы использовать эту опцию.",
"sttSilenceDuration": "Длительность тишины",
"sttSilenceDurationDescription": "Время ожидания после тишины перед автоматической остановкой записи",
"ttsSettings": "Преобразование текста в речь",
"ttsEngineAuto": "Авто",
"ttsEngineAutoDescription": "Использует синтез речи на устройстве, когда это возможно, иначе переключается на ваш сервер.",
"ttsEngineDeviceDescription": "Оставляет синтез на этом устройстве. Воспроизведение голоса не работает, если устройство не поддерживает синтез речи.",
"ttsEngineServerDescription": "Всегда запрашивает аудио у вашего сервера OpenWebUI.",
"ttsDeviceUnavailableWarning": "Синтез речи на устройстве недоступен на этом устройстве.",
"ttsServerUnavailableWarning": "Подключитесь к серверу с включённым синтезом речи, чтобы использовать эту опцию.",
"ttsVoice": "Голос",
"ttsSpeechRate": "Скорость речи",
"ttsPitch": "Высота тона",

View File

@@ -307,7 +307,25 @@
"chatSettings": "对话",
"sendOnEnter": "回车发送",
"sendOnEnterDescription": "回车发送软键盘。Cmd/Ctrl+Enter 也可用",
"sttSettings": "语音转文字",
"sttEngineLabel": "识别引擎",
"sttEngineAuto": "自动",
"sttEngineDevice": "本机",
"sttEngineServer": "服务器",
"sttEngineAutoDescription": "在可用时使用本机识别,否则切换到你的服务器。",
"sttEngineDeviceDescription": "音频会保留在此设备上。如果设备不支持语音识别,语音输入将不可用。",
"sttEngineServerDescription": "始终将录音发送到你的 OpenWebUI 服务器进行转写。",
"sttDeviceUnavailableWarning": "此设备不支持本机语音识别。",
"sttServerUnavailableWarning": "连接到启用转写功能的服务器后才能使用此选项。",
"sttSilenceDuration": "静音持续时间",
"sttSilenceDurationDescription": "检测到静音后等待多久自动停止录音",
"ttsSettings": "文本转语音",
"ttsEngineAuto": "自动",
"ttsEngineAutoDescription": "在可用时使用本机合成,否则切换到你的服务器。",
"ttsEngineDeviceDescription": "在此设备上完成合成。如果设备不支持文本转语音,语音播放将不可用。",
"ttsEngineServerDescription": "始终向你的 OpenWebUI 服务器请求音频。",
"ttsDeviceUnavailableWarning": "此设备不支持本机文本转语音。",
"ttsServerUnavailableWarning": "连接到启用文本转语音的服务器后才能使用此选项。",
"ttsVoice": "语音",
"ttsSpeechRate": "语速",
"ttsPitch": "音调",

View File

@@ -965,6 +965,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.16.0"
mic_stream_recorder:
dependency: "direct main"
description:
name: mic_stream_recorder
sha256: "73965991ef5cc93d2b0c1e6d590cbd567a853b9ee7b2d52de43a73f185bb0d9c"
url: "https://pub.dev"
source: hosted
version: "1.1.2"
mime:
dependency: transitive
description:
@@ -1165,70 +1173,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.5.0"
record:
dependency: "direct main"
description:
name: record
sha256: "9dbc6ff3e784612f90a9b001373c45ff76b7a08abd2bd9fdf72c242320c8911c"
url: "https://pub.dev"
source: hosted
version: "6.1.1"
record_android:
dependency: transitive
description:
name: record_android
sha256: "854627cd78d8d66190377f98477eee06ca96ab7c9f2e662700daf33dbf7e6673"
url: "https://pub.dev"
source: hosted
version: "1.4.2"
record_ios:
dependency: transitive
description:
name: record_ios
sha256: "13e241ed9cbc220534a40ae6b66222e21288db364d96dd66fb762ebd3cb77c71"
url: "https://pub.dev"
source: hosted
version: "1.1.2"
record_linux:
dependency: transitive
description:
name: record_linux
sha256: "235b1f1fb84e810f8149cc0c2c731d7d697f8d1c333b32cb820c449bf7bb72d8"
url: "https://pub.dev"
source: hosted
version: "1.2.1"
record_macos:
dependency: transitive
description:
name: record_macos
sha256: "2849068bb59072f300ad63ed146e543d66afaef8263edba4de4834fc7c8d4d35"
url: "https://pub.dev"
source: hosted
version: "1.1.1"
record_platform_interface:
dependency: transitive
description:
name: record_platform_interface
sha256: b0065fdf1ec28f5a634d676724d388a77e43ce7646fb049949f58c69f3fcb4ed
url: "https://pub.dev"
source: hosted
version: "1.4.0"
record_web:
dependency: transitive
description:
name: record_web
sha256: "4f0adf20c9ccafcc02d71111fd91fba1ca7b17a7453902593e5a9b25b74a5c56"
url: "https://pub.dev"
source: hosted
version: "1.2.0"
record_windows:
dependency: transitive
description:
name: record_windows
sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78"
url: "https://pub.dev"
source: hosted
version: "1.0.7"
riverpod:
dependency: transitive
description:

View File

@@ -44,7 +44,7 @@ dependencies:
flutter_animate: ^4.5.0
# Platform Features
record: ^6.1.1
mic_stream_recorder: ^1.1.2
stts: ^1.2.5
flutter_tts: ^4.2.3
audioplayers: ^6.5.1