diff --git a/flutter_01.png b/flutter_01.png new file mode 100644 index 0000000..8ed4c34 Binary files /dev/null and b/flutter_01.png differ diff --git a/ios/Podfile.lock b/ios/Podfile.lock index afb4210..42cde87 100644 --- a/ios/Podfile.lock +++ b/ios/Podfile.lock @@ -49,13 +49,13 @@ PODS: - Flutter - image_picker_ios (0.0.1): - Flutter + - mic_stream_recorder (0.0.1): + - Flutter - package_info_plus (0.4.5): - Flutter - path_provider_foundation (0.0.1): - Flutter - FlutterMacOS - - record_ios (1.1.0): - - Flutter - SDWebImage (5.21.1): - SDWebImage/Core (= 5.21.1) - SDWebImage/Core (5.21.1) @@ -96,9 +96,9 @@ DEPENDENCIES: - flutter_secure_storage (from `.symlinks/plugins/flutter_secure_storage/ios`) - flutter_tts (from `.symlinks/plugins/flutter_tts/ios`) - image_picker_ios (from `.symlinks/plugins/image_picker_ios/ios`) + - mic_stream_recorder (from `.symlinks/plugins/mic_stream_recorder/ios`) - package_info_plus (from `.symlinks/plugins/package_info_plus/ios`) - path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`) - - record_ios (from `.symlinks/plugins/record_ios/ios`) - share_handler_ios (from `.symlinks/plugins/share_handler_ios/ios`) - share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`) - share_plus (from `.symlinks/plugins/share_plus/ios`) @@ -135,12 +135,12 @@ EXTERNAL SOURCES: :path: ".symlinks/plugins/flutter_tts/ios" image_picker_ios: :path: ".symlinks/plugins/image_picker_ios/ios" + mic_stream_recorder: + :path: ".symlinks/plugins/mic_stream_recorder/ios" package_info_plus: :path: ".symlinks/plugins/package_info_plus/ios" path_provider_foundation: :path: ".symlinks/plugins/path_provider_foundation/darwin" - record_ios: - :path: ".symlinks/plugins/record_ios/ios" share_handler_ios: :path: ".symlinks/plugins/share_handler_ios/ios" share_handler_ios_models: @@ -172,9 +172,9 @@ SPEC CHECKSUMS: flutter_secure_storage: 1ed9476fba7e7a782b22888f956cce43e2c62f13 flutter_tts: b88dbc8655d3dc961bc4a796e4e16a4cc1795833 image_picker_ios: 7fe1ff8e34c1790d6fff70a32484959f563a928a + mic_stream_recorder: 27d2d1225563a3a28bf4019fc5cc198cffd7dad1 package_info_plus: af8e2ca6888548050f16fa2f1938db7b5a5df499 path_provider_foundation: 080d55be775b7414fd5a5ef3ac137b97b097e564 - record_ios: f75fa1d57f840012775c0e93a38a7f3ceea1a374 SDWebImage: f29024626962457f3470184232766516dee8dfea share_handler_ios: e2244e990f826b2c8eaa291ac3831569438ba0fb share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871 diff --git a/lib/core/persistence/persistence_keys.dart b/lib/core/persistence/persistence_keys.dart index d0078c5..c74a616 100644 --- a/lib/core/persistence/persistence_keys.dart +++ b/lib/core/persistence/persistence_keys.dart @@ -11,6 +11,7 @@ final class PreferenceKeys { static const String voiceLocaleId = 'voice_locale_id'; static const String voiceHoldToTalk = 'voice_hold_to_talk'; static const String voiceAutoSendFinal = 'voice_auto_send_final'; + static const String voiceSttPreference = 'voice_stt_preference'; static const String socketTransportMode = 'socket_transport_mode'; static const String quickPills = 'quick_pills'; static const String sendOnEnterKey = 'send_on_enter'; @@ -27,6 +28,7 @@ final class PreferenceKeys { static const String ttsEngine = 'tts_engine'; // 'device' | 'server' static const String ttsServerVoiceId = 'tts_server_voice_id'; static const String ttsServerVoiceName = 'tts_server_voice_name'; + static const String voiceSilenceDuration = 'voice_silence_duration'; } final class LegacyPreferenceKeys { diff --git a/lib/core/persistence/persistence_migrator.dart b/lib/core/persistence/persistence_migrator.dart index 8a7350f..d6a278c 100644 --- a/lib/core/persistence/persistence_migrator.dart +++ b/lib/core/persistence/persistence_migrator.dart @@ -90,6 +90,7 @@ class PersistenceMigrator { copyString(PreferenceKeys.voiceLocaleId); copyBool(PreferenceKeys.voiceHoldToTalk); copyBool(PreferenceKeys.voiceAutoSendFinal); + copyString(PreferenceKeys.voiceSttPreference); copyString(PreferenceKeys.socketTransportMode); copyStringList(PreferenceKeys.quickPills); copyBool(PreferenceKeys.sendOnEnterKey); @@ -194,6 +195,7 @@ class PersistenceMigrator { PreferenceKeys.voiceLocaleId, PreferenceKeys.voiceHoldToTalk, PreferenceKeys.voiceAutoSendFinal, + PreferenceKeys.voiceSttPreference, PreferenceKeys.socketTransportMode, PreferenceKeys.quickPills, PreferenceKeys.sendOnEnterKey, diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart index efafb45..f88cd49 100644 --- a/lib/core/services/api_service.dart +++ b/lib/core/services/api_service.dart @@ -4,7 +4,7 @@ import 'dart:io'; import 'package:dio/dio.dart'; import 'package:dio/io.dart'; import 'package:flutter/foundation.dart'; -// import 'package:http_parser/http_parser.dart'; +import 'package:http_parser/http_parser.dart'; // Removed legacy websocket/socket.io imports import 'package:uuid/uuid.dart'; import '../models/backend_config.dart'; @@ -1607,15 +1607,69 @@ class ApiService { return []; } + Future> transcribeSpeech({ + required Uint8List audioBytes, + String? fileName, + String? mimeType, + String? language, + }) async { + if (audioBytes.isEmpty) { + throw ArgumentError('audioBytes cannot be empty for transcription'); + } + + final sanitizedFileName = (fileName != null && fileName.trim().isNotEmpty + ? fileName.trim() + : 'audio.m4a'); + final resolvedMimeType = (mimeType != null && mimeType.trim().isNotEmpty) + ? mimeType.trim() + : _inferMimeTypeFromName(sanitizedFileName); + + _traceApi( + 'Uploading $sanitizedFileName (${audioBytes.length} bytes) for transcription', + ); + + final formData = FormData.fromMap({ + 'file': MultipartFile.fromBytes( + audioBytes, + filename: sanitizedFileName, + contentType: _parseMediaType(resolvedMimeType), + ), + if (language != null && language.trim().isNotEmpty) + 'language': language.trim(), + }); + + final response = await _dio.post( + '/api/v1/audio/transcriptions', + data: formData, + options: Options(headers: const {'accept': 'application/json'}), + ); + + final data = response.data; + if (data is Map) { + return data; + } + if (data is String) { + return {'text': data}; + } + throw StateError( + 'Unexpected transcription response type: ${data.runtimeType}', + ); + } + Future<({Uint8List bytes, String mimeType})> generateSpeech({ required String text, String? voice, + double? speed, }) async { final textPreview = text.length > 50 ? text.substring(0, 50) : text; _traceApi('Generating speech for text: $textPreview...'); final response = await _dio.post( '/api/v1/audio/speech', - data: {'input': text, if (voice != null) 'voice': voice}, + data: { + 'input': text, + if (voice != null) 'voice': voice, + if (speed != null) 'speed': speed, + }, options: Options(responseType: ResponseType.bytes), ); @@ -1690,7 +1744,43 @@ class ApiService { return bytes.length >= 2 && bytes[0] == 0xFF && (bytes[1] & 0xE0) == 0xE0; } - // Server audio transcription removed; rely on on-device STT in UI layer + String _inferMimeTypeFromName(String name) { + final dotIndex = name.lastIndexOf('.'); + if (dotIndex == -1 || dotIndex == name.length - 1) { + return 'audio/mpeg'; + } + final ext = name.substring(dotIndex + 1).toLowerCase(); + switch (ext) { + case 'wav': + return 'audio/wav'; + case 'ogg': + return 'audio/ogg'; + case 'm4a': + case 'mp4': + return 'audio/mp4'; + case 'aac': + return 'audio/aac'; + case 'webm': + return 'audio/webm'; + case 'flac': + return 'audio/flac'; + case 'mp3': + return 'audio/mpeg'; + default: + return 'audio/mpeg'; + } + } + + MediaType? _parseMediaType(String? value) { + if (value == null || value.isEmpty) { + return null; + } + try { + return MediaType.parse(value); + } catch (_) { + return null; + } + } // Image Generation Future>> getImageModels() async { diff --git a/lib/core/services/settings_service.dart b/lib/core/services/settings_service.dart index 19ba497..e04f210 100644 --- a/lib/core/services/settings_service.dart +++ b/lib/core/services/settings_service.dart @@ -8,8 +8,11 @@ import 'animation_service.dart'; part 'settings_service.g.dart'; +/// Speech-to-text preference selection. +enum SttPreference { auto, deviceOnly, serverOnly } + /// TTS engine selection -enum TtsEngine { device, server } +enum TtsEngine { auto, device, server } /// Service for managing app-wide settings including accessibility preferences class SettingsService { @@ -32,6 +35,9 @@ class SettingsService { .quickPills; // StringList of identifiers e.g. ['web','image','tools'] // Chat input behavior static const String _sendOnEnterKey = PreferenceKeys.sendOnEnterKey; + // Voice silence duration for auto-stop (milliseconds) + static const String _voiceSilenceDurationKey = + PreferenceKeys.voiceSilenceDuration; static Box _preferencesBox() => Hive.box(HiveBoxNames.preferences); @@ -151,6 +157,11 @@ class SettingsService { ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?, ttsServerVoiceName: box.get(PreferenceKeys.ttsServerVoiceName) as String?, + sttPreference: _parseSttPreference( + box.get(PreferenceKeys.voiceSttPreference) as String?, + ), + voiceSilenceDuration: + (box.get(_voiceSilenceDurationKey) as int? ?? 2000).clamp(300, 3000), ), ); } @@ -174,6 +185,8 @@ class SettingsService { PreferenceKeys.ttsPitch: settings.ttsPitch, PreferenceKeys.ttsVolume: settings.ttsVolume, PreferenceKeys.ttsEngine: settings.ttsEngine.name, + PreferenceKeys.voiceSttPreference: settings.sttPreference.name, + _voiceSilenceDurationKey: settings.voiceSilenceDuration, }; await box.putAll(updates); @@ -216,11 +229,31 @@ class SettingsService { static TtsEngine _parseTtsEngine(String? raw) { switch ((raw ?? '').toLowerCase()) { + case 'auto': + case '': + return TtsEngine.auto; case 'server': return TtsEngine.server; case 'device': - default: return TtsEngine.device; + default: + return TtsEngine.auto; + } + } + + static SttPreference _parseSttPreference(String? raw) { + switch ((raw ?? '').toLowerCase()) { + case 'deviceonly': + case 'device_only': + case 'device': + return SttPreference.deviceOnly; + case 'serveronly': + case 'server_only': + case 'server': + return SttPreference.serverOnly; + case 'auto': + default: + return SttPreference.auto; } } @@ -304,6 +337,16 @@ class SettingsService { return _preferencesBox().put(_sendOnEnterKey, value); } + static Future getVoiceSilenceDuration() { + final value = _preferencesBox().get(_voiceSilenceDurationKey) as int?; + return Future.value((value ?? 2000).clamp(300, 3000)); + } + + static Future setVoiceSilenceDuration(int milliseconds) { + final sanitized = milliseconds.clamp(300, 3000); + return _preferencesBox().put(_voiceSilenceDurationKey, sanitized); + } + /// Get effective animation duration considering all settings static Duration getEffectiveAnimationDuration( BuildContext context, @@ -359,6 +402,7 @@ class AppSettings { final String socketTransportMode; // 'polling' or 'ws' final List quickPills; // e.g., ['web','image'] final bool sendOnEnter; + final SttPreference sttPreference; final String? ttsVoice; final double ttsSpeechRate; final double ttsPitch; @@ -366,6 +410,7 @@ class AppSettings { final TtsEngine ttsEngine; final String? ttsServerVoiceId; final String? ttsServerVoiceName; + final int voiceSilenceDuration; const AppSettings({ this.reduceMotion = false, this.animationSpeed = 1.0, @@ -380,13 +425,15 @@ class AppSettings { this.socketTransportMode = 'ws', this.quickPills = const [], this.sendOnEnter = false, + this.sttPreference = SttPreference.auto, this.ttsVoice, this.ttsSpeechRate = 0.5, this.ttsPitch = 1.0, this.ttsVolume = 1.0, - this.ttsEngine = TtsEngine.device, + this.ttsEngine = TtsEngine.auto, this.ttsServerVoiceId, this.ttsServerVoiceName, + this.voiceSilenceDuration = 2000, }); AppSettings copyWith({ @@ -403,6 +450,7 @@ class AppSettings { String? socketTransportMode, List? quickPills, bool? sendOnEnter, + SttPreference? sttPreference, Object? ttsVoice = const _DefaultValue(), double? ttsSpeechRate, double? ttsPitch, @@ -410,6 +458,7 @@ class AppSettings { TtsEngine? ttsEngine, Object? ttsServerVoiceId = const _DefaultValue(), Object? ttsServerVoiceName = const _DefaultValue(), + int? voiceSilenceDuration, }) { return AppSettings( reduceMotion: reduceMotion ?? this.reduceMotion, @@ -429,6 +478,7 @@ class AppSettings { socketTransportMode: socketTransportMode ?? this.socketTransportMode, quickPills: quickPills ?? this.quickPills, sendOnEnter: sendOnEnter ?? this.sendOnEnter, + sttPreference: sttPreference ?? this.sttPreference, ttsVoice: ttsVoice is _DefaultValue ? this.ttsVoice : ttsVoice as String?, ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate, ttsPitch: ttsPitch ?? this.ttsPitch, @@ -440,6 +490,7 @@ class AppSettings { ttsServerVoiceName: ttsServerVoiceName is _DefaultValue ? this.ttsServerVoiceName : ttsServerVoiceName as String?, + voiceSilenceDuration: voiceSilenceDuration ?? this.voiceSilenceDuration, ); } @@ -457,6 +508,7 @@ class AppSettings { other.voiceLocaleId == voiceLocaleId && other.voiceHoldToTalk == voiceHoldToTalk && other.voiceAutoSendFinal == voiceAutoSendFinal && + other.sttPreference == sttPreference && other.sendOnEnter == sendOnEnter && other.ttsVoice == ttsVoice && other.ttsSpeechRate == ttsSpeechRate && @@ -465,13 +517,14 @@ class AppSettings { other.ttsEngine == ttsEngine && other.ttsServerVoiceId == ttsServerVoiceId && other.ttsServerVoiceName == ttsServerVoiceName && + other.voiceSilenceDuration == voiceSilenceDuration && _listEquals(other.quickPills, quickPills); // socketTransportMode intentionally not included in == to avoid frequent rebuilds } @override int get hashCode { - return Object.hash( + return Object.hashAll([ reduceMotion, animationSpeed, hapticFeedback, @@ -482,6 +535,7 @@ class AppSettings { voiceLocaleId, voiceHoldToTalk, voiceAutoSendFinal, + sttPreference, socketTransportMode, sendOnEnter, ttsVoice, @@ -491,8 +545,9 @@ class AppSettings { ttsEngine, ttsServerVoiceId, ttsServerVoiceName, + voiceSilenceDuration, Object.hashAllUnordered(quickPills), - ); + ]); } } @@ -603,6 +658,14 @@ class AppSettingsNotifier extends _$AppSettingsNotifier { await SettingsService.setSendOnEnter(value); } + Future setSttPreference(SttPreference preference) async { + if (state.sttPreference == preference) { + return; + } + state = state.copyWith(sttPreference: preference); + await SettingsService.saveSettings(state); + } + Future setTtsVoice(String? voice) async { state = state.copyWith(ttsVoice: voice); await SettingsService.saveSettings(state); @@ -638,6 +701,11 @@ class AppSettingsNotifier extends _$AppSettingsNotifier { await SettingsService.saveSettings(state); } + Future setVoiceSilenceDuration(int milliseconds) async { + state = state.copyWith(voiceSilenceDuration: milliseconds); + await SettingsService.setVoiceSilenceDuration(milliseconds); + } + Future resetToDefaults() async { const defaultSettings = AppSettings(); await SettingsService.saveSettings(defaultSettings); diff --git a/lib/core/services/streaming_helper.dart b/lib/core/services/streaming_helper.dart index 682c11e..c546181 100644 --- a/lib/core/services/streaming_helper.dart +++ b/lib/core/services/streaming_helper.dart @@ -277,6 +277,13 @@ ActiveSocketStream attachUnifiedChunkedStreaming({ )..start(); } + Timer? imageCollectionDebounce; + String? pendingImageContent; + String? pendingImageMessageId; + String? pendingImageSignature; + String? lastProcessedImageSignature; + int imageCollectionRequestId = 0; + void disposeSocketSubscriptions() { if (socketSubscriptions.isEmpty) { return; @@ -287,56 +294,119 @@ ActiveSocketStream attachUnifiedChunkedStreaming({ } catch (_) {} } socketSubscriptions.clear(); + imageCollectionDebounce?.cancel(); + imageCollectionDebounce = null; + pendingImageContent = null; + pendingImageMessageId = null; + pendingImageSignature = null; + lastProcessedImageSignature = null; + imageCollectionRequestId = 0; socketWatchdog?.stop(); } bool isSearching = false; + void runPendingImageCollection() { + imageCollectionDebounce?.cancel(); + imageCollectionDebounce = null; + + final content = pendingImageContent; + final targetMessageId = pendingImageMessageId; + final signature = pendingImageSignature; + if (content == null || targetMessageId == null || signature == null) { + return; + } + + pendingImageContent = null; + pendingImageMessageId = null; + pendingImageSignature = null; + + final requestId = ++imageCollectionRequestId; + unawaited( + workerManager + .schedule>>( + _collectImageReferencesWorker, + content, + debugLabel: 'stream_collect_images', + ) + .then((collected) { + if (requestId != imageCollectionRequestId) { + return; + } + + final currentMessages = getMessages(); + if (currentMessages.isEmpty) { + return; + } + final last = currentMessages.last; + if (last.id != targetMessageId || last.role != 'assistant') { + return; + } + + lastProcessedImageSignature = signature; + + if (collected.isEmpty) { + return; + } + + final existing = last.files ?? >[]; + final seen = { + for (final f in existing) + if (f['url'] is String) (f['url'] as String) else '', + }..removeWhere((e) => e.isEmpty); + + final merged = >[...existing]; + for (final f in collected) { + final url = f['url'] as String?; + if (url != null && url.isNotEmpty && !seen.contains(url)) { + merged.add({'type': 'image', 'url': url}); + seen.add(url); + } + } + + if (merged.length != existing.length) { + updateLastMessageWith((m) => m.copyWith(files: merged)); + } + }) + .catchError((_) {}), + ); + } + void updateImagesFromCurrentContent() { try { final msgs = getMessages(); if (msgs.isEmpty || msgs.last.role != 'assistant') return; - final content = msgs.last.content; + final last = msgs.last; + final content = last.content; if (content.isEmpty) return; - final targetMessageId = msgs.last.id; - unawaited( - workerManager - .schedule>>( - _collectImageReferencesWorker, - content, - debugLabel: 'stream_collect_images', - ) - .then((collected) { - if (collected.isEmpty) return; - final currentMessages = getMessages(); - if (currentMessages.isEmpty) return; - final last = currentMessages.last; - if (last.id != targetMessageId || last.role != 'assistant') { - return; - } + final targetMessageId = last.id; + final signature = + '$targetMessageId:${content.hashCode}:${content.length}'; - final existing = last.files ?? >[]; - final seen = { - for (final f in existing) - if (f['url'] is String) (f['url'] as String) else '', - }..removeWhere((e) => e.isEmpty); + if (signature == lastProcessedImageSignature && + pendingImageSignature == null) { + return; + } + if (signature == pendingImageSignature) { + return; + } - final merged = >[...existing]; - for (final f in collected) { - final url = f['url'] as String?; - if (url != null && url.isNotEmpty && !seen.contains(url)) { - merged.add({'type': 'image', 'url': url}); - seen.add(url); - } - } + pendingImageMessageId = targetMessageId; + pendingImageContent = content; + pendingImageSignature = signature; - if (merged.length != existing.length) { - updateLastMessageWith((m) => m.copyWith(files: merged)); - } - }) - .catchError((_) {}), - ); + final shouldDelay = last.isStreaming; + + imageCollectionDebounce?.cancel(); + if (shouldDelay) { + imageCollectionDebounce = Timer( + const Duration(milliseconds: 200), + runPendingImageCollection, + ); + } else { + runPendingImageCollection(); + } } catch (_) {} } diff --git a/lib/features/chat/providers/chat_providers.dart b/lib/features/chat/providers/chat_providers.dart index cea62f7..c9f8307 100644 --- a/lib/features/chat/providers/chat_providers.dart +++ b/lib/features/chat/providers/chat_providers.dart @@ -647,13 +647,6 @@ class ChatMessagesNotifier extends Notifier> { return; } - // Log content replacement for debugging - DebugLogger.log( - 'Replacing message content: messageId=${lastMessage.id}, ' - 'oldLength=${lastMessage.content.length}, newLength=${content.length}', - scope: 'chat/providers', - ); - _ensureFormatterForMessage(lastMessage); // Defensive check: ensure the formatter is for the correct message diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart index 2f53bd6..7992596 100644 --- a/lib/features/chat/providers/text_to_speech_provider.dart +++ b/lib/features/chat/providers/text_to_speech_provider.dart @@ -107,11 +107,9 @@ class TextToSpeechController extends Notifier { // Listen to settings changes and update TTS when initialized ref.listen(appSettingsProvider, (previous, next) { if (_service.isInitialized && _service.isAvailable) { - final selectedVoice = next.ttsEngine == TtsEngine.server - ? next.ttsServerVoiceId - : next.ttsVoice; _service.updateSettings( - voice: selectedVoice, + voice: next.ttsVoice, + serverVoice: next.ttsServerVoiceId, speechRate: next.ttsSpeechRate, pitch: next.ttsPitch, volume: next.ttsVolume, @@ -137,9 +135,8 @@ class TextToSpeechController extends Notifier { final settings = ref.read(appSettingsProvider); final future = _service .initialize( - voice: settings.ttsEngine == TtsEngine.server - ? settings.ttsServerVoiceId - : settings.ttsVoice, + deviceVoice: settings.ttsVoice, + serverVoice: settings.ttsServerVoiceId, speechRate: settings.ttsSpeechRate, pitch: settings.ttsPitch, volume: settings.ttsVolume, @@ -222,8 +219,8 @@ class TextToSpeechController extends Notifier { // Prepare sentence split for highlighting final cleanText = MarkdownToText.convert(text); - final sentences = _splitForTts(cleanText); - final offsets = _computeOffsets(sentences); + final sentences = _service.splitTextForSpeech(cleanText); + final offsets = _computeOffsets(cleanText, sentences); state = state.copyWith( status: TtsPlaybackStatus.loading, @@ -268,30 +265,24 @@ class TextToSpeechController extends Notifier { } } - List _splitForTts(String text) { - final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim(); - if (normalized.isEmpty) return const []; - final parts = []; - final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)"); - int index = 0; - for (final match in sentenceRegex.allMatches('$normalized ')) { - final s = match.group(1) ?? ''; - if (s.trim().isNotEmpty) parts.add(s.trim()); - index = match.end; - } - if (index < normalized.length) { - final tail = normalized.substring(index).trim(); - if (tail.isNotEmpty) parts.add(tail); - } - return parts; - } - - List _computeOffsets(List sentences) { + List _computeOffsets(String source, List sentences) { + if (sentences.isEmpty) return const []; final offsets = []; - int acc = 0; - for (final s in sentences) { - offsets.add(acc); - acc += s.length + 1; // assume a space or punctuation between + var cursor = 0; + for (final sentence in sentences) { + final chunk = sentence.trim(); + if (chunk.isEmpty) { + offsets.add(cursor); + continue; + } + final index = source.indexOf(chunk, cursor); + if (index == -1) { + offsets.add(cursor); + cursor += chunk.length; + } else { + offsets.add(index); + cursor = index + chunk.length; + } } return offsets; } diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart index 56de3b5..7c8f4b4 100644 --- a/lib/features/chat/services/text_to_speech_service.dart +++ b/lib/features/chat/services/text_to_speech_service.dart @@ -16,8 +16,10 @@ class TextToSpeechService { final FlutterTts _tts = FlutterTts(); final AudioPlayer _player = AudioPlayer(); final ApiService? _api; - TtsEngine _engine = TtsEngine.device; + TtsEngine _engine = TtsEngine.auto; String? _preferredVoice; + String? _serverPreferredVoice; + double _speechRate = 0.5; bool _initialized = false; bool _available = false; bool _voiceConfigured = false; @@ -41,6 +43,8 @@ class TextToSpeechService { bool get isInitialized => _initialized; bool get isAvailable => _available; + bool get deviceEngineAvailable => _deviceEngineAvailable; + bool get serverEngineAvailable => _api != null; TextToSpeechService({ApiService? api}) : _api = api { // Wire minimal player events to callbacks @@ -59,6 +63,69 @@ class TextToSpeechService { }); } + Future _configureDeviceEngine({ + required String? voice, + required double speechRate, + required double pitch, + required double volume, + }) async { + _deviceEngineAvailable = false; + try { + await _tts.awaitSpeakCompletion(false); + await _tts.setVolume(volume); + await _tts.setSpeechRate(speechRate); + await _tts.setPitch(pitch); + + if (!kIsWeb && Platform.isIOS) { + await _tts.setSharedInstance(true); + await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [ + IosTextToSpeechAudioCategoryOptions.mixWithOthers, + IosTextToSpeechAudioCategoryOptions.defaultToSpeaker, + IosTextToSpeechAudioCategoryOptions.allowBluetooth, + IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP, + ]); + } + + if (_engine != TtsEngine.server) { + await _setVoiceByName(_preferredVoice); + } else { + _voiceConfigured = false; + } + + _deviceEngineAvailable = true; + } catch (e) { + _voiceConfigured = false; + _deviceEngineAvailable = false; + rethrow; + } + } + + bool _computeAvailability() { + final serverAvailable = _api != null; + switch (_engine) { + case TtsEngine.device: + return _deviceEngineAvailable; + case TtsEngine.server: + return serverAvailable; + case TtsEngine.auto: + return _deviceEngineAvailable || serverAvailable; + } + } + + bool _shouldUseServer() { + if (_engine == TtsEngine.server) { + return _api != null; + } + if (_engine == TtsEngine.device) { + return false; + } + // Auto: prefer device when available, otherwise fall back to server + if (_deviceEngineAvailable) { + return false; + } + return _api != null; + } + /// Register callbacks for TTS lifecycle events void bindHandlers({ VoidCallback? onStart, @@ -96,56 +163,60 @@ class TextToSpeechService { /// Initialize the native TTS engine lazily Future initialize({ - String? voice, + String? deviceVoice, + String? serverVoice, double speechRate = 0.5, double pitch = 1.0, double volume = 1.0, - TtsEngine engine = TtsEngine.device, + TtsEngine engine = TtsEngine.auto, }) async { if (_initialized) { + _engine = engine; + _speechRate = speechRate; + if (deviceVoice != null) { + _preferredVoice = deviceVoice; + _voiceConfigured = false; + } + if (serverVoice != null) { + _serverPreferredVoice = serverVoice; + } + _available = _computeAvailability(); return _available; } - try { - _engine = engine; - _preferredVoice = voice; - await _tts.awaitSpeakCompletion(false); + _engine = engine; + _speechRate = speechRate; + _preferredVoice = deviceVoice; + _serverPreferredVoice = serverVoice; + _voiceConfigured = false; - // Set volume - await _tts.setVolume(volume); - - // Set speech rate - await _tts.setSpeechRate(speechRate); - - // Set pitch - await _tts.setPitch(pitch); - - if (!kIsWeb && Platform.isIOS) { - await _tts.setSharedInstance(true); - await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [ - IosTextToSpeechAudioCategoryOptions.mixWithOthers, - IosTextToSpeechAudioCategoryOptions.defaultToSpeaker, - IosTextToSpeechAudioCategoryOptions.allowBluetooth, - IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP, - ]); + if (_engine != TtsEngine.server || _api == null) { + try { + await _configureDeviceEngine( + voice: deviceVoice, + speechRate: speechRate, + pitch: pitch, + volume: volume, + ); + } catch (e) { + if (_engine == TtsEngine.device) { + _available = false; + _onError?.call(e.toString()); + _initialized = true; + return _available; + } } - - // Set the voice (specific or default) when using device engine - if (_engine == TtsEngine.device) { - await _setVoiceByName(voice); - } - _deviceEngineAvailable = true; - } catch (e) { + } else { _deviceEngineAvailable = false; - if (_engine != TtsEngine.server) { - _available = false; - _onError?.call(e.toString()); - _initialized = true; - return _available; - } + try { + await _tts.awaitSpeakCompletion(false); + await _tts.setVolume(volume); + await _tts.setSpeechRate(speechRate); + await _tts.setPitch(pitch); + } catch (_) {} } - _available = _engine == TtsEngine.server || _deviceEngineAvailable; + _available = _computeAvailability(); _initialized = true; return _available; } @@ -156,10 +227,23 @@ class TextToSpeechService { } if (!_initialized) { - await initialize(voice: _preferredVoice, engine: _engine); + await initialize( + deviceVoice: _preferredVoice, + serverVoice: _serverPreferredVoice, + engine: _engine, + ); } - if (_engine == TtsEngine.server && _api != null) { + final bool useServer = _shouldUseServer(); + + if (useServer) { + if (_api == null) { + if (_deviceEngineAvailable) { + await _speakOnDevice(text); + return; + } + throw StateError('Server text-to-speech is unavailable'); + } // Server-backed TTS with sentence chunking & queued playback try { await _startServerChunkedPlayback(text); @@ -196,7 +280,7 @@ class TextToSpeechService { Future pause() async { if (!_initialized) return; try { - if (_engine == TtsEngine.server) { + if (_shouldUseServer()) { await _player.pause(); _handlePause(); } else if (_deviceEngineAvailable) { @@ -210,7 +294,7 @@ class TextToSpeechService { Future resume() async { if (!_initialized) return; try { - if (_engine == TtsEngine.server) { + if (_shouldUseServer()) { if (_waitingNext && (_currentIndex + 1) < _buffered.length) { _waitingNext = false; await _playNextIfBuffered(_session); @@ -235,7 +319,7 @@ class TextToSpeechService { _expectedChunks = 0; _currentIndex = -1; _waitingNext = false; - if (_engine == TtsEngine.server) { + if (_shouldUseServer()) { await _player.stop(); _handleCancel(); } else { @@ -254,17 +338,24 @@ class TextToSpeechService { /// Update TTS settings on-the-fly Future updateSettings({ Object? voice = const _VoiceNotProvided(), + Object? serverVoice = const _VoiceNotProvided(), double? speechRate, double? pitch, double? volume, TtsEngine? engine, }) async { final voiceProvided = voice is! _VoiceNotProvided; + final serverVoiceProvided = serverVoice is! _VoiceNotProvided; final voiceValue = voiceProvided ? voice as String? : null; + final serverVoiceValue = serverVoiceProvided + ? serverVoice as String? + : null; if (!_initialized || !_available) { // Allow engine and voice to update before init if (engine != null) _engine = engine; if (voiceProvided) _preferredVoice = voiceValue; + if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue; + if (speechRate != null) _speechRate = speechRate; return; } @@ -275,22 +366,28 @@ class TextToSpeechService { if (voiceProvided) { _preferredVoice = voiceValue; } + if (serverVoiceProvided) { + _serverPreferredVoice = serverVoiceValue; + } if (volume != null) { await _tts.setVolume(volume); } if (speechRate != null) { + _speechRate = speechRate; await _tts.setSpeechRate(speechRate); } if (pitch != null) { await _tts.setPitch(pitch); } - // Set specific voice by name on device engine - if (_engine == TtsEngine.device && voiceProvided) { + // Set specific voice by name on device-capable engines + if (_engine != TtsEngine.server && voiceProvided) { await _setVoiceByName(_preferredVoice); } } catch (e) { _onError?.call(e.toString()); } + + _available = _computeAvailability(); } /// Set voice by name, or use system default if null @@ -343,7 +440,11 @@ class TextToSpeechService { /// Get available voices from the TTS engine Future>> getAvailableVoices() async { if (!_initialized) { - await initialize(voice: _preferredVoice, engine: _engine); + await initialize( + deviceVoice: _preferredVoice, + serverVoice: _serverPreferredVoice, + engine: _engine, + ); } if (_engine == TtsEngine.server && _api != null) { @@ -425,6 +526,10 @@ class TextToSpeechService { } Future _resolveServerVoice() async { + final serverSelected = _serverPreferredVoice?.trim(); + if (serverSelected != null && serverSelected.isNotEmpty) { + return serverSelected; + } final selected = _preferredVoice?.trim(); if (selected != null && selected.isNotEmpty) { return selected; @@ -545,9 +650,19 @@ class TextToSpeechService { String? voice, int session, ) async { - return await _api!.generateSpeech(text: text, voice: voice); + return await _api!.generateSpeech( + text: text, + voice: voice, + speed: _speechRate, + ); } + /// Splits [text] into the chunks used for playback sequencing. + /// + /// This mirrors the server-side streaming behavior so UI consumers can stay + /// in sync with sentence indices reported during playback. + List splitTextForSpeech(String text) => _splitForTts(text); + Future _onAudioComplete() async { final session = _session; // If there are more expected chunks @@ -580,43 +695,66 @@ class TextToSpeechService { } List _splitForTts(String text) { - // Normalize whitespace - final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim(); - if (normalized.isEmpty) return const []; + // Mirrors OpenWebUI's extractSentencesForAudio implementation + // See: src/lib/utils/index.ts lines 953-970, 907-928 - // Split on sentence-ending punctuation while keeping the delimiter - final parts = []; - final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)"); - int index = 0; - for (final match in sentenceRegex.allMatches('$normalized ')) { - final s = match.group(1) ?? ''; - if (s.trim().isNotEmpty) parts.add(s.trim()); - index = match.end; - } - if (index < normalized.length) { - final tail = normalized.substring(index).trim(); - if (tail.isNotEmpty) parts.add(tail); - } + // 1. Preserve code blocks (replace with placeholders) + final codeBlocks = []; + var processed = text; + var codeBlockIndex = 0; - // Fallback to length-based splits for very long segments - const maxLen = 300; - final chunks = []; - for (final p in parts.isEmpty ? [normalized] : parts) { - if (p.length <= maxLen) { - chunks.add(p); + // Match triple backticks code blocks + final codeBlockRegex = RegExp(r'```[\s\S]*?```', multiLine: true); + processed = processed.replaceAllMapped(codeBlockRegex, (match) { + final placeholder = '\u0000$codeBlockIndex\u0000'; + codeBlocks.add(match.group(0)!); + codeBlockIndex++; + return placeholder; + }); + + // 2. Split on sentence-ending punctuation: .!? + // OpenWebUI uses: /(?<=[.!?])\s+/ + final sentences = processed + .split(RegExp(r'(?<=[.!?])\s+')) + .map((s) => s.trim()) + .where((s) => s.isNotEmpty) + .toList(); + + // 3. Restore code blocks from placeholders + final restoredSentences = sentences + .map((sentence) { + return sentence.replaceAllMapped(RegExp(r'\u0000(\d+)\u0000'), ( + match, + ) { + final idx = int.parse(match.group(1)!); + return idx < codeBlocks.length ? codeBlocks[idx] : ''; + }); + }) + .where((s) => s.isNotEmpty) + .toList(); + + // 4. Merge short sentences (< 4 words OR < 50 chars) + // OpenWebUI logic from extractSentencesForAudio + final mergedChunks = []; + for (final sentence in restoredSentences) { + if (mergedChunks.isEmpty) { + mergedChunks.add(sentence); } else { - // Try splitting on commas/spaces - var remaining = p; - while (remaining.length > maxLen) { - int cut = remaining.lastIndexOf(RegExp(r",\s|\s"), maxLen); - cut = cut <= 0 ? maxLen : cut; - chunks.add(remaining.substring(0, cut).trim()); - remaining = remaining.substring(cut).trim(); + final lastIndex = mergedChunks.length - 1; + final previousText = mergedChunks[lastIndex]; + final wordCount = previousText.split(RegExp(r'\s+')).length; + final charCount = previousText.length; + + // Merge if previous chunk is too short + if (wordCount < 4 || charCount < 50) { + mergedChunks[lastIndex] = '$previousText $sentence'; + } else { + mergedChunks.add(sentence); } - if (remaining.isNotEmpty) chunks.add(remaining); } } - return chunks; + + return mergedChunks.isEmpty ? [text.trim()] : mergedChunks; } Future _configurePreferredVoice() async { diff --git a/lib/features/chat/services/voice_call_service.dart b/lib/features/chat/services/voice_call_service.dart index 1037b38..1fb4123 100644 --- a/lib/features/chat/services/voice_call_service.dart +++ b/lib/features/chat/services/voice_call_service.dart @@ -108,11 +108,18 @@ class VoiceCallService { throw Exception('Voice input initialization failed'); } - // Check if local STT is available + // Check if preferred STT path is available final hasLocalStt = _voiceInput.hasLocalStt; - if (!hasLocalStt) { + final hasServerStt = _voiceInput.hasServerStt; + final ready = switch (_voiceInput.preference) { + SttPreference.deviceOnly => hasLocalStt, + SttPreference.serverOnly => hasServerStt, + SttPreference.auto => hasLocalStt || hasServerStt, + }; + + if (!ready) { _updateState(VoiceCallState.error); - throw Exception('Speech recognition not available on this device'); + throw Exception('Preferred speech recognition engine is unavailable'); } // Check microphone permissions @@ -125,9 +132,8 @@ class VoiceCallService { // Initialize TTS with current app settings (engine/voice/rate/pitch/volume) final settings = _ref.read(appSettingsProvider); await _tts.initialize( - voice: settings.ttsEngine == TtsEngine.server - ? settings.ttsServerVoiceId - : settings.ttsVoice, + deviceVoice: settings.ttsVoice, + serverVoice: settings.ttsServerVoiceId, speechRate: settings.ttsSpeechRate, pitch: settings.ttsPitch, volume: settings.ttsVolume, @@ -202,10 +208,18 @@ class VoiceCallService { _listeningPaused = false; _accumulatedTranscript = ''; - // Check if voice input is available - if (!_voiceInput.hasLocalStt) { + final hasLocalStt = _voiceInput.hasLocalStt; + final hasServerStt = _voiceInput.hasServerStt; + final pref = _voiceInput.preference; + final engineAvailable = switch (pref) { + SttPreference.deviceOnly => hasLocalStt, + SttPreference.serverOnly => hasServerStt, + SttPreference.auto => hasLocalStt || hasServerStt, + }; + + if (!engineAvailable) { _updateState(VoiceCallState.error); - throw Exception('Voice input not available on this device'); + throw Exception('Preferred speech recognition engine is unavailable'); } _updateState(VoiceCallState.listening); @@ -572,11 +586,9 @@ VoiceCallService voiceCallService(Ref ref) { // Keep TTS settings in sync with app settings during a call ref.listen(appSettingsProvider, (previous, next) { // Update voice/engine and runtime parameters - final selectedVoice = next.ttsEngine == TtsEngine.server - ? next.ttsServerVoiceId - : next.ttsVoice; service._tts.updateSettings( - voice: selectedVoice, + voice: next.ttsVoice, + serverVoice: next.ttsServerVoiceId, speechRate: next.ttsSpeechRate, pitch: next.ttsPitch, volume: next.ttsVolume, diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index 1c30c63..c1990aa 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -1,14 +1,19 @@ import 'dart:async'; -import 'dart:io' show Platform; +import 'dart:io' show File, Platform; import 'package:flutter/widgets.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:riverpod_annotation/riverpod_annotation.dart'; -import 'package:record/record.dart'; +import 'package:mic_stream_recorder/mic_stream_recorder.dart'; import 'package:stts/stts.dart'; +import 'package:path/path.dart' as p; +import 'package:path_provider/path_provider.dart'; + +import '../../../core/providers/app_providers.dart'; +import '../../../core/services/api_service.dart'; +import '../../../core/services/settings_service.dart'; part 'voice_input_service.g.dart'; -// Removed path imports as server transcription fallback was removed // Lightweight replacement for previous stt.LocaleName used across the UI class LocaleName { @@ -18,31 +23,50 @@ class LocaleName { } class VoiceInputService { - final AudioRecorder _recorder = AudioRecorder(); + final MicStreamRecorder _recorder = MicStreamRecorder(); final Stt _speech = Stt(); + final ApiService? _api; + final Ref? _ref; bool _isInitialized = false; bool _isListening = false; bool _localSttAvailable = false; + SttPreference _preference = SttPreference.auto; + bool _usingServerStt = false; String? _selectedLocaleId; List _locales = const []; StreamController? _textStreamController; String _currentText = ''; - // Public stream for UI waveform visualization (emits partial text length as proxy) StreamController? _intensityController; Stream get intensityStream => _intensityController?.stream ?? const Stream.empty(); int _lastIntensity = 0; Timer? _intensityDecayTimer; + Timer? _silenceTimer; + bool _hasDetectedSpeech = false; + int _amplitudeCallbackCount = 0; + Timer? _amplitudeFallbackTimer; - /// Public stream of partial/final transcript strings and special audio tokens. Stream get textStream => _textStreamController?.stream ?? const Stream.empty(); Timer? _autoStopTimer; - StreamSubscription? _ampSub; + StreamSubscription? _ampSub; StreamSubscription? _sttResultSub; StreamSubscription? _sttStateSub; bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS; + bool get hasServerStt => _api != null; + SttPreference get preference => _preference; + bool get allowsServerFallback => _preference != SttPreference.deviceOnly; + bool get prefersServerOnly => _preference == SttPreference.serverOnly; + bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly; + + VoiceInputService({ApiService? api, Ref? ref}) + : _api = api, + _ref = ref; + + void updatePreference(SttPreference preference) { + _preference = preference; + } Future initialize() async { if (_isInitialized) return true; @@ -87,17 +111,15 @@ class VoiceInputService { Future checkPermissions() async { try { - // Prefer stts permission check which will request microphone permission - final mic = await _speech.hasPermission(); - if (mic) return true; - return await _recorder.hasPermission(); + return await _speech.hasPermission(); } catch (_) { return false; } } bool get isListening => _isListening; - bool get isAvailable => _isInitialized; // service usable (local or fallback) + bool get isAvailable => + _isInitialized && (_localSttAvailable || hasServerStt); bool get hasLocalStt => _localSttAvailable; // Add a method to check if on-device STT is properly supported @@ -166,7 +188,7 @@ class VoiceInputService { } if (_isListening) { - stopListening(); + unawaited(stopListening()); } _textStreamController = StreamController.broadcast(); @@ -174,82 +196,109 @@ class VoiceInputService { _isListening = true; _intensityController = StreamController.broadcast(); _lastIntensity = 0; + _usingServerStt = false; - // Begin a gentle decay timer so the UI level bars fall when silent - _intensityDecayTimer?.cancel(); - _intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), ( - t, - ) { - if (!_isListening) return; - if (_lastIntensity <= 0) return; - _lastIntensity = (_lastIntensity - 1).clamp(0, 10); - try { - _intensityController?.add(_lastIntensity); - } catch (_) {} - }); + _startIntensityDecayTimer(); + + final bool canUseLocal = _localSttAvailable; + final bool serverAvailable = hasServerStt; + final bool shouldUseLocal = + canUseLocal && _preference != SttPreference.serverOnly; + final bool shouldUseServer = + serverAvailable && + (_preference == SttPreference.serverOnly || !shouldUseLocal); + + if (shouldUseLocal) { + _autoStopTimer?.cancel(); + _autoStopTimer = Timer(const Duration(seconds: 60), () { + if (_isListening) { + unawaited(_stopListening()); + } + }); - // Check if speech recognition is available before trying to use it - if (_localSttAvailable) { - // Schedule a check for speech recognition availability Future.microtask(() async { try { final isStillAvailable = await _speech.isSupported(); if (!isStillAvailable && _isListening) { - // Speech recognition no longer available; stop listening _localSttAvailable = false; - _stopListening(); - return; + if (hasServerStt && allowsServerFallback) { + unawaited(_beginServerFallback()); + } else { + unawaited(_stopListening()); + } } - } catch (e) { + } catch (_) { // ignore availability check errors } }); - // Local on-device STT path - _autoStopTimer?.cancel(); - _autoStopTimer = Timer(const Duration(seconds: 60), () { - if (_isListening) { - _stopListening(); - } - }); - - // Listen for results and state changes; keep subscriptions so we can cancel later _sttResultSub = _speech.onResultChanged.listen((SttRecognition result) { if (!_isListening) return; final prevLen = _currentText.length; _currentText = result.text; _textStreamController?.add(_currentText); - // Map number of new characters to a rough 0..10 intensity final delta = (_currentText.length - prevLen).clamp(0, 50); - final mapped = (delta / 5.0).ceil(); // 0 chars -> 0, 1-5 -> 1, ... + final mapped = (delta / 5.0).ceil(); _lastIntensity = mapped.clamp(0, 10); try { _intensityController?.add(_lastIntensity); } catch (_) {} if (result.isFinal) { - _stopListening(); + unawaited(_stopListening()); } }, onError: (_) {}); _sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {}); - try { - if (_selectedLocaleId != null) { - _speech.setLanguage(_selectedLocaleId!).catchError((_) {}); - } - // Start recognition (no await blocking the sync flow) - _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) { - // On-device STT failed; stop listening entirely as server transcription is removed + Future(() async { + try { + if (_selectedLocaleId != null) { + await _speech.setLanguage(_selectedLocaleId!); + } + await _speech.start(SttRecognitionOptions(punctuation: true)); + } catch (error) { _localSttAvailable = false; - _stopListening(); - }); - } catch (e) { - _localSttAvailable = false; - _stopListening(); - } + if (!_isListening) return; + if (hasServerStt && allowsServerFallback) { + await _beginServerFallback(); + } else { + _textStreamController?.addError(error); + await _stopListening(); + } + } + }); + } else if (shouldUseServer) { + _usingServerStt = true; + _autoStopTimer?.cancel(); + _autoStopTimer = Timer(const Duration(seconds: 90), () { + if (_isListening) { + unawaited(_stopListening()); + } + }); + Future(() async { + try { + await _startServerRecording(); + } catch (error) { + if (!_isListening) return; + _textStreamController?.addError(error); + await _stopListening(); + } + }); } else { - // No local STT available; stop immediately since server transcription is removed - _stopListening(); + final Exception error; + if (prefersDeviceOnly) { + error = Exception( + 'On-device speech recognition required but unavailable', + ); + } else if (prefersServerOnly) { + error = Exception('Server speech-to-text is not configured'); + } else { + error = Exception('Speech recognition not available on this device'); + } + Future.microtask(() { + _textStreamController?.addError(error); + unawaited(_stopListening()); + }); } return _textStreamController!.stream; @@ -258,14 +307,11 @@ class VoiceInputService { /// Centralized entry point to begin voice recognition. /// Ensures initialization and microphone permission before starting. Future> beginListening() async { - // Ensure service is ready await initialize(); - // Ensure microphone permission (triggers OS prompt if needed) final hasMic = await checkPermissions(); if (!hasMic) { throw Exception('Microphone permission not granted'); } - // Start listening and return the transcript stream return startListening(); } @@ -277,53 +323,349 @@ class VoiceInputService { if (!_isListening) return; _isListening = false; - if (_localSttAvailable) { - try { - await _speech.stop(); - } catch (_) {} - // Cancel STT subscriptions - try { - _sttResultSub?.cancel(); - } catch (_) {} - _sttResultSub = null; - try { - _sttStateSub?.cancel(); - } catch (_) {} - _sttStateSub = null; - } _autoStopTimer?.cancel(); _autoStopTimer = null; - _ampSub?.cancel(); + + _silenceTimer?.cancel(); + _silenceTimer = null; + + _amplitudeFallbackTimer?.cancel(); + _amplitudeFallbackTimer = null; + + if (_usingServerStt) { + await _finalizeServerRecording(); + } else { + await _stopLocalStt(); + } + + await _ampSub?.cancel(); _ampSub = null; + _intensityDecayTimer?.cancel(); _intensityDecayTimer = null; _lastIntensity = 0; - if (_currentText.isNotEmpty) { + if (!_usingServerStt && _currentText.isNotEmpty) { _textStreamController?.add(_currentText); } - _textStreamController?.close(); - _textStreamController = null; - _intensityController?.close(); - _intensityController = null; + await _closeControllers(); + + _usingServerStt = false; + _hasDetectedSpeech = false; + } + + Future _stopLocalStt() async { + if (_sttResultSub != null) { + try { + await _sttResultSub?.cancel(); + } catch (_) {} + _sttResultSub = null; + } + if (_sttStateSub != null) { + try { + await _sttStateSub?.cancel(); + } catch (_) {} + _sttStateSub = null; + } + + if (_localSttAvailable) { + try { + await _speech.stop(); + } catch (_) {} + } + } + + Future _beginServerFallback() async { + if (!allowsServerFallback) { + _textStreamController?.addError( + Exception('Server speech-to-text disabled in preferences'), + ); + await _stopListening(); + return; + } + await _stopLocalStt(); + if (!hasServerStt) { + _textStreamController?.addError( + Exception('Server speech-to-text unavailable'), + ); + await _stopListening(); + return; + } + + _usingServerStt = true; + _autoStopTimer?.cancel(); + _autoStopTimer = Timer(const Duration(seconds: 90), () { + if (_isListening) { + unawaited(_stopListening()); + } + }); + + try { + await _startServerRecording(); + } catch (error) { + _textStreamController?.addError(error); + await _stopListening(); + } + } + + Future _startServerRecording() async { + final path = await _createRecordingPath(); + _hasDetectedSpeech = false; + + await _recorder.startRecording(path); + + await _ampSub?.cancel(); + _amplitudeFallbackTimer?.cancel(); + _amplitudeCallbackCount = 0; + + _ampSub = _recorder.amplitudeStream.listen((amplitude) { + _amplitudeCallbackCount++; + if (!_isListening) return; + + _lastIntensity = _normalizedToIntensity(amplitude); + try { + _intensityController?.add(_lastIntensity); + } catch (_) {} + + _handleServerAmplitude(amplitude); + }); + + _amplitudeFallbackTimer = Timer(const Duration(seconds: 1), () { + if (_amplitudeCallbackCount == 0) { + _silenceTimer = Timer(const Duration(seconds: 15), () { + if (_isListening && _usingServerStt) { + unawaited(_stopListening()); + } + }); + } + }); + } + + void _handleServerAmplitude(double amplitude) { + if (!_usingServerStt || !_isListening) return; + + const double speechThreshold = 0.55; + if (amplitude.isNaN || amplitude.isInfinite) return; + + if (amplitude > speechThreshold) { + _hasDetectedSpeech = true; + _silenceTimer?.cancel(); + _silenceTimer = null; + } else if (_hasDetectedSpeech && _silenceTimer == null) { + final silenceDuration = _ref?.read(appSettingsProvider).voiceSilenceDuration ?? 2000; + _silenceTimer = Timer(Duration(milliseconds: silenceDuration), () { + if (_isListening && _usingServerStt) { + unawaited(_stopListening()); + } + }); + } + } + + Future _createRecordingPath() async { + final directory = await getTemporaryDirectory(); + final timestamp = DateTime.now().millisecondsSinceEpoch; + final fileName = 'conduit_voice_$timestamp.m4a'; + return p.join(directory.path, fileName); + } + + Future _finalizeServerRecording() async { + final api = _api; + if (api == null) return; + + final path = await _recorder.stopRecording(); + if (path == null || path.isEmpty) return; + + final file = File(path); + try { + if (!await file.exists()) return; + final bytes = await file.readAsBytes(); + if (bytes.isEmpty) return; + + final response = await api.transcribeSpeech( + audioBytes: bytes, + fileName: p.basename(path), + mimeType: 'audio/mp4', + language: _languageForServer(), + ); + + final transcript = _extractTranscriptionText(response); + if (transcript != null && transcript.trim().isNotEmpty) { + _currentText = transcript.trim(); + _textStreamController?.add(_currentText); + } else { + throw StateError('Empty transcription result'); + } + } catch (error) { + _textStreamController?.addError(error); + } finally { + unawaited(_cleanupRecordingFile(file)); + } + } + + Future _cleanupRecordingFile(File file) async { + try { + if (await file.exists()) { + await file.delete(); + } + } catch (_) {} + } + + String? _languageForServer() { + final locale = _selectedLocaleId; + if (locale != null && locale.isNotEmpty) { + final primary = locale.split(RegExp('[-_]')).first.toLowerCase(); + if (primary.length >= 2) { + return primary; + } + } + try { + final fallback = WidgetsBinding.instance.platformDispatcher.locale; + final primary = fallback.languageCode.toLowerCase(); + if (primary.isNotEmpty) { + return primary; + } + } catch (_) {} + return null; + } + + String? _extractTranscriptionText(Map data) { + final direct = data['text']; + if (direct is String && direct.trim().isNotEmpty) { + return direct; + } + + final display = data['display_text'] ?? data['DisplayText']; + if (display is String && display.trim().isNotEmpty) { + return display; + } + + final result = data['result']; + if (result is Map) { + final resultText = result['text']; + if (resultText is String && resultText.trim().isNotEmpty) { + return resultText; + } + } + + final combined = data['combinedRecognizedPhrases']; + if (combined is List && combined.isNotEmpty) { + final first = combined.first; + if (first is Map) { + final candidate = + first['display'] ?? + first['Display'] ?? + first['transcript'] ?? + first['text']; + if (candidate is String && candidate.trim().isNotEmpty) { + return candidate; + } + } else if (first is String && first.trim().isNotEmpty) { + return first; + } + } + + final results = data['results']; + if (results is Map) { + final channels = results['channels']; + if (channels is List && channels.isNotEmpty) { + final channel = channels.first; + if (channel is Map) { + final alternatives = channel['alternatives']; + if (alternatives is List && alternatives.isNotEmpty) { + final alternative = alternatives.first; + if (alternative is Map) { + final transcript = + alternative['transcript'] ?? alternative['text']; + if (transcript is String && transcript.trim().isNotEmpty) { + return transcript; + } + } + } + } + } + } + + final segments = data['segments']; + if (segments is List && segments.isNotEmpty) { + final buffer = StringBuffer(); + for (final segment in segments) { + if (segment is Map) { + final text = segment['text']; + if (text is String && text.trim().isNotEmpty) { + buffer.write(text.trim()); + buffer.write(' '); + } + } else if (segment is String && segment.trim().isNotEmpty) { + buffer.write(segment.trim()); + buffer.write(' '); + } + } + final combinedText = buffer.toString().trim(); + if (combinedText.isNotEmpty) { + return combinedText; + } + } + + return null; + } + + int _normalizedToIntensity(double value) { + if (value.isNaN || value.isInfinite) return 0; + return (value * 10).round().clamp(0, 10); + } + + Future _closeControllers() async { + if (_textStreamController != null) { + try { + await _textStreamController?.close(); + } catch (_) {} + _textStreamController = null; + } + if (_intensityController != null) { + try { + await _intensityController?.close(); + } catch (_) {} + _intensityController = null; + } + } + + void _startIntensityDecayTimer() { + _intensityDecayTimer?.cancel(); + _intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), ( + _, + ) { + if (!_isListening) return; + if (_lastIntensity <= 0) return; + _lastIntensity = (_lastIntensity - 1).clamp(0, 10); + try { + _intensityController?.add(_lastIntensity); + } catch (_) {} + }); } void dispose() { stopListening(); + _silenceTimer?.cancel(); try { _speech.dispose().catchError((_) {}); } catch (_) {} } - - // Recording fallback removed; only on-device STT is supported now - - // Native locales not used in server transcription mode } final voiceInputServiceProvider = Provider((ref) { - return VoiceInputService(); + final api = ref.watch(apiServiceProvider); + final service = VoiceInputService(api: api, ref: ref); + final currentSettings = ref.read(appSettingsProvider); + service.updatePreference(currentSettings.sttPreference); + ref.listen(appSettingsProvider, (previous, next) { + if (previous?.sttPreference != next.sttPreference) { + service.updatePreference(next.sttPreference); + } + }); + ref.onDispose(service.dispose); + return service; }); @Riverpod(keepAlive: true) @@ -332,8 +674,16 @@ Future voiceInputAvailable(Ref ref) async { if (!service.isSupportedPlatform) return false; final initialized = await service.initialize(); if (!initialized) return false; - // If local STT exists, we consider it available; otherwise ensure mic permission for fallback - if (service.hasLocalStt) return true; + switch (service.preference) { + case SttPreference.deviceOnly: + return service.hasLocalStt; + case SttPreference.serverOnly: + return service.hasServerStt; + case SttPreference.auto: + if (service.hasLocalStt) return true; + if (!service.hasServerStt) return false; + break; + } final hasPermission = await service.checkPermissions(); if (!hasPermission) return false; return service.isAvailable; @@ -349,3 +699,18 @@ final voiceIntensityStreamProvider = StreamProvider((ref) { final service = ref.watch(voiceInputServiceProvider); return service.intensityStream; }); + +final localVoiceRecognitionAvailableProvider = FutureProvider(( + ref, +) async { + final service = ref.watch(voiceInputServiceProvider); + final initialized = await service.initialize(); + if (!initialized) return false; + if (service.hasLocalStt) return true; + return service.checkOnDeviceSupport(); +}); + +final serverVoiceRecognitionAvailableProvider = Provider((ref) { + final service = ref.watch(voiceInputServiceProvider); + return service.hasServerStt; +}); diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart index 7a28e67..2e49411 100644 --- a/lib/features/chat/views/chat_page.dart +++ b/lib/features/chat/views/chat_page.dart @@ -2380,7 +2380,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { } } - // Server transcription removed; only on-device STT is supported + // When on-device STT is unavailable we fall back to server transcription. Future _stopListening() async { _intensitySub?.cancel(); diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart index bfd348c..570ee3f 100644 --- a/lib/features/chat/widgets/assistant_message_widget.dart +++ b/lib/features/chat/widgets/assistant_message_widget.dart @@ -71,6 +71,11 @@ class _AssistantMessageWidgetState extends ConsumerState bool _allowTypingIndicator = false; Timer? _typingGateTimer; String _ttsPlainText = ''; + Timer? _ttsPlainTextDebounce; + Map? _pendingTtsPlainTextPayload; + String? _pendingTtsPlainTextSource; + String? _lastAppliedTtsPlainTextSource; + int _ttsPlainTextRequestId = 0; // Active version index (-1 means current/live content) int _activeVersionIndex = -1; // press state handled by shared ChatActionButton @@ -162,13 +167,11 @@ class _AssistantMessageWidgetState extends ConsumerState final rSegs = ReasoningParser.segments(raw); final out = []; - final textBuf = StringBuffer(); final textSegments = []; if (rSegs == null || rSegs.isEmpty) { final tSegs = ToolCallsParser.segments(raw); if (tSegs == null || tSegs.isEmpty) { out.add(MessageSegment.text(raw)); - textBuf.write(raw); textSegments.add(raw); } else { for (final s in tSegs) { @@ -176,7 +179,6 @@ class _AssistantMessageWidgetState extends ConsumerState out.add(MessageSegment.tool(s.entry!)); } else if ((s.text ?? '').isNotEmpty) { out.add(MessageSegment.text(s.text!)); - textBuf.write(s.text); textSegments.add(s.text!); } } @@ -190,7 +192,6 @@ class _AssistantMessageWidgetState extends ConsumerState final tSegs = ToolCallsParser.segments(t); if (tSegs == null || tSegs.isEmpty) { out.add(MessageSegment.text(t)); - textBuf.write(t); textSegments.add(t); } else { for (final s in tSegs) { @@ -198,7 +199,6 @@ class _AssistantMessageWidgetState extends ConsumerState out.add(MessageSegment.tool(s.entry!)); } else if ((s.text ?? '').isNotEmpty) { out.add(MessageSegment.text(s.text!)); - textBuf.write(s.text); textSegments.add(s.text!); } } @@ -208,23 +208,15 @@ class _AssistantMessageWidgetState extends ConsumerState } final segments = out.isEmpty ? [MessageSegment.text(raw)] : out; - String speechText; - try { - final worker = ref.read(workerManagerProvider); - speechText = await worker.schedule, String>( - _buildTtsPlainTextWorker, - {'segments': textSegments, 'fallback': raw}, - debugLabel: 'tts_plain_text', - ); - } catch (_) { - speechText = _buildTtsPlainTextFallback(textSegments, raw); - } if (!mounted) return; setState(() { _segments = segments; - _ttsPlainText = speechText; }); + _scheduleTtsPlainTextBuild( + List.from(textSegments, growable: false), + raw, + ); _updateTypingIndicatorGate(); } @@ -290,6 +282,96 @@ class _AssistantMessageWidgetState extends ConsumerState return result; } + void _scheduleTtsPlainTextBuild(List segments, String raw) { + final hasContent = + segments.any((segment) => segment.trim().isNotEmpty) || + raw.trim().isNotEmpty; + if (!hasContent) { + _pendingTtsPlainTextPayload = null; + _pendingTtsPlainTextSource = null; + _lastAppliedTtsPlainTextSource = ''; + if (_ttsPlainText.isNotEmpty && mounted) { + setState(() { + _ttsPlainText = ''; + }); + } + return; + } + + if (_pendingTtsPlainTextPayload == null && + raw == _lastAppliedTtsPlainTextSource) { + return; + } + if (raw == _pendingTtsPlainTextSource && + _pendingTtsPlainTextPayload != null) { + return; + } + + final pendingSegments = List.from(segments, growable: false); + _pendingTtsPlainTextPayload = { + 'segments': pendingSegments, + 'fallback': raw, + }; + _pendingTtsPlainTextSource = raw; + + final delay = widget.isStreaming + ? const Duration(milliseconds: 250) + : Duration.zero; + + _ttsPlainTextDebounce?.cancel(); + if (delay == Duration.zero) { + _runPendingTtsPlainTextBuild(); + } else { + _ttsPlainTextDebounce = Timer(delay, _runPendingTtsPlainTextBuild); + } + } + + void _runPendingTtsPlainTextBuild() { + _ttsPlainTextDebounce?.cancel(); + _ttsPlainTextDebounce = null; + + final payload = _pendingTtsPlainTextPayload; + final source = _pendingTtsPlainTextSource; + if (payload == null || source == null) { + return; + } + + _pendingTtsPlainTextPayload = null; + _pendingTtsPlainTextSource = null; + final requestId = ++_ttsPlainTextRequestId; + unawaited(_executeTtsPlainTextBuild(payload, source, requestId)); + } + + Future _executeTtsPlainTextBuild( + Map payload, + String raw, + int requestId, + ) async { + final segments = (payload['segments'] as List).cast(); + String speechText; + try { + final worker = ref.read(workerManagerProvider); + speechText = await worker.schedule, String>( + _buildTtsPlainTextWorker, + payload, + debugLabel: 'tts_plain_text', + ); + } catch (_) { + speechText = _buildTtsPlainTextFallback(segments, raw); + } + + if (!mounted || requestId != _ttsPlainTextRequestId) { + return; + } + + _lastAppliedTtsPlainTextSource = raw; + if (_ttsPlainText != speechText) { + setState(() { + _ttsPlainText = speechText; + }); + } + } + // No streaming-specific markdown fixes needed here; handled by Markdown widget Widget _buildToolCallTile(ToolCallEntry tc) { @@ -622,6 +704,9 @@ class _AssistantMessageWidgetState extends ConsumerState @override void dispose() { _typingGateTimer?.cancel(); + _ttsPlainTextDebounce?.cancel(); + _pendingTtsPlainTextPayload = null; + _pendingTtsPlainTextSource = null; _fadeController.dispose(); _slideController.dispose(); super.dispose(); diff --git a/lib/features/chat/widgets/modern_chat_input.dart b/lib/features/chat/widgets/modern_chat_input.dart index 6fb4e39..d3daf19 100644 --- a/lib/features/chat/widgets/modern_chat_input.dart +++ b/lib/features/chat/widgets/modern_chat_input.dart @@ -2460,7 +2460,7 @@ class _ModernChatInputState extends ConsumerState HapticFeedback.selectionClick(); } - // Server transcription removed; only on-device STT updates the input text + // When on-device STT is unavailable we rely on server transcription. void _showVoiceUnavailable(String message) { if (!mounted) return; diff --git a/lib/features/profile/views/app_customization_page.dart b/lib/features/profile/views/app_customization_page.dart index c8adde3..86c81bf 100644 --- a/lib/features/profile/views/app_customization_page.dart +++ b/lib/features/profile/views/app_customization_page.dart @@ -14,6 +14,7 @@ import '../../../shared/utils/ui_utils.dart'; import '../../../core/providers/app_providers.dart'; import '../../../l10n/app_localizations.dart'; import '../../chat/providers/text_to_speech_provider.dart'; +import '../../chat/services/voice_input_service.dart'; class AppCustomizationPage extends ConsumerWidget { const AppCustomizationPage({super.key}); @@ -70,6 +71,8 @@ class AppCustomizationPage extends ConsumerWidget { languageLabel, ), const SizedBox(height: Spacing.xl), + _buildSttSection(context, ref, settings), + const SizedBox(height: Spacing.xl), _buildTtsDropdownSection(context, ref, settings), const SizedBox(height: Spacing.xl), _buildChatSection(context, ref, settings), @@ -468,6 +471,303 @@ class AppCustomizationPage extends ConsumerWidget { ); } + Widget _buildSttSection( + BuildContext context, + WidgetRef ref, + AppSettings settings, + ) { + final theme = context.conduitTheme; + final l10n = AppLocalizations.of(context)!; + final localSupport = ref.watch(localVoiceRecognitionAvailableProvider); + final bool localAvailable = localSupport.maybeWhen( + data: (value) => value, + orElse: () => false, + ); + final bool localLoading = localSupport.isLoading; + final bool serverAvailable = ref.watch( + serverVoiceRecognitionAvailableProvider, + ); + final notifier = ref.read(appSettingsProvider.notifier); + final description = _sttPreferenceDescription(l10n, settings.sttPreference); + + final warnings = []; + if (settings.sttPreference == SttPreference.deviceOnly && + !localAvailable && + !localLoading) { + warnings.add(l10n.sttDeviceUnavailableWarning); + } + if (settings.sttPreference == SttPreference.serverOnly && + !serverAvailable) { + warnings.add(l10n.sttServerUnavailableWarning); + } + + final bool autoSelectable = + localAvailable || serverAvailable || localLoading; + final bool deviceSelectable = localAvailable || localLoading; + final bool serverSelectable = serverAvailable; + + return Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text( + l10n.sttSettings, + style: + theme.headingSmall?.copyWith(color: theme.sidebarForeground) ?? + TextStyle(color: theme.sidebarForeground, fontSize: 18), + ), + const SizedBox(height: Spacing.sm), + ConduitCard( + padding: const EdgeInsets.all(Spacing.md), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Row( + children: [ + _buildIconBadge( + context, + UiUtils.platformIcon( + ios: CupertinoIcons.mic, + android: Icons.mic, + ), + color: theme.buttonPrimary, + ), + const SizedBox(width: Spacing.md), + Expanded( + child: Text( + l10n.sttEngineLabel, + style: + theme.bodyMedium?.copyWith( + color: theme.sidebarForeground, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.sidebarForeground, + fontSize: 14, + fontWeight: FontWeight.w600, + ), + ), + ), + ], + ), + const SizedBox(height: Spacing.sm), + Wrap( + spacing: Spacing.sm, + runSpacing: Spacing.sm, + children: [ + ChoiceChip( + label: Text(l10n.sttEngineAuto), + selected: settings.sttPreference == SttPreference.auto, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.sttPreference == SttPreference.auto + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues(alpha: 0.2), + ), + labelStyle: TextStyle( + color: settings.sttPreference == SttPreference.auto + ? theme.buttonPrimaryText + : theme.textPrimary, + fontWeight: FontWeight.w600, + ), + onSelected: autoSelectable + ? (value) { + if (value) { + notifier.setSttPreference(SttPreference.auto); + } + } + : null, + ), + ChoiceChip( + label: Text(l10n.sttEngineDevice), + selected: + settings.sttPreference == SttPreference.deviceOnly, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.sttPreference == SttPreference.deviceOnly + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues(alpha: 0.2), + ), + labelStyle: TextStyle( + color: settings.sttPreference == SttPreference.deviceOnly + ? theme.buttonPrimaryText + : theme.textPrimary, + fontWeight: FontWeight.w600, + ), + onSelected: deviceSelectable + ? (value) { + if (value) { + notifier.setSttPreference( + SttPreference.deviceOnly, + ); + } + } + : null, + ), + ChoiceChip( + label: Text(l10n.sttEngineServer), + selected: + settings.sttPreference == SttPreference.serverOnly, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.sttPreference == SttPreference.serverOnly + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues(alpha: 0.2), + ), + labelStyle: TextStyle( + color: settings.sttPreference == SttPreference.serverOnly + ? theme.buttonPrimaryText + : theme.textPrimary, + fontWeight: FontWeight.w600, + ), + onSelected: serverSelectable + ? (value) { + if (value) { + notifier.setSttPreference( + SttPreference.serverOnly, + ); + } + } + : null, + ), + ], + ), + if (localLoading) ...[ + const SizedBox(height: Spacing.sm), + LinearProgressIndicator( + minHeight: 3, + color: theme.buttonPrimary, + backgroundColor: theme.cardBorder.withValues(alpha: 0.4), + ), + ], + const SizedBox(height: Spacing.sm), + AnimatedSwitcher( + duration: const Duration(milliseconds: 200), + child: Text( + description, + key: ValueKey( + 'stt-desc-${settings.sttPreference.name}', + ), + style: + theme.bodyMedium?.copyWith( + color: theme.sidebarForeground.withValues(alpha: 0.9), + ) ?? + TextStyle( + color: theme.sidebarForeground.withValues(alpha: 0.9), + fontSize: 14, + ), + ), + ), + if (warnings.isNotEmpty) ...[ + const SizedBox(height: Spacing.sm), + ...warnings.map( + (warning) => Padding( + padding: const EdgeInsets.only(top: Spacing.xs), + child: Text( + warning, + style: + theme.bodySmall?.copyWith( + color: theme.error, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.error, + fontSize: 12, + fontWeight: FontWeight.w600, + ), + ), + ), + ), + ], + if (settings.sttPreference == SttPreference.serverOnly || + (settings.sttPreference == SttPreference.auto && + serverAvailable)) ...[ + const SizedBox(height: Spacing.md), + const Divider(), + const SizedBox(height: Spacing.md), + Row( + children: [ + Expanded( + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text( + l10n.sttSilenceDuration, + style: theme.bodyMedium?.copyWith( + color: theme.sidebarForeground, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.sidebarForeground, + fontSize: 14, + fontWeight: FontWeight.w600, + ), + ), + const SizedBox(height: Spacing.xs), + Text( + '${settings.voiceSilenceDuration}ms', + style: theme.bodySmall?.copyWith( + color: theme.sidebarForeground + .withValues(alpha: 0.7), + ) ?? + TextStyle( + color: theme.sidebarForeground + .withValues(alpha: 0.7), + fontSize: 12, + ), + ), + ], + ), + ), + Text( + '${(settings.voiceSilenceDuration / 1000).toStringAsFixed(1)}s', + style: theme.bodyMedium?.copyWith( + color: theme.buttonPrimary, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.buttonPrimary, + fontSize: 14, + fontWeight: FontWeight.w600, + ), + ), + ], + ), + const SizedBox(height: Spacing.sm), + Slider( + value: settings.voiceSilenceDuration.toDouble(), + min: 300, + max: 3000, + divisions: 27, + activeColor: theme.buttonPrimary, + inactiveColor: theme.cardBorder.withValues(alpha: 0.4), + onChanged: (value) { + notifier.setVoiceSilenceDuration(value.round()); + }, + ), + Text( + l10n.sttSilenceDurationDescription, + style: theme.bodySmall?.copyWith( + color: theme.sidebarForeground.withValues(alpha: 0.7), + ) ?? + TextStyle( + color: theme.sidebarForeground.withValues(alpha: 0.7), + fontSize: 12, + ), + ), + ], + ], + ), + ), + ], + ); + } + Widget _buildTtsDropdownSection( BuildContext context, WidgetRef ref, @@ -475,6 +775,35 @@ class AppCustomizationPage extends ConsumerWidget { ) { final theme = context.conduitTheme; final l10n = AppLocalizations.of(context)!; + final ttsService = ref.watch(textToSpeechServiceProvider); + final bool deviceAvailable = + ttsService.deviceEngineAvailable || !ttsService.isInitialized; + final bool serverAvailable = ttsService.serverEngineAvailable; + final bool autoSelectable = deviceAvailable || serverAvailable; + final bool deviceSelectable = deviceAvailable; + final bool serverSelectable = serverAvailable; + final ttsDescription = _ttsPreferenceDescription(l10n, settings); + final warnings = []; + switch (settings.ttsEngine) { + case TtsEngine.auto: + if (!deviceAvailable) { + warnings.add(l10n.ttsDeviceUnavailableWarning); + } + if (!serverAvailable) { + warnings.add(l10n.ttsServerUnavailableWarning); + } + break; + case TtsEngine.device: + if (!deviceAvailable) { + warnings.add(l10n.ttsDeviceUnavailableWarning); + } + break; + case TtsEngine.server: + if (!serverAvailable) { + warnings.add(l10n.ttsServerUnavailableWarning); + } + break; + } return Column( crossAxisAlignment: CrossAxisAlignment.start, children: [ @@ -510,82 +839,154 @@ class AppCustomizationPage extends ConsumerWidget { ) ?? TextStyle(color: theme.sidebarForeground, fontSize: 14), ), - const Spacer(), - Wrap( - spacing: Spacing.sm, - children: [ - ChoiceChip( - label: Text(l10n.ttsEngineDevice), - selected: settings.ttsEngine == TtsEngine.device, - showCheckmark: false, - selectedColor: theme.buttonPrimary, - backgroundColor: theme.cardBackground, - side: BorderSide( - color: settings.ttsEngine == TtsEngine.device - ? theme.buttonPrimary.withValues(alpha: 0.6) - : theme.textPrimary.withValues(alpha: 0.2), - ), - labelStyle: TextStyle( - color: settings.ttsEngine == TtsEngine.device - ? theme.buttonPrimaryText - : theme.textPrimary, - fontWeight: FontWeight.w600, - ), - onSelected: (v) { - if (v) { - final notifier = ref.read( - appSettingsProvider.notifier, - ); - notifier.setTtsEngine(TtsEngine.device); - // Keep previous voice (device voices) + ], + ), + const SizedBox(height: Spacing.sm), + Wrap( + spacing: Spacing.sm, + runSpacing: Spacing.sm, + children: [ + ChoiceChip( + label: Text(l10n.ttsEngineAuto), + selected: settings.ttsEngine == TtsEngine.auto, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.ttsEngine == TtsEngine.auto + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues( + alpha: autoSelectable ? 0.2 : 0.12, + ), + ), + labelStyle: TextStyle( + color: settings.ttsEngine == TtsEngine.auto + ? theme.buttonPrimaryText + : theme.textPrimary.withValues( + alpha: autoSelectable ? 1.0 : 0.45, + ), + fontWeight: FontWeight.w600, + ), + onSelected: autoSelectable + ? (value) { + if (value) { + ref + .read(appSettingsProvider.notifier) + .setTtsEngine(TtsEngine.auto); + } } - }, - ), - ChoiceChip( - label: Text(l10n.ttsEngineServer), - selected: settings.ttsEngine == TtsEngine.server, - showCheckmark: false, - selectedColor: theme.buttonPrimary, - backgroundColor: theme.cardBackground, - side: BorderSide( - color: settings.ttsEngine == TtsEngine.server - ? theme.buttonPrimary.withValues(alpha: 0.6) - : theme.textPrimary.withValues(alpha: 0.2), - ), - labelStyle: TextStyle( - color: settings.ttsEngine == TtsEngine.server - ? theme.buttonPrimaryText - : theme.textPrimary, - fontWeight: FontWeight.w600, - ), - onSelected: (v) { - if (v) { - final notifier = ref.read( - appSettingsProvider.notifier, - ); - // Clear device-specific voice so server can default - notifier.setTtsVoice(null); - notifier.setTtsEngine(TtsEngine.server); + : null, + ), + ChoiceChip( + label: Text(l10n.ttsEngineDevice), + selected: settings.ttsEngine == TtsEngine.device, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.ttsEngine == TtsEngine.device + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues( + alpha: deviceSelectable ? 0.2 : 0.12, + ), + ), + labelStyle: TextStyle( + color: settings.ttsEngine == TtsEngine.device + ? theme.buttonPrimaryText + : theme.textPrimary.withValues( + alpha: deviceSelectable ? 1.0 : 0.45, + ), + fontWeight: FontWeight.w600, + ), + onSelected: deviceSelectable + ? (value) { + if (value) { + ref + .read(appSettingsProvider.notifier) + .setTtsEngine(TtsEngine.device); + } } - }, - ), - ], + : null, + ), + ChoiceChip( + label: Text(l10n.ttsEngineServer), + selected: settings.ttsEngine == TtsEngine.server, + showCheckmark: false, + selectedColor: theme.buttonPrimary, + backgroundColor: theme.cardBackground, + side: BorderSide( + color: settings.ttsEngine == TtsEngine.server + ? theme.buttonPrimary.withValues(alpha: 0.6) + : theme.textPrimary.withValues( + alpha: serverSelectable ? 0.2 : 0.12, + ), + ), + labelStyle: TextStyle( + color: settings.ttsEngine == TtsEngine.server + ? theme.buttonPrimaryText + : theme.textPrimary.withValues( + alpha: serverSelectable ? 1.0 : 0.45, + ), + fontWeight: FontWeight.w600, + ), + onSelected: serverSelectable + ? (value) { + if (value) { + final notifier = ref.read( + appSettingsProvider.notifier, + ); + notifier.setTtsVoice(null); + notifier.setTtsEngine(TtsEngine.server); + } + } + : null, ), ], ), + const SizedBox(height: Spacing.sm), + AnimatedSwitcher( + duration: const Duration(milliseconds: 200), + child: Text( + ttsDescription, + key: ValueKey('tts-desc-${settings.ttsEngine.name}'), + style: + theme.bodyMedium?.copyWith( + color: theme.sidebarForeground.withValues(alpha: 0.9), + ) ?? + TextStyle( + color: theme.sidebarForeground.withValues(alpha: 0.9), + fontSize: 14, + ), + ), + ), + if (warnings.isNotEmpty) ...[ + const SizedBox(height: Spacing.sm), + ...warnings.map( + (warning) => Padding( + padding: const EdgeInsets.only(top: Spacing.xs), + child: Text( + warning, + style: + theme.bodySmall?.copyWith( + color: theme.error, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.error, + fontSize: 12, + fontWeight: FontWeight.w600, + ), + ), + ), + ), + ], ], ), ), const SizedBox(height: Spacing.sm), _ExpandableCard( title: l10n.ttsVoice, - subtitle: _getDisplayVoiceName( - settings.ttsEngine == TtsEngine.server - ? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? - '') - : (settings.ttsVoice ?? ''), - l10n.ttsSystemDefault, - ), + subtitle: _ttsVoiceSubtitle(l10n, settings), icon: UiUtils.platformIcon( ios: CupertinoIcons.speaker_3, android: Icons.record_voice_over, @@ -604,14 +1005,7 @@ class AppCustomizationPage extends ConsumerWidget { color: theme.buttonPrimary, ), title: l10n.ttsVoice, - subtitle: _getDisplayVoiceName( - settings.ttsEngine == TtsEngine.server - ? ((settings.ttsServerVoiceName ?? - settings.ttsServerVoiceId) ?? - '') - : (settings.ttsVoice ?? ''), - l10n.ttsSystemDefault, - ), + subtitle: _ttsVoiceSubtitle(l10n, settings), onTap: () => _showVoicePickerSheet(context, ref, settings), ), const SizedBox(height: Spacing.md), @@ -627,49 +1021,13 @@ class AppCustomizationPage extends ConsumerWidget { value: settings.ttsSpeechRate, min: 0.25, max: 2.0, - divisions: 7, + divisions: 35, label: '${(settings.ttsSpeechRate * 100).round()}%', onChanged: (value) => ref .read(appSettingsProvider.notifier) .setTtsSpeechRate(value), ), const SizedBox(height: Spacing.md), - // Pitch Slider - _buildSliderTile( - context, - ref, - icon: UiUtils.platformIcon( - ios: CupertinoIcons.waveform, - android: Icons.graphic_eq, - ), - title: l10n.ttsPitch, - value: settings.ttsPitch, - min: 0.5, - max: 2.0, - divisions: 6, - label: settings.ttsPitch.toStringAsFixed(1), - onChanged: (value) => - ref.read(appSettingsProvider.notifier).setTtsPitch(value), - ), - const SizedBox(height: Spacing.md), - // Volume Slider - _buildSliderTile( - context, - ref, - icon: UiUtils.platformIcon( - ios: CupertinoIcons.volume_up, - android: Icons.volume_up, - ), - title: l10n.ttsVolume, - value: settings.ttsVolume, - min: 0.0, - max: 1.0, - divisions: 10, - label: '${(settings.ttsVolume * 100).round()}%', - onChanged: (value) => - ref.read(appSettingsProvider.notifier).setTtsVolume(value), - ), - const SizedBox(height: Spacing.md), // Preview Button _CustomizationTile( leading: _buildIconBadge( @@ -691,6 +1049,53 @@ class AppCustomizationPage extends ConsumerWidget { ); } + String _sttPreferenceDescription( + AppLocalizations l10n, + SttPreference preference, + ) { + switch (preference) { + case SttPreference.auto: + return l10n.sttEngineAutoDescription; + case SttPreference.deviceOnly: + return l10n.sttEngineDeviceDescription; + case SttPreference.serverOnly: + return l10n.sttEngineServerDescription; + } + } + + String _ttsPreferenceDescription( + AppLocalizations l10n, + AppSettings settings, + ) { + switch (settings.ttsEngine) { + case TtsEngine.auto: + return l10n.ttsEngineAutoDescription; + case TtsEngine.device: + return l10n.ttsEngineDeviceDescription; + case TtsEngine.server: + return l10n.ttsEngineServerDescription; + } + } + + String _ttsVoiceSubtitle(AppLocalizations l10n, AppSettings settings) { + final deviceName = _getDisplayVoiceName( + settings.ttsVoice, + l10n.ttsSystemDefault, + ); + final serverVoice = + (settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? ''; + final serverName = _getDisplayVoiceName(serverVoice, l10n.ttsSystemDefault); + + switch (settings.ttsEngine) { + case TtsEngine.auto: + return '${l10n.ttsEngineDevice}: $deviceName • ${l10n.ttsEngineServer}: $serverName'; + case TtsEngine.device: + return deviceName; + case TtsEngine.server: + return serverName; + } + } + Widget _buildSliderTile( BuildContext context, WidgetRef ref, { diff --git a/lib/l10n/app_de.arb b/lib/l10n/app_de.arb index c99b5fa..17381a0 100644 --- a/lib/l10n/app_de.arb +++ b/lib/l10n/app_de.arb @@ -307,7 +307,25 @@ "chatSettings": "Chat", "sendOnEnter": "Mit Enter senden", "sendOnEnterDescription": "Enter sendet (Soft-Tastatur). Cmd/Ctrl+Enter ebenfalls verfügbar", + "sttSettings": "Sprache zu Text", + "sttEngineLabel": "Erkennungs-Engine", + "sttEngineAuto": "Automatisch", + "sttEngineDevice": "Auf dem Gerät", + "sttEngineServer": "Server", + "sttEngineAutoDescription": "Verwendet die Erkennung auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.", + "sttEngineDeviceDescription": "Behält Audio auf diesem Gerät. Spracheingabe funktioniert nicht, wenn das Gerät keine Spracherkennung unterstützt.", + "sttEngineServerDescription": "Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.", + "sttDeviceUnavailableWarning": "Auf diesem Gerät steht keine Spracherkennung zur Verfügung.", + "sttServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.", + "sttSilenceDuration": "Stille-Dauer", + "sttSilenceDurationDescription": "Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird", "ttsSettings": "Text zu Sprache", + "ttsEngineAuto": "Automatisch", + "ttsEngineAutoDescription": "Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.", + "ttsEngineDeviceDescription": "Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.", + "ttsEngineServerDescription": "Sendet die Ausgabe immer an deinen OpenWebUI-Server.", + "ttsDeviceUnavailableWarning": "Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.", + "ttsServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Sprachausgabe, um diese Option zu nutzen.", "ttsVoice": "Stimme", "ttsSpeechRate": "Sprechgeschwindigkeit", "ttsPitch": "Tonhöhe", diff --git a/lib/l10n/app_en.arb b/lib/l10n/app_en.arb index d45abb4..cf6a18c 100644 --- a/lib/l10n/app_en.arb +++ b/lib/l10n/app_en.arb @@ -1219,10 +1219,62 @@ "@sendOnEnterDescription": { "description": "Explanation of how the Send on Enter toggle behaves." }, + "sttSettings": "Speech to Text", + "@sttSettings": { + "description": "Section header for speech-to-text settings." + }, + "sttEngineLabel": "Recognition engine", + "@sttEngineLabel": { + "description": "Label shown above the speech-to-text engine chips." + }, + "sttEngineAuto": "Auto", + "@sttEngineAuto": { + "description": "Chip label for automatic speech-to-text selection." + }, + "sttEngineDevice": "On device", + "@sttEngineDevice": { + "description": "Chip label for on-device speech recognition." + }, + "sttEngineServer": "Server", + "@sttEngineServer": { + "description": "Chip label for server speech recognition." + }, + "sttEngineAutoDescription": "Use on-device recognition when available and fall back to your server.", + "@sttEngineAutoDescription": { + "description": "Description shown when automatic speech-to-text preference is active." + }, + "sttEngineDeviceDescription": "Keep audio on this device. Voice input stops working if on-device speech recognition isn’t supported.", + "@sttEngineDeviceDescription": { + "description": "Description shown when on-device speech-to-text preference is active." + }, + "sttEngineServerDescription": "Always send recordings to your OpenWebUI server for transcription.", + "@sttEngineServerDescription": { + "description": "Description shown when server speech-to-text preference is active." + }, + "sttDeviceUnavailableWarning": "On-device speech recognition isn’t available on this device.", + "@sttDeviceUnavailableWarning": { + "description": "Warning shown when the user selects on-device speech recognition but it is unavailable." + }, + "sttServerUnavailableWarning": "Connect to a server with transcription enabled to use this option.", + "@sttServerUnavailableWarning": { + "description": "Warning shown when the user selects server speech recognition but no server is available." + }, + "sttSilenceDuration": "Silence Duration", + "@sttSilenceDuration": { + "description": "Label for the silence duration setting in server speech-to-text." + }, + "sttSilenceDurationDescription": "Time to wait after silence before auto-stopping recording", + "@sttSilenceDurationDescription": { + "description": "Description for the silence duration slider in server speech-to-text settings." + }, "ttsEngineLabel": "Engine", "@ttsEngineLabel": { "description": "Label for selecting the text-to-speech engine." }, + "ttsEngineAuto": "Auto", + "@ttsEngineAuto": { + "description": "Chip label for automatically selecting the text-to-speech engine." + }, "ttsEngineDevice": "On device", "@ttsEngineDevice": { "description": "Chip label for using on-device text-to-speech." @@ -1231,6 +1283,26 @@ "@ttsEngineServer": { "description": "Chip label for using server-side text-to-speech." }, + "ttsEngineAutoDescription": "Use on-device speech when available and fall back to your server.", + "@ttsEngineAutoDescription": { + "description": "Description shown when automatic text-to-speech preference is active." + }, + "ttsEngineDeviceDescription": "Keep synthesis on this device. Voice playback stops working if on-device TTS isn’t supported.", + "@ttsEngineDeviceDescription": { + "description": "Description shown when on-device text-to-speech preference is active." + }, + "ttsEngineServerDescription": "Always request audio from your OpenWebUI server.", + "@ttsEngineServerDescription": { + "description": "Description shown when server text-to-speech preference is active." + }, + "ttsDeviceUnavailableWarning": "On-device text-to-speech isn’t available on this device.", + "@ttsDeviceUnavailableWarning": { + "description": "Warning shown when on-device text-to-speech is unavailable." + }, + "ttsServerUnavailableWarning": "Connect to a server with text-to-speech enabled to use this option.", + "@ttsServerUnavailableWarning": { + "description": "Warning shown when server text-to-speech is unavailable." + }, "ttsSettings": "Text to Speech", "@ttsSettings": { "description": "Section header for TTS-related customization options." diff --git a/lib/l10n/app_es.arb b/lib/l10n/app_es.arb index a8d8ddc..73c1b35 100644 --- a/lib/l10n/app_es.arb +++ b/lib/l10n/app_es.arb @@ -307,7 +307,25 @@ "chatSettings": "Conversación", "sendOnEnter": "Enviar con Enter", "sendOnEnterDescription": "Enter envía (teclado virtual). Cmd/Ctrl+Enter también disponible", + "sttSettings": "Voz a texto", + "sttEngineLabel": "Motor de reconocimiento", + "sttEngineAuto": "Automático", + "sttEngineDevice": "En el dispositivo", + "sttEngineServer": "Servidor", + "sttEngineAutoDescription": "Usa el reconocimiento en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.", + "sttEngineDeviceDescription": "Mantiene el audio en este dispositivo. La entrada de voz no funciona si el dispositivo no admite reconocimiento de voz.", + "sttEngineServerDescription": "Envía siempre las grabaciones a tu servidor OpenWebUI para la transcripción.", + "sttDeviceUnavailableWarning": "El reconocimiento de voz en el dispositivo no está disponible en este dispositivo.", + "sttServerUnavailableWarning": "Conéctate a un servidor con transcripción habilitada para usar esta opción.", + "sttSilenceDuration": "Duración del silencio", + "sttSilenceDurationDescription": "Tiempo de espera después del silencio antes de detener automáticamente la grabación", "ttsSettings": "Texto a voz", + "ttsEngineAuto": "Automático", + "ttsEngineAutoDescription": "Usa la síntesis en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.", + "ttsEngineDeviceDescription": "Mantiene la síntesis en este dispositivo. La reproducción de voz no funciona si el dispositivo no admite TTS.", + "ttsEngineServerDescription": "Solicita siempre el audio a tu servidor OpenWebUI.", + "ttsDeviceUnavailableWarning": "La síntesis de voz en el dispositivo no está disponible en este dispositivo.", + "ttsServerUnavailableWarning": "Conéctate a un servidor con texto a voz habilitado para usar esta opción.", "ttsVoice": "Voz", "ttsSpeechRate": "Velocidad de voz", "ttsPitch": "Tono", diff --git a/lib/l10n/app_fr.arb b/lib/l10n/app_fr.arb index d50ae5e..18955b9 100644 --- a/lib/l10n/app_fr.arb +++ b/lib/l10n/app_fr.arb @@ -307,7 +307,25 @@ "chatSettings": "Discussion", "sendOnEnter": "Envoyer avec Entrée", "sendOnEnterDescription": "Entrée envoie (clavier logiciel). Cmd/Ctrl+Entrée aussi disponible", + "sttSettings": "Voix vers texte", + "sttEngineLabel": "Moteur de reconnaissance", + "sttEngineAuto": "Auto", + "sttEngineDevice": "Sur l’appareil", + "sttEngineServer": "Serveur", + "sttEngineAutoDescription": "Utilise la reconnaissance sur l’appareil quand c’est possible, sinon bascule vers votre serveur.", + "sttEngineDeviceDescription": "Conserve l’audio sur cet appareil. L’entrée vocale cesse de fonctionner si la reconnaissance vocale n’est pas prise en charge.", + "sttEngineServerDescription": "Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.", + "sttDeviceUnavailableWarning": "La reconnaissance vocale sur l’appareil n’est pas disponible sur cet appareil.", + "sttServerUnavailableWarning": "Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.", + "sttSilenceDuration": "Durée du silence", + "sttSilenceDurationDescription": "Temps d'attente après le silence avant d'arrêter automatiquement l'enregistrement", "ttsSettings": "Synthèse vocale", + "ttsEngineAuto": "Auto", + "ttsEngineAutoDescription": "Utilise la synthèse locale quand c’est possible, sinon bascule vers votre serveur.", + "ttsEngineDeviceDescription": "Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si l’appareil n’offre pas la synthèse vocale.", + "ttsEngineServerDescription": "Demande toujours l'audio à votre serveur OpenWebUI.", + "ttsDeviceUnavailableWarning": "La synthèse vocale sur l’appareil n’est pas disponible sur cet appareil.", + "ttsServerUnavailableWarning": "Connectez-vous à un serveur avec la synthèse vocale activée pour utiliser cette option.", "ttsVoice": "Voix", "ttsSpeechRate": "Vitesse de parole", "ttsPitch": "Hauteur", diff --git a/lib/l10n/app_it.arb b/lib/l10n/app_it.arb index cc2e0f0..fe7948e 100644 --- a/lib/l10n/app_it.arb +++ b/lib/l10n/app_it.arb @@ -307,7 +307,25 @@ "chatSettings": "Chat", "sendOnEnter": "Invia con Invio", "sendOnEnterDescription": "Invio invia (tastiera software). Cmd/Ctrl+Invio disponibile", + "sttSettings": "Voce in testo", + "sttEngineLabel": "Motore di riconoscimento", + "sttEngineAuto": "Automatico", + "sttEngineDevice": "Sul dispositivo", + "sttEngineServer": "Server", + "sttEngineAutoDescription": "Usa il riconoscimento sul dispositivo quando disponibile e altrimenti passa al tuo server.", + "sttEngineDeviceDescription": "Mantiene l’audio su questo dispositivo. L’input vocale non funziona se il dispositivo non supporta il riconoscimento vocale.", + "sttEngineServerDescription": "Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.", + "sttDeviceUnavailableWarning": "Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.", + "sttServerUnavailableWarning": "Collegati a un server con la trascrizione abilitata per usare questa opzione.", + "sttSilenceDuration": "Durata del silenzio", + "sttSilenceDurationDescription": "Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione", "ttsSettings": "Sintesi vocale", + "ttsEngineAuto": "Automatico", + "ttsEngineAutoDescription": "Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.", + "ttsEngineDeviceDescription": "Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.", + "ttsEngineServerDescription": "Richiede sempre l'audio dal tuo server OpenWebUI.", + "ttsDeviceUnavailableWarning": "La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.", + "ttsServerUnavailableWarning": "Collegati a un server con la sintesi vocale abilitata per usare questa opzione.", "ttsVoice": "Voce", "ttsSpeechRate": "Velocità di sintesi vocale", "ttsPitch": "Tonalità", diff --git a/lib/l10n/app_localizations.dart b/lib/l10n/app_localizations.dart index 91f7949..a2c974f 100644 --- a/lib/l10n/app_localizations.dart +++ b/lib/l10n/app_localizations.dart @@ -1784,12 +1784,90 @@ abstract class AppLocalizations { /// **'Enter sends (soft keyboard). Cmd/Ctrl+Enter also available'** String get sendOnEnterDescription; + /// Section header for speech-to-text settings. + /// + /// In en, this message translates to: + /// **'Speech to Text'** + String get sttSettings; + + /// Label shown above the speech-to-text engine chips. + /// + /// In en, this message translates to: + /// **'Recognition engine'** + String get sttEngineLabel; + + /// Chip label for automatic speech-to-text selection. + /// + /// In en, this message translates to: + /// **'Auto'** + String get sttEngineAuto; + + /// Chip label for on-device speech recognition. + /// + /// In en, this message translates to: + /// **'On device'** + String get sttEngineDevice; + + /// Chip label for server speech recognition. + /// + /// In en, this message translates to: + /// **'Server'** + String get sttEngineServer; + + /// Description shown when automatic speech-to-text preference is active. + /// + /// In en, this message translates to: + /// **'Use on-device recognition when available and fall back to your server.'** + String get sttEngineAutoDescription; + + /// Description shown when on-device speech-to-text preference is active. + /// + /// In en, this message translates to: + /// **'Keep audio on this device. Voice input stops working if on-device speech recognition isn’t supported.'** + String get sttEngineDeviceDescription; + + /// Description shown when server speech-to-text preference is active. + /// + /// In en, this message translates to: + /// **'Always send recordings to your OpenWebUI server for transcription.'** + String get sttEngineServerDescription; + + /// Warning shown when the user selects on-device speech recognition but it is unavailable. + /// + /// In en, this message translates to: + /// **'On-device speech recognition isn’t available on this device.'** + String get sttDeviceUnavailableWarning; + + /// Warning shown when the user selects server speech recognition but no server is available. + /// + /// In en, this message translates to: + /// **'Connect to a server with transcription enabled to use this option.'** + String get sttServerUnavailableWarning; + + /// Label for the silence duration setting in server speech-to-text. + /// + /// In en, this message translates to: + /// **'Silence Duration'** + String get sttSilenceDuration; + + /// Description for the silence duration slider in server speech-to-text settings. + /// + /// In en, this message translates to: + /// **'Time to wait after silence before auto-stopping recording'** + String get sttSilenceDurationDescription; + /// Label for selecting the text-to-speech engine. /// /// In en, this message translates to: /// **'Engine'** String get ttsEngineLabel; + /// Chip label for automatically selecting the text-to-speech engine. + /// + /// In en, this message translates to: + /// **'Auto'** + String get ttsEngineAuto; + /// Chip label for using on-device text-to-speech. /// /// In en, this message translates to: @@ -1802,6 +1880,36 @@ abstract class AppLocalizations { /// **'Server'** String get ttsEngineServer; + /// Description shown when automatic text-to-speech preference is active. + /// + /// In en, this message translates to: + /// **'Use on-device speech when available and fall back to your server.'** + String get ttsEngineAutoDescription; + + /// Description shown when on-device text-to-speech preference is active. + /// + /// In en, this message translates to: + /// **'Keep synthesis on this device. Voice playback stops working if on-device TTS isn’t supported.'** + String get ttsEngineDeviceDescription; + + /// Description shown when server text-to-speech preference is active. + /// + /// In en, this message translates to: + /// **'Always request audio from your OpenWebUI server.'** + String get ttsEngineServerDescription; + + /// Warning shown when on-device text-to-speech is unavailable. + /// + /// In en, this message translates to: + /// **'On-device text-to-speech isn’t available on this device.'** + String get ttsDeviceUnavailableWarning; + + /// Warning shown when server text-to-speech is unavailable. + /// + /// In en, this message translates to: + /// **'Connect to a server with text-to-speech enabled to use this option.'** + String get ttsServerUnavailableWarning; + /// Section header for TTS-related customization options. /// /// In en, this message translates to: diff --git a/lib/l10n/app_localizations_de.dart b/lib/l10n/app_localizations_de.dart index cbb3f39..58fb7f9 100644 --- a/lib/l10n/app_localizations_de.dart +++ b/lib/l10n/app_localizations_de.dart @@ -931,15 +931,80 @@ class AppLocalizationsDe extends AppLocalizations { String get sendOnEnterDescription => 'Enter sendet (Soft-Tastatur). Cmd/Ctrl+Enter ebenfalls verfügbar'; + @override + String get sttSettings => 'Sprache zu Text'; + + @override + String get sttEngineLabel => 'Erkennungs-Engine'; + + @override + String get sttEngineAuto => 'Automatisch'; + + @override + String get sttEngineDevice => 'Auf dem Gerät'; + + @override + String get sttEngineServer => 'Server'; + + @override + String get sttEngineAutoDescription => + 'Verwendet die Erkennung auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.'; + + @override + String get sttEngineDeviceDescription => + 'Behält Audio auf diesem Gerät. Spracheingabe funktioniert nicht, wenn das Gerät keine Spracherkennung unterstützt.'; + + @override + String get sttEngineServerDescription => + 'Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.'; + + @override + String get sttDeviceUnavailableWarning => + 'Auf diesem Gerät steht keine Spracherkennung zur Verfügung.'; + + @override + String get sttServerUnavailableWarning => + 'Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.'; + + @override + String get sttSilenceDuration => 'Stille-Dauer'; + + @override + String get sttSilenceDurationDescription => + 'Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird'; + @override String get ttsEngineLabel => 'Engine'; + @override + String get ttsEngineAuto => 'Automatisch'; + @override String get ttsEngineDevice => 'Auf dem Gerät'; @override String get ttsEngineServer => 'Server'; + @override + String get ttsEngineAutoDescription => + 'Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.'; + + @override + String get ttsEngineDeviceDescription => + 'Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.'; + + @override + String get ttsEngineServerDescription => + 'Sendet die Ausgabe immer an deinen OpenWebUI-Server.'; + + @override + String get ttsDeviceUnavailableWarning => + 'Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.'; + + @override + String get ttsServerUnavailableWarning => + 'Verbinde dich mit einem Server mit aktivierter Sprachausgabe, um diese Option zu nutzen.'; + @override String get ttsSettings => 'Text zu Sprache'; diff --git a/lib/l10n/app_localizations_en.dart b/lib/l10n/app_localizations_en.dart index 72eb92f..7f2baba 100644 --- a/lib/l10n/app_localizations_en.dart +++ b/lib/l10n/app_localizations_en.dart @@ -925,15 +925,80 @@ class AppLocalizationsEn extends AppLocalizations { String get sendOnEnterDescription => 'Enter sends (soft keyboard). Cmd/Ctrl+Enter also available'; + @override + String get sttSettings => 'Speech to Text'; + + @override + String get sttEngineLabel => 'Recognition engine'; + + @override + String get sttEngineAuto => 'Auto'; + + @override + String get sttEngineDevice => 'On device'; + + @override + String get sttEngineServer => 'Server'; + + @override + String get sttEngineAutoDescription => + 'Use on-device recognition when available and fall back to your server.'; + + @override + String get sttEngineDeviceDescription => + 'Keep audio on this device. Voice input stops working if on-device speech recognition isn’t supported.'; + + @override + String get sttEngineServerDescription => + 'Always send recordings to your OpenWebUI server for transcription.'; + + @override + String get sttDeviceUnavailableWarning => + 'On-device speech recognition isn’t available on this device.'; + + @override + String get sttServerUnavailableWarning => + 'Connect to a server with transcription enabled to use this option.'; + + @override + String get sttSilenceDuration => 'Silence Duration'; + + @override + String get sttSilenceDurationDescription => + 'Time to wait after silence before auto-stopping recording'; + @override String get ttsEngineLabel => 'Engine'; + @override + String get ttsEngineAuto => 'Auto'; + @override String get ttsEngineDevice => 'On device'; @override String get ttsEngineServer => 'Server'; + @override + String get ttsEngineAutoDescription => + 'Use on-device speech when available and fall back to your server.'; + + @override + String get ttsEngineDeviceDescription => + 'Keep synthesis on this device. Voice playback stops working if on-device TTS isn’t supported.'; + + @override + String get ttsEngineServerDescription => + 'Always request audio from your OpenWebUI server.'; + + @override + String get ttsDeviceUnavailableWarning => + 'On-device text-to-speech isn’t available on this device.'; + + @override + String get ttsServerUnavailableWarning => + 'Connect to a server with text-to-speech enabled to use this option.'; + @override String get ttsSettings => 'Text to Speech'; diff --git a/lib/l10n/app_localizations_fr.dart b/lib/l10n/app_localizations_fr.dart index e312b8e..97abbd8 100644 --- a/lib/l10n/app_localizations_fr.dart +++ b/lib/l10n/app_localizations_fr.dart @@ -940,15 +940,80 @@ class AppLocalizationsFr extends AppLocalizations { String get sendOnEnterDescription => 'Entrée envoie (clavier logiciel). Cmd/Ctrl+Entrée aussi disponible'; + @override + String get sttSettings => 'Voix vers texte'; + + @override + String get sttEngineLabel => 'Moteur de reconnaissance'; + + @override + String get sttEngineAuto => 'Auto'; + + @override + String get sttEngineDevice => 'Sur l’appareil'; + + @override + String get sttEngineServer => 'Serveur'; + + @override + String get sttEngineAutoDescription => + 'Utilise la reconnaissance sur l’appareil quand c’est possible, sinon bascule vers votre serveur.'; + + @override + String get sttEngineDeviceDescription => + 'Conserve l’audio sur cet appareil. L’entrée vocale cesse de fonctionner si la reconnaissance vocale n’est pas prise en charge.'; + + @override + String get sttEngineServerDescription => + 'Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.'; + + @override + String get sttDeviceUnavailableWarning => + 'La reconnaissance vocale sur l’appareil n’est pas disponible sur cet appareil.'; + + @override + String get sttServerUnavailableWarning => + 'Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.'; + + @override + String get sttSilenceDuration => 'Durée du silence'; + + @override + String get sttSilenceDurationDescription => + 'Temps d\'attente après le silence avant d\'arrêter automatiquement l\'enregistrement'; + @override String get ttsEngineLabel => 'Moteur'; + @override + String get ttsEngineAuto => 'Auto'; + @override String get ttsEngineDevice => 'Sur l\'appareil'; @override String get ttsEngineServer => 'Serveur'; + @override + String get ttsEngineAutoDescription => + 'Utilise la synthèse locale quand c’est possible, sinon bascule vers votre serveur.'; + + @override + String get ttsEngineDeviceDescription => + 'Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si l’appareil n’offre pas la synthèse vocale.'; + + @override + String get ttsEngineServerDescription => + 'Demande toujours l\'audio à votre serveur OpenWebUI.'; + + @override + String get ttsDeviceUnavailableWarning => + 'La synthèse vocale sur l’appareil n’est pas disponible sur cet appareil.'; + + @override + String get ttsServerUnavailableWarning => + 'Connectez-vous à un serveur avec la synthèse vocale activée pour utiliser cette option.'; + @override String get ttsSettings => 'Synthèse vocale'; diff --git a/lib/l10n/app_localizations_it.dart b/lib/l10n/app_localizations_it.dart index 0af2b1b..f47fe4d 100644 --- a/lib/l10n/app_localizations_it.dart +++ b/lib/l10n/app_localizations_it.dart @@ -929,15 +929,80 @@ class AppLocalizationsIt extends AppLocalizations { String get sendOnEnterDescription => 'Invio invia (tastiera software). Cmd/Ctrl+Invio disponibile'; + @override + String get sttSettings => 'Voce in testo'; + + @override + String get sttEngineLabel => 'Motore di riconoscimento'; + + @override + String get sttEngineAuto => 'Automatico'; + + @override + String get sttEngineDevice => 'Sul dispositivo'; + + @override + String get sttEngineServer => 'Server'; + + @override + String get sttEngineAutoDescription => + 'Usa il riconoscimento sul dispositivo quando disponibile e altrimenti passa al tuo server.'; + + @override + String get sttEngineDeviceDescription => + 'Mantiene l’audio su questo dispositivo. L’input vocale non funziona se il dispositivo non supporta il riconoscimento vocale.'; + + @override + String get sttEngineServerDescription => + 'Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.'; + + @override + String get sttDeviceUnavailableWarning => + 'Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.'; + + @override + String get sttServerUnavailableWarning => + 'Collegati a un server con la trascrizione abilitata per usare questa opzione.'; + + @override + String get sttSilenceDuration => 'Durata del silenzio'; + + @override + String get sttSilenceDurationDescription => + 'Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione'; + @override String get ttsEngineLabel => 'Motore'; + @override + String get ttsEngineAuto => 'Automatico'; + @override String get ttsEngineDevice => 'Sul dispositivo'; @override String get ttsEngineServer => 'Server'; + @override + String get ttsEngineAutoDescription => + 'Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.'; + + @override + String get ttsEngineDeviceDescription => + 'Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.'; + + @override + String get ttsEngineServerDescription => + 'Richiede sempre l\'audio dal tuo server OpenWebUI.'; + + @override + String get ttsDeviceUnavailableWarning => + 'La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.'; + + @override + String get ttsServerUnavailableWarning => + 'Collegati a un server con la sintesi vocale abilitata per usare questa opzione.'; + @override String get ttsSettings => 'Sintesi vocale'; diff --git a/lib/l10n/app_nl.arb b/lib/l10n/app_nl.arb index d6133d2..0b5f8aa 100644 --- a/lib/l10n/app_nl.arb +++ b/lib/l10n/app_nl.arb @@ -307,7 +307,25 @@ "chatSettings": "Chat", "sendOnEnter": "Verzenden met Enter", "sendOnEnterDescription": "Enter verzendt (softtoetsenbord). Cmd/Ctrl+Enter ook beschikbaar", + "sttSettings": "Spraak naar tekst", + "sttEngineLabel": "Herkenningsengine", + "sttEngineAuto": "Automatisch", + "sttEngineDevice": "Op het apparaat", + "sttEngineServer": "Server", + "sttEngineAutoDescription": "Gebruikt spraakherkenning op het apparaat wanneer beschikbaar en valt anders terug op je server.", + "sttEngineDeviceDescription": "Houdt audio op dit apparaat. Spraakinput werkt niet als het apparaat geen spraakherkenning ondersteunt.", + "sttEngineServerDescription": "Stuurt opnames altijd naar je OpenWebUI-server voor transcriptie.", + "sttDeviceUnavailableWarning": "Spraakherkenning op het apparaat is niet beschikbaar op dit apparaat.", + "sttServerUnavailableWarning": "Verbind met een server met transcriptie ingeschakeld om deze optie te gebruiken.", + "sttSilenceDuration": "Stilteduur", + "sttSilenceDurationDescription": "Tijd om te wachten na stilte voordat de opname automatisch stopt", "ttsSettings": "Tekst naar spraak", + "ttsEngineAuto": "Automatisch", + "ttsEngineAutoDescription": "Gebruikt spraaksynthese op het apparaat wanneer beschikbaar en valt anders terug op je server.", + "ttsEngineDeviceDescription": "Houdt de synthese op dit apparaat. Spraakweergave werkt niet als het apparaat geen TTS ondersteunt.", + "ttsEngineServerDescription": "Vraagt altijd audio op bij je OpenWebUI-server.", + "ttsDeviceUnavailableWarning": "Spraaksynthese op het apparaat is niet beschikbaar op dit apparaat.", + "ttsServerUnavailableWarning": "Verbind met een server met tekst-naar-spraak ingeschakeld om deze optie te gebruiken.", "ttsVoice": "Stem", "ttsSpeechRate": "Spraaksnelheid", "ttsPitch": "Toonhoogte", diff --git a/lib/l10n/app_ru.arb b/lib/l10n/app_ru.arb index d438815..3383c6c 100644 --- a/lib/l10n/app_ru.arb +++ b/lib/l10n/app_ru.arb @@ -307,7 +307,25 @@ "chatSettings": "Чат", "sendOnEnter": "Отправка по Enter", "sendOnEnterDescription": "Enter отправляет (программная клавиатура). Также доступно Cmd/Ctrl+Enter", + "sttSettings": "Речь в текст", + "sttEngineLabel": "Движок распознавания", + "sttEngineAuto": "Авто", + "sttEngineDevice": "На устройстве", + "sttEngineServer": "Сервер", + "sttEngineAutoDescription": "Использует распознавание на устройстве, когда это возможно, иначе переключается на ваш сервер.", + "sttEngineDeviceDescription": "Оставляет звук на этом устройстве. Голосовой ввод не работает, если устройство не поддерживает распознавание речи.", + "sttEngineServerDescription": "Всегда отправляет записи на сервер OpenWebUI для транскрибации.", + "sttDeviceUnavailableWarning": "Распознавание речи на устройстве недоступно на этом устройстве.", + "sttServerUnavailableWarning": "Подключитесь к серверу с включённой транскрибацией, чтобы использовать эту опцию.", + "sttSilenceDuration": "Длительность тишины", + "sttSilenceDurationDescription": "Время ожидания после тишины перед автоматической остановкой записи", "ttsSettings": "Преобразование текста в речь", + "ttsEngineAuto": "Авто", + "ttsEngineAutoDescription": "Использует синтез речи на устройстве, когда это возможно, иначе переключается на ваш сервер.", + "ttsEngineDeviceDescription": "Оставляет синтез на этом устройстве. Воспроизведение голоса не работает, если устройство не поддерживает синтез речи.", + "ttsEngineServerDescription": "Всегда запрашивает аудио у вашего сервера OpenWebUI.", + "ttsDeviceUnavailableWarning": "Синтез речи на устройстве недоступен на этом устройстве.", + "ttsServerUnavailableWarning": "Подключитесь к серверу с включённым синтезом речи, чтобы использовать эту опцию.", "ttsVoice": "Голос", "ttsSpeechRate": "Скорость речи", "ttsPitch": "Высота тона", diff --git a/lib/l10n/app_zh.arb b/lib/l10n/app_zh.arb index b8b41f9..0ac5761 100644 --- a/lib/l10n/app_zh.arb +++ b/lib/l10n/app_zh.arb @@ -307,7 +307,25 @@ "chatSettings": "对话", "sendOnEnter": "回车发送", "sendOnEnterDescription": "回车发送(软键盘)。Cmd/Ctrl+Enter 也可用", + "sttSettings": "语音转文字", + "sttEngineLabel": "识别引擎", + "sttEngineAuto": "自动", + "sttEngineDevice": "本机", + "sttEngineServer": "服务器", + "sttEngineAutoDescription": "在可用时使用本机识别,否则切换到你的服务器。", + "sttEngineDeviceDescription": "音频会保留在此设备上。如果设备不支持语音识别,语音输入将不可用。", + "sttEngineServerDescription": "始终将录音发送到你的 OpenWebUI 服务器进行转写。", + "sttDeviceUnavailableWarning": "此设备不支持本机语音识别。", + "sttServerUnavailableWarning": "连接到启用转写功能的服务器后才能使用此选项。", + "sttSilenceDuration": "静音持续时间", + "sttSilenceDurationDescription": "检测到静音后等待多久自动停止录音", "ttsSettings": "文本转语音", + "ttsEngineAuto": "自动", + "ttsEngineAutoDescription": "在可用时使用本机合成,否则切换到你的服务器。", + "ttsEngineDeviceDescription": "在此设备上完成合成。如果设备不支持文本转语音,语音播放将不可用。", + "ttsEngineServerDescription": "始终向你的 OpenWebUI 服务器请求音频。", + "ttsDeviceUnavailableWarning": "此设备不支持本机文本转语音。", + "ttsServerUnavailableWarning": "连接到启用文本转语音的服务器后才能使用此选项。", "ttsVoice": "语音", "ttsSpeechRate": "语速", "ttsPitch": "音调", diff --git a/pubspec.lock b/pubspec.lock index 405533d..b662398 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -965,6 +965,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.16.0" + mic_stream_recorder: + dependency: "direct main" + description: + name: mic_stream_recorder + sha256: "73965991ef5cc93d2b0c1e6d590cbd567a853b9ee7b2d52de43a73f185bb0d9c" + url: "https://pub.dev" + source: hosted + version: "1.1.2" mime: dependency: transitive description: @@ -1165,70 +1173,6 @@ packages: url: "https://pub.dev" source: hosted version: "1.5.0" - record: - dependency: "direct main" - description: - name: record - sha256: "9dbc6ff3e784612f90a9b001373c45ff76b7a08abd2bd9fdf72c242320c8911c" - url: "https://pub.dev" - source: hosted - version: "6.1.1" - record_android: - dependency: transitive - description: - name: record_android - sha256: "854627cd78d8d66190377f98477eee06ca96ab7c9f2e662700daf33dbf7e6673" - url: "https://pub.dev" - source: hosted - version: "1.4.2" - record_ios: - dependency: transitive - description: - name: record_ios - sha256: "13e241ed9cbc220534a40ae6b66222e21288db364d96dd66fb762ebd3cb77c71" - url: "https://pub.dev" - source: hosted - version: "1.1.2" - record_linux: - dependency: transitive - description: - name: record_linux - sha256: "235b1f1fb84e810f8149cc0c2c731d7d697f8d1c333b32cb820c449bf7bb72d8" - url: "https://pub.dev" - source: hosted - version: "1.2.1" - record_macos: - dependency: transitive - description: - name: record_macos - sha256: "2849068bb59072f300ad63ed146e543d66afaef8263edba4de4834fc7c8d4d35" - url: "https://pub.dev" - source: hosted - version: "1.1.1" - record_platform_interface: - dependency: transitive - description: - name: record_platform_interface - sha256: b0065fdf1ec28f5a634d676724d388a77e43ce7646fb049949f58c69f3fcb4ed - url: "https://pub.dev" - source: hosted - version: "1.4.0" - record_web: - dependency: transitive - description: - name: record_web - sha256: "4f0adf20c9ccafcc02d71111fd91fba1ca7b17a7453902593e5a9b25b74a5c56" - url: "https://pub.dev" - source: hosted - version: "1.2.0" - record_windows: - dependency: transitive - description: - name: record_windows - sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78" - url: "https://pub.dev" - source: hosted - version: "1.0.7" riverpod: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index ee02fd1..10af0a9 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -44,7 +44,7 @@ dependencies: flutter_animate: ^4.5.0 # Platform Features - record: ^6.1.1 + mic_stream_recorder: ^1.1.2 stts: ^1.2.5 flutter_tts: ^4.2.3 audioplayers: ^6.5.1