From 1bb2cbae25fb6c7ac262b935996f1a0682cdcec8 Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Wed, 5 Nov 2025 00:33:17 +0530 Subject: [PATCH] feat(voice): add voice silence duration configuration --- ios/Podfile.lock | 12 +-- lib/core/persistence/persistence_keys.dart | 1 + lib/core/services/settings_service.dart | 27 +++++++ .../chat/services/voice_input_service.dart | 10 ++- .../profile/views/app_customization_page.dart | 77 +++++++++++++++++++ lib/l10n/app_de.arb | 2 + lib/l10n/app_en.arb | 8 ++ lib/l10n/app_es.arb | 2 + lib/l10n/app_fr.arb | 4 +- lib/l10n/app_it.arb | 4 +- lib/l10n/app_nl.arb | 4 +- lib/l10n/app_ru.arb | 4 +- lib/l10n/app_zh.arb | 4 +- 13 files changed, 145 insertions(+), 14 deletions(-) diff --git a/ios/Podfile.lock b/ios/Podfile.lock index afb4210..42cde87 100644 --- a/ios/Podfile.lock +++ b/ios/Podfile.lock @@ -49,13 +49,13 @@ PODS: - Flutter - image_picker_ios (0.0.1): - Flutter + - mic_stream_recorder (0.0.1): + - Flutter - package_info_plus (0.4.5): - Flutter - path_provider_foundation (0.0.1): - Flutter - FlutterMacOS - - record_ios (1.1.0): - - Flutter - SDWebImage (5.21.1): - SDWebImage/Core (= 5.21.1) - SDWebImage/Core (5.21.1) @@ -96,9 +96,9 @@ DEPENDENCIES: - flutter_secure_storage (from `.symlinks/plugins/flutter_secure_storage/ios`) - flutter_tts (from `.symlinks/plugins/flutter_tts/ios`) - image_picker_ios (from `.symlinks/plugins/image_picker_ios/ios`) + - mic_stream_recorder (from `.symlinks/plugins/mic_stream_recorder/ios`) - package_info_plus (from `.symlinks/plugins/package_info_plus/ios`) - path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`) - - record_ios (from `.symlinks/plugins/record_ios/ios`) - share_handler_ios (from `.symlinks/plugins/share_handler_ios/ios`) - share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`) - share_plus (from `.symlinks/plugins/share_plus/ios`) @@ -135,12 +135,12 @@ EXTERNAL SOURCES: :path: ".symlinks/plugins/flutter_tts/ios" image_picker_ios: :path: ".symlinks/plugins/image_picker_ios/ios" + mic_stream_recorder: + :path: ".symlinks/plugins/mic_stream_recorder/ios" package_info_plus: :path: ".symlinks/plugins/package_info_plus/ios" path_provider_foundation: :path: ".symlinks/plugins/path_provider_foundation/darwin" - record_ios: - :path: ".symlinks/plugins/record_ios/ios" share_handler_ios: :path: ".symlinks/plugins/share_handler_ios/ios" share_handler_ios_models: @@ -172,9 +172,9 @@ SPEC CHECKSUMS: flutter_secure_storage: 1ed9476fba7e7a782b22888f956cce43e2c62f13 flutter_tts: b88dbc8655d3dc961bc4a796e4e16a4cc1795833 image_picker_ios: 7fe1ff8e34c1790d6fff70a32484959f563a928a + mic_stream_recorder: 27d2d1225563a3a28bf4019fc5cc198cffd7dad1 package_info_plus: af8e2ca6888548050f16fa2f1938db7b5a5df499 path_provider_foundation: 080d55be775b7414fd5a5ef3ac137b97b097e564 - record_ios: f75fa1d57f840012775c0e93a38a7f3ceea1a374 SDWebImage: f29024626962457f3470184232766516dee8dfea share_handler_ios: e2244e990f826b2c8eaa291ac3831569438ba0fb share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871 diff --git a/lib/core/persistence/persistence_keys.dart b/lib/core/persistence/persistence_keys.dart index 177fedc..c74a616 100644 --- a/lib/core/persistence/persistence_keys.dart +++ b/lib/core/persistence/persistence_keys.dart @@ -28,6 +28,7 @@ final class PreferenceKeys { static const String ttsEngine = 'tts_engine'; // 'device' | 'server' static const String ttsServerVoiceId = 'tts_server_voice_id'; static const String ttsServerVoiceName = 'tts_server_voice_name'; + static const String voiceSilenceDuration = 'voice_silence_duration'; } final class LegacyPreferenceKeys { diff --git a/lib/core/services/settings_service.dart b/lib/core/services/settings_service.dart index 6d95faa..e04f210 100644 --- a/lib/core/services/settings_service.dart +++ b/lib/core/services/settings_service.dart @@ -35,6 +35,9 @@ class SettingsService { .quickPills; // StringList of identifiers e.g. ['web','image','tools'] // Chat input behavior static const String _sendOnEnterKey = PreferenceKeys.sendOnEnterKey; + // Voice silence duration for auto-stop (milliseconds) + static const String _voiceSilenceDurationKey = + PreferenceKeys.voiceSilenceDuration; static Box _preferencesBox() => Hive.box(HiveBoxNames.preferences); @@ -157,6 +160,8 @@ class SettingsService { sttPreference: _parseSttPreference( box.get(PreferenceKeys.voiceSttPreference) as String?, ), + voiceSilenceDuration: + (box.get(_voiceSilenceDurationKey) as int? ?? 2000).clamp(300, 3000), ), ); } @@ -181,6 +186,7 @@ class SettingsService { PreferenceKeys.ttsVolume: settings.ttsVolume, PreferenceKeys.ttsEngine: settings.ttsEngine.name, PreferenceKeys.voiceSttPreference: settings.sttPreference.name, + _voiceSilenceDurationKey: settings.voiceSilenceDuration, }; await box.putAll(updates); @@ -331,6 +337,16 @@ class SettingsService { return _preferencesBox().put(_sendOnEnterKey, value); } + static Future getVoiceSilenceDuration() { + final value = _preferencesBox().get(_voiceSilenceDurationKey) as int?; + return Future.value((value ?? 2000).clamp(300, 3000)); + } + + static Future setVoiceSilenceDuration(int milliseconds) { + final sanitized = milliseconds.clamp(300, 3000); + return _preferencesBox().put(_voiceSilenceDurationKey, sanitized); + } + /// Get effective animation duration considering all settings static Duration getEffectiveAnimationDuration( BuildContext context, @@ -394,6 +410,7 @@ class AppSettings { final TtsEngine ttsEngine; final String? ttsServerVoiceId; final String? ttsServerVoiceName; + final int voiceSilenceDuration; const AppSettings({ this.reduceMotion = false, this.animationSpeed = 1.0, @@ -416,6 +433,7 @@ class AppSettings { this.ttsEngine = TtsEngine.auto, this.ttsServerVoiceId, this.ttsServerVoiceName, + this.voiceSilenceDuration = 2000, }); AppSettings copyWith({ @@ -440,6 +458,7 @@ class AppSettings { TtsEngine? ttsEngine, Object? ttsServerVoiceId = const _DefaultValue(), Object? ttsServerVoiceName = const _DefaultValue(), + int? voiceSilenceDuration, }) { return AppSettings( reduceMotion: reduceMotion ?? this.reduceMotion, @@ -471,6 +490,7 @@ class AppSettings { ttsServerVoiceName: ttsServerVoiceName is _DefaultValue ? this.ttsServerVoiceName : ttsServerVoiceName as String?, + voiceSilenceDuration: voiceSilenceDuration ?? this.voiceSilenceDuration, ); } @@ -497,6 +517,7 @@ class AppSettings { other.ttsEngine == ttsEngine && other.ttsServerVoiceId == ttsServerVoiceId && other.ttsServerVoiceName == ttsServerVoiceName && + other.voiceSilenceDuration == voiceSilenceDuration && _listEquals(other.quickPills, quickPills); // socketTransportMode intentionally not included in == to avoid frequent rebuilds } @@ -524,6 +545,7 @@ class AppSettings { ttsEngine, ttsServerVoiceId, ttsServerVoiceName, + voiceSilenceDuration, Object.hashAllUnordered(quickPills), ]); } @@ -679,6 +701,11 @@ class AppSettingsNotifier extends _$AppSettingsNotifier { await SettingsService.saveSettings(state); } + Future setVoiceSilenceDuration(int milliseconds) async { + state = state.copyWith(voiceSilenceDuration: milliseconds); + await SettingsService.setVoiceSilenceDuration(milliseconds); + } + Future resetToDefaults() async { const defaultSettings = AppSettings(); await SettingsService.saveSettings(defaultSettings); diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index 805cdac..c1990aa 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -26,6 +26,7 @@ class VoiceInputService { final MicStreamRecorder _recorder = MicStreamRecorder(); final Stt _speech = Stt(); final ApiService? _api; + final Ref? _ref; bool _isInitialized = false; bool _isListening = false; bool _localSttAvailable = false; @@ -59,7 +60,9 @@ class VoiceInputService { bool get prefersServerOnly => _preference == SttPreference.serverOnly; bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly; - VoiceInputService({ApiService? api}) : _api = api; + VoiceInputService({ApiService? api, Ref? ref}) + : _api = api, + _ref = ref; void updatePreference(SttPreference preference) { _preference = preference; @@ -451,7 +454,8 @@ class VoiceInputService { _silenceTimer?.cancel(); _silenceTimer = null; } else if (_hasDetectedSpeech && _silenceTimer == null) { - _silenceTimer = Timer(const Duration(milliseconds: 800), () { + final silenceDuration = _ref?.read(appSettingsProvider).voiceSilenceDuration ?? 2000; + _silenceTimer = Timer(Duration(milliseconds: silenceDuration), () { if (_isListening && _usingServerStt) { unawaited(_stopListening()); } @@ -652,7 +656,7 @@ class VoiceInputService { final voiceInputServiceProvider = Provider((ref) { final api = ref.watch(apiServiceProvider); - final service = VoiceInputService(api: api); + final service = VoiceInputService(api: api, ref: ref); final currentSettings = ref.read(appSettingsProvider); service.updatePreference(currentSettings.sttPreference); ref.listen(appSettingsProvider, (previous, next) { diff --git a/lib/features/profile/views/app_customization_page.dart b/lib/features/profile/views/app_customization_page.dart index c4f989a..86c81bf 100644 --- a/lib/features/profile/views/app_customization_page.dart +++ b/lib/features/profile/views/app_customization_page.dart @@ -684,6 +684,83 @@ class AppCustomizationPage extends ConsumerWidget { ), ), ], + if (settings.sttPreference == SttPreference.serverOnly || + (settings.sttPreference == SttPreference.auto && + serverAvailable)) ...[ + const SizedBox(height: Spacing.md), + const Divider(), + const SizedBox(height: Spacing.md), + Row( + children: [ + Expanded( + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text( + l10n.sttSilenceDuration, + style: theme.bodyMedium?.copyWith( + color: theme.sidebarForeground, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.sidebarForeground, + fontSize: 14, + fontWeight: FontWeight.w600, + ), + ), + const SizedBox(height: Spacing.xs), + Text( + '${settings.voiceSilenceDuration}ms', + style: theme.bodySmall?.copyWith( + color: theme.sidebarForeground + .withValues(alpha: 0.7), + ) ?? + TextStyle( + color: theme.sidebarForeground + .withValues(alpha: 0.7), + fontSize: 12, + ), + ), + ], + ), + ), + Text( + '${(settings.voiceSilenceDuration / 1000).toStringAsFixed(1)}s', + style: theme.bodyMedium?.copyWith( + color: theme.buttonPrimary, + fontWeight: FontWeight.w600, + ) ?? + TextStyle( + color: theme.buttonPrimary, + fontSize: 14, + fontWeight: FontWeight.w600, + ), + ), + ], + ), + const SizedBox(height: Spacing.sm), + Slider( + value: settings.voiceSilenceDuration.toDouble(), + min: 300, + max: 3000, + divisions: 27, + activeColor: theme.buttonPrimary, + inactiveColor: theme.cardBorder.withValues(alpha: 0.4), + onChanged: (value) { + notifier.setVoiceSilenceDuration(value.round()); + }, + ), + Text( + l10n.sttSilenceDurationDescription, + style: theme.bodySmall?.copyWith( + color: theme.sidebarForeground.withValues(alpha: 0.7), + ) ?? + TextStyle( + color: theme.sidebarForeground.withValues(alpha: 0.7), + fontSize: 12, + ), + ), + ], ], ), ), diff --git a/lib/l10n/app_de.arb b/lib/l10n/app_de.arb index 19b8782..17381a0 100644 --- a/lib/l10n/app_de.arb +++ b/lib/l10n/app_de.arb @@ -317,6 +317,8 @@ "sttEngineServerDescription": "Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.", "sttDeviceUnavailableWarning": "Auf diesem Gerät steht keine Spracherkennung zur Verfügung.", "sttServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.", + "sttSilenceDuration": "Stille-Dauer", + "sttSilenceDurationDescription": "Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird", "ttsSettings": "Text zu Sprache", "ttsEngineAuto": "Automatisch", "ttsEngineAutoDescription": "Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.", diff --git a/lib/l10n/app_en.arb b/lib/l10n/app_en.arb index 9fd39b5..cf6a18c 100644 --- a/lib/l10n/app_en.arb +++ b/lib/l10n/app_en.arb @@ -1259,6 +1259,14 @@ "@sttServerUnavailableWarning": { "description": "Warning shown when the user selects server speech recognition but no server is available." }, + "sttSilenceDuration": "Silence Duration", + "@sttSilenceDuration": { + "description": "Label for the silence duration setting in server speech-to-text." + }, + "sttSilenceDurationDescription": "Time to wait after silence before auto-stopping recording", + "@sttSilenceDurationDescription": { + "description": "Description for the silence duration slider in server speech-to-text settings." + }, "ttsEngineLabel": "Engine", "@ttsEngineLabel": { "description": "Label for selecting the text-to-speech engine." diff --git a/lib/l10n/app_es.arb b/lib/l10n/app_es.arb index b2329c1..73c1b35 100644 --- a/lib/l10n/app_es.arb +++ b/lib/l10n/app_es.arb @@ -317,6 +317,8 @@ "sttEngineServerDescription": "Envía siempre las grabaciones a tu servidor OpenWebUI para la transcripción.", "sttDeviceUnavailableWarning": "El reconocimiento de voz en el dispositivo no está disponible en este dispositivo.", "sttServerUnavailableWarning": "Conéctate a un servidor con transcripción habilitada para usar esta opción.", + "sttSilenceDuration": "Duración del silencio", + "sttSilenceDurationDescription": "Tiempo de espera después del silencio antes de detener automáticamente la grabación", "ttsSettings": "Texto a voz", "ttsEngineAuto": "Automático", "ttsEngineAutoDescription": "Usa la síntesis en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.", diff --git a/lib/l10n/app_fr.arb b/lib/l10n/app_fr.arb index d6a7dbb..18955b9 100644 --- a/lib/l10n/app_fr.arb +++ b/lib/l10n/app_fr.arb @@ -317,7 +317,9 @@ "sttEngineServerDescription": "Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.", "sttDeviceUnavailableWarning": "La reconnaissance vocale sur l’appareil n’est pas disponible sur cet appareil.", "sttServerUnavailableWarning": "Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.", -"ttsSettings": "Synthèse vocale", + "sttSilenceDuration": "Durée du silence", + "sttSilenceDurationDescription": "Temps d'attente après le silence avant d'arrêter automatiquement l'enregistrement", + "ttsSettings": "Synthèse vocale", "ttsEngineAuto": "Auto", "ttsEngineAutoDescription": "Utilise la synthèse locale quand c’est possible, sinon bascule vers votre serveur.", "ttsEngineDeviceDescription": "Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si l’appareil n’offre pas la synthèse vocale.", diff --git a/lib/l10n/app_it.arb b/lib/l10n/app_it.arb index 2c997cd..fe7948e 100644 --- a/lib/l10n/app_it.arb +++ b/lib/l10n/app_it.arb @@ -317,7 +317,9 @@ "sttEngineServerDescription": "Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.", "sttDeviceUnavailableWarning": "Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.", "sttServerUnavailableWarning": "Collegati a un server con la trascrizione abilitata per usare questa opzione.", -"ttsSettings": "Sintesi vocale", + "sttSilenceDuration": "Durata del silenzio", + "sttSilenceDurationDescription": "Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione", + "ttsSettings": "Sintesi vocale", "ttsEngineAuto": "Automatico", "ttsEngineAutoDescription": "Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.", "ttsEngineDeviceDescription": "Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.", diff --git a/lib/l10n/app_nl.arb b/lib/l10n/app_nl.arb index 424861f..0b5f8aa 100644 --- a/lib/l10n/app_nl.arb +++ b/lib/l10n/app_nl.arb @@ -317,7 +317,9 @@ "sttEngineServerDescription": "Stuurt opnames altijd naar je OpenWebUI-server voor transcriptie.", "sttDeviceUnavailableWarning": "Spraakherkenning op het apparaat is niet beschikbaar op dit apparaat.", "sttServerUnavailableWarning": "Verbind met een server met transcriptie ingeschakeld om deze optie te gebruiken.", -"ttsSettings": "Tekst naar spraak", + "sttSilenceDuration": "Stilteduur", + "sttSilenceDurationDescription": "Tijd om te wachten na stilte voordat de opname automatisch stopt", + "ttsSettings": "Tekst naar spraak", "ttsEngineAuto": "Automatisch", "ttsEngineAutoDescription": "Gebruikt spraaksynthese op het apparaat wanneer beschikbaar en valt anders terug op je server.", "ttsEngineDeviceDescription": "Houdt de synthese op dit apparaat. Spraakweergave werkt niet als het apparaat geen TTS ondersteunt.", diff --git a/lib/l10n/app_ru.arb b/lib/l10n/app_ru.arb index 6187548..3383c6c 100644 --- a/lib/l10n/app_ru.arb +++ b/lib/l10n/app_ru.arb @@ -317,7 +317,9 @@ "sttEngineServerDescription": "Всегда отправляет записи на сервер OpenWebUI для транскрибации.", "sttDeviceUnavailableWarning": "Распознавание речи на устройстве недоступно на этом устройстве.", "sttServerUnavailableWarning": "Подключитесь к серверу с включённой транскрибацией, чтобы использовать эту опцию.", -"ttsSettings": "Преобразование текста в речь", + "sttSilenceDuration": "Длительность тишины", + "sttSilenceDurationDescription": "Время ожидания после тишины перед автоматической остановкой записи", + "ttsSettings": "Преобразование текста в речь", "ttsEngineAuto": "Авто", "ttsEngineAutoDescription": "Использует синтез речи на устройстве, когда это возможно, иначе переключается на ваш сервер.", "ttsEngineDeviceDescription": "Оставляет синтез на этом устройстве. Воспроизведение голоса не работает, если устройство не поддерживает синтез речи.", diff --git a/lib/l10n/app_zh.arb b/lib/l10n/app_zh.arb index abc7460..0ac5761 100644 --- a/lib/l10n/app_zh.arb +++ b/lib/l10n/app_zh.arb @@ -317,7 +317,9 @@ "sttEngineServerDescription": "始终将录音发送到你的 OpenWebUI 服务器进行转写。", "sttDeviceUnavailableWarning": "此设备不支持本机语音识别。", "sttServerUnavailableWarning": "连接到启用转写功能的服务器后才能使用此选项。", -"ttsSettings": "文本转语音", + "sttSilenceDuration": "静音持续时间", + "sttSilenceDurationDescription": "检测到静音后等待多久自动停止录音", + "ttsSettings": "文本转语音", "ttsEngineAuto": "自动", "ttsEngineAutoDescription": "在可用时使用本机合成,否则切换到你的服务器。", "ttsEngineDeviceDescription": "在此设备上完成合成。如果设备不支持文本转语音,语音播放将不可用。",