feat(voice): add voice silence duration configuration
This commit is contained in:
@@ -49,13 +49,13 @@ PODS:
|
||||
- Flutter
|
||||
- image_picker_ios (0.0.1):
|
||||
- Flutter
|
||||
- mic_stream_recorder (0.0.1):
|
||||
- Flutter
|
||||
- package_info_plus (0.4.5):
|
||||
- Flutter
|
||||
- path_provider_foundation (0.0.1):
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
- record_ios (1.1.0):
|
||||
- Flutter
|
||||
- SDWebImage (5.21.1):
|
||||
- SDWebImage/Core (= 5.21.1)
|
||||
- SDWebImage/Core (5.21.1)
|
||||
@@ -96,9 +96,9 @@ DEPENDENCIES:
|
||||
- flutter_secure_storage (from `.symlinks/plugins/flutter_secure_storage/ios`)
|
||||
- flutter_tts (from `.symlinks/plugins/flutter_tts/ios`)
|
||||
- image_picker_ios (from `.symlinks/plugins/image_picker_ios/ios`)
|
||||
- mic_stream_recorder (from `.symlinks/plugins/mic_stream_recorder/ios`)
|
||||
- package_info_plus (from `.symlinks/plugins/package_info_plus/ios`)
|
||||
- path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`)
|
||||
- record_ios (from `.symlinks/plugins/record_ios/ios`)
|
||||
- share_handler_ios (from `.symlinks/plugins/share_handler_ios/ios`)
|
||||
- share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`)
|
||||
- share_plus (from `.symlinks/plugins/share_plus/ios`)
|
||||
@@ -135,12 +135,12 @@ EXTERNAL SOURCES:
|
||||
:path: ".symlinks/plugins/flutter_tts/ios"
|
||||
image_picker_ios:
|
||||
:path: ".symlinks/plugins/image_picker_ios/ios"
|
||||
mic_stream_recorder:
|
||||
:path: ".symlinks/plugins/mic_stream_recorder/ios"
|
||||
package_info_plus:
|
||||
:path: ".symlinks/plugins/package_info_plus/ios"
|
||||
path_provider_foundation:
|
||||
:path: ".symlinks/plugins/path_provider_foundation/darwin"
|
||||
record_ios:
|
||||
:path: ".symlinks/plugins/record_ios/ios"
|
||||
share_handler_ios:
|
||||
:path: ".symlinks/plugins/share_handler_ios/ios"
|
||||
share_handler_ios_models:
|
||||
@@ -172,9 +172,9 @@ SPEC CHECKSUMS:
|
||||
flutter_secure_storage: 1ed9476fba7e7a782b22888f956cce43e2c62f13
|
||||
flutter_tts: b88dbc8655d3dc961bc4a796e4e16a4cc1795833
|
||||
image_picker_ios: 7fe1ff8e34c1790d6fff70a32484959f563a928a
|
||||
mic_stream_recorder: 27d2d1225563a3a28bf4019fc5cc198cffd7dad1
|
||||
package_info_plus: af8e2ca6888548050f16fa2f1938db7b5a5df499
|
||||
path_provider_foundation: 080d55be775b7414fd5a5ef3ac137b97b097e564
|
||||
record_ios: f75fa1d57f840012775c0e93a38a7f3ceea1a374
|
||||
SDWebImage: f29024626962457f3470184232766516dee8dfea
|
||||
share_handler_ios: e2244e990f826b2c8eaa291ac3831569438ba0fb
|
||||
share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871
|
||||
|
||||
@@ -28,6 +28,7 @@ final class PreferenceKeys {
|
||||
static const String ttsEngine = 'tts_engine'; // 'device' | 'server'
|
||||
static const String ttsServerVoiceId = 'tts_server_voice_id';
|
||||
static const String ttsServerVoiceName = 'tts_server_voice_name';
|
||||
static const String voiceSilenceDuration = 'voice_silence_duration';
|
||||
}
|
||||
|
||||
final class LegacyPreferenceKeys {
|
||||
|
||||
@@ -35,6 +35,9 @@ class SettingsService {
|
||||
.quickPills; // StringList of identifiers e.g. ['web','image','tools']
|
||||
// Chat input behavior
|
||||
static const String _sendOnEnterKey = PreferenceKeys.sendOnEnterKey;
|
||||
// Voice silence duration for auto-stop (milliseconds)
|
||||
static const String _voiceSilenceDurationKey =
|
||||
PreferenceKeys.voiceSilenceDuration;
|
||||
static Box<dynamic> _preferencesBox() =>
|
||||
Hive.box<dynamic>(HiveBoxNames.preferences);
|
||||
|
||||
@@ -157,6 +160,8 @@ class SettingsService {
|
||||
sttPreference: _parseSttPreference(
|
||||
box.get(PreferenceKeys.voiceSttPreference) as String?,
|
||||
),
|
||||
voiceSilenceDuration:
|
||||
(box.get(_voiceSilenceDurationKey) as int? ?? 2000).clamp(300, 3000),
|
||||
),
|
||||
);
|
||||
}
|
||||
@@ -181,6 +186,7 @@ class SettingsService {
|
||||
PreferenceKeys.ttsVolume: settings.ttsVolume,
|
||||
PreferenceKeys.ttsEngine: settings.ttsEngine.name,
|
||||
PreferenceKeys.voiceSttPreference: settings.sttPreference.name,
|
||||
_voiceSilenceDurationKey: settings.voiceSilenceDuration,
|
||||
};
|
||||
|
||||
await box.putAll(updates);
|
||||
@@ -331,6 +337,16 @@ class SettingsService {
|
||||
return _preferencesBox().put(_sendOnEnterKey, value);
|
||||
}
|
||||
|
||||
static Future<int> getVoiceSilenceDuration() {
|
||||
final value = _preferencesBox().get(_voiceSilenceDurationKey) as int?;
|
||||
return Future.value((value ?? 2000).clamp(300, 3000));
|
||||
}
|
||||
|
||||
static Future<void> setVoiceSilenceDuration(int milliseconds) {
|
||||
final sanitized = milliseconds.clamp(300, 3000);
|
||||
return _preferencesBox().put(_voiceSilenceDurationKey, sanitized);
|
||||
}
|
||||
|
||||
/// Get effective animation duration considering all settings
|
||||
static Duration getEffectiveAnimationDuration(
|
||||
BuildContext context,
|
||||
@@ -394,6 +410,7 @@ class AppSettings {
|
||||
final TtsEngine ttsEngine;
|
||||
final String? ttsServerVoiceId;
|
||||
final String? ttsServerVoiceName;
|
||||
final int voiceSilenceDuration;
|
||||
const AppSettings({
|
||||
this.reduceMotion = false,
|
||||
this.animationSpeed = 1.0,
|
||||
@@ -416,6 +433,7 @@ class AppSettings {
|
||||
this.ttsEngine = TtsEngine.auto,
|
||||
this.ttsServerVoiceId,
|
||||
this.ttsServerVoiceName,
|
||||
this.voiceSilenceDuration = 2000,
|
||||
});
|
||||
|
||||
AppSettings copyWith({
|
||||
@@ -440,6 +458,7 @@ class AppSettings {
|
||||
TtsEngine? ttsEngine,
|
||||
Object? ttsServerVoiceId = const _DefaultValue(),
|
||||
Object? ttsServerVoiceName = const _DefaultValue(),
|
||||
int? voiceSilenceDuration,
|
||||
}) {
|
||||
return AppSettings(
|
||||
reduceMotion: reduceMotion ?? this.reduceMotion,
|
||||
@@ -471,6 +490,7 @@ class AppSettings {
|
||||
ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
|
||||
? this.ttsServerVoiceName
|
||||
: ttsServerVoiceName as String?,
|
||||
voiceSilenceDuration: voiceSilenceDuration ?? this.voiceSilenceDuration,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -497,6 +517,7 @@ class AppSettings {
|
||||
other.ttsEngine == ttsEngine &&
|
||||
other.ttsServerVoiceId == ttsServerVoiceId &&
|
||||
other.ttsServerVoiceName == ttsServerVoiceName &&
|
||||
other.voiceSilenceDuration == voiceSilenceDuration &&
|
||||
_listEquals(other.quickPills, quickPills);
|
||||
// socketTransportMode intentionally not included in == to avoid frequent rebuilds
|
||||
}
|
||||
@@ -524,6 +545,7 @@ class AppSettings {
|
||||
ttsEngine,
|
||||
ttsServerVoiceId,
|
||||
ttsServerVoiceName,
|
||||
voiceSilenceDuration,
|
||||
Object.hashAllUnordered(quickPills),
|
||||
]);
|
||||
}
|
||||
@@ -679,6 +701,11 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
|
||||
await SettingsService.saveSettings(state);
|
||||
}
|
||||
|
||||
Future<void> setVoiceSilenceDuration(int milliseconds) async {
|
||||
state = state.copyWith(voiceSilenceDuration: milliseconds);
|
||||
await SettingsService.setVoiceSilenceDuration(milliseconds);
|
||||
}
|
||||
|
||||
Future<void> resetToDefaults() async {
|
||||
const defaultSettings = AppSettings();
|
||||
await SettingsService.saveSettings(defaultSettings);
|
||||
|
||||
@@ -26,6 +26,7 @@ class VoiceInputService {
|
||||
final MicStreamRecorder _recorder = MicStreamRecorder();
|
||||
final Stt _speech = Stt();
|
||||
final ApiService? _api;
|
||||
final Ref? _ref;
|
||||
bool _isInitialized = false;
|
||||
bool _isListening = false;
|
||||
bool _localSttAvailable = false;
|
||||
@@ -59,7 +60,9 @@ class VoiceInputService {
|
||||
bool get prefersServerOnly => _preference == SttPreference.serverOnly;
|
||||
bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly;
|
||||
|
||||
VoiceInputService({ApiService? api}) : _api = api;
|
||||
VoiceInputService({ApiService? api, Ref? ref})
|
||||
: _api = api,
|
||||
_ref = ref;
|
||||
|
||||
void updatePreference(SttPreference preference) {
|
||||
_preference = preference;
|
||||
@@ -451,7 +454,8 @@ class VoiceInputService {
|
||||
_silenceTimer?.cancel();
|
||||
_silenceTimer = null;
|
||||
} else if (_hasDetectedSpeech && _silenceTimer == null) {
|
||||
_silenceTimer = Timer(const Duration(milliseconds: 800), () {
|
||||
final silenceDuration = _ref?.read(appSettingsProvider).voiceSilenceDuration ?? 2000;
|
||||
_silenceTimer = Timer(Duration(milliseconds: silenceDuration), () {
|
||||
if (_isListening && _usingServerStt) {
|
||||
unawaited(_stopListening());
|
||||
}
|
||||
@@ -652,7 +656,7 @@ class VoiceInputService {
|
||||
|
||||
final voiceInputServiceProvider = Provider<VoiceInputService>((ref) {
|
||||
final api = ref.watch(apiServiceProvider);
|
||||
final service = VoiceInputService(api: api);
|
||||
final service = VoiceInputService(api: api, ref: ref);
|
||||
final currentSettings = ref.read(appSettingsProvider);
|
||||
service.updatePreference(currentSettings.sttPreference);
|
||||
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
|
||||
|
||||
@@ -684,6 +684,83 @@ class AppCustomizationPage extends ConsumerWidget {
|
||||
),
|
||||
),
|
||||
],
|
||||
if (settings.sttPreference == SttPreference.serverOnly ||
|
||||
(settings.sttPreference == SttPreference.auto &&
|
||||
serverAvailable)) ...[
|
||||
const SizedBox(height: Spacing.md),
|
||||
const Divider(),
|
||||
const SizedBox(height: Spacing.md),
|
||||
Row(
|
||||
children: [
|
||||
Expanded(
|
||||
child: Column(
|
||||
crossAxisAlignment: CrossAxisAlignment.start,
|
||||
children: [
|
||||
Text(
|
||||
l10n.sttSilenceDuration,
|
||||
style: theme.bodyMedium?.copyWith(
|
||||
color: theme.sidebarForeground,
|
||||
fontWeight: FontWeight.w600,
|
||||
) ??
|
||||
TextStyle(
|
||||
color: theme.sidebarForeground,
|
||||
fontSize: 14,
|
||||
fontWeight: FontWeight.w600,
|
||||
),
|
||||
),
|
||||
const SizedBox(height: Spacing.xs),
|
||||
Text(
|
||||
'${settings.voiceSilenceDuration}ms',
|
||||
style: theme.bodySmall?.copyWith(
|
||||
color: theme.sidebarForeground
|
||||
.withValues(alpha: 0.7),
|
||||
) ??
|
||||
TextStyle(
|
||||
color: theme.sidebarForeground
|
||||
.withValues(alpha: 0.7),
|
||||
fontSize: 12,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
Text(
|
||||
'${(settings.voiceSilenceDuration / 1000).toStringAsFixed(1)}s',
|
||||
style: theme.bodyMedium?.copyWith(
|
||||
color: theme.buttonPrimary,
|
||||
fontWeight: FontWeight.w600,
|
||||
) ??
|
||||
TextStyle(
|
||||
color: theme.buttonPrimary,
|
||||
fontSize: 14,
|
||||
fontWeight: FontWeight.w600,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
const SizedBox(height: Spacing.sm),
|
||||
Slider(
|
||||
value: settings.voiceSilenceDuration.toDouble(),
|
||||
min: 300,
|
||||
max: 3000,
|
||||
divisions: 27,
|
||||
activeColor: theme.buttonPrimary,
|
||||
inactiveColor: theme.cardBorder.withValues(alpha: 0.4),
|
||||
onChanged: (value) {
|
||||
notifier.setVoiceSilenceDuration(value.round());
|
||||
},
|
||||
),
|
||||
Text(
|
||||
l10n.sttSilenceDurationDescription,
|
||||
style: theme.bodySmall?.copyWith(
|
||||
color: theme.sidebarForeground.withValues(alpha: 0.7),
|
||||
) ??
|
||||
TextStyle(
|
||||
color: theme.sidebarForeground.withValues(alpha: 0.7),
|
||||
fontSize: 12,
|
||||
),
|
||||
),
|
||||
],
|
||||
],
|
||||
),
|
||||
),
|
||||
|
||||
@@ -317,6 +317,8 @@
|
||||
"sttEngineServerDescription": "Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.",
|
||||
"sttDeviceUnavailableWarning": "Auf diesem Gerät steht keine Spracherkennung zur Verfügung.",
|
||||
"sttServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.",
|
||||
"sttSilenceDuration": "Stille-Dauer",
|
||||
"sttSilenceDurationDescription": "Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird",
|
||||
"ttsSettings": "Text zu Sprache",
|
||||
"ttsEngineAuto": "Automatisch",
|
||||
"ttsEngineAutoDescription": "Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.",
|
||||
|
||||
@@ -1259,6 +1259,14 @@
|
||||
"@sttServerUnavailableWarning": {
|
||||
"description": "Warning shown when the user selects server speech recognition but no server is available."
|
||||
},
|
||||
"sttSilenceDuration": "Silence Duration",
|
||||
"@sttSilenceDuration": {
|
||||
"description": "Label for the silence duration setting in server speech-to-text."
|
||||
},
|
||||
"sttSilenceDurationDescription": "Time to wait after silence before auto-stopping recording",
|
||||
"@sttSilenceDurationDescription": {
|
||||
"description": "Description for the silence duration slider in server speech-to-text settings."
|
||||
},
|
||||
"ttsEngineLabel": "Engine",
|
||||
"@ttsEngineLabel": {
|
||||
"description": "Label for selecting the text-to-speech engine."
|
||||
|
||||
@@ -317,6 +317,8 @@
|
||||
"sttEngineServerDescription": "Envía siempre las grabaciones a tu servidor OpenWebUI para la transcripción.",
|
||||
"sttDeviceUnavailableWarning": "El reconocimiento de voz en el dispositivo no está disponible en este dispositivo.",
|
||||
"sttServerUnavailableWarning": "Conéctate a un servidor con transcripción habilitada para usar esta opción.",
|
||||
"sttSilenceDuration": "Duración del silencio",
|
||||
"sttSilenceDurationDescription": "Tiempo de espera después del silencio antes de detener automáticamente la grabación",
|
||||
"ttsSettings": "Texto a voz",
|
||||
"ttsEngineAuto": "Automático",
|
||||
"ttsEngineAutoDescription": "Usa la síntesis en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.",
|
||||
|
||||
@@ -317,7 +317,9 @@
|
||||
"sttEngineServerDescription": "Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.",
|
||||
"sttDeviceUnavailableWarning": "La reconnaissance vocale sur l’appareil n’est pas disponible sur cet appareil.",
|
||||
"sttServerUnavailableWarning": "Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.",
|
||||
"ttsSettings": "Synthèse vocale",
|
||||
"sttSilenceDuration": "Durée du silence",
|
||||
"sttSilenceDurationDescription": "Temps d'attente après le silence avant d'arrêter automatiquement l'enregistrement",
|
||||
"ttsSettings": "Synthèse vocale",
|
||||
"ttsEngineAuto": "Auto",
|
||||
"ttsEngineAutoDescription": "Utilise la synthèse locale quand c’est possible, sinon bascule vers votre serveur.",
|
||||
"ttsEngineDeviceDescription": "Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si l’appareil n’offre pas la synthèse vocale.",
|
||||
|
||||
@@ -317,7 +317,9 @@
|
||||
"sttEngineServerDescription": "Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.",
|
||||
"sttDeviceUnavailableWarning": "Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.",
|
||||
"sttServerUnavailableWarning": "Collegati a un server con la trascrizione abilitata per usare questa opzione.",
|
||||
"ttsSettings": "Sintesi vocale",
|
||||
"sttSilenceDuration": "Durata del silenzio",
|
||||
"sttSilenceDurationDescription": "Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione",
|
||||
"ttsSettings": "Sintesi vocale",
|
||||
"ttsEngineAuto": "Automatico",
|
||||
"ttsEngineAutoDescription": "Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.",
|
||||
"ttsEngineDeviceDescription": "Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.",
|
||||
|
||||
@@ -317,7 +317,9 @@
|
||||
"sttEngineServerDescription": "Stuurt opnames altijd naar je OpenWebUI-server voor transcriptie.",
|
||||
"sttDeviceUnavailableWarning": "Spraakherkenning op het apparaat is niet beschikbaar op dit apparaat.",
|
||||
"sttServerUnavailableWarning": "Verbind met een server met transcriptie ingeschakeld om deze optie te gebruiken.",
|
||||
"ttsSettings": "Tekst naar spraak",
|
||||
"sttSilenceDuration": "Stilteduur",
|
||||
"sttSilenceDurationDescription": "Tijd om te wachten na stilte voordat de opname automatisch stopt",
|
||||
"ttsSettings": "Tekst naar spraak",
|
||||
"ttsEngineAuto": "Automatisch",
|
||||
"ttsEngineAutoDescription": "Gebruikt spraaksynthese op het apparaat wanneer beschikbaar en valt anders terug op je server.",
|
||||
"ttsEngineDeviceDescription": "Houdt de synthese op dit apparaat. Spraakweergave werkt niet als het apparaat geen TTS ondersteunt.",
|
||||
|
||||
@@ -317,7 +317,9 @@
|
||||
"sttEngineServerDescription": "Всегда отправляет записи на сервер OpenWebUI для транскрибации.",
|
||||
"sttDeviceUnavailableWarning": "Распознавание речи на устройстве недоступно на этом устройстве.",
|
||||
"sttServerUnavailableWarning": "Подключитесь к серверу с включённой транскрибацией, чтобы использовать эту опцию.",
|
||||
"ttsSettings": "Преобразование текста в речь",
|
||||
"sttSilenceDuration": "Длительность тишины",
|
||||
"sttSilenceDurationDescription": "Время ожидания после тишины перед автоматической остановкой записи",
|
||||
"ttsSettings": "Преобразование текста в речь",
|
||||
"ttsEngineAuto": "Авто",
|
||||
"ttsEngineAutoDescription": "Использует синтез речи на устройстве, когда это возможно, иначе переключается на ваш сервер.",
|
||||
"ttsEngineDeviceDescription": "Оставляет синтез на этом устройстве. Воспроизведение голоса не работает, если устройство не поддерживает синтез речи.",
|
||||
|
||||
@@ -317,7 +317,9 @@
|
||||
"sttEngineServerDescription": "始终将录音发送到你的 OpenWebUI 服务器进行转写。",
|
||||
"sttDeviceUnavailableWarning": "此设备不支持本机语音识别。",
|
||||
"sttServerUnavailableWarning": "连接到启用转写功能的服务器后才能使用此选项。",
|
||||
"ttsSettings": "文本转语音",
|
||||
"sttSilenceDuration": "静音持续时间",
|
||||
"sttSilenceDurationDescription": "检测到静音后等待多久自动停止录音",
|
||||
"ttsSettings": "文本转语音",
|
||||
"ttsEngineAuto": "自动",
|
||||
"ttsEngineAutoDescription": "在可用时使用本机合成,否则切换到你的服务器。",
|
||||
"ttsEngineDeviceDescription": "在此设备上完成合成。如果设备不支持文本转语音,语音播放将不可用。",
|
||||
|
||||
Reference in New Issue
Block a user