feat(tts): Remove auto engine and fix ios STS

This commit is contained in:
cogwheel0
2025-11-21 13:15:20 +05:30
parent 74807a2f5f
commit 84af6bbe86
15 changed files with 52 additions and 190 deletions

View File

@@ -9,10 +9,10 @@ import 'animation_service.dart';
part 'settings_service.g.dart';
/// Speech-to-text preference selection.
enum SttPreference { auto, deviceOnly, serverOnly }
enum SttPreference { deviceOnly, serverOnly }
/// TTS engine selection
enum TtsEngine { auto, device, server }
enum TtsEngine { device, server }
/// Service for managing app-wide settings including accessibility preferences
class SettingsService {
@@ -232,15 +232,12 @@ class SettingsService {
static TtsEngine _parseTtsEngine(String? raw) {
switch ((raw ?? '').toLowerCase()) {
case 'auto':
case '':
return TtsEngine.auto;
case 'server':
return TtsEngine.server;
case 'device':
return TtsEngine.device;
default:
return TtsEngine.auto;
return TtsEngine.device;
}
}
@@ -254,9 +251,8 @@ class SettingsService {
case 'server_only':
case 'server':
return SttPreference.serverOnly;
case 'auto':
default:
return SttPreference.auto;
return SttPreference.deviceOnly;
}
}
@@ -428,12 +424,12 @@ class AppSettings {
this.socketTransportMode = 'ws',
this.quickPills = const [],
this.sendOnEnter = false,
this.sttPreference = SttPreference.auto,
this.sttPreference = SttPreference.deviceOnly,
this.ttsVoice,
this.ttsSpeechRate = 0.5,
this.ttsPitch = 1.0,
this.ttsVolume = 1.0,
this.ttsEngine = TtsEngine.auto,
this.ttsEngine = TtsEngine.device,
this.ttsServerVoiceId,
this.ttsServerVoiceName,
this.voiceSilenceDuration = 2000,

View File

@@ -23,7 +23,7 @@ class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
final AudioPlayer _player = AudioPlayer();
final ApiService? _api;
TtsEngine _engine = TtsEngine.auto;
TtsEngine _engine = TtsEngine.device;
String? _preferredVoice;
String? _serverPreferredVoice;
double _speechRate = 0.5;
@@ -127,11 +127,9 @@ class TextToSpeechService {
final serverAvailable = _api != null;
switch (_engine) {
case TtsEngine.device:
return _deviceEngineAvailable;
return _deviceEngineAvailable || serverAvailable;
case TtsEngine.server:
return serverAvailable;
case TtsEngine.auto:
return _deviceEngineAvailable || serverAvailable;
}
}
@@ -139,10 +137,7 @@ class TextToSpeechService {
if (_engine == TtsEngine.server) {
return _api != null;
}
if (_engine == TtsEngine.device) {
return false;
}
// Auto: prefer device when available, otherwise fall back to server
// Device preference with graceful fallback to server if available.
if (_deviceEngineAvailable) {
return false;
}
@@ -191,7 +186,7 @@ class TextToSpeechService {
double speechRate = 0.5,
double pitch = 1.0,
double volume = 1.0,
TtsEngine engine = TtsEngine.auto,
TtsEngine engine = TtsEngine.device,
}) async {
if (_initialized) {
_engine = engine;

View File

@@ -133,9 +133,8 @@ class VoiceCallService {
final hasLocalStt = _voiceInput.hasLocalStt;
final hasServerStt = _voiceInput.hasServerStt;
final ready = switch (_voiceInput.preference) {
SttPreference.deviceOnly => hasLocalStt,
SttPreference.deviceOnly => hasLocalStt || hasServerStt,
SttPreference.serverOnly => hasServerStt,
SttPreference.auto => hasLocalStt || hasServerStt,
};
if (!ready) {
@@ -240,9 +239,8 @@ class VoiceCallService {
final hasServerStt = _voiceInput.hasServerStt;
final pref = _voiceInput.preference;
final engineAvailable = switch (pref) {
SttPreference.deviceOnly => hasLocalStt,
SttPreference.deviceOnly => hasLocalStt || hasServerStt,
SttPreference.serverOnly => hasServerStt,
SttPreference.auto => hasLocalStt || hasServerStt,
};
if (!engineAvailable) {

View File

@@ -36,7 +36,7 @@ class VoiceInputService {
bool _isInitialized = false;
bool _isListening = false;
bool _localSttAvailable = false;
SttPreference _preference = SttPreference.auto;
SttPreference _preference = SttPreference.deviceOnly;
bool _usingServerStt = false;
String? _selectedLocaleId;
List<LocaleName> _locales = const [];
@@ -63,7 +63,6 @@ class VoiceInputService {
bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;
bool get hasServerStt => _api != null;
SttPreference get preference => _preference;
bool get allowsServerFallback => _preference != SttPreference.deviceOnly;
bool get prefersServerOnly => _preference == SttPreference.serverOnly;
bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly;
@@ -101,15 +100,9 @@ class VoiceInputService {
try {
final sttGranted = await _speech.hasPermission();
if (!sttGranted) {
if (prefersDeviceOnly) {
return false;
}
_localSttAvailable = false;
}
} catch (_) {
if (prefersDeviceOnly) {
return false;
}
_localSttAvailable = false;
}
}
@@ -248,11 +241,6 @@ class VoiceInputService {
? 'Speech recognition failed'
: message,
);
if (hasServerStt && allowsServerFallback) {
_textStreamController?.addError(exception);
unawaited(_beginServerFallback());
return;
}
_textStreamController?.addError(exception);
unawaited(_stopListening());
}
@@ -265,6 +253,35 @@ class VoiceInputService {
}
}
Future<void> _startLocalRecognition({
required bool allowOnlineFallback,
}) async {
if (_selectedLocaleId != null) {
await _speech.setLanguage(_selectedLocaleId!);
}
Future<void> attempt(bool offline) => _speech.start(
SttRecognitionOptions(punctuation: true, offline: offline),
);
try {
await attempt(true);
} catch (error) {
if (Platform.isIOS && allowOnlineFallback) {
try {
await attempt(false);
return;
} catch (secondary) {
throw Exception(
'On-device speech failed ($error); '
'online fallback failed ($secondary).',
);
}
}
rethrow;
}
}
Stream<String> startListening() {
if (!_isInitialized) {
throw Exception('Voice input not initialized');
@@ -304,11 +321,10 @@ class VoiceInputService {
final isStillAvailable = await _speech.isSupported();
if (!isStillAvailable && _isListening) {
_localSttAvailable = false;
if (hasServerStt && allowsServerFallback) {
unawaited(_beginServerFallback());
} else {
unawaited(_stopListening());
}
_textStreamController?.addError(
Exception('On-device speech recognition unavailable'),
);
unawaited(_stopListening());
}
} catch (_) {
// ignore availability check errors
@@ -338,19 +354,12 @@ class VoiceInputService {
Future(() async {
try {
if (_selectedLocaleId != null) {
await _speech.setLanguage(_selectedLocaleId!);
}
await _speech.start(SttRecognitionOptions(punctuation: true));
await _startLocalRecognition(allowOnlineFallback: !prefersDeviceOnly);
} catch (error) {
_localSttAvailable = false;
if (!_isListening) return;
if (hasServerStt && allowsServerFallback) {
await _beginServerFallback();
} else {
_textStreamController?.addError(error);
await _stopListening();
}
_textStreamController?.addError(error);
await _stopListening();
}
});
} else if (shouldUseServer) {
@@ -457,39 +466,6 @@ class VoiceInputService {
}
}
Future<void> _beginServerFallback() async {
if (!allowsServerFallback) {
_textStreamController?.addError(
Exception('Server speech-to-text disabled in preferences'),
);
await _stopListening();
return;
}
await _stopLocalStt();
if (!hasServerStt) {
_textStreamController?.addError(
Exception('Server speech-to-text unavailable'),
);
await _stopListening();
return;
}
_usingServerStt = true;
_autoStopTimer?.cancel();
_autoStopTimer = Timer(const Duration(seconds: 90), () {
if (_isListening) {
unawaited(_stopListening());
}
});
try {
await _startServerRecording();
} catch (error) {
_textStreamController?.addError(error);
await _stopListening();
}
}
Future<void> _startServerRecording() async {
await _setupVadStreams();
final settings = _ref?.read(appSettingsProvider);
@@ -823,13 +799,11 @@ Future<bool> voiceInputAvailable(Ref ref) async {
if (!initialized) return false;
switch (service.preference) {
case SttPreference.deviceOnly:
return service.hasLocalStt;
case SttPreference.serverOnly:
return service.hasServerStt;
case SttPreference.auto:
if (service.hasLocalStt) return true;
if (!service.hasServerStt) return false;
break;
case SttPreference.serverOnly:
return service.hasServerStt;
}
final hasPermission = await service.checkPermissions();
if (!hasPermission) return false;

View File

@@ -501,8 +501,6 @@ class AppCustomizationPage extends ConsumerWidget {
warnings.add(l10n.sttServerUnavailableWarning);
}
final bool autoSelectable =
localAvailable || serverAvailable || localLoading;
final bool deviceSelectable = localAvailable || localLoading;
final bool serverSelectable = serverAvailable;
@@ -554,31 +552,6 @@ class AppCustomizationPage extends ConsumerWidget {
spacing: Spacing.sm,
runSpacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.sttEngineAuto),
selected: settings.sttPreference == SttPreference.auto,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.sttPreference == SttPreference.auto
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(alpha: 0.2),
),
labelStyle: TextStyle(
color: settings.sttPreference == SttPreference.auto
? theme.buttonPrimaryText
: theme.textPrimary,
fontWeight: FontWeight.w600,
),
onSelected: autoSelectable
? (value) {
if (value) {
notifier.setSttPreference(SttPreference.auto);
}
}
: null,
),
ChoiceChip(
label: Text(l10n.sttEngineDevice),
selected:
@@ -684,9 +657,7 @@ class AppCustomizationPage extends ConsumerWidget {
),
),
],
if (settings.sttPreference == SttPreference.serverOnly ||
(settings.sttPreference == SttPreference.auto &&
serverAvailable)) ...[
if (settings.sttPreference == SttPreference.serverOnly) ...[
const SizedBox(height: Spacing.md),
const Divider(),
const SizedBox(height: Spacing.md),
@@ -785,20 +756,11 @@ class AppCustomizationPage extends ConsumerWidget {
final bool deviceAvailable =
ttsService.deviceEngineAvailable || !ttsService.isInitialized;
final bool serverAvailable = ttsService.serverEngineAvailable;
final bool autoSelectable = deviceAvailable || serverAvailable;
final bool deviceSelectable = deviceAvailable;
final bool serverSelectable = serverAvailable;
final ttsDescription = _ttsPreferenceDescription(l10n, settings);
final warnings = <String>[];
switch (settings.ttsEngine) {
case TtsEngine.auto:
if (!deviceAvailable) {
warnings.add(l10n.ttsDeviceUnavailableWarning);
}
if (!serverAvailable) {
warnings.add(l10n.ttsServerUnavailableWarning);
}
break;
case TtsEngine.device:
if (!deviceAvailable) {
warnings.add(l10n.ttsDeviceUnavailableWarning);
@@ -852,37 +814,6 @@ class AppCustomizationPage extends ConsumerWidget {
spacing: Spacing.sm,
runSpacing: Spacing.sm,
children: [
ChoiceChip(
label: Text(l10n.ttsEngineAuto),
selected: settings.ttsEngine == TtsEngine.auto,
showCheckmark: false,
selectedColor: theme.buttonPrimary,
backgroundColor: theme.cardBackground,
side: BorderSide(
color: settings.ttsEngine == TtsEngine.auto
? theme.buttonPrimary.withValues(alpha: 0.6)
: theme.textPrimary.withValues(
alpha: autoSelectable ? 0.2 : 0.12,
),
),
labelStyle: TextStyle(
color: settings.ttsEngine == TtsEngine.auto
? theme.buttonPrimaryText
: theme.textPrimary.withValues(
alpha: autoSelectable ? 1.0 : 0.45,
),
fontWeight: FontWeight.w600,
),
onSelected: autoSelectable
? (value) {
if (value) {
ref
.read(appSettingsProvider.notifier)
.setTtsEngine(TtsEngine.auto);
}
}
: null,
),
ChoiceChip(
label: Text(l10n.ttsEngineDevice),
selected: settings.ttsEngine == TtsEngine.device,
@@ -1060,8 +991,6 @@ class AppCustomizationPage extends ConsumerWidget {
SttPreference preference,
) {
switch (preference) {
case SttPreference.auto:
return l10n.sttEngineAutoDescription;
case SttPreference.deviceOnly:
return l10n.sttEngineDeviceDescription;
case SttPreference.serverOnly:
@@ -1074,8 +1003,6 @@ class AppCustomizationPage extends ConsumerWidget {
AppSettings settings,
) {
switch (settings.ttsEngine) {
case TtsEngine.auto:
return l10n.ttsEngineAutoDescription;
case TtsEngine.device:
return l10n.ttsEngineDeviceDescription;
case TtsEngine.server:
@@ -1093,8 +1020,6 @@ class AppCustomizationPage extends ConsumerWidget {
final serverName = _getDisplayVoiceName(serverVoice, l10n.ttsSystemDefault);
switch (settings.ttsEngine) {
case TtsEngine.auto:
return '${l10n.ttsEngineDevice}: $deviceName${l10n.ttsEngineServer}: $serverName';
case TtsEngine.device:
return deviceName;
case TtsEngine.server:

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "Stille-Dauer",
"sttSilenceDurationDescription": "Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird",
"ttsSettings": "Text zu Sprache",
"ttsEngineAuto": "Automatisch",
"ttsEngineAutoDescription": "Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.",
"ttsEngineDeviceDescription": "Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.",
"ttsEngineServerDescription": "Sendet die Ausgabe immer an deinen OpenWebUI-Server.",
"ttsDeviceUnavailableWarning": "Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.",

View File

@@ -1275,10 +1275,6 @@
"@ttsEngineLabel": {
"description": "Label for selecting the text-to-speech engine."
},
"ttsEngineAuto": "Auto",
"@ttsEngineAuto": {
"description": "Chip label for automatically selecting the text-to-speech engine."
},
"ttsEngineDevice": "On device",
"@ttsEngineDevice": {
"description": "Chip label for using on-device text-to-speech."
@@ -1287,10 +1283,6 @@
"@ttsEngineServer": {
"description": "Chip label for using server-side text-to-speech."
},
"ttsEngineAutoDescription": "Use on-device speech when available and fall back to your server.",
"@ttsEngineAutoDescription": {
"description": "Description shown when automatic text-to-speech preference is active."
},
"ttsEngineDeviceDescription": "Keep synthesis on this device. Voice playback stops working if on-device TTS isnt supported.",
"@ttsEngineDeviceDescription": {
"description": "Description shown when on-device text-to-speech preference is active."

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "Duración del silencio",
"sttSilenceDurationDescription": "Tiempo de espera después del silencio antes de detener automáticamente la grabación",
"ttsSettings": "Texto a voz",
"ttsEngineAuto": "Automático",
"ttsEngineAutoDescription": "Usa la síntesis en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.",
"ttsEngineDeviceDescription": "Mantiene la síntesis en este dispositivo. La reproducción de voz no funciona si el dispositivo no admite TTS.",
"ttsEngineServerDescription": "Solicita siempre el audio a tu servidor OpenWebUI.",
"ttsDeviceUnavailableWarning": "La síntesis de voz en el dispositivo no está disponible en este dispositivo.",

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "Durée du silence",
"sttSilenceDurationDescription": "Temps d'attente après le silence avant d'arrêter automatiquement l'enregistrement",
"ttsSettings": "Synthèse vocale",
"ttsEngineAuto": "Auto",
"ttsEngineAutoDescription": "Utilise la synthèse locale quand cest possible, sinon bascule vers votre serveur.",
"ttsEngineDeviceDescription": "Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si lappareil noffre pas la synthèse vocale.",
"ttsEngineServerDescription": "Demande toujours l'audio à votre serveur OpenWebUI.",
"ttsDeviceUnavailableWarning": "La synthèse vocale sur lappareil nest pas disponible sur cet appareil.",

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "Durata del silenzio",
"sttSilenceDurationDescription": "Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione",
"ttsSettings": "Sintesi vocale",
"ttsEngineAuto": "Automatico",
"ttsEngineAutoDescription": "Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.",
"ttsEngineDeviceDescription": "Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.",
"ttsEngineServerDescription": "Richiede sempre l'audio dal tuo server OpenWebUI.",
"ttsDeviceUnavailableWarning": "La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.",

View File

@@ -447,10 +447,8 @@
"sttSilenceDuration": "침묵 지속 시간",
"sttSilenceDurationDescription": "침묵 후 자동 녹음 중지 전 대기 시간",
"ttsEngineLabel": "엔진",
"ttsEngineAuto": "자동",
"ttsEngineDevice": "기기에서",
"ttsEngineServer": "서버",
"ttsEngineAutoDescription": "사용 가능한 경우 기기에서 음성을 사용하고 서버로 폴백합니다.",
"ttsEngineDeviceDescription": "합성을 이 기기에서 유지합니다. 기기에서 TTS가 지원되지 않으면 음성 재생이 작동하지 않습니다.",
"ttsEngineServerDescription": "항상 OpenWebUI 서버에서 오디오를 요청합니다.",
"ttsDeviceUnavailableWarning": "이 기기에서 기기 텍스트 음성 변환을 사용할 수 없습니다.",

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "Stilteduur",
"sttSilenceDurationDescription": "Tijd om te wachten na stilte voordat de opname automatisch stopt",
"ttsSettings": "Tekst naar spraak",
"ttsEngineAuto": "Automatisch",
"ttsEngineAutoDescription": "Gebruikt spraaksynthese op het apparaat wanneer beschikbaar en valt anders terug op je server.",
"ttsEngineDeviceDescription": "Houdt de synthese op dit apparaat. Spraakweergave werkt niet als het apparaat geen TTS ondersteunt.",
"ttsEngineServerDescription": "Vraagt altijd audio op bij je OpenWebUI-server.",
"ttsDeviceUnavailableWarning": "Spraaksynthese op het apparaat is niet beschikbaar op dit apparaat.",

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "Длительность тишины",
"sttSilenceDurationDescription": "Время ожидания после тишины перед автоматической остановкой записи",
"ttsSettings": "Преобразование текста в речь",
"ttsEngineAuto": "Авто",
"ttsEngineAutoDescription": "Использует синтез речи на устройстве, когда это возможно, иначе переключается на ваш сервер.",
"ttsEngineDeviceDescription": "Оставляет синтез на этом устройстве. Воспроизведение голоса не работает, если устройство не поддерживает синтез речи.",
"ttsEngineServerDescription": "Всегда запрашивает аудио у вашего сервера OpenWebUI.",
"ttsDeviceUnavailableWarning": "Синтез речи на устройстве недоступен на этом устройстве.",

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "静音持续时间",
"sttSilenceDurationDescription": "检测到静音后等待多久自动停止录音",
"ttsSettings": "文本转语音",
"ttsEngineAuto": "自动",
"ttsEngineAutoDescription": "在可用时使用本机合成,否则切换到你的服务器。",
"ttsEngineDeviceDescription": "在此设备上完成合成。如果设备不支持文本转语音,语音播放将不可用。",
"ttsEngineServerDescription": "始终向你的 OpenWebUI 服务器请求音频。",
"ttsDeviceUnavailableWarning": "此设备不支持本机文本转语音。",

View File

@@ -321,8 +321,6 @@
"sttSilenceDuration": "靜音持續時間",
"sttSilenceDurationDescription": "檢測到靜音後等待多久自動停止錄音",
"ttsSettings": "文本轉語音",
"ttsEngineAuto": "自動",
"ttsEngineAutoDescription": "在可用時使用本機合成,否則切換到你的服務器。",
"ttsEngineDeviceDescription": "在此設備上完成合成。如果設備不支持文本轉語音,語音播放將不可用。",
"ttsEngineServerDescription": "始終向你的 OpenWebUI 服務器請求音頻。",
"ttsDeviceUnavailableWarning": "此設備不支持本機文本轉語音。",