From f885513a89aa16f4ed7a9ec8ee110e9e77ef9c86 Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Thu, 13 Nov 2025 12:21:59 +0530 Subject: [PATCH] feat(voice): Improve voice input service with locale handling and permission checks --- .../chat/services/voice_input_service.dart | 146 ++++++++++++++---- pubspec.lock | 2 +- pubspec.yaml | 1 + 3 files changed, 118 insertions(+), 31 deletions(-) diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index df7e2fe..34812e7 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -5,6 +5,7 @@ import 'dart:typed_data'; import 'package:flutter/widgets.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; +import 'package:record/record.dart'; import 'package:riverpod_annotation/riverpod_annotation.dart'; import 'package:stts/stts.dart'; import 'package:vad/vad.dart'; @@ -25,9 +26,11 @@ class LocaleName { class VoiceInputService { static const int _vadSampleRate = 16000; static const int _vadFrameSamples = 1536; + static const Duration _localeFetchTimeout = Duration(seconds: 2); final VadHandler _vadHandler = VadHandler.create(); final Stt _speech = Stt(); + final AudioRecorder _microphonePermissionProbe = AudioRecorder(); final ApiService? _api; final Ref? _ref; bool _isInitialized = false; @@ -37,6 +40,7 @@ class VoiceInputService { bool _usingServerStt = false; String? _selectedLocaleId; List _locales = const []; + bool _usingFallbackLocales = false; StreamController? _textStreamController; String _currentText = ''; StreamController? _intensityController; @@ -77,31 +81,9 @@ class VoiceInputService { // Check permission and supported status _localSttAvailable = await _speech.isSupported(); if (_localSttAvailable) { - try { - final langs = await _speech.getLanguages(); - _locales = langs.map((l) => LocaleName(l, l)).toList(); - final deviceTag = WidgetsBinding.instance.platformDispatcher.locale - .toLanguageTag(); - final match = _locales.firstWhere( - (l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(), - orElse: () { - final primary = deviceTag - .split(RegExp('[-_]')) - .first - .toLowerCase(); - return _locales.firstWhere( - (l) => l.localeId.toLowerCase().startsWith('$primary-'), - orElse: () => _locales.isNotEmpty - ? _locales.first - : LocaleName('en_US', 'en_US'), - ); - }, - ); - _selectedLocaleId = match.localeId; - } catch (e) { - // ignore locale load errors - _selectedLocaleId = null; - } + final deviceTag = WidgetsBinding.instance.platformDispatcher.locale + .toLanguageTag(); + await _loadLocales(deviceTag); } } catch (_) { _localSttAvailable = false; @@ -111,17 +93,34 @@ class VoiceInputService { } Future checkPermissions() async { - try { - return await _speech.hasPermission(); - } catch (_) { + final micGranted = await _ensureMicrophonePermission(); + if (!micGranted) { return false; } + if (_localSttAvailable && _preference != SttPreference.serverOnly) { + try { + final sttGranted = await _speech.hasPermission(); + if (!sttGranted) { + if (prefersDeviceOnly) { + return false; + } + _localSttAvailable = false; + } + } catch (_) { + if (prefersDeviceOnly) { + return false; + } + _localSttAvailable = false; + } + } + return true; } bool get isListening => _isListening; bool get isAvailable => _isInitialized && (_localSttAvailable || hasServerStt); bool get hasLocalStt => _localSttAvailable; + bool get localeMetadataIncomplete => _usingFallbackLocales; // Add a method to check if on-device STT is properly supported Future checkOnDeviceSupport() async { @@ -183,6 +182,89 @@ class VoiceInputService { _selectedLocaleId = localeId; } + Future _loadLocales(String deviceTag) async { + _ensureFallbackLocale(deviceTag); + List langs = const []; + try { + langs = await _speech.getLanguages().timeout( + _localeFetchTimeout, + onTimeout: () => const [], + ); + } catch (_) { + // Engines such as Whisper Voice may not support this call. + langs = const []; + } + if (langs.isEmpty) { + return; + } + _locales = langs.map((locale) => LocaleName(locale, locale)).toList(); + _usingFallbackLocales = false; + final match = _matchLocale(deviceTag); + _selectedLocaleId = match.localeId; + } + + void _ensureFallbackLocale(String deviceTag) { + if (_locales.isNotEmpty && _selectedLocaleId != null) { + return; + } + _usingFallbackLocales = true; + if (deviceTag.isEmpty) { + _locales = const [LocaleName('en_US', 'en_US')]; + _selectedLocaleId = 'en_US'; + return; + } + _locales = [LocaleName(deviceTag, deviceTag)]; + _selectedLocaleId = deviceTag; + } + + LocaleName _matchLocale(String deviceTag) { + if (_locales.isEmpty) { + return const LocaleName('en_US', 'en_US'); + } + final normalizedDevice = deviceTag.toLowerCase(); + for (final locale in _locales) { + if (locale.localeId.toLowerCase() == normalizedDevice) { + return locale; + } + } + final parts = normalizedDevice.split(RegExp('[-_]')); + final primary = parts.isNotEmpty ? parts.first : normalizedDevice; + for (final locale in _locales) { + if (locale.localeId.toLowerCase().startsWith('$primary-')) { + return locale; + } + } + return _locales.first; + } + + void _handleLocalRecognizerError(Object? error) { + if (!_isListening) { + return; + } + _localSttAvailable = false; + final message = error?.toString().trim(); + final exception = Exception( + (message == null || message.isEmpty) + ? 'Speech recognition failed' + : message, + ); + if (hasServerStt && allowsServerFallback) { + _textStreamController?.addError(exception); + unawaited(_beginServerFallback()); + return; + } + _textStreamController?.addError(exception); + unawaited(_stopListening()); + } + + Future _ensureMicrophonePermission() async { + try { + return await _microphonePermissionProbe.hasPermission(); + } catch (_) { + return false; + } + } + Stream startListening() { if (!_isInitialized) { throw Exception('Voice input not initialized'); @@ -247,9 +329,12 @@ class VoiceInputService { if (result.isFinal) { unawaited(_stopListening()); } - }, onError: (_) {}); + }, onError: _handleLocalRecognizerError); - _sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {}); + _sttStateSub = _speech.onStateChanged.listen( + (_) {}, + onError: _handleLocalRecognizerError, + ); Future(() async { try { @@ -709,6 +794,7 @@ class VoiceInputService { void dispose() { stopListening(); unawaited(_vadHandler.dispose()); + unawaited(_microphonePermissionProbe.dispose()); try { _speech.dispose().catchError((_) {}); } catch (_) {} diff --git a/pubspec.lock b/pubspec.lock index f12e607..8cfd515 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -1166,7 +1166,7 @@ packages: source: hosted version: "1.5.0" record: - dependency: transitive + dependency: "direct main" description: name: record sha256: "6bad72fb3ea6708d724cf8b6c97c4e236cf9f43a52259b654efeb6fd9b737f1f" diff --git a/pubspec.yaml b/pubspec.yaml index 5e85654..1df56ef 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -46,6 +46,7 @@ dependencies: # Platform Features vad: ^0.0.7+1 stts: ^1.2.5 + record: ^6.1.2 flutter_tts: ^4.2.3 audioplayers: ^6.5.1 image_picker: ^1.2.0