feat(voice): Improve voice input service with locale handling and permission checks

This commit is contained in:
cogwheel0
2025-11-13 12:21:59 +05:30
parent 926b5f1cdd
commit f885513a89
3 changed files with 118 additions and 31 deletions

View File

@@ -5,6 +5,7 @@ import 'dart:typed_data';
import 'package:flutter/widgets.dart'; import 'package:flutter/widgets.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart';
import 'package:record/record.dart';
import 'package:riverpod_annotation/riverpod_annotation.dart'; import 'package:riverpod_annotation/riverpod_annotation.dart';
import 'package:stts/stts.dart'; import 'package:stts/stts.dart';
import 'package:vad/vad.dart'; import 'package:vad/vad.dart';
@@ -25,9 +26,11 @@ class LocaleName {
class VoiceInputService { class VoiceInputService {
static const int _vadSampleRate = 16000; static const int _vadSampleRate = 16000;
static const int _vadFrameSamples = 1536; static const int _vadFrameSamples = 1536;
static const Duration _localeFetchTimeout = Duration(seconds: 2);
final VadHandler _vadHandler = VadHandler.create(); final VadHandler _vadHandler = VadHandler.create();
final Stt _speech = Stt(); final Stt _speech = Stt();
final AudioRecorder _microphonePermissionProbe = AudioRecorder();
final ApiService? _api; final ApiService? _api;
final Ref? _ref; final Ref? _ref;
bool _isInitialized = false; bool _isInitialized = false;
@@ -37,6 +40,7 @@ class VoiceInputService {
bool _usingServerStt = false; bool _usingServerStt = false;
String? _selectedLocaleId; String? _selectedLocaleId;
List<LocaleName> _locales = const []; List<LocaleName> _locales = const [];
bool _usingFallbackLocales = false;
StreamController<String>? _textStreamController; StreamController<String>? _textStreamController;
String _currentText = ''; String _currentText = '';
StreamController<int>? _intensityController; StreamController<int>? _intensityController;
@@ -77,31 +81,9 @@ class VoiceInputService {
// Check permission and supported status // Check permission and supported status
_localSttAvailable = await _speech.isSupported(); _localSttAvailable = await _speech.isSupported();
if (_localSttAvailable) { if (_localSttAvailable) {
try { final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
final langs = await _speech.getLanguages(); .toLanguageTag();
_locales = langs.map((l) => LocaleName(l, l)).toList(); await _loadLocales(deviceTag);
final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
.toLanguageTag();
final match = _locales.firstWhere(
(l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
orElse: () {
final primary = deviceTag
.split(RegExp('[-_]'))
.first
.toLowerCase();
return _locales.firstWhere(
(l) => l.localeId.toLowerCase().startsWith('$primary-'),
orElse: () => _locales.isNotEmpty
? _locales.first
: LocaleName('en_US', 'en_US'),
);
},
);
_selectedLocaleId = match.localeId;
} catch (e) {
// ignore locale load errors
_selectedLocaleId = null;
}
} }
} catch (_) { } catch (_) {
_localSttAvailable = false; _localSttAvailable = false;
@@ -111,17 +93,34 @@ class VoiceInputService {
} }
Future<bool> checkPermissions() async { Future<bool> checkPermissions() async {
try { final micGranted = await _ensureMicrophonePermission();
return await _speech.hasPermission(); if (!micGranted) {
} catch (_) {
return false; return false;
} }
if (_localSttAvailable && _preference != SttPreference.serverOnly) {
try {
final sttGranted = await _speech.hasPermission();
if (!sttGranted) {
if (prefersDeviceOnly) {
return false;
}
_localSttAvailable = false;
}
} catch (_) {
if (prefersDeviceOnly) {
return false;
}
_localSttAvailable = false;
}
}
return true;
} }
bool get isListening => _isListening; bool get isListening => _isListening;
bool get isAvailable => bool get isAvailable =>
_isInitialized && (_localSttAvailable || hasServerStt); _isInitialized && (_localSttAvailable || hasServerStt);
bool get hasLocalStt => _localSttAvailable; bool get hasLocalStt => _localSttAvailable;
bool get localeMetadataIncomplete => _usingFallbackLocales;
// Add a method to check if on-device STT is properly supported // Add a method to check if on-device STT is properly supported
Future<bool> checkOnDeviceSupport() async { Future<bool> checkOnDeviceSupport() async {
@@ -183,6 +182,89 @@ class VoiceInputService {
_selectedLocaleId = localeId; _selectedLocaleId = localeId;
} }
Future<void> _loadLocales(String deviceTag) async {
_ensureFallbackLocale(deviceTag);
List<String> langs = const [];
try {
langs = await _speech.getLanguages().timeout(
_localeFetchTimeout,
onTimeout: () => const [],
);
} catch (_) {
// Engines such as Whisper Voice may not support this call.
langs = const [];
}
if (langs.isEmpty) {
return;
}
_locales = langs.map((locale) => LocaleName(locale, locale)).toList();
_usingFallbackLocales = false;
final match = _matchLocale(deviceTag);
_selectedLocaleId = match.localeId;
}
void _ensureFallbackLocale(String deviceTag) {
if (_locales.isNotEmpty && _selectedLocaleId != null) {
return;
}
_usingFallbackLocales = true;
if (deviceTag.isEmpty) {
_locales = const [LocaleName('en_US', 'en_US')];
_selectedLocaleId = 'en_US';
return;
}
_locales = [LocaleName(deviceTag, deviceTag)];
_selectedLocaleId = deviceTag;
}
LocaleName _matchLocale(String deviceTag) {
if (_locales.isEmpty) {
return const LocaleName('en_US', 'en_US');
}
final normalizedDevice = deviceTag.toLowerCase();
for (final locale in _locales) {
if (locale.localeId.toLowerCase() == normalizedDevice) {
return locale;
}
}
final parts = normalizedDevice.split(RegExp('[-_]'));
final primary = parts.isNotEmpty ? parts.first : normalizedDevice;
for (final locale in _locales) {
if (locale.localeId.toLowerCase().startsWith('$primary-')) {
return locale;
}
}
return _locales.first;
}
void _handleLocalRecognizerError(Object? error) {
if (!_isListening) {
return;
}
_localSttAvailable = false;
final message = error?.toString().trim();
final exception = Exception(
(message == null || message.isEmpty)
? 'Speech recognition failed'
: message,
);
if (hasServerStt && allowsServerFallback) {
_textStreamController?.addError(exception);
unawaited(_beginServerFallback());
return;
}
_textStreamController?.addError(exception);
unawaited(_stopListening());
}
Future<bool> _ensureMicrophonePermission() async {
try {
return await _microphonePermissionProbe.hasPermission();
} catch (_) {
return false;
}
}
Stream<String> startListening() { Stream<String> startListening() {
if (!_isInitialized) { if (!_isInitialized) {
throw Exception('Voice input not initialized'); throw Exception('Voice input not initialized');
@@ -247,9 +329,12 @@ class VoiceInputService {
if (result.isFinal) { if (result.isFinal) {
unawaited(_stopListening()); unawaited(_stopListening());
} }
}, onError: (_) {}); }, onError: _handleLocalRecognizerError);
_sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {}); _sttStateSub = _speech.onStateChanged.listen(
(_) {},
onError: _handleLocalRecognizerError,
);
Future(() async { Future(() async {
try { try {
@@ -709,6 +794,7 @@ class VoiceInputService {
void dispose() { void dispose() {
stopListening(); stopListening();
unawaited(_vadHandler.dispose()); unawaited(_vadHandler.dispose());
unawaited(_microphonePermissionProbe.dispose());
try { try {
_speech.dispose().catchError((_) {}); _speech.dispose().catchError((_) {});
} catch (_) {} } catch (_) {}

View File

@@ -1166,7 +1166,7 @@ packages:
source: hosted source: hosted
version: "1.5.0" version: "1.5.0"
record: record:
dependency: transitive dependency: "direct main"
description: description:
name: record name: record
sha256: "6bad72fb3ea6708d724cf8b6c97c4e236cf9f43a52259b654efeb6fd9b737f1f" sha256: "6bad72fb3ea6708d724cf8b6c97c4e236cf9f43a52259b654efeb6fd9b737f1f"

View File

@@ -46,6 +46,7 @@ dependencies:
# Platform Features # Platform Features
vad: ^0.0.7+1 vad: ^0.0.7+1
stts: ^1.2.5 stts: ^1.2.5
record: ^6.1.2
flutter_tts: ^4.2.3 flutter_tts: ^4.2.3
audioplayers: ^6.5.1 audioplayers: ^6.5.1
image_picker: ^1.2.0 image_picker: ^1.2.0