feat(voice): Improve voice input service with locale handling and permission checks
This commit is contained in:
@@ -5,6 +5,7 @@ import 'dart:typed_data';
|
|||||||
|
|
||||||
import 'package:flutter/widgets.dart';
|
import 'package:flutter/widgets.dart';
|
||||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||||
|
import 'package:record/record.dart';
|
||||||
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
||||||
import 'package:stts/stts.dart';
|
import 'package:stts/stts.dart';
|
||||||
import 'package:vad/vad.dart';
|
import 'package:vad/vad.dart';
|
||||||
@@ -25,9 +26,11 @@ class LocaleName {
|
|||||||
class VoiceInputService {
|
class VoiceInputService {
|
||||||
static const int _vadSampleRate = 16000;
|
static const int _vadSampleRate = 16000;
|
||||||
static const int _vadFrameSamples = 1536;
|
static const int _vadFrameSamples = 1536;
|
||||||
|
static const Duration _localeFetchTimeout = Duration(seconds: 2);
|
||||||
|
|
||||||
final VadHandler _vadHandler = VadHandler.create();
|
final VadHandler _vadHandler = VadHandler.create();
|
||||||
final Stt _speech = Stt();
|
final Stt _speech = Stt();
|
||||||
|
final AudioRecorder _microphonePermissionProbe = AudioRecorder();
|
||||||
final ApiService? _api;
|
final ApiService? _api;
|
||||||
final Ref? _ref;
|
final Ref? _ref;
|
||||||
bool _isInitialized = false;
|
bool _isInitialized = false;
|
||||||
@@ -37,6 +40,7 @@ class VoiceInputService {
|
|||||||
bool _usingServerStt = false;
|
bool _usingServerStt = false;
|
||||||
String? _selectedLocaleId;
|
String? _selectedLocaleId;
|
||||||
List<LocaleName> _locales = const [];
|
List<LocaleName> _locales = const [];
|
||||||
|
bool _usingFallbackLocales = false;
|
||||||
StreamController<String>? _textStreamController;
|
StreamController<String>? _textStreamController;
|
||||||
String _currentText = '';
|
String _currentText = '';
|
||||||
StreamController<int>? _intensityController;
|
StreamController<int>? _intensityController;
|
||||||
@@ -77,31 +81,9 @@ class VoiceInputService {
|
|||||||
// Check permission and supported status
|
// Check permission and supported status
|
||||||
_localSttAvailable = await _speech.isSupported();
|
_localSttAvailable = await _speech.isSupported();
|
||||||
if (_localSttAvailable) {
|
if (_localSttAvailable) {
|
||||||
try {
|
final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
|
||||||
final langs = await _speech.getLanguages();
|
.toLanguageTag();
|
||||||
_locales = langs.map((l) => LocaleName(l, l)).toList();
|
await _loadLocales(deviceTag);
|
||||||
final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
|
|
||||||
.toLanguageTag();
|
|
||||||
final match = _locales.firstWhere(
|
|
||||||
(l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
|
|
||||||
orElse: () {
|
|
||||||
final primary = deviceTag
|
|
||||||
.split(RegExp('[-_]'))
|
|
||||||
.first
|
|
||||||
.toLowerCase();
|
|
||||||
return _locales.firstWhere(
|
|
||||||
(l) => l.localeId.toLowerCase().startsWith('$primary-'),
|
|
||||||
orElse: () => _locales.isNotEmpty
|
|
||||||
? _locales.first
|
|
||||||
: LocaleName('en_US', 'en_US'),
|
|
||||||
);
|
|
||||||
},
|
|
||||||
);
|
|
||||||
_selectedLocaleId = match.localeId;
|
|
||||||
} catch (e) {
|
|
||||||
// ignore locale load errors
|
|
||||||
_selectedLocaleId = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (_) {
|
} catch (_) {
|
||||||
_localSttAvailable = false;
|
_localSttAvailable = false;
|
||||||
@@ -111,17 +93,34 @@ class VoiceInputService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Future<bool> checkPermissions() async {
|
Future<bool> checkPermissions() async {
|
||||||
try {
|
final micGranted = await _ensureMicrophonePermission();
|
||||||
return await _speech.hasPermission();
|
if (!micGranted) {
|
||||||
} catch (_) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (_localSttAvailable && _preference != SttPreference.serverOnly) {
|
||||||
|
try {
|
||||||
|
final sttGranted = await _speech.hasPermission();
|
||||||
|
if (!sttGranted) {
|
||||||
|
if (prefersDeviceOnly) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
_localSttAvailable = false;
|
||||||
|
}
|
||||||
|
} catch (_) {
|
||||||
|
if (prefersDeviceOnly) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
_localSttAvailable = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool get isListening => _isListening;
|
bool get isListening => _isListening;
|
||||||
bool get isAvailable =>
|
bool get isAvailable =>
|
||||||
_isInitialized && (_localSttAvailable || hasServerStt);
|
_isInitialized && (_localSttAvailable || hasServerStt);
|
||||||
bool get hasLocalStt => _localSttAvailable;
|
bool get hasLocalStt => _localSttAvailable;
|
||||||
|
bool get localeMetadataIncomplete => _usingFallbackLocales;
|
||||||
|
|
||||||
// Add a method to check if on-device STT is properly supported
|
// Add a method to check if on-device STT is properly supported
|
||||||
Future<bool> checkOnDeviceSupport() async {
|
Future<bool> checkOnDeviceSupport() async {
|
||||||
@@ -183,6 +182,89 @@ class VoiceInputService {
|
|||||||
_selectedLocaleId = localeId;
|
_selectedLocaleId = localeId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Future<void> _loadLocales(String deviceTag) async {
|
||||||
|
_ensureFallbackLocale(deviceTag);
|
||||||
|
List<String> langs = const [];
|
||||||
|
try {
|
||||||
|
langs = await _speech.getLanguages().timeout(
|
||||||
|
_localeFetchTimeout,
|
||||||
|
onTimeout: () => const [],
|
||||||
|
);
|
||||||
|
} catch (_) {
|
||||||
|
// Engines such as Whisper Voice may not support this call.
|
||||||
|
langs = const [];
|
||||||
|
}
|
||||||
|
if (langs.isEmpty) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_locales = langs.map((locale) => LocaleName(locale, locale)).toList();
|
||||||
|
_usingFallbackLocales = false;
|
||||||
|
final match = _matchLocale(deviceTag);
|
||||||
|
_selectedLocaleId = match.localeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
void _ensureFallbackLocale(String deviceTag) {
|
||||||
|
if (_locales.isNotEmpty && _selectedLocaleId != null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_usingFallbackLocales = true;
|
||||||
|
if (deviceTag.isEmpty) {
|
||||||
|
_locales = const [LocaleName('en_US', 'en_US')];
|
||||||
|
_selectedLocaleId = 'en_US';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_locales = [LocaleName(deviceTag, deviceTag)];
|
||||||
|
_selectedLocaleId = deviceTag;
|
||||||
|
}
|
||||||
|
|
||||||
|
LocaleName _matchLocale(String deviceTag) {
|
||||||
|
if (_locales.isEmpty) {
|
||||||
|
return const LocaleName('en_US', 'en_US');
|
||||||
|
}
|
||||||
|
final normalizedDevice = deviceTag.toLowerCase();
|
||||||
|
for (final locale in _locales) {
|
||||||
|
if (locale.localeId.toLowerCase() == normalizedDevice) {
|
||||||
|
return locale;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final parts = normalizedDevice.split(RegExp('[-_]'));
|
||||||
|
final primary = parts.isNotEmpty ? parts.first : normalizedDevice;
|
||||||
|
for (final locale in _locales) {
|
||||||
|
if (locale.localeId.toLowerCase().startsWith('$primary-')) {
|
||||||
|
return locale;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return _locales.first;
|
||||||
|
}
|
||||||
|
|
||||||
|
void _handleLocalRecognizerError(Object? error) {
|
||||||
|
if (!_isListening) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_localSttAvailable = false;
|
||||||
|
final message = error?.toString().trim();
|
||||||
|
final exception = Exception(
|
||||||
|
(message == null || message.isEmpty)
|
||||||
|
? 'Speech recognition failed'
|
||||||
|
: message,
|
||||||
|
);
|
||||||
|
if (hasServerStt && allowsServerFallback) {
|
||||||
|
_textStreamController?.addError(exception);
|
||||||
|
unawaited(_beginServerFallback());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_textStreamController?.addError(exception);
|
||||||
|
unawaited(_stopListening());
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<bool> _ensureMicrophonePermission() async {
|
||||||
|
try {
|
||||||
|
return await _microphonePermissionProbe.hasPermission();
|
||||||
|
} catch (_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Stream<String> startListening() {
|
Stream<String> startListening() {
|
||||||
if (!_isInitialized) {
|
if (!_isInitialized) {
|
||||||
throw Exception('Voice input not initialized');
|
throw Exception('Voice input not initialized');
|
||||||
@@ -247,9 +329,12 @@ class VoiceInputService {
|
|||||||
if (result.isFinal) {
|
if (result.isFinal) {
|
||||||
unawaited(_stopListening());
|
unawaited(_stopListening());
|
||||||
}
|
}
|
||||||
}, onError: (_) {});
|
}, onError: _handleLocalRecognizerError);
|
||||||
|
|
||||||
_sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
|
_sttStateSub = _speech.onStateChanged.listen(
|
||||||
|
(_) {},
|
||||||
|
onError: _handleLocalRecognizerError,
|
||||||
|
);
|
||||||
|
|
||||||
Future(() async {
|
Future(() async {
|
||||||
try {
|
try {
|
||||||
@@ -709,6 +794,7 @@ class VoiceInputService {
|
|||||||
void dispose() {
|
void dispose() {
|
||||||
stopListening();
|
stopListening();
|
||||||
unawaited(_vadHandler.dispose());
|
unawaited(_vadHandler.dispose());
|
||||||
|
unawaited(_microphonePermissionProbe.dispose());
|
||||||
try {
|
try {
|
||||||
_speech.dispose().catchError((_) {});
|
_speech.dispose().catchError((_) {});
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
|||||||
@@ -1166,7 +1166,7 @@ packages:
|
|||||||
source: hosted
|
source: hosted
|
||||||
version: "1.5.0"
|
version: "1.5.0"
|
||||||
record:
|
record:
|
||||||
dependency: transitive
|
dependency: "direct main"
|
||||||
description:
|
description:
|
||||||
name: record
|
name: record
|
||||||
sha256: "6bad72fb3ea6708d724cf8b6c97c4e236cf9f43a52259b654efeb6fd9b737f1f"
|
sha256: "6bad72fb3ea6708d724cf8b6c97c4e236cf9f43a52259b654efeb6fd9b737f1f"
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ dependencies:
|
|||||||
# Platform Features
|
# Platform Features
|
||||||
vad: ^0.0.7+1
|
vad: ^0.0.7+1
|
||||||
stts: ^1.2.5
|
stts: ^1.2.5
|
||||||
|
record: ^6.1.2
|
||||||
flutter_tts: ^4.2.3
|
flutter_tts: ^4.2.3
|
||||||
audioplayers: ^6.5.1
|
audioplayers: ^6.5.1
|
||||||
image_picker: ^1.2.0
|
image_picker: ^1.2.0
|
||||||
|
|||||||
Reference in New Issue
Block a user