fix(audio): optimize audio configuration for iOS and Android platforms
This commit is contained in:
@@ -5,6 +5,9 @@ PODS:
|
||||
- connectivity_plus (0.0.1):
|
||||
- Flutter
|
||||
- CryptoSwift (1.8.4)
|
||||
- CwlCatchException (2.2.1):
|
||||
- CwlCatchExceptionSupport (~> 2.2.1)
|
||||
- CwlCatchExceptionSupport (2.2.1)
|
||||
- DKImagePickerController/Core (4.3.9):
|
||||
- DKImagePickerController/ImageDataManager
|
||||
- DKImagePickerController/Resource
|
||||
@@ -85,11 +88,13 @@ PODS:
|
||||
- shared_preferences_foundation (0.0.1):
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
- speech_to_text (7.2.0):
|
||||
- CwlCatchException
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
- sqflite_darwin (0.0.4):
|
||||
- Flutter
|
||||
- FlutterMacOS
|
||||
- stts (1.0.0):
|
||||
- Flutter
|
||||
- SwiftyGif (5.4.5)
|
||||
- url_launcher_ios (0.0.1):
|
||||
- Flutter
|
||||
@@ -122,8 +127,8 @@ DEPENDENCIES:
|
||||
- share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`)
|
||||
- share_plus (from `.symlinks/plugins/share_plus/ios`)
|
||||
- shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
|
||||
- speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
|
||||
- sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`)
|
||||
- stts (from `.symlinks/plugins/stts/ios`)
|
||||
- url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
|
||||
- vad (from `.symlinks/plugins/vad/ios`)
|
||||
- wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`)
|
||||
@@ -132,6 +137,8 @@ DEPENDENCIES:
|
||||
SPEC REPOS:
|
||||
trunk:
|
||||
- CryptoSwift
|
||||
- CwlCatchException
|
||||
- CwlCatchExceptionSupport
|
||||
- DKImagePickerController
|
||||
- DKPhotoGallery
|
||||
- onnxruntime-c
|
||||
@@ -178,10 +185,10 @@ EXTERNAL SOURCES:
|
||||
:path: ".symlinks/plugins/share_plus/ios"
|
||||
shared_preferences_foundation:
|
||||
:path: ".symlinks/plugins/shared_preferences_foundation/darwin"
|
||||
speech_to_text:
|
||||
:path: ".symlinks/plugins/speech_to_text/darwin"
|
||||
sqflite_darwin:
|
||||
:path: ".symlinks/plugins/sqflite_darwin/darwin"
|
||||
stts:
|
||||
:path: ".symlinks/plugins/stts/ios"
|
||||
url_launcher_ios:
|
||||
:path: ".symlinks/plugins/url_launcher_ios/ios"
|
||||
vad:
|
||||
@@ -195,6 +202,8 @@ SPEC CHECKSUMS:
|
||||
audioplayers_darwin: 4f9ca89d92d3d21cec7ec580e78ca888e5fb68bd
|
||||
connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd
|
||||
CryptoSwift: e64e11850ede528a02a0f3e768cec8e9d92ecb90
|
||||
CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
|
||||
CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
|
||||
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
|
||||
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60
|
||||
file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be
|
||||
@@ -217,8 +226,8 @@ SPEC CHECKSUMS:
|
||||
share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871
|
||||
share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a
|
||||
shared_preferences_foundation: 7036424c3d8ec98dfe75ff1667cb0cd531ec82bb
|
||||
speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
|
||||
sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0
|
||||
stts: 1a48df645bb516e86e4121d5253b582749a1d3a6
|
||||
SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4
|
||||
url_launcher_ios: 7a95fa5b60cc718a708b8f2966718e93db0cef1b
|
||||
vad: 7934867589afe53567f492df66fb1615f2185822
|
||||
|
||||
@@ -130,6 +130,29 @@ class CallKitService {
|
||||
return <Map<String, dynamic>>[];
|
||||
}
|
||||
|
||||
/// Checks for active calls and clears them if they are not tracked by the app.
|
||||
Future<void> checkAndCleanActiveCalls() async {
|
||||
if (!_shouldUseCallKit('check active calls')) return;
|
||||
|
||||
try {
|
||||
final calls = await activeCalls();
|
||||
if (calls.isNotEmpty) {
|
||||
developer.log(
|
||||
'Found ${calls.length} active CallKit calls on startup. Cleaning up.',
|
||||
name: 'callkit',
|
||||
);
|
||||
await endAllCalls();
|
||||
}
|
||||
} catch (error, stackTrace) {
|
||||
developer.log(
|
||||
'Failed to clean up active calls: $error',
|
||||
name: 'callkit',
|
||||
error: error,
|
||||
stackTrace: stackTrace,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Stream of CallKit events from the native layer.
|
||||
Stream<CallEvent> get events {
|
||||
if (!_callKitAllowed) {
|
||||
@@ -182,7 +205,7 @@ class CallKitService {
|
||||
ios: const IOSParams(
|
||||
handleType: 'generic',
|
||||
supportsVideo: false,
|
||||
audioSessionMode: 'default',
|
||||
audioSessionMode: 'voiceChat',
|
||||
audioSessionActive: true,
|
||||
audioSessionPreferredSampleRate: 44100.0,
|
||||
audioSessionPreferredIOBufferDuration: 0.005,
|
||||
|
||||
@@ -70,20 +70,12 @@ class TextToSpeechService {
|
||||
}
|
||||
});
|
||||
|
||||
if (!kIsWeb && Platform.isIOS) {
|
||||
final context = AudioContext(
|
||||
iOS: AudioContextIOS(
|
||||
category: AVAudioSessionCategory.playAndRecord,
|
||||
options: const {
|
||||
AVAudioSessionOptions.defaultToSpeaker,
|
||||
AVAudioSessionOptions.mixWithOthers,
|
||||
AVAudioSessionOptions.allowBluetooth,
|
||||
AVAudioSessionOptions.allowBluetoothA2DP,
|
||||
},
|
||||
if (!kIsWeb && Platform.isAndroid) {
|
||||
_player.setAudioContext(
|
||||
AudioContext(
|
||||
android: const AudioContextAndroid(),
|
||||
),
|
||||
android: const AudioContextAndroid(),
|
||||
);
|
||||
_player.setAudioContext(context);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,13 +95,8 @@ class TextToSpeechService {
|
||||
|
||||
if (!kIsWeb && Platform.isIOS) {
|
||||
await _tts.setSharedInstance(true);
|
||||
await _tts
|
||||
.setIosAudioCategory(IosTextToSpeechAudioCategory.playAndRecord, [
|
||||
IosTextToSpeechAudioCategoryOptions.mixWithOthers,
|
||||
IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetooth,
|
||||
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
|
||||
]);
|
||||
// Rely on the native VoiceBackgroundAudioManager for iOS
|
||||
// audio session configuration to avoid routing conflicts.
|
||||
}
|
||||
|
||||
if (_engine != TtsEngine.server) {
|
||||
|
||||
@@ -123,6 +123,11 @@ class VoiceCallService {
|
||||
_pauseReasons.clear();
|
||||
_listeningPaused = false;
|
||||
|
||||
// Clean up any zombie calls from previous sessions
|
||||
if (_callKitEnabled) {
|
||||
unawaited(_callKitService.checkAndCleanActiveCalls());
|
||||
}
|
||||
|
||||
// Initialize notification service
|
||||
await _notificationService.initialize();
|
||||
|
||||
@@ -312,9 +317,17 @@ class VoiceCallService {
|
||||
throw Exception('Failed to establish socket connection');
|
||||
}
|
||||
|
||||
// Initialize voice input first so we know which STT mode will be used
|
||||
await _voiceInput.initialize();
|
||||
|
||||
// Only activate VoiceBackgroundAudioManager for server STT
|
||||
// For local STT, speech_to_text handles its own iOS audio session
|
||||
final useServerMic =
|
||||
(_voiceInput.prefersServerOnly && _voiceInput.hasServerStt) ||
|
||||
(!_voiceInput.hasLocalStt && _voiceInput.hasServerStt);
|
||||
await BackgroundStreamingHandler.instance.startBackgroundExecution(const [
|
||||
_voiceCallStreamId,
|
||||
], requiresMicrophone: true);
|
||||
], requiresMicrophone: useServerMic);
|
||||
|
||||
// Set up periodic keep-alive to refresh wake lock (every 5 minutes)
|
||||
_keepAliveTimer?.cancel();
|
||||
@@ -385,10 +398,11 @@ class VoiceCallService {
|
||||
throw Exception('Preferred speech recognition engine is unavailable');
|
||||
}
|
||||
|
||||
_updateState(VoiceCallState.listening);
|
||||
|
||||
final stream = await _voiceInput.beginListening();
|
||||
|
||||
// Only mark as listening after STT has successfully started.
|
||||
_updateState(VoiceCallState.listening);
|
||||
|
||||
_transcriptSubscription = stream.listen(
|
||||
(text) {
|
||||
if (_isDisposed) return;
|
||||
@@ -401,13 +415,27 @@ class VoiceCallService {
|
||||
},
|
||||
onDone: () async {
|
||||
if (_isDisposed) return;
|
||||
|
||||
final trimmed = _accumulatedTranscript.trim();
|
||||
// User stopped speaking, send message to assistant
|
||||
if (_accumulatedTranscript.trim().isNotEmpty) {
|
||||
await _sendMessageToAssistant(_accumulatedTranscript);
|
||||
} else {
|
||||
// No input, restart listening
|
||||
await _startListening();
|
||||
if (trimmed.isNotEmpty) {
|
||||
await _sendMessageToAssistant(trimmed);
|
||||
return;
|
||||
}
|
||||
|
||||
// No input – avoid a tight restart loop and only restart
|
||||
// while the call is still active and not paused.
|
||||
await Future.delayed(const Duration(milliseconds: 250));
|
||||
if (_isDisposed) return;
|
||||
if (_state == VoiceCallState.disconnected ||
|
||||
_state == VoiceCallState.error) {
|
||||
return;
|
||||
}
|
||||
if (_pauseReasons.isNotEmpty) {
|
||||
// Respect paused state; resumeListening() will restart if needed.
|
||||
return;
|
||||
}
|
||||
await _startListening();
|
||||
},
|
||||
);
|
||||
|
||||
|
||||
@@ -5,10 +5,10 @@ import 'dart:typed_data';
|
||||
|
||||
import 'package:flutter/widgets.dart';
|
||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||
import 'package:record/record.dart'
|
||||
hide IosAudioCategory, IosAudioCategoryOptions;
|
||||
import 'package:record/record.dart';
|
||||
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
||||
import 'package:stts/stts.dart';
|
||||
import 'package:speech_to_text/speech_recognition_result.dart';
|
||||
import 'package:speech_to_text/speech_to_text.dart';
|
||||
import 'package:vad/vad.dart';
|
||||
|
||||
import '../../../core/providers/app_providers.dart';
|
||||
@@ -18,7 +18,7 @@ import '../../../core/services/settings_service.dart';
|
||||
|
||||
part 'voice_input_service.g.dart';
|
||||
|
||||
// Lightweight replacement for previous stt.LocaleName used across the UI
|
||||
/// Lightweight locale representation used across the UI.
|
||||
class LocaleName {
|
||||
final String localeId;
|
||||
final String name;
|
||||
@@ -37,7 +37,7 @@ class VoiceInputService {
|
||||
static const String _backgroundSttStreamId = 'voice-input-stt';
|
||||
|
||||
final VadHandler _vadHandler = VadHandler.create();
|
||||
final Stt _speech = Stt();
|
||||
final SpeechToText _speech = SpeechToText();
|
||||
final AudioRecorder _microphonePermissionProbe = AudioRecorder();
|
||||
final ApiService? _api;
|
||||
final Ref? _ref;
|
||||
@@ -64,8 +64,6 @@ class VoiceInputService {
|
||||
Stream<String> get textStream =>
|
||||
_textStreamController?.stream ?? const Stream<String>.empty();
|
||||
Timer? _autoStopTimer;
|
||||
StreamSubscription<SttRecognition>? _sttResultSub;
|
||||
StreamSubscription<SttState>? _sttStateSub;
|
||||
StreamSubscription<List<double>>? _vadSpeechEndSub;
|
||||
StreamSubscription<({double isSpeech, double notSpeech, List<double> frame})>?
|
||||
_vadFrameSub;
|
||||
@@ -100,8 +98,11 @@ class VoiceInputService {
|
||||
}
|
||||
// Prepare local speech recognizer
|
||||
try {
|
||||
// Check permission and supported status
|
||||
_localSttAvailable = await _speech.isSupported();
|
||||
// Initialize speech_to_text and check availability
|
||||
_localSttAvailable = await _speech.initialize(
|
||||
onStatus: _handleSttStatus,
|
||||
onError: _handleSttError,
|
||||
);
|
||||
if (_localSttAvailable) {
|
||||
await _loadLocales(deviceTag);
|
||||
}
|
||||
@@ -112,21 +113,56 @@ class VoiceInputService {
|
||||
return true;
|
||||
}
|
||||
|
||||
void _handleSttStatus(String status) {
|
||||
debugPrint('Local STT Status: $status');
|
||||
if (status == 'listening') {
|
||||
_localSttActive = true;
|
||||
} else if (status == 'notListening' || status == 'done') {
|
||||
final wasActive = _localSttActive;
|
||||
_localSttActive = false;
|
||||
// If we were actively listening and the platform stopped us,
|
||||
// properly close the stream so voice call service can restart
|
||||
if (wasActive && _isListening && !_usingServerStt) {
|
||||
debugPrint('Platform stopped listening, closing stream');
|
||||
unawaited(_stopListening());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void _handleSttError(dynamic error) {
|
||||
debugPrint('Local STT Error: $error');
|
||||
final errorStr = error.toString().toLowerCase();
|
||||
|
||||
// These errors are non-fatal - they just mean no speech was detected
|
||||
// or the session timed out. The status handler will close the stream
|
||||
// and voice call service will restart listening.
|
||||
final nonFatalErrors = [
|
||||
'error_no_match',
|
||||
'error_speech_timeout',
|
||||
'error_busy', // Temporary, can retry
|
||||
];
|
||||
|
||||
final isNonFatal = nonFatalErrors.any((e) => errorStr.contains(e));
|
||||
if (isNonFatal) {
|
||||
debugPrint('Non-fatal STT error, allowing normal stream close');
|
||||
// Let the status handler / auto-stop timer close the stream.
|
||||
// We do not treat this as a fatal failure for the current session.
|
||||
return;
|
||||
}
|
||||
|
||||
// Fatal errors - mark STT as unavailable
|
||||
_handleLocalRecognizerError(error);
|
||||
}
|
||||
|
||||
Future<bool> checkPermissions() async {
|
||||
final micGranted = await _ensureMicrophonePermission();
|
||||
if (!micGranted) {
|
||||
return false;
|
||||
}
|
||||
if (_localSttAvailable && _preference != SttPreference.serverOnly) {
|
||||
try {
|
||||
final sttGranted = await _speech.hasPermission();
|
||||
if (!sttGranted) {
|
||||
_localSttAvailable = false;
|
||||
}
|
||||
} catch (_) {
|
||||
_localSttAvailable = false;
|
||||
}
|
||||
}
|
||||
// Note: Don't disable _localSttAvailable based on hasPermission check
|
||||
// The permission might be granted lazily when listen() is called on iOS,
|
||||
// and the check can be unreliable. Let speech_to_text handle permissions
|
||||
// during the actual listen() call.
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -136,23 +172,21 @@ class VoiceInputService {
|
||||
bool get hasLocalStt => _localSttAvailable;
|
||||
bool get localeMetadataIncomplete => _usingFallbackLocales;
|
||||
|
||||
// Add a method to check if on-device STT is properly supported
|
||||
/// Checks if on-device STT is properly supported.
|
||||
Future<bool> checkOnDeviceSupport() async {
|
||||
if (!isSupportedPlatform || !_isInitialized) return false;
|
||||
try {
|
||||
final supported = await _speech.isSupported();
|
||||
return supported;
|
||||
// speech_to_text isAvailable is set after initialize()
|
||||
return _speech.isAvailable;
|
||||
} catch (e) {
|
||||
// ignore errors checking on-device support
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Test method to verify on-device STT functionality
|
||||
/// Test method to verify on-device STT functionality.
|
||||
Future<String> testOnDeviceStt() async {
|
||||
try {
|
||||
// starting on-device STT test
|
||||
|
||||
// First ensure we're initialized
|
||||
await initialize();
|
||||
|
||||
@@ -167,24 +201,19 @@ class VoiceInputService {
|
||||
}
|
||||
|
||||
// Test if speech recognition is available
|
||||
final supported = await _speech.isSupported();
|
||||
if (!supported) {
|
||||
if (!_speech.isAvailable) {
|
||||
return 'Speech recognition service is not available on this device';
|
||||
}
|
||||
|
||||
// Set language if available, then start and stop quickly
|
||||
if (_selectedLocaleId != null) {
|
||||
try {
|
||||
await _speech.setLanguage(_selectedLocaleId!);
|
||||
} catch (_) {}
|
||||
}
|
||||
await _speech.start(SttRecognitionOptions(punctuation: true));
|
||||
// Start and stop quickly to test
|
||||
await _speech.listen(onResult: (_) {}, localeId: _selectedLocaleId);
|
||||
await Future.delayed(const Duration(milliseconds: 100));
|
||||
await _speech.stop();
|
||||
|
||||
return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId';
|
||||
return 'On-device STT test completed successfully. '
|
||||
'Local STT available: $_localSttAvailable, '
|
||||
'Selected locale: $_selectedLocaleId';
|
||||
} catch (e) {
|
||||
// on-device STT test failed
|
||||
return 'On-device STT test failed: $e';
|
||||
}
|
||||
}
|
||||
@@ -198,23 +227,23 @@ class VoiceInputService {
|
||||
|
||||
Future<void> _loadLocales(String deviceTag) async {
|
||||
_ensureFallbackLocale(deviceTag);
|
||||
List<String> langs = const [];
|
||||
try {
|
||||
langs = await _speech.getLanguages().timeout(
|
||||
_localeFetchTimeout,
|
||||
onTimeout: () => const [],
|
||||
);
|
||||
final sttLocales = await Future.value(
|
||||
_speech.locales(),
|
||||
).timeout(_localeFetchTimeout, onTimeout: () => const []);
|
||||
if (sttLocales.isEmpty) {
|
||||
return;
|
||||
}
|
||||
// Map speech_to_text LocaleName to our own LocaleName class
|
||||
_locales = sttLocales
|
||||
.map((loc) => LocaleName(loc.localeId, loc.name))
|
||||
.toList();
|
||||
_usingFallbackLocales = false;
|
||||
final match = _matchLocale(deviceTag);
|
||||
_selectedLocaleId = match.localeId;
|
||||
} catch (_) {
|
||||
// Engines such as Whisper Voice may not support this call.
|
||||
langs = const [];
|
||||
// Some engines may not support locale listing
|
||||
}
|
||||
if (langs.isEmpty) {
|
||||
return;
|
||||
}
|
||||
_locales = langs.map((locale) => LocaleName(locale, locale)).toList();
|
||||
_usingFallbackLocales = false;
|
||||
final match = _matchLocale(deviceTag);
|
||||
_selectedLocaleId = match.localeId;
|
||||
}
|
||||
|
||||
void _ensureFallbackLocale(String deviceTag) {
|
||||
@@ -255,7 +284,8 @@ class VoiceInputService {
|
||||
if (!_isListening) {
|
||||
return;
|
||||
}
|
||||
_localSttAvailable = false;
|
||||
// Don't permanently disable _localSttAvailable on transient errors
|
||||
// The next session should still try local STT
|
||||
final message = error?.toString().trim();
|
||||
final exception = Exception(
|
||||
(message == null || message.isEmpty)
|
||||
@@ -284,37 +314,39 @@ class VoiceInputService {
|
||||
_startingLocalStt = completer.future;
|
||||
_localSttActive = false;
|
||||
|
||||
await _ensureLocalSttReset();
|
||||
await _configureIosAudioSession();
|
||||
|
||||
if (_selectedLocaleId != null) {
|
||||
await _speech.setLanguage(_selectedLocaleId!);
|
||||
// Only reset if there's an active session to avoid startup delay
|
||||
if (_speech.isListening) {
|
||||
await _ensureLocalSttReset();
|
||||
// Give the platform a moment to fully release the audio session
|
||||
await Future.delayed(const Duration(milliseconds: 100));
|
||||
}
|
||||
|
||||
Future<void> attempt(bool offline) async {
|
||||
await _speech.start(
|
||||
SttRecognitionOptions(punctuation: true, offline: offline),
|
||||
);
|
||||
_localSttActive = true;
|
||||
}
|
||||
// Use user's configured silence duration for pause detection
|
||||
final settings = _ref?.read(appSettingsProvider);
|
||||
final pauseDuration = Duration(
|
||||
milliseconds: settings?.voiceSilenceDuration ?? 2000,
|
||||
);
|
||||
|
||||
try {
|
||||
await attempt(true);
|
||||
await _speech.listen(
|
||||
onResult: _handleSttResult,
|
||||
localeId: _selectedLocaleId,
|
||||
// Extended duration for voice calls - listen up to 60 seconds
|
||||
listenFor: const Duration(seconds: 60),
|
||||
// Use user's silence duration setting for pause detection
|
||||
pauseFor: pauseDuration,
|
||||
listenOptions: SpeechListenOptions(
|
||||
listenMode: ListenMode.dictation,
|
||||
cancelOnError: false,
|
||||
partialResults: true,
|
||||
autoPunctuation: true,
|
||||
enableHapticFeedback: false,
|
||||
),
|
||||
);
|
||||
_localSttActive = true;
|
||||
} catch (error) {
|
||||
_localSttActive = false;
|
||||
await _ensureLocalSttReset();
|
||||
if (Platform.isIOS && allowOnlineFallback) {
|
||||
try {
|
||||
await attempt(false);
|
||||
return;
|
||||
} catch (secondary) {
|
||||
await _ensureLocalSttReset();
|
||||
throw Exception(
|
||||
'On-device speech failed ($error); '
|
||||
'online fallback failed ($secondary).',
|
||||
);
|
||||
}
|
||||
}
|
||||
rethrow;
|
||||
} finally {
|
||||
completer.complete();
|
||||
@@ -322,6 +354,22 @@ class VoiceInputService {
|
||||
}
|
||||
}
|
||||
|
||||
void _handleSttResult(SpeechRecognitionResult result) {
|
||||
if (!_isListening) return;
|
||||
final prevLen = _currentText.length;
|
||||
_currentText = result.recognizedWords;
|
||||
_textStreamController?.add(_currentText);
|
||||
final delta = (_currentText.length - prevLen).clamp(0, 50);
|
||||
final mapped = (delta / 5.0).ceil();
|
||||
_lastIntensity = mapped.clamp(0, 10);
|
||||
try {
|
||||
_intensityController?.add(_lastIntensity);
|
||||
} catch (_) {}
|
||||
if (result.finalResult) {
|
||||
unawaited(_stopListening());
|
||||
}
|
||||
}
|
||||
|
||||
Future<Stream<String>> startListening() async {
|
||||
if (!_isInitialized) {
|
||||
throw Exception('Voice input not initialized');
|
||||
@@ -356,7 +404,6 @@ class VoiceInputService {
|
||||
(!shouldUseLocal && _preference != SttPreference.deviceOnly));
|
||||
|
||||
if (shouldUseLocal) {
|
||||
await _pinBackgroundMicrophone();
|
||||
_autoStopTimer?.cancel();
|
||||
_autoStopTimer = Timer(const Duration(seconds: 60), () {
|
||||
if (_isListening) {
|
||||
@@ -364,9 +411,7 @@ class VoiceInputService {
|
||||
}
|
||||
});
|
||||
try {
|
||||
final isStillAvailable = await _speech.isSupported();
|
||||
if (!isStillAvailable && _isListening) {
|
||||
_localSttAvailable = false;
|
||||
if (!_speech.isAvailable && _isListening) {
|
||||
_textStreamController?.addError(
|
||||
Exception('On-device speech recognition unavailable'),
|
||||
);
|
||||
@@ -377,50 +422,12 @@ class VoiceInputService {
|
||||
// ignore availability check errors
|
||||
}
|
||||
|
||||
_sttResultSub = _speech.onResultChanged.listen(
|
||||
(SttRecognition result) {
|
||||
if (!_isListening) return;
|
||||
final prevLen = _currentText.length;
|
||||
_currentText = result.text;
|
||||
_textStreamController?.add(_currentText);
|
||||
final delta = (_currentText.length - prevLen).clamp(0, 50);
|
||||
final mapped = (delta / 5.0).ceil();
|
||||
_lastIntensity = mapped.clamp(0, 10);
|
||||
try {
|
||||
_intensityController?.add(_lastIntensity);
|
||||
} catch (_) {}
|
||||
if (result.isFinal) {
|
||||
unawaited(_stopListening());
|
||||
}
|
||||
},
|
||||
onError: (error) {
|
||||
debugPrint('Local STT Error: $error');
|
||||
_handleLocalRecognizerError(error);
|
||||
},
|
||||
);
|
||||
|
||||
_sttStateSub = _speech.onStateChanged.listen(
|
||||
(state) {
|
||||
debugPrint('Local STT State: $state');
|
||||
if (state == SttState.start) {
|
||||
_localSttActive = true;
|
||||
} else if (state == SttState.stop) {
|
||||
_localSttActive = false;
|
||||
}
|
||||
},
|
||||
onError: (error) {
|
||||
debugPrint('Local STT State Error: $error');
|
||||
_handleLocalRecognizerError(error);
|
||||
},
|
||||
);
|
||||
|
||||
try {
|
||||
debugPrint('Starting local recognition...');
|
||||
await _startLocalRecognition(allowOnlineFallback: !prefersDeviceOnly);
|
||||
debugPrint('Local recognition started');
|
||||
} catch (error) {
|
||||
debugPrint('Failed to start local recognition: $error');
|
||||
_localSttAvailable = false;
|
||||
if (!_isListening) {
|
||||
return _textStreamController!.stream;
|
||||
}
|
||||
@@ -518,18 +525,6 @@ class VoiceInputService {
|
||||
await pendingStart;
|
||||
} catch (_) {}
|
||||
}
|
||||
if (_sttResultSub != null) {
|
||||
try {
|
||||
await _sttResultSub?.cancel();
|
||||
} catch (_) {}
|
||||
_sttResultSub = null;
|
||||
}
|
||||
if (_sttStateSub != null) {
|
||||
try {
|
||||
await _sttStateSub?.cancel();
|
||||
} catch (_) {}
|
||||
_sttStateSub = null;
|
||||
}
|
||||
|
||||
final shouldStopStt = _localSttActive && _localSttAvailable;
|
||||
_localSttActive = false;
|
||||
@@ -538,21 +533,6 @@ class VoiceInputService {
|
||||
await _speech.stop();
|
||||
} catch (_) {}
|
||||
}
|
||||
if (Platform.isIOS) {
|
||||
try {
|
||||
await _speech.ios?.setAudioSessionActive(false);
|
||||
} catch (_) {}
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> _pinBackgroundMicrophone() async {
|
||||
if (!Platform.isIOS || _backgroundMicPinned) return;
|
||||
try {
|
||||
await BackgroundStreamingHandler.instance.startBackgroundExecution(const [
|
||||
_backgroundSttStreamId,
|
||||
], requiresMicrophone: true);
|
||||
_backgroundMicPinned = true;
|
||||
} catch (_) {}
|
||||
}
|
||||
|
||||
Future<void> _releaseBackgroundMicrophone() async {
|
||||
@@ -567,29 +547,7 @@ class VoiceInputService {
|
||||
|
||||
Future<void> _ensureLocalSttReset() async {
|
||||
try {
|
||||
await _speech.stop();
|
||||
} catch (_) {}
|
||||
if (Platform.isIOS) {
|
||||
try {
|
||||
await _speech.ios?.setAudioSessionActive(false);
|
||||
} catch (_) {}
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> _configureIosAudioSession() async {
|
||||
if (!Platform.isIOS) return;
|
||||
final ios = _speech.ios;
|
||||
if (ios == null) return;
|
||||
try {
|
||||
await ios.setAudioSessionCategory(
|
||||
category: IosAudioCategory.playAndRecord,
|
||||
options: [
|
||||
IosAudioCategoryOptions.allowBluetooth,
|
||||
IosAudioCategoryOptions.defaultToSpeaker,
|
||||
IosAudioCategoryOptions.duckOthers,
|
||||
],
|
||||
);
|
||||
await ios.setAudioSessionActive(true);
|
||||
await _speech.cancel();
|
||||
} catch (_) {}
|
||||
}
|
||||
|
||||
@@ -628,13 +586,6 @@ class VoiceInputService {
|
||||
manageBluetooth: true,
|
||||
useLegacy: false,
|
||||
),
|
||||
iosConfig: IosRecordConfig(
|
||||
categoryOptions: [
|
||||
IosAudioCategoryOption.allowBluetooth,
|
||||
IosAudioCategoryOption.defaultToSpeaker,
|
||||
IosAudioCategoryOption.duckOthers,
|
||||
],
|
||||
),
|
||||
),
|
||||
);
|
||||
} catch (error) {
|
||||
@@ -737,46 +688,49 @@ class VoiceInputService {
|
||||
if (samples.isEmpty) {
|
||||
return Uint8List(0);
|
||||
}
|
||||
final Int16List pcm = Int16List(samples.length);
|
||||
for (var i = 0; i < samples.length; i++) {
|
||||
final clamped = samples[i].clamp(-1.0, 1.0);
|
||||
final scaled = (clamped * 32767).round().clamp(-32768, 32767);
|
||||
pcm[i] = scaled;
|
||||
}
|
||||
|
||||
final dataLength = pcm.lengthInBytes;
|
||||
final dataLength = samples.length * 2; // 2 bytes per sample (16-bit)
|
||||
final bytesPerSample = 2;
|
||||
final numChannels = 1;
|
||||
final byteRate = _vadSampleRate * numChannels * bytesPerSample;
|
||||
final blockAlign = numChannels * bytesPerSample;
|
||||
const headerSize = 44;
|
||||
|
||||
final builder = BytesBuilder();
|
||||
builder.add(ascii.encode('RIFF'));
|
||||
builder.add(_int32Le(36 + dataLength));
|
||||
builder.add(ascii.encode('WAVE'));
|
||||
builder.add(ascii.encode('fmt '));
|
||||
builder.add(_int32Le(16));
|
||||
builder.add(_int16Le(1));
|
||||
builder.add(_int16Le(numChannels));
|
||||
builder.add(_int32Le(_vadSampleRate));
|
||||
builder.add(_int32Le(byteRate));
|
||||
builder.add(_int16Le(blockAlign));
|
||||
builder.add(_int16Le(16));
|
||||
builder.add(ascii.encode('data'));
|
||||
builder.add(_int32Le(dataLength));
|
||||
builder.add(Uint8List.view(pcm.buffer));
|
||||
return builder.toBytes();
|
||||
final totalSize = headerSize + dataLength;
|
||||
final buffer = Uint8List(totalSize);
|
||||
final view = ByteData.view(buffer.buffer);
|
||||
|
||||
// RIFF chunk
|
||||
buffer.setRange(0, 4, ascii.encode('RIFF'));
|
||||
view.setUint32(4, 36 + dataLength, Endian.little);
|
||||
buffer.setRange(8, 12, ascii.encode('WAVE'));
|
||||
|
||||
// fmt chunk
|
||||
buffer.setRange(12, 16, ascii.encode('fmt '));
|
||||
view.setUint32(16, 16, Endian.little); // PCM chunk size
|
||||
view.setUint16(20, 1, Endian.little); // AudioFormat (1 = PCM)
|
||||
view.setUint16(22, numChannels, Endian.little);
|
||||
view.setUint32(24, _vadSampleRate, Endian.little);
|
||||
view.setUint32(28, byteRate, Endian.little);
|
||||
view.setUint16(32, blockAlign, Endian.little);
|
||||
view.setUint16(34, 16, Endian.little); // BitsPerSample
|
||||
|
||||
// data chunk
|
||||
buffer.setRange(36, 40, ascii.encode('data'));
|
||||
view.setUint32(40, dataLength, Endian.little);
|
||||
|
||||
// Write samples
|
||||
var offset = 44;
|
||||
for (var i = 0; i < samples.length; i++) {
|
||||
final clamped = samples[i].clamp(-1.0, 1.0);
|
||||
// Convert float to 16-bit PCM
|
||||
final pcm = (clamped * 32767).round().clamp(-32768, 32767);
|
||||
view.setInt16(offset, pcm, Endian.little);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
List<int> _int16Le(int value) => [value & 0xff, (value >> 8) & 0xff];
|
||||
|
||||
List<int> _int32Le(int value) => [
|
||||
value & 0xff,
|
||||
(value >> 8) & 0xff,
|
||||
(value >> 16) & 0xff,
|
||||
(value >> 24) & 0xff,
|
||||
];
|
||||
|
||||
String? _languageForServer() {
|
||||
final locale = _selectedLocaleId;
|
||||
if (locale != null && locale.isNotEmpty) {
|
||||
@@ -910,7 +864,7 @@ class VoiceInputService {
|
||||
unawaited(_vadHandler.dispose());
|
||||
unawaited(_microphonePermissionProbe.dispose());
|
||||
try {
|
||||
_speech.dispose().catchError((_) {});
|
||||
_speech.stop();
|
||||
} catch (_) {}
|
||||
}
|
||||
}
|
||||
|
||||
56
pubspec.lock
56
pubspec.lock
@@ -1109,6 +1109,14 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.3.0"
|
||||
pedantic:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: pedantic
|
||||
sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.11.1"
|
||||
petitparser:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -1514,6 +1522,30 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.10.1"
|
||||
speech_to_text:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: speech_to_text
|
||||
sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "7.3.0"
|
||||
speech_to_text_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: speech_to_text_platform_interface
|
||||
sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.3.0"
|
||||
speech_to_text_windows:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: speech_to_text_windows
|
||||
sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.0+beta.8"
|
||||
sqflite:
|
||||
dependency: transitive
|
||||
description:
|
||||
@@ -1594,30 +1626,6 @@ packages:
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.4.1"
|
||||
stts:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: stts
|
||||
sha256: "166ca37d241652cdefb9c31e18a7be93ff4eb847382ae32c5563e07bf44bf1d8"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.6"
|
||||
stts_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: stts_platform_interface
|
||||
sha256: "6b82268d59d608e9b5accdadf0e7ccaea7928e8fce68ca393111fa7193d1bf10"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.0"
|
||||
stts_web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: stts_web
|
||||
sha256: "62625c3b4d86076820d687dc468845a0f54c7dd4ead155b58f1e5864488c7f1c"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.0"
|
||||
synchronized:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
||||
@@ -45,7 +45,7 @@ dependencies:
|
||||
|
||||
# Platform Features
|
||||
vad: ^0.0.7+1
|
||||
stts: ^1.2.5
|
||||
speech_to_text: ^7.3.0
|
||||
record: ^6.1.2
|
||||
flutter_tts: ^4.2.3
|
||||
audioplayers: ^6.5.1
|
||||
|
||||
Reference in New Issue
Block a user