From d092bb2e44cfa90ef56141ee79e45a1b2479999e Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Thu, 27 Nov 2025 18:41:41 +0530 Subject: [PATCH] fix(audio): optimize audio configuration for iOS and Android platforms --- ios/Podfile.lock | 21 +- lib/core/services/callkit_service.dart | 25 +- .../chat/services/text_to_speech_service.dart | 25 +- .../chat/services/voice_call_service.dart | 44 ++- .../chat/services/voice_input_service.dart | 370 ++++++++---------- pubspec.lock | 56 +-- pubspec.yaml | 2 +- 7 files changed, 276 insertions(+), 267 deletions(-) diff --git a/ios/Podfile.lock b/ios/Podfile.lock index 1365e15..e21ec0d 100644 --- a/ios/Podfile.lock +++ b/ios/Podfile.lock @@ -5,6 +5,9 @@ PODS: - connectivity_plus (0.0.1): - Flutter - CryptoSwift (1.8.4) + - CwlCatchException (2.2.1): + - CwlCatchExceptionSupport (~> 2.2.1) + - CwlCatchExceptionSupport (2.2.1) - DKImagePickerController/Core (4.3.9): - DKImagePickerController/ImageDataManager - DKImagePickerController/Resource @@ -85,11 +88,13 @@ PODS: - shared_preferences_foundation (0.0.1): - Flutter - FlutterMacOS + - speech_to_text (7.2.0): + - CwlCatchException + - Flutter + - FlutterMacOS - sqflite_darwin (0.0.4): - Flutter - FlutterMacOS - - stts (1.0.0): - - Flutter - SwiftyGif (5.4.5) - url_launcher_ios (0.0.1): - Flutter @@ -122,8 +127,8 @@ DEPENDENCIES: - share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`) - share_plus (from `.symlinks/plugins/share_plus/ios`) - shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`) + - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`) - sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`) - - stts (from `.symlinks/plugins/stts/ios`) - url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`) - vad (from `.symlinks/plugins/vad/ios`) - wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`) @@ -132,6 +137,8 @@ DEPENDENCIES: SPEC REPOS: trunk: - CryptoSwift + - CwlCatchException + - CwlCatchExceptionSupport - DKImagePickerController - DKPhotoGallery - onnxruntime-c @@ -178,10 +185,10 @@ EXTERNAL SOURCES: :path: ".symlinks/plugins/share_plus/ios" shared_preferences_foundation: :path: ".symlinks/plugins/shared_preferences_foundation/darwin" + speech_to_text: + :path: ".symlinks/plugins/speech_to_text/darwin" sqflite_darwin: :path: ".symlinks/plugins/sqflite_darwin/darwin" - stts: - :path: ".symlinks/plugins/stts/ios" url_launcher_ios: :path: ".symlinks/plugins/url_launcher_ios/ios" vad: @@ -195,6 +202,8 @@ SPEC CHECKSUMS: audioplayers_darwin: 4f9ca89d92d3d21cec7ec580e78ca888e5fb68bd connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd CryptoSwift: e64e11850ede528a02a0f3e768cec8e9d92ecb90 + CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a + CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60 file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be @@ -217,8 +226,8 @@ SPEC CHECKSUMS: share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871 share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a shared_preferences_foundation: 7036424c3d8ec98dfe75ff1667cb0cd531ec82bb + speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19 sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0 - stts: 1a48df645bb516e86e4121d5253b582749a1d3a6 SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4 url_launcher_ios: 7a95fa5b60cc718a708b8f2966718e93db0cef1b vad: 7934867589afe53567f492df66fb1615f2185822 diff --git a/lib/core/services/callkit_service.dart b/lib/core/services/callkit_service.dart index 61c452f..b27deb3 100644 --- a/lib/core/services/callkit_service.dart +++ b/lib/core/services/callkit_service.dart @@ -130,6 +130,29 @@ class CallKitService { return >[]; } + /// Checks for active calls and clears them if they are not tracked by the app. + Future checkAndCleanActiveCalls() async { + if (!_shouldUseCallKit('check active calls')) return; + + try { + final calls = await activeCalls(); + if (calls.isNotEmpty) { + developer.log( + 'Found ${calls.length} active CallKit calls on startup. Cleaning up.', + name: 'callkit', + ); + await endAllCalls(); + } + } catch (error, stackTrace) { + developer.log( + 'Failed to clean up active calls: $error', + name: 'callkit', + error: error, + stackTrace: stackTrace, + ); + } + } + /// Stream of CallKit events from the native layer. Stream get events { if (!_callKitAllowed) { @@ -182,7 +205,7 @@ class CallKitService { ios: const IOSParams( handleType: 'generic', supportsVideo: false, - audioSessionMode: 'default', + audioSessionMode: 'voiceChat', audioSessionActive: true, audioSessionPreferredSampleRate: 44100.0, audioSessionPreferredIOBufferDuration: 0.005, diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart index 95f86d6..6225e55 100644 --- a/lib/features/chat/services/text_to_speech_service.dart +++ b/lib/features/chat/services/text_to_speech_service.dart @@ -70,20 +70,12 @@ class TextToSpeechService { } }); - if (!kIsWeb && Platform.isIOS) { - final context = AudioContext( - iOS: AudioContextIOS( - category: AVAudioSessionCategory.playAndRecord, - options: const { - AVAudioSessionOptions.defaultToSpeaker, - AVAudioSessionOptions.mixWithOthers, - AVAudioSessionOptions.allowBluetooth, - AVAudioSessionOptions.allowBluetoothA2DP, - }, + if (!kIsWeb && Platform.isAndroid) { + _player.setAudioContext( + AudioContext( + android: const AudioContextAndroid(), ), - android: const AudioContextAndroid(), ); - _player.setAudioContext(context); } } @@ -103,13 +95,8 @@ class TextToSpeechService { if (!kIsWeb && Platform.isIOS) { await _tts.setSharedInstance(true); - await _tts - .setIosAudioCategory(IosTextToSpeechAudioCategory.playAndRecord, [ - IosTextToSpeechAudioCategoryOptions.mixWithOthers, - IosTextToSpeechAudioCategoryOptions.defaultToSpeaker, - IosTextToSpeechAudioCategoryOptions.allowBluetooth, - IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP, - ]); + // Rely on the native VoiceBackgroundAudioManager for iOS + // audio session configuration to avoid routing conflicts. } if (_engine != TtsEngine.server) { diff --git a/lib/features/chat/services/voice_call_service.dart b/lib/features/chat/services/voice_call_service.dart index d53912c..7c2df2f 100644 --- a/lib/features/chat/services/voice_call_service.dart +++ b/lib/features/chat/services/voice_call_service.dart @@ -123,6 +123,11 @@ class VoiceCallService { _pauseReasons.clear(); _listeningPaused = false; + // Clean up any zombie calls from previous sessions + if (_callKitEnabled) { + unawaited(_callKitService.checkAndCleanActiveCalls()); + } + // Initialize notification service await _notificationService.initialize(); @@ -312,9 +317,17 @@ class VoiceCallService { throw Exception('Failed to establish socket connection'); } + // Initialize voice input first so we know which STT mode will be used + await _voiceInput.initialize(); + + // Only activate VoiceBackgroundAudioManager for server STT + // For local STT, speech_to_text handles its own iOS audio session + final useServerMic = + (_voiceInput.prefersServerOnly && _voiceInput.hasServerStt) || + (!_voiceInput.hasLocalStt && _voiceInput.hasServerStt); await BackgroundStreamingHandler.instance.startBackgroundExecution(const [ _voiceCallStreamId, - ], requiresMicrophone: true); + ], requiresMicrophone: useServerMic); // Set up periodic keep-alive to refresh wake lock (every 5 minutes) _keepAliveTimer?.cancel(); @@ -385,10 +398,11 @@ class VoiceCallService { throw Exception('Preferred speech recognition engine is unavailable'); } - _updateState(VoiceCallState.listening); - final stream = await _voiceInput.beginListening(); + // Only mark as listening after STT has successfully started. + _updateState(VoiceCallState.listening); + _transcriptSubscription = stream.listen( (text) { if (_isDisposed) return; @@ -401,13 +415,27 @@ class VoiceCallService { }, onDone: () async { if (_isDisposed) return; + + final trimmed = _accumulatedTranscript.trim(); // User stopped speaking, send message to assistant - if (_accumulatedTranscript.trim().isNotEmpty) { - await _sendMessageToAssistant(_accumulatedTranscript); - } else { - // No input, restart listening - await _startListening(); + if (trimmed.isNotEmpty) { + await _sendMessageToAssistant(trimmed); + return; } + + // No input – avoid a tight restart loop and only restart + // while the call is still active and not paused. + await Future.delayed(const Duration(milliseconds: 250)); + if (_isDisposed) return; + if (_state == VoiceCallState.disconnected || + _state == VoiceCallState.error) { + return; + } + if (_pauseReasons.isNotEmpty) { + // Respect paused state; resumeListening() will restart if needed. + return; + } + await _startListening(); }, ); diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index 5f2a60f..34eed69 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -5,10 +5,10 @@ import 'dart:typed_data'; import 'package:flutter/widgets.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; -import 'package:record/record.dart' - hide IosAudioCategory, IosAudioCategoryOptions; +import 'package:record/record.dart'; import 'package:riverpod_annotation/riverpod_annotation.dart'; -import 'package:stts/stts.dart'; +import 'package:speech_to_text/speech_recognition_result.dart'; +import 'package:speech_to_text/speech_to_text.dart'; import 'package:vad/vad.dart'; import '../../../core/providers/app_providers.dart'; @@ -18,7 +18,7 @@ import '../../../core/services/settings_service.dart'; part 'voice_input_service.g.dart'; -// Lightweight replacement for previous stt.LocaleName used across the UI +/// Lightweight locale representation used across the UI. class LocaleName { final String localeId; final String name; @@ -37,7 +37,7 @@ class VoiceInputService { static const String _backgroundSttStreamId = 'voice-input-stt'; final VadHandler _vadHandler = VadHandler.create(); - final Stt _speech = Stt(); + final SpeechToText _speech = SpeechToText(); final AudioRecorder _microphonePermissionProbe = AudioRecorder(); final ApiService? _api; final Ref? _ref; @@ -64,8 +64,6 @@ class VoiceInputService { Stream get textStream => _textStreamController?.stream ?? const Stream.empty(); Timer? _autoStopTimer; - StreamSubscription? _sttResultSub; - StreamSubscription? _sttStateSub; StreamSubscription>? _vadSpeechEndSub; StreamSubscription<({double isSpeech, double notSpeech, List frame})>? _vadFrameSub; @@ -100,8 +98,11 @@ class VoiceInputService { } // Prepare local speech recognizer try { - // Check permission and supported status - _localSttAvailable = await _speech.isSupported(); + // Initialize speech_to_text and check availability + _localSttAvailable = await _speech.initialize( + onStatus: _handleSttStatus, + onError: _handleSttError, + ); if (_localSttAvailable) { await _loadLocales(deviceTag); } @@ -112,21 +113,56 @@ class VoiceInputService { return true; } + void _handleSttStatus(String status) { + debugPrint('Local STT Status: $status'); + if (status == 'listening') { + _localSttActive = true; + } else if (status == 'notListening' || status == 'done') { + final wasActive = _localSttActive; + _localSttActive = false; + // If we were actively listening and the platform stopped us, + // properly close the stream so voice call service can restart + if (wasActive && _isListening && !_usingServerStt) { + debugPrint('Platform stopped listening, closing stream'); + unawaited(_stopListening()); + } + } + } + + void _handleSttError(dynamic error) { + debugPrint('Local STT Error: $error'); + final errorStr = error.toString().toLowerCase(); + + // These errors are non-fatal - they just mean no speech was detected + // or the session timed out. The status handler will close the stream + // and voice call service will restart listening. + final nonFatalErrors = [ + 'error_no_match', + 'error_speech_timeout', + 'error_busy', // Temporary, can retry + ]; + + final isNonFatal = nonFatalErrors.any((e) => errorStr.contains(e)); + if (isNonFatal) { + debugPrint('Non-fatal STT error, allowing normal stream close'); + // Let the status handler / auto-stop timer close the stream. + // We do not treat this as a fatal failure for the current session. + return; + } + + // Fatal errors - mark STT as unavailable + _handleLocalRecognizerError(error); + } + Future checkPermissions() async { final micGranted = await _ensureMicrophonePermission(); if (!micGranted) { return false; } - if (_localSttAvailable && _preference != SttPreference.serverOnly) { - try { - final sttGranted = await _speech.hasPermission(); - if (!sttGranted) { - _localSttAvailable = false; - } - } catch (_) { - _localSttAvailable = false; - } - } + // Note: Don't disable _localSttAvailable based on hasPermission check + // The permission might be granted lazily when listen() is called on iOS, + // and the check can be unreliable. Let speech_to_text handle permissions + // during the actual listen() call. return true; } @@ -136,23 +172,21 @@ class VoiceInputService { bool get hasLocalStt => _localSttAvailable; bool get localeMetadataIncomplete => _usingFallbackLocales; - // Add a method to check if on-device STT is properly supported + /// Checks if on-device STT is properly supported. Future checkOnDeviceSupport() async { if (!isSupportedPlatform || !_isInitialized) return false; try { - final supported = await _speech.isSupported(); - return supported; + // speech_to_text isAvailable is set after initialize() + return _speech.isAvailable; } catch (e) { // ignore errors checking on-device support return false; } } - // Test method to verify on-device STT functionality + /// Test method to verify on-device STT functionality. Future testOnDeviceStt() async { try { - // starting on-device STT test - // First ensure we're initialized await initialize(); @@ -167,24 +201,19 @@ class VoiceInputService { } // Test if speech recognition is available - final supported = await _speech.isSupported(); - if (!supported) { + if (!_speech.isAvailable) { return 'Speech recognition service is not available on this device'; } - // Set language if available, then start and stop quickly - if (_selectedLocaleId != null) { - try { - await _speech.setLanguage(_selectedLocaleId!); - } catch (_) {} - } - await _speech.start(SttRecognitionOptions(punctuation: true)); + // Start and stop quickly to test + await _speech.listen(onResult: (_) {}, localeId: _selectedLocaleId); await Future.delayed(const Duration(milliseconds: 100)); await _speech.stop(); - return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId'; + return 'On-device STT test completed successfully. ' + 'Local STT available: $_localSttAvailable, ' + 'Selected locale: $_selectedLocaleId'; } catch (e) { - // on-device STT test failed return 'On-device STT test failed: $e'; } } @@ -198,23 +227,23 @@ class VoiceInputService { Future _loadLocales(String deviceTag) async { _ensureFallbackLocale(deviceTag); - List langs = const []; try { - langs = await _speech.getLanguages().timeout( - _localeFetchTimeout, - onTimeout: () => const [], - ); + final sttLocales = await Future.value( + _speech.locales(), + ).timeout(_localeFetchTimeout, onTimeout: () => const []); + if (sttLocales.isEmpty) { + return; + } + // Map speech_to_text LocaleName to our own LocaleName class + _locales = sttLocales + .map((loc) => LocaleName(loc.localeId, loc.name)) + .toList(); + _usingFallbackLocales = false; + final match = _matchLocale(deviceTag); + _selectedLocaleId = match.localeId; } catch (_) { - // Engines such as Whisper Voice may not support this call. - langs = const []; + // Some engines may not support locale listing } - if (langs.isEmpty) { - return; - } - _locales = langs.map((locale) => LocaleName(locale, locale)).toList(); - _usingFallbackLocales = false; - final match = _matchLocale(deviceTag); - _selectedLocaleId = match.localeId; } void _ensureFallbackLocale(String deviceTag) { @@ -255,7 +284,8 @@ class VoiceInputService { if (!_isListening) { return; } - _localSttAvailable = false; + // Don't permanently disable _localSttAvailable on transient errors + // The next session should still try local STT final message = error?.toString().trim(); final exception = Exception( (message == null || message.isEmpty) @@ -284,37 +314,39 @@ class VoiceInputService { _startingLocalStt = completer.future; _localSttActive = false; - await _ensureLocalSttReset(); - await _configureIosAudioSession(); - - if (_selectedLocaleId != null) { - await _speech.setLanguage(_selectedLocaleId!); + // Only reset if there's an active session to avoid startup delay + if (_speech.isListening) { + await _ensureLocalSttReset(); + // Give the platform a moment to fully release the audio session + await Future.delayed(const Duration(milliseconds: 100)); } - Future attempt(bool offline) async { - await _speech.start( - SttRecognitionOptions(punctuation: true, offline: offline), - ); - _localSttActive = true; - } + // Use user's configured silence duration for pause detection + final settings = _ref?.read(appSettingsProvider); + final pauseDuration = Duration( + milliseconds: settings?.voiceSilenceDuration ?? 2000, + ); try { - await attempt(true); + await _speech.listen( + onResult: _handleSttResult, + localeId: _selectedLocaleId, + // Extended duration for voice calls - listen up to 60 seconds + listenFor: const Duration(seconds: 60), + // Use user's silence duration setting for pause detection + pauseFor: pauseDuration, + listenOptions: SpeechListenOptions( + listenMode: ListenMode.dictation, + cancelOnError: false, + partialResults: true, + autoPunctuation: true, + enableHapticFeedback: false, + ), + ); + _localSttActive = true; } catch (error) { _localSttActive = false; await _ensureLocalSttReset(); - if (Platform.isIOS && allowOnlineFallback) { - try { - await attempt(false); - return; - } catch (secondary) { - await _ensureLocalSttReset(); - throw Exception( - 'On-device speech failed ($error); ' - 'online fallback failed ($secondary).', - ); - } - } rethrow; } finally { completer.complete(); @@ -322,6 +354,22 @@ class VoiceInputService { } } + void _handleSttResult(SpeechRecognitionResult result) { + if (!_isListening) return; + final prevLen = _currentText.length; + _currentText = result.recognizedWords; + _textStreamController?.add(_currentText); + final delta = (_currentText.length - prevLen).clamp(0, 50); + final mapped = (delta / 5.0).ceil(); + _lastIntensity = mapped.clamp(0, 10); + try { + _intensityController?.add(_lastIntensity); + } catch (_) {} + if (result.finalResult) { + unawaited(_stopListening()); + } + } + Future> startListening() async { if (!_isInitialized) { throw Exception('Voice input not initialized'); @@ -356,7 +404,6 @@ class VoiceInputService { (!shouldUseLocal && _preference != SttPreference.deviceOnly)); if (shouldUseLocal) { - await _pinBackgroundMicrophone(); _autoStopTimer?.cancel(); _autoStopTimer = Timer(const Duration(seconds: 60), () { if (_isListening) { @@ -364,9 +411,7 @@ class VoiceInputService { } }); try { - final isStillAvailable = await _speech.isSupported(); - if (!isStillAvailable && _isListening) { - _localSttAvailable = false; + if (!_speech.isAvailable && _isListening) { _textStreamController?.addError( Exception('On-device speech recognition unavailable'), ); @@ -377,50 +422,12 @@ class VoiceInputService { // ignore availability check errors } - _sttResultSub = _speech.onResultChanged.listen( - (SttRecognition result) { - if (!_isListening) return; - final prevLen = _currentText.length; - _currentText = result.text; - _textStreamController?.add(_currentText); - final delta = (_currentText.length - prevLen).clamp(0, 50); - final mapped = (delta / 5.0).ceil(); - _lastIntensity = mapped.clamp(0, 10); - try { - _intensityController?.add(_lastIntensity); - } catch (_) {} - if (result.isFinal) { - unawaited(_stopListening()); - } - }, - onError: (error) { - debugPrint('Local STT Error: $error'); - _handleLocalRecognizerError(error); - }, - ); - - _sttStateSub = _speech.onStateChanged.listen( - (state) { - debugPrint('Local STT State: $state'); - if (state == SttState.start) { - _localSttActive = true; - } else if (state == SttState.stop) { - _localSttActive = false; - } - }, - onError: (error) { - debugPrint('Local STT State Error: $error'); - _handleLocalRecognizerError(error); - }, - ); - try { debugPrint('Starting local recognition...'); await _startLocalRecognition(allowOnlineFallback: !prefersDeviceOnly); debugPrint('Local recognition started'); } catch (error) { debugPrint('Failed to start local recognition: $error'); - _localSttAvailable = false; if (!_isListening) { return _textStreamController!.stream; } @@ -518,18 +525,6 @@ class VoiceInputService { await pendingStart; } catch (_) {} } - if (_sttResultSub != null) { - try { - await _sttResultSub?.cancel(); - } catch (_) {} - _sttResultSub = null; - } - if (_sttStateSub != null) { - try { - await _sttStateSub?.cancel(); - } catch (_) {} - _sttStateSub = null; - } final shouldStopStt = _localSttActive && _localSttAvailable; _localSttActive = false; @@ -538,21 +533,6 @@ class VoiceInputService { await _speech.stop(); } catch (_) {} } - if (Platform.isIOS) { - try { - await _speech.ios?.setAudioSessionActive(false); - } catch (_) {} - } - } - - Future _pinBackgroundMicrophone() async { - if (!Platform.isIOS || _backgroundMicPinned) return; - try { - await BackgroundStreamingHandler.instance.startBackgroundExecution(const [ - _backgroundSttStreamId, - ], requiresMicrophone: true); - _backgroundMicPinned = true; - } catch (_) {} } Future _releaseBackgroundMicrophone() async { @@ -567,29 +547,7 @@ class VoiceInputService { Future _ensureLocalSttReset() async { try { - await _speech.stop(); - } catch (_) {} - if (Platform.isIOS) { - try { - await _speech.ios?.setAudioSessionActive(false); - } catch (_) {} - } - } - - Future _configureIosAudioSession() async { - if (!Platform.isIOS) return; - final ios = _speech.ios; - if (ios == null) return; - try { - await ios.setAudioSessionCategory( - category: IosAudioCategory.playAndRecord, - options: [ - IosAudioCategoryOptions.allowBluetooth, - IosAudioCategoryOptions.defaultToSpeaker, - IosAudioCategoryOptions.duckOthers, - ], - ); - await ios.setAudioSessionActive(true); + await _speech.cancel(); } catch (_) {} } @@ -628,13 +586,6 @@ class VoiceInputService { manageBluetooth: true, useLegacy: false, ), - iosConfig: IosRecordConfig( - categoryOptions: [ - IosAudioCategoryOption.allowBluetooth, - IosAudioCategoryOption.defaultToSpeaker, - IosAudioCategoryOption.duckOthers, - ], - ), ), ); } catch (error) { @@ -737,46 +688,49 @@ class VoiceInputService { if (samples.isEmpty) { return Uint8List(0); } - final Int16List pcm = Int16List(samples.length); - for (var i = 0; i < samples.length; i++) { - final clamped = samples[i].clamp(-1.0, 1.0); - final scaled = (clamped * 32767).round().clamp(-32768, 32767); - pcm[i] = scaled; - } - - final dataLength = pcm.lengthInBytes; + final dataLength = samples.length * 2; // 2 bytes per sample (16-bit) final bytesPerSample = 2; final numChannels = 1; final byteRate = _vadSampleRate * numChannels * bytesPerSample; final blockAlign = numChannels * bytesPerSample; + const headerSize = 44; - final builder = BytesBuilder(); - builder.add(ascii.encode('RIFF')); - builder.add(_int32Le(36 + dataLength)); - builder.add(ascii.encode('WAVE')); - builder.add(ascii.encode('fmt ')); - builder.add(_int32Le(16)); - builder.add(_int16Le(1)); - builder.add(_int16Le(numChannels)); - builder.add(_int32Le(_vadSampleRate)); - builder.add(_int32Le(byteRate)); - builder.add(_int16Le(blockAlign)); - builder.add(_int16Le(16)); - builder.add(ascii.encode('data')); - builder.add(_int32Le(dataLength)); - builder.add(Uint8List.view(pcm.buffer)); - return builder.toBytes(); + final totalSize = headerSize + dataLength; + final buffer = Uint8List(totalSize); + final view = ByteData.view(buffer.buffer); + + // RIFF chunk + buffer.setRange(0, 4, ascii.encode('RIFF')); + view.setUint32(4, 36 + dataLength, Endian.little); + buffer.setRange(8, 12, ascii.encode('WAVE')); + + // fmt chunk + buffer.setRange(12, 16, ascii.encode('fmt ')); + view.setUint32(16, 16, Endian.little); // PCM chunk size + view.setUint16(20, 1, Endian.little); // AudioFormat (1 = PCM) + view.setUint16(22, numChannels, Endian.little); + view.setUint32(24, _vadSampleRate, Endian.little); + view.setUint32(28, byteRate, Endian.little); + view.setUint16(32, blockAlign, Endian.little); + view.setUint16(34, 16, Endian.little); // BitsPerSample + + // data chunk + buffer.setRange(36, 40, ascii.encode('data')); + view.setUint32(40, dataLength, Endian.little); + + // Write samples + var offset = 44; + for (var i = 0; i < samples.length; i++) { + final clamped = samples[i].clamp(-1.0, 1.0); + // Convert float to 16-bit PCM + final pcm = (clamped * 32767).round().clamp(-32768, 32767); + view.setInt16(offset, pcm, Endian.little); + offset += 2; + } + + return buffer; } - List _int16Le(int value) => [value & 0xff, (value >> 8) & 0xff]; - - List _int32Le(int value) => [ - value & 0xff, - (value >> 8) & 0xff, - (value >> 16) & 0xff, - (value >> 24) & 0xff, - ]; - String? _languageForServer() { final locale = _selectedLocaleId; if (locale != null && locale.isNotEmpty) { @@ -910,7 +864,7 @@ class VoiceInputService { unawaited(_vadHandler.dispose()); unawaited(_microphonePermissionProbe.dispose()); try { - _speech.dispose().catchError((_) {}); + _speech.stop(); } catch (_) {} } } diff --git a/pubspec.lock b/pubspec.lock index 6e13dd2..3c31925 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -1109,6 +1109,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.3.0" + pedantic: + dependency: transitive + description: + name: pedantic + sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602" + url: "https://pub.dev" + source: hosted + version: "1.11.1" petitparser: dependency: transitive description: @@ -1514,6 +1522,30 @@ packages: url: "https://pub.dev" source: hosted version: "1.10.1" + speech_to_text: + dependency: "direct main" + description: + name: speech_to_text + sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04 + url: "https://pub.dev" + source: hosted + version: "7.3.0" + speech_to_text_platform_interface: + dependency: transitive + description: + name: speech_to_text_platform_interface + sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114 + url: "https://pub.dev" + source: hosted + version: "2.3.0" + speech_to_text_windows: + dependency: transitive + description: + name: speech_to_text_windows + sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072" + url: "https://pub.dev" + source: hosted + version: "1.0.0+beta.8" sqflite: dependency: transitive description: @@ -1594,30 +1626,6 @@ packages: url: "https://pub.dev" source: hosted version: "1.4.1" - stts: - dependency: "direct main" - description: - name: stts - sha256: "166ca37d241652cdefb9c31e18a7be93ff4eb847382ae32c5563e07bf44bf1d8" - url: "https://pub.dev" - source: hosted - version: "1.2.6" - stts_platform_interface: - dependency: transitive - description: - name: stts_platform_interface - sha256: "6b82268d59d608e9b5accdadf0e7ccaea7928e8fce68ca393111fa7193d1bf10" - url: "https://pub.dev" - source: hosted - version: "1.2.0" - stts_web: - dependency: transitive - description: - name: stts_web - sha256: "62625c3b4d86076820d687dc468845a0f54c7dd4ead155b58f1e5864488c7f1c" - url: "https://pub.dev" - source: hosted - version: "1.1.0" synchronized: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index b45f0f6..9c538ca 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -45,7 +45,7 @@ dependencies: # Platform Features vad: ^0.0.7+1 - stts: ^1.2.5 + speech_to_text: ^7.3.0 record: ^6.1.2 flutter_tts: ^4.2.3 audioplayers: ^6.5.1