From fa9fa8dd1b259ce945b437a1520ce9129b65e24a Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Mon, 25 Aug 2025 20:04:04 +0530 Subject: [PATCH] refactor: migrate from speech_to_text to stts for voice input functionality --- ios/Podfile.lock | 21 +- .../chat/services/voice_input_service.dart | 217 ++++++++---------- pubspec.lock | 56 ++--- pubspec.yaml | 2 +- 4 files changed, 122 insertions(+), 174 deletions(-) diff --git a/ios/Podfile.lock b/ios/Podfile.lock index 87bcc6e..cddd6e8 100644 --- a/ios/Podfile.lock +++ b/ios/Podfile.lock @@ -1,7 +1,4 @@ PODS: - - CwlCatchException (2.2.1): - - CwlCatchExceptionSupport (~> 2.2.1) - - CwlCatchExceptionSupport (2.2.1) - DKImagePickerController/Core (4.3.9): - DKImagePickerController/ImageDataManager - DKImagePickerController/Resource @@ -58,13 +55,11 @@ PODS: - shared_preferences_foundation (0.0.1): - Flutter - FlutterMacOS - - speech_to_text (7.2.0): - - CwlCatchException - - Flutter - - FlutterMacOS - sqflite_darwin (0.0.4): - Flutter - FlutterMacOS + - stts (1.0.0): + - Flutter - SwiftyGif (5.4.5) - url_launcher_ios (0.0.1): - Flutter @@ -82,15 +77,13 @@ DEPENDENCIES: - record_ios (from `.symlinks/plugins/record_ios/ios`) - share_plus (from `.symlinks/plugins/share_plus/ios`) - shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`) - - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`) - sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`) + - stts (from `.symlinks/plugins/stts/ios`) - url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`) - wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`) SPEC REPOS: trunk: - - CwlCatchException - - CwlCatchExceptionSupport - DKImagePickerController - DKPhotoGallery - SDWebImage @@ -117,18 +110,16 @@ EXTERNAL SOURCES: :path: ".symlinks/plugins/share_plus/ios" shared_preferences_foundation: :path: ".symlinks/plugins/shared_preferences_foundation/darwin" - speech_to_text: - :path: ".symlinks/plugins/speech_to_text/darwin" sqflite_darwin: :path: ".symlinks/plugins/sqflite_darwin/darwin" + stts: + :path: ".symlinks/plugins/stts/ios" url_launcher_ios: :path: ".symlinks/plugins/url_launcher_ios/ios" wakelock_plus: :path: ".symlinks/plugins/wakelock_plus/ios" SPEC CHECKSUMS: - CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a - CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60 file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be @@ -142,8 +133,8 @@ SPEC CHECKSUMS: SDWebImage: f29024626962457f3470184232766516dee8dfea share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7 - speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19 sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0 + stts: 1a48df645bb516e86e4121d5253b582749a1d3a6 SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4 url_launcher_ios: 694010445543906933d732453a59da0a173ae33d wakelock_plus: e29112ab3ef0b318e58cfa5c32326458be66b556 diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index a54e163..1c06d62 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -5,26 +5,37 @@ import 'dart:async'; import 'dart:io' show Platform; import 'package:path_provider/path_provider.dart'; import 'package:path/path.dart' as p; -import 'package:speech_to_text/speech_recognition_error.dart'; -import 'package:speech_to_text/speech_recognition_result.dart'; -import 'package:speech_to_text/speech_to_text.dart' as stt; +import 'package:stts/stts.dart'; + +// Lightweight replacement for previous stt.LocaleName used across the UI +class LocaleName { + final String localeId; + final String name; + const LocaleName(this.localeId, this.name); +} class VoiceInputService { final AudioRecorder _recorder = AudioRecorder(); - stt.SpeechToText? _speech; + final Stt _speech = Stt(); bool _isInitialized = false; bool _isListening = false; bool _localSttAvailable = false; String? _selectedLocaleId; - List _locales = const []; + List _locales = const []; StreamController? _textStreamController; String _currentText = ''; // Public stream for UI waveform visualization (emits partial text length as proxy) StreamController? _intensityController; Stream get intensityStream => _intensityController?.stream ?? const Stream.empty(); + + /// Public stream of partial/final transcript strings and special audio tokens. + Stream get textStream => + _textStreamController?.stream ?? const Stream.empty(); Timer? _autoStopTimer; StreamSubscription? _ampSub; + StreamSubscription? _sttResultSub; + StreamSubscription? _sttStateSub; bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS; @@ -33,40 +44,14 @@ class VoiceInputService { if (!isSupportedPlatform) return false; // Prepare local speech recognizer try { - _speech = stt.SpeechToText(); - debugPrint('DEBUG: Initializing speech_to_text...'); - _localSttAvailable = await _speech!.initialize( - onStatus: (status) { - debugPrint('DEBUG: SpeechToText status: $status'); - // When platform end-of-speech triggers, ensure we stop timer/streams - if (status.toLowerCase().contains('notListening') || - status.toLowerCase().contains('done')) { - // No-op: UI manages stopping; SpeechToText emits final result - } - }, - onError: (SpeechRecognitionError error) { - debugPrint('DEBUG: SpeechToText error: ${error.errorMsg}'); - debugPrint('DEBUG: SpeechToText error permanent: ${error.permanent}'); - // If error is permanent, mark local STT as unavailable - if (error.permanent) { - debugPrint('DEBUG: Permanent error detected, disabling local STT'); - _localSttAvailable = false; - } - // If any error, we keep fallback available; no throws here. - }, - ); - debugPrint( - 'DEBUG: SpeechToText initialization result: $_localSttAvailable', - ); + // Check permission and supported status + _localSttAvailable = await _speech.isSupported(); if (_localSttAvailable) { try { - _locales = await _speech!.locales(); - debugPrint( - 'DEBUG: Available locales: ${_locales.map((l) => l.localeId).join(', ')}', - ); + final langs = await _speech.getLanguages(); + _locales = langs.map((l) => LocaleName(l, l)).toList(); final deviceTag = WidgetsBinding.instance.platformDispatcher.locale .toLanguageTag(); - debugPrint('DEBUG: Device locale: $deviceTag'); final match = _locales.firstWhere( (l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(), orElse: () { @@ -78,14 +63,13 @@ class VoiceInputService { (l) => l.localeId.toLowerCase().startsWith('$primary-'), orElse: () => _locales.isNotEmpty ? _locales.first - : stt.LocaleName('en_US', 'English (US)'), + : LocaleName('en_US', 'en_US'), ); }, ); _selectedLocaleId = match.localeId; - debugPrint('DEBUG: Selected locale: $_selectedLocaleId'); } catch (e) { - debugPrint('DEBUG: Error loading locales: $e'); + // ignore locale load errors _selectedLocaleId = null; } } @@ -98,6 +82,9 @@ class VoiceInputService { Future checkPermissions() async { try { + // Prefer stts permission check which will request microphone permission + final mic = await _speech.hasPermission(); + if (mic) return true; return await _recorder.hasPermission(); } catch (_) { return false; @@ -111,24 +98,11 @@ class VoiceInputService { // Add a method to check if on-device STT is properly supported Future checkOnDeviceSupport() async { if (!isSupportedPlatform || !_isInitialized) return false; - if (_speech == null) return false; - try { - // Check if the speech engine supports on-device recognition - final result = await _speech!.initialize(); - debugPrint('DEBUG: On-device support check - initialize result: $result'); - - if (result) { - // Note: getEngines() method is not available in speech_to_text 7.3.0 - // The package handles engine selection internally - debugPrint( - 'DEBUG: SpeechToText initialized successfully - engine selection handled internally', - ); - } - - return result; + final supported = await _speech.isSupported(); + return supported; } catch (e) { - debugPrint('DEBUG: Error checking on-device support: $e'); + // ignore errors checking on-device support return false; } } @@ -136,13 +110,13 @@ class VoiceInputService { // Test method to verify on-device STT functionality Future testOnDeviceStt() async { try { - debugPrint('DEBUG: Starting on-device STT test'); + // starting on-device STT test // First ensure we're initialized await initialize(); - if (!_localSttAvailable || _speech == null) { - return 'Local STT not available. Available: $_localSttAvailable, Speech: ${_speech != null}'; + if (!_localSttAvailable) { + return 'Local STT not available. Available: $_localSttAvailable'; } // Check microphone permission @@ -152,40 +126,29 @@ class VoiceInputService { } // Test if speech recognition is available - final isAvailable = await _speech!.isAvailable; - debugPrint('DEBUG: Speech recognition isAvailable: $isAvailable'); - - if (!isAvailable) { + final supported = await _speech.isSupported(); + if (!supported) return 'Speech recognition service is not available on this device'; + + // Set language if available, then start and stop quickly + if (_selectedLocaleId != null) { + try { + await _speech.setLanguage(_selectedLocaleId!); + } catch (_) {} } - - // Check if listening is already active - final isListening = await _speech!.isListening; - debugPrint('DEBUG: Speech recognition isListening: $isListening'); - - if (isListening) { - await _speech!.stop(); - await Future.delayed(const Duration(milliseconds: 500)); - } - - // Check if we can start listening - startListening(); - - // Wait a bit for initialization + await _speech.start(SttRecognitionOptions(punctuation: true)); await Future.delayed(const Duration(milliseconds: 100)); - - // Stop immediately after starting - await stopListening(); + await _speech.stop(); return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId'; } catch (e) { - debugPrint('DEBUG: On-device STT test failed: $e'); + // on-device STT test failed return 'On-device STT test failed: $e'; } } String? get selectedLocaleId => _selectedLocaleId; - List get locales => _locales; + List get locales => _locales; void setLocale(String? localeId) { _selectedLocaleId = localeId; @@ -206,15 +169,13 @@ class VoiceInputService { _intensityController = StreamController.broadcast(); // Check if speech recognition is available before trying to use it - if (_localSttAvailable && _speech != null) { + if (_localSttAvailable) { // Schedule a check for speech recognition availability Future.microtask(() async { try { - final isStillAvailable = await _speech!.isAvailable; + final isStillAvailable = await _speech.isSupported(); if (!isStillAvailable && _isListening) { - debugPrint( - 'DEBUG: Speech recognition no longer available, falling back to recording', - ); + // speech recognition no longer available, fallback to recording _localSttAvailable = false; // Restart with fallback method _startRecordingProxyIntensity(); @@ -227,52 +188,47 @@ class VoiceInputService { return; } } catch (e) { - debugPrint('DEBUG: Error checking speech availability: $e'); + // ignore availability check errors } }); // Local on-device STT path - debugPrint( - 'DEBUG: Starting on-device STT with locale: $_selectedLocaleId', - ); _autoStopTimer?.cancel(); - // SpeechToText has its own end-of-speech handling; we still cap at 60s _autoStopTimer = Timer(const Duration(seconds: 60), () { if (_isListening) { _stopListening(); } }); - _speech!.listen( - localeId: _selectedLocaleId, - listenFor: const Duration(seconds: 60), - pauseFor: const Duration(seconds: 3), - onResult: (SpeechRecognitionResult result) { - if (!_isListening) return; - debugPrint( - 'DEBUG: Speech result: "${result.recognizedWords}" (final: ${result.finalResult})', - ); - _currentText = result.recognizedWords; - _textStreamController?.add(_currentText); - if (result.finalResult) { - // Will be followed by notListening status; we proactively close - _stopListening(); - } - }, - onSoundLevelChange: (level) { - debugPrint('DEBUG: Sound level: $level'); - // level is roughly 0..1+; map to 0..10 - final scaled = (level * 10).clamp(0, 10).round(); - _intensityController?.add(scaled); - }, - partialResults: true, - cancelOnError: true, - listenMode: stt.ListenMode.dictation, - onDevice: true, - ); - debugPrint('DEBUG: SpeechToText.listen() called with onDevice: true'); + + // Listen for results and state changes; keep subscriptions so we can cancel later + _sttResultSub = _speech.onResultChanged.listen((SttRecognition result) { + if (!_isListening) return; + _currentText = result.text; + _textStreamController?.add(_currentText); + if (result.isFinal) { + _stopListening(); + } + }, onError: (_) {}); + + _sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {}); + + try { + if (_selectedLocaleId != null) { + _speech.setLanguage(_selectedLocaleId!).catchError((_) {}); + } + // Start recognition (no await blocking the sync flow) + _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) { + // fallback to recording + _localSttAvailable = false; + _startRecordingProxyIntensity(); + }); + } catch (e) { + _localSttAvailable = false; + _startRecordingProxyIntensity(); + } } else { // Fallback: record audio and signal file path for server transcription - debugPrint('DEBUG: Local STT not available, falling back to recording'); + // Local STT not available, falling back to recording _startRecordingProxyIntensity(); _autoStopTimer?.cancel(); _autoStopTimer = Timer(const Duration(seconds: 30), () { @@ -293,10 +249,19 @@ class VoiceInputService { if (!_isListening) return; _isListening = false; - if (_localSttAvailable && _speech != null) { + if (_localSttAvailable) { try { - await _speech!.stop(); + await _speech.stop(); } catch (_) {} + // Cancel STT subscriptions + try { + _sttResultSub?.cancel(); + } catch (_) {} + _sttResultSub = null; + try { + _sttStateSub?.cancel(); + } catch (_) {} + _sttStateSub = null; } else { // Also stop recorder if active await _stopRecording(); @@ -321,7 +286,7 @@ class VoiceInputService { stopListening(); _stopRecording(force: true); try { - _speech?.cancel(); + _speech.dispose().catchError((_) {}); } catch (_) {} } @@ -418,12 +383,12 @@ final voiceInputAvailableProvider = FutureProvider((ref) async { }); final voiceInputStreamProvider = StreamProvider((ref) { - // Voice input stream would be initialized when needed - return const Stream.empty(); + final service = ref.watch(voiceInputServiceProvider); + return service.textStream; }); /// Stream of crude voice intensity for waveform visuals final voiceIntensityStreamProvider = StreamProvider((ref) { - // Connected at runtime by the UI after calling startListening - return const Stream.empty(); + final service = ref.watch(voiceInputServiceProvider); + return service.intensityStream; }); diff --git a/pubspec.lock b/pubspec.lock index 84aab48..026fcbb 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -837,14 +837,6 @@ packages: url: "https://pub.dev" source: hosted version: "2.3.0" - pedantic: - dependency: transitive - description: - name: pedantic - sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602" - url: "https://pub.dev" - source: hosted - version: "1.11.1" petitparser: dependency: transitive description: @@ -1106,30 +1098,6 @@ packages: url: "https://pub.dev" source: hosted version: "1.10.1" - speech_to_text: - dependency: "direct main" - description: - name: speech_to_text - sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04 - url: "https://pub.dev" - source: hosted - version: "7.3.0" - speech_to_text_platform_interface: - dependency: transitive - description: - name: speech_to_text_platform_interface - sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114 - url: "https://pub.dev" - source: hosted - version: "2.3.0" - speech_to_text_windows: - dependency: transitive - description: - name: speech_to_text_windows - sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072" - url: "https://pub.dev" - source: hosted - version: "1.0.0+beta.8" sprintf: dependency: transitive description: @@ -1218,6 +1186,30 @@ packages: url: "https://pub.dev" source: hosted version: "1.4.1" + stts: + dependency: "direct main" + description: + name: stts + sha256: "097aabf3600b3327651f6ae13de440d6e09e5d447dbb42bf35e36a02e5f611c2" + url: "https://pub.dev" + source: hosted + version: "1.2.5" + stts_platform_interface: + dependency: transitive + description: + name: stts_platform_interface + sha256: "6b82268d59d608e9b5accdadf0e7ccaea7928e8fce68ca393111fa7193d1bf10" + url: "https://pub.dev" + source: hosted + version: "1.2.0" + stts_web: + dependency: transitive + description: + name: stts_web + sha256: "62625c3b4d86076820d687dc468845a0f54c7dd4ead155b58f1e5864488c7f1c" + url: "https://pub.dev" + source: hosted + version: "1.1.0" synchronized: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index aca5bfe..71bd53a 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -36,7 +36,7 @@ dependencies: # Platform Features record: ^6.0.0 - speech_to_text: ^7.3.0 + stts: ^1.2.5 image_picker: ^1.1.2 file_picker: ^10.2.1 path_provider: ^2.1.4