From fa9fa8dd1b259ce945b437a1520ce9129b65e24a Mon Sep 17 00:00:00 2001
From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com>
Date: Mon, 25 Aug 2025 20:04:04 +0530
Subject: [PATCH] refactor: migrate from speech_to_text to stts for voice input
 functionality

---
 ios/Podfile.lock                              |  21 +-
 .../chat/services/voice_input_service.dart    | 217 ++++++++----------
 pubspec.lock                                  |  56 ++---
 pubspec.yaml                                  |   2 +-
 4 files changed, 122 insertions(+), 174 deletions(-)

diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index 87bcc6e..cddd6e8 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -1,7 +1,4 @@
 PODS:
-  - CwlCatchException (2.2.1):
-    - CwlCatchExceptionSupport (~> 2.2.1)
-  - CwlCatchExceptionSupport (2.2.1)
   - DKImagePickerController/Core (4.3.9):
     - DKImagePickerController/ImageDataManager
     - DKImagePickerController/Resource
@@ -58,13 +55,11 @@ PODS:
   - shared_preferences_foundation (0.0.1):
     - Flutter
     - FlutterMacOS
-  - speech_to_text (7.2.0):
-    - CwlCatchException
-    - Flutter
-    - FlutterMacOS
   - sqflite_darwin (0.0.4):
     - Flutter
     - FlutterMacOS
+  - stts (1.0.0):
+    - Flutter
   - SwiftyGif (5.4.5)
   - url_launcher_ios (0.0.1):
     - Flutter
@@ -82,15 +77,13 @@ DEPENDENCIES:
   - record_ios (from `.symlinks/plugins/record_ios/ios`)
   - share_plus (from `.symlinks/plugins/share_plus/ios`)
   - shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
-  - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
   - sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`)
+  - stts (from `.symlinks/plugins/stts/ios`)
   - url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
   - wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`)
 
 SPEC REPOS:
   trunk:
-    - CwlCatchException
-    - CwlCatchExceptionSupport
     - DKImagePickerController
     - DKPhotoGallery
     - SDWebImage
@@ -117,18 +110,16 @@ EXTERNAL SOURCES:
     :path: ".symlinks/plugins/share_plus/ios"
   shared_preferences_foundation:
     :path: ".symlinks/plugins/shared_preferences_foundation/darwin"
-  speech_to_text:
-    :path: ".symlinks/plugins/speech_to_text/darwin"
   sqflite_darwin:
     :path: ".symlinks/plugins/sqflite_darwin/darwin"
+  stts:
+    :path: ".symlinks/plugins/stts/ios"
   url_launcher_ios:
     :path: ".symlinks/plugins/url_launcher_ios/ios"
   wakelock_plus:
     :path: ".symlinks/plugins/wakelock_plus/ios"
 
 SPEC CHECKSUMS:
-  CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
-  CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
   DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
   DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60
   file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be
@@ -142,8 +133,8 @@ SPEC CHECKSUMS:
   SDWebImage: f29024626962457f3470184232766516dee8dfea
   share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a
   shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7
-  speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
   sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0
+  stts: 1a48df645bb516e86e4121d5253b582749a1d3a6
   SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4
   url_launcher_ios: 694010445543906933d732453a59da0a173ae33d
   wakelock_plus: e29112ab3ef0b318e58cfa5c32326458be66b556
diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart
index a54e163..1c06d62 100644
--- a/lib/features/chat/services/voice_input_service.dart
+++ b/lib/features/chat/services/voice_input_service.dart
@@ -5,26 +5,37 @@ import 'dart:async';
 import 'dart:io' show Platform;
 import 'package:path_provider/path_provider.dart';
 import 'package:path/path.dart' as p;
-import 'package:speech_to_text/speech_recognition_error.dart';
-import 'package:speech_to_text/speech_recognition_result.dart';
-import 'package:speech_to_text/speech_to_text.dart' as stt;
+import 'package:stts/stts.dart';
+
+// Lightweight replacement for previous stt.LocaleName used across the UI
+class LocaleName {
+  final String localeId;
+  final String name;
+  const LocaleName(this.localeId, this.name);
+}
 
 class VoiceInputService {
   final AudioRecorder _recorder = AudioRecorder();
-  stt.SpeechToText? _speech;
+  final Stt _speech = Stt();
   bool _isInitialized = false;
   bool _isListening = false;
   bool _localSttAvailable = false;
   String? _selectedLocaleId;
-  List<stt.LocaleName> _locales = const [];
+  List<LocaleName> _locales = const [];
   StreamController<String>? _textStreamController;
   String _currentText = '';
   // Public stream for UI waveform visualization (emits partial text length as proxy)
   StreamController<int>? _intensityController;
   Stream<int> get intensityStream =>
       _intensityController?.stream ?? const Stream<int>.empty();
+
+  /// Public stream of partial/final transcript strings and special audio tokens.
+  Stream<String> get textStream =>
+      _textStreamController?.stream ?? const Stream<String>.empty();
   Timer? _autoStopTimer;
   StreamSubscription<Amplitude>? _ampSub;
+  StreamSubscription<SttRecognition>? _sttResultSub;
+  StreamSubscription<SttState>? _sttStateSub;
 
   bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;
 
@@ -33,40 +44,14 @@ class VoiceInputService {
     if (!isSupportedPlatform) return false;
     // Prepare local speech recognizer
     try {
-      _speech = stt.SpeechToText();
-      debugPrint('DEBUG: Initializing speech_to_text...');
-      _localSttAvailable = await _speech!.initialize(
-        onStatus: (status) {
-          debugPrint('DEBUG: SpeechToText status: $status');
-          // When platform end-of-speech triggers, ensure we stop timer/streams
-          if (status.toLowerCase().contains('notListening') ||
-              status.toLowerCase().contains('done')) {
-            // No-op: UI manages stopping; SpeechToText emits final result
-          }
-        },
-        onError: (SpeechRecognitionError error) {
-          debugPrint('DEBUG: SpeechToText error: ${error.errorMsg}');
-          debugPrint('DEBUG: SpeechToText error permanent: ${error.permanent}');
-          // If error is permanent, mark local STT as unavailable
-          if (error.permanent) {
-            debugPrint('DEBUG: Permanent error detected, disabling local STT');
-            _localSttAvailable = false;
-          }
-          // If any error, we keep fallback available; no throws here.
-        },
-      );
-      debugPrint(
-        'DEBUG: SpeechToText initialization result: $_localSttAvailable',
-      );
+      // Check permission and supported status
+      _localSttAvailable = await _speech.isSupported();
       if (_localSttAvailable) {
         try {
-          _locales = await _speech!.locales();
-          debugPrint(
-            'DEBUG: Available locales: ${_locales.map((l) => l.localeId).join(', ')}',
-          );
+          final langs = await _speech.getLanguages();
+          _locales = langs.map((l) => LocaleName(l, l)).toList();
           final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
               .toLanguageTag();
-          debugPrint('DEBUG: Device locale: $deviceTag');
           final match = _locales.firstWhere(
             (l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
             orElse: () {
@@ -78,14 +63,13 @@ class VoiceInputService {
                 (l) => l.localeId.toLowerCase().startsWith('$primary-'),
                 orElse: () => _locales.isNotEmpty
                     ? _locales.first
-                    : stt.LocaleName('en_US', 'English (US)'),
+                    : LocaleName('en_US', 'en_US'),
               );
             },
           );
           _selectedLocaleId = match.localeId;
-          debugPrint('DEBUG: Selected locale: $_selectedLocaleId');
         } catch (e) {
-          debugPrint('DEBUG: Error loading locales: $e');
+          // ignore locale load errors
           _selectedLocaleId = null;
         }
       }
@@ -98,6 +82,9 @@ class VoiceInputService {
 
   Future<bool> checkPermissions() async {
     try {
+      // Prefer stts permission check which will request microphone permission
+      final mic = await _speech.hasPermission();
+      if (mic) return true;
       return await _recorder.hasPermission();
     } catch (_) {
       return false;
@@ -111,24 +98,11 @@ class VoiceInputService {
   // Add a method to check if on-device STT is properly supported
   Future<bool> checkOnDeviceSupport() async {
     if (!isSupportedPlatform || !_isInitialized) return false;
-    if (_speech == null) return false;
-
     try {
-      // Check if the speech engine supports on-device recognition
-      final result = await _speech!.initialize();
-      debugPrint('DEBUG: On-device support check - initialize result: $result');
-
-      if (result) {
-        // Note: getEngines() method is not available in speech_to_text 7.3.0
-        // The package handles engine selection internally
-        debugPrint(
-          'DEBUG: SpeechToText initialized successfully - engine selection handled internally',
-        );
-      }
-
-      return result;
+      final supported = await _speech.isSupported();
+      return supported;
     } catch (e) {
-      debugPrint('DEBUG: Error checking on-device support: $e');
+      // ignore errors checking on-device support
       return false;
     }
   }
@@ -136,13 +110,13 @@ class VoiceInputService {
   // Test method to verify on-device STT functionality
   Future<String> testOnDeviceStt() async {
     try {
-      debugPrint('DEBUG: Starting on-device STT test');
+      // starting on-device STT test
 
       // First ensure we're initialized
       await initialize();
 
-      if (!_localSttAvailable || _speech == null) {
-        return 'Local STT not available. Available: $_localSttAvailable, Speech: ${_speech != null}';
+      if (!_localSttAvailable) {
+        return 'Local STT not available. Available: $_localSttAvailable';
       }
 
       // Check microphone permission
@@ -152,40 +126,29 @@ class VoiceInputService {
       }
 
       // Test if speech recognition is available
-      final isAvailable = await _speech!.isAvailable;
-      debugPrint('DEBUG: Speech recognition isAvailable: $isAvailable');
-
-      if (!isAvailable) {
+      final supported = await _speech.isSupported();
+      if (!supported)
         return 'Speech recognition service is not available on this device';
+
+      // Set language if available, then start and stop quickly
+      if (_selectedLocaleId != null) {
+        try {
+          await _speech.setLanguage(_selectedLocaleId!);
+        } catch (_) {}
       }
-
-      // Check if listening is already active
-      final isListening = await _speech!.isListening;
-      debugPrint('DEBUG: Speech recognition isListening: $isListening');
-
-      if (isListening) {
-        await _speech!.stop();
-        await Future.delayed(const Duration(milliseconds: 500));
-      }
-
-      // Check if we can start listening
-      startListening();
-
-      // Wait a bit for initialization
+      await _speech.start(SttRecognitionOptions(punctuation: true));
       await Future.delayed(const Duration(milliseconds: 100));
-
-      // Stop immediately after starting
-      await stopListening();
+      await _speech.stop();
 
       return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId';
     } catch (e) {
-      debugPrint('DEBUG: On-device STT test failed: $e');
+      // on-device STT test failed
       return 'On-device STT test failed: $e';
     }
   }
 
   String? get selectedLocaleId => _selectedLocaleId;
-  List<stt.LocaleName> get locales => _locales;
+  List<LocaleName> get locales => _locales;
 
   void setLocale(String? localeId) {
     _selectedLocaleId = localeId;
@@ -206,15 +169,13 @@ class VoiceInputService {
     _intensityController = StreamController<int>.broadcast();
 
     // Check if speech recognition is available before trying to use it
-    if (_localSttAvailable && _speech != null) {
+    if (_localSttAvailable) {
       // Schedule a check for speech recognition availability
       Future.microtask(() async {
         try {
-          final isStillAvailable = await _speech!.isAvailable;
+          final isStillAvailable = await _speech.isSupported();
           if (!isStillAvailable && _isListening) {
-            debugPrint(
-              'DEBUG: Speech recognition no longer available, falling back to recording',
-            );
+            // speech recognition no longer available, fallback to recording
             _localSttAvailable = false;
             // Restart with fallback method
             _startRecordingProxyIntensity();
@@ -227,52 +188,47 @@ class VoiceInputService {
             return;
           }
         } catch (e) {
-          debugPrint('DEBUG: Error checking speech availability: $e');
+          // ignore availability check errors
         }
       });
 
       // Local on-device STT path
-      debugPrint(
-        'DEBUG: Starting on-device STT with locale: $_selectedLocaleId',
-      );
       _autoStopTimer?.cancel();
-      // SpeechToText has its own end-of-speech handling; we still cap at 60s
       _autoStopTimer = Timer(const Duration(seconds: 60), () {
         if (_isListening) {
           _stopListening();
         }
       });
-      _speech!.listen(
-        localeId: _selectedLocaleId,
-        listenFor: const Duration(seconds: 60),
-        pauseFor: const Duration(seconds: 3),
-        onResult: (SpeechRecognitionResult result) {
-          if (!_isListening) return;
-          debugPrint(
-            'DEBUG: Speech result: "${result.recognizedWords}" (final: ${result.finalResult})',
-          );
-          _currentText = result.recognizedWords;
-          _textStreamController?.add(_currentText);
-          if (result.finalResult) {
-            // Will be followed by notListening status; we proactively close
-            _stopListening();
-          }
-        },
-        onSoundLevelChange: (level) {
-          debugPrint('DEBUG: Sound level: $level');
-          // level is roughly 0..1+; map to 0..10
-          final scaled = (level * 10).clamp(0, 10).round();
-          _intensityController?.add(scaled);
-        },
-        partialResults: true,
-        cancelOnError: true,
-        listenMode: stt.ListenMode.dictation,
-        onDevice: true,
-      );
-      debugPrint('DEBUG: SpeechToText.listen() called with onDevice: true');
+
+      // Listen for results and state changes; keep subscriptions so we can cancel later
+      _sttResultSub = _speech.onResultChanged.listen((SttRecognition result) {
+        if (!_isListening) return;
+        _currentText = result.text;
+        _textStreamController?.add(_currentText);
+        if (result.isFinal) {
+          _stopListening();
+        }
+      }, onError: (_) {});
+
+      _sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
+
+      try {
+        if (_selectedLocaleId != null) {
+          _speech.setLanguage(_selectedLocaleId!).catchError((_) {});
+        }
+        // Start recognition (no await blocking the sync flow)
+        _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
+          // fallback to recording
+          _localSttAvailable = false;
+          _startRecordingProxyIntensity();
+        });
+      } catch (e) {
+        _localSttAvailable = false;
+        _startRecordingProxyIntensity();
+      }
     } else {
       // Fallback: record audio and signal file path for server transcription
-      debugPrint('DEBUG: Local STT not available, falling back to recording');
+      // Local STT not available, falling back to recording
       _startRecordingProxyIntensity();
       _autoStopTimer?.cancel();
       _autoStopTimer = Timer(const Duration(seconds: 30), () {
@@ -293,10 +249,19 @@ class VoiceInputService {
     if (!_isListening) return;
 
     _isListening = false;
-    if (_localSttAvailable && _speech != null) {
+    if (_localSttAvailable) {
       try {
-        await _speech!.stop();
+        await _speech.stop();
       } catch (_) {}
+      // Cancel STT subscriptions
+      try {
+        _sttResultSub?.cancel();
+      } catch (_) {}
+      _sttResultSub = null;
+      try {
+        _sttStateSub?.cancel();
+      } catch (_) {}
+      _sttStateSub = null;
     } else {
       // Also stop recorder if active
       await _stopRecording();
@@ -321,7 +286,7 @@ class VoiceInputService {
     stopListening();
     _stopRecording(force: true);
     try {
-      _speech?.cancel();
+      _speech.dispose().catchError((_) {});
     } catch (_) {}
   }
 
@@ -418,12 +383,12 @@ final voiceInputAvailableProvider = FutureProvider<bool>((ref) async {
 });
 
 final voiceInputStreamProvider = StreamProvider<String>((ref) {
-  // Voice input stream would be initialized when needed
-  return const Stream.empty();
+  final service = ref.watch(voiceInputServiceProvider);
+  return service.textStream;
 });
 
 /// Stream of crude voice intensity for waveform visuals
 final voiceIntensityStreamProvider = StreamProvider<int>((ref) {
-  // Connected at runtime by the UI after calling startListening
-  return const Stream.empty();
+  final service = ref.watch(voiceInputServiceProvider);
+  return service.intensityStream;
 });
diff --git a/pubspec.lock b/pubspec.lock
index 84aab48..026fcbb 100644
--- a/pubspec.lock
+++ b/pubspec.lock
@@ -837,14 +837,6 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "2.3.0"
-  pedantic:
-    dependency: transitive
-    description:
-      name: pedantic
-      sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.11.1"
   petitparser:
     dependency: transitive
     description:
@@ -1106,30 +1098,6 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "1.10.1"
-  speech_to_text:
-    dependency: "direct main"
-    description:
-      name: speech_to_text
-      sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04
-      url: "https://pub.dev"
-    source: hosted
-    version: "7.3.0"
-  speech_to_text_platform_interface:
-    dependency: transitive
-    description:
-      name: speech_to_text_platform_interface
-      sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
-      url: "https://pub.dev"
-    source: hosted
-    version: "2.3.0"
-  speech_to_text_windows:
-    dependency: transitive
-    description:
-      name: speech_to_text_windows
-      sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.0.0+beta.8"
   sprintf:
     dependency: transitive
     description:
@@ -1218,6 +1186,30 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "1.4.1"
+  stts:
+    dependency: "direct main"
+    description:
+      name: stts
+      sha256: "097aabf3600b3327651f6ae13de440d6e09e5d447dbb42bf35e36a02e5f611c2"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.5"
+  stts_platform_interface:
+    dependency: transitive
+    description:
+      name: stts_platform_interface
+      sha256: "6b82268d59d608e9b5accdadf0e7ccaea7928e8fce68ca393111fa7193d1bf10"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.0"
+  stts_web:
+    dependency: transitive
+    description:
+      name: stts_web
+      sha256: "62625c3b4d86076820d687dc468845a0f54c7dd4ead155b58f1e5864488c7f1c"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.0"
   synchronized:
     dependency: transitive
     description:
diff --git a/pubspec.yaml b/pubspec.yaml
index aca5bfe..71bd53a 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -36,7 +36,7 @@ dependencies:
   
   # Platform Features
   record: ^6.0.0
-  speech_to_text: ^7.3.0
+  stts: ^1.2.5
   image_picker: ^1.1.2
   file_picker: ^10.2.1
   path_provider: ^2.1.4