refactor: migrate from speech_to_text to stts for voice input functionality

2025-08-25 20:04:04 +05:30
parent 265c7026af
commit fa9fa8dd1b
4 changed files with 122 additions and 174 deletions
@@ -1,7 +1,4 @@
 PODS:
-  - CwlCatchException (2.2.1):
-    - CwlCatchExceptionSupport (~> 2.2.1)
-  - CwlCatchExceptionSupport (2.2.1)
  - DKImagePickerController/Core (4.3.9):
    - DKImagePickerController/ImageDataManager
    - DKImagePickerController/Resource
@@ -58,13 +55,11 @@ PODS:
  - shared_preferences_foundation (0.0.1):
    - Flutter
    - FlutterMacOS
-  - speech_to_text (7.2.0):
-    - CwlCatchException
-    - Flutter
-    - FlutterMacOS
  - sqflite_darwin (0.0.4):
    - Flutter
    - FlutterMacOS
+  - stts (1.0.0):
+    - Flutter
  - SwiftyGif (5.4.5)
  - url_launcher_ios (0.0.1):
    - Flutter
@@ -82,15 +77,13 @@ DEPENDENCIES:
  - record_ios (from `.symlinks/plugins/record_ios/ios`)
  - share_plus (from `.symlinks/plugins/share_plus/ios`)
  - shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
-  - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
  - sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`)
+  - stts (from `.symlinks/plugins/stts/ios`)
  - url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
  - wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`)

 SPEC REPOS:
  trunk:
-    - CwlCatchException
-    - CwlCatchExceptionSupport
    - DKImagePickerController
    - DKPhotoGallery
    - SDWebImage
@@ -117,18 +110,16 @@ EXTERNAL SOURCES:
    :path: ".symlinks/plugins/share_plus/ios"
  shared_preferences_foundation:
    :path: ".symlinks/plugins/shared_preferences_foundation/darwin"
-  speech_to_text:
-    :path: ".symlinks/plugins/speech_to_text/darwin"
  sqflite_darwin:
    :path: ".symlinks/plugins/sqflite_darwin/darwin"
+  stts:
+    :path: ".symlinks/plugins/stts/ios"
  url_launcher_ios:
    :path: ".symlinks/plugins/url_launcher_ios/ios"
  wakelock_plus:
    :path: ".symlinks/plugins/wakelock_plus/ios"

 SPEC CHECKSUMS:
-  CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
-  CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
  DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
  DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60
  file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be
@@ -142,8 +133,8 @@ SPEC CHECKSUMS:
  SDWebImage: f29024626962457f3470184232766516dee8dfea
  share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a
  shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7
-  speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
  sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0
+  stts: 1a48df645bb516e86e4121d5253b582749a1d3a6
  SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4
  url_launcher_ios: 694010445543906933d732453a59da0a173ae33d
  wakelock_plus: e29112ab3ef0b318e58cfa5c32326458be66b556
@@ -5,26 +5,37 @@ import 'dart:async';
 import 'dart:io' show Platform;
 import 'package:path_provider/path_provider.dart';
 import 'package:path/path.dart' as p;
-import 'package:speech_to_text/speech_recognition_error.dart';
-import 'package:speech_to_text/speech_recognition_result.dart';
-import 'package:speech_to_text/speech_to_text.dart' as stt;
+import 'package:stts/stts.dart';
+
+// Lightweight replacement for previous stt.LocaleName used across the UI
+class LocaleName {
+  final String localeId;
+  final String name;
+  const LocaleName(this.localeId, this.name);
+}

 class VoiceInputService {
  final AudioRecorder _recorder = AudioRecorder();
-  stt.SpeechToText? _speech;
+  final Stt _speech = Stt();
  bool _isInitialized = false;
  bool _isListening = false;
  bool _localSttAvailable = false;
  String? _selectedLocaleId;
-  List<stt.LocaleName> _locales = const [];
+  List<LocaleName> _locales = const [];
  StreamController<String>? _textStreamController;
  String _currentText = '';
  // Public stream for UI waveform visualization (emits partial text length as proxy)
  StreamController<int>? _intensityController;
  Stream<int> get intensityStream =>
      _intensityController?.stream ?? const Stream<int>.empty();
+
+  /// Public stream of partial/final transcript strings and special audio tokens.
+  Stream<String> get textStream =>
+      _textStreamController?.stream ?? const Stream<String>.empty();
  Timer? _autoStopTimer;
  StreamSubscription<Amplitude>? _ampSub;
+  StreamSubscription<SttRecognition>? _sttResultSub;
+  StreamSubscription<SttState>? _sttStateSub;

  bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;

@@ -33,40 +44,14 @@ class VoiceInputService {
    if (!isSupportedPlatform) return false;
    // Prepare local speech recognizer
    try {
-      _speech = stt.SpeechToText();
-      debugPrint('DEBUG: Initializing speech_to_text...');
-      _localSttAvailable = await _speech!.initialize(
-        onStatus: (status) {
-          debugPrint('DEBUG: SpeechToText status: $status');
-          // When platform end-of-speech triggers, ensure we stop timer/streams
-          if (status.toLowerCase().contains('notListening') ||
-              status.toLowerCase().contains('done')) {
-            // No-op: UI manages stopping; SpeechToText emits final result
-          }
-        },
-        onError: (SpeechRecognitionError error) {
-          debugPrint('DEBUG: SpeechToText error: ${error.errorMsg}');
-          debugPrint('DEBUG: SpeechToText error permanent: ${error.permanent}');
-          // If error is permanent, mark local STT as unavailable
-          if (error.permanent) {
-            debugPrint('DEBUG: Permanent error detected, disabling local STT');
-            _localSttAvailable = false;
-          }
-          // If any error, we keep fallback available; no throws here.
-        },
-      );
-      debugPrint(
-        'DEBUG: SpeechToText initialization result: $_localSttAvailable',
-      );
+      // Check permission and supported status
+      _localSttAvailable = await _speech.isSupported();
      if (_localSttAvailable) {
        try {
-          _locales = await _speech!.locales();
-          debugPrint(
-            'DEBUG: Available locales: ${_locales.map((l) => l.localeId).join(', ')}',
-          );
+          final langs = await _speech.getLanguages();
+          _locales = langs.map((l) => LocaleName(l, l)).toList();
          final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
              .toLanguageTag();
-          debugPrint('DEBUG: Device locale: $deviceTag');
          final match = _locales.firstWhere(
            (l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
            orElse: () {
@@ -78,14 +63,13 @@ class VoiceInputService {
                (l) => l.localeId.toLowerCase().startsWith('$primary-'),
                orElse: () => _locales.isNotEmpty
                    ? _locales.first
-                    : stt.LocaleName('en_US', 'English (US)'),
+                    : LocaleName('en_US', 'en_US'),
              );
            },
          );
          _selectedLocaleId = match.localeId;
-          debugPrint('DEBUG: Selected locale: $_selectedLocaleId');
        } catch (e) {
-          debugPrint('DEBUG: Error loading locales: $e');
+          // ignore locale load errors
          _selectedLocaleId = null;
        }
      }
@@ -98,6 +82,9 @@ class VoiceInputService {

  Future<bool> checkPermissions() async {
    try {
+      // Prefer stts permission check which will request microphone permission
+      final mic = await _speech.hasPermission();
+      if (mic) return true;
      return await _recorder.hasPermission();
    } catch (_) {
      return false;
@@ -111,24 +98,11 @@ class VoiceInputService {
  // Add a method to check if on-device STT is properly supported
  Future<bool> checkOnDeviceSupport() async {
    if (!isSupportedPlatform || !_isInitialized) return false;
-    if (_speech == null) return false;
-
    try {
-      // Check if the speech engine supports on-device recognition
-      final result = await _speech!.initialize();
-      debugPrint('DEBUG: On-device support check - initialize result: $result');
-
-      if (result) {
-        // Note: getEngines() method is not available in speech_to_text 7.3.0
-        // The package handles engine selection internally
-        debugPrint(
-          'DEBUG: SpeechToText initialized successfully - engine selection handled internally',
-        );
-      }
-
-      return result;
+      final supported = await _speech.isSupported();
+      return supported;
    } catch (e) {
-      debugPrint('DEBUG: Error checking on-device support: $e');
+      // ignore errors checking on-device support
      return false;
    }
  }
@@ -136,13 +110,13 @@ class VoiceInputService {
  // Test method to verify on-device STT functionality
  Future<String> testOnDeviceStt() async {
    try {
-      debugPrint('DEBUG: Starting on-device STT test');
+      // starting on-device STT test

      // First ensure we're initialized
      await initialize();

-      if (!_localSttAvailable || _speech == null) {
-        return 'Local STT not available. Available: $_localSttAvailable, Speech: ${_speech != null}';
+      if (!_localSttAvailable) {
+        return 'Local STT not available. Available: $_localSttAvailable';
      }

      // Check microphone permission
@@ -152,40 +126,29 @@ class VoiceInputService {
      }

      // Test if speech recognition is available
-      final isAvailable = await _speech!.isAvailable;
-      debugPrint('DEBUG: Speech recognition isAvailable: $isAvailable');
-
-      if (!isAvailable) {
+      final supported = await _speech.isSupported();
+      if (!supported)
        return 'Speech recognition service is not available on this device';
+
+      // Set language if available, then start and stop quickly
+      if (_selectedLocaleId != null) {
+        try {
+          await _speech.setLanguage(_selectedLocaleId!);
+        } catch (_) {}
      }
-
-      // Check if listening is already active
-      final isListening = await _speech!.isListening;
-      debugPrint('DEBUG: Speech recognition isListening: $isListening');
-
-      if (isListening) {
-        await _speech!.stop();
-        await Future.delayed(const Duration(milliseconds: 500));
-      }
-
-      // Check if we can start listening
-      startListening();
-
-      // Wait a bit for initialization
+      await _speech.start(SttRecognitionOptions(punctuation: true));
      await Future.delayed(const Duration(milliseconds: 100));
-
-      // Stop immediately after starting
-      await stopListening();
+      await _speech.stop();

      return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId';
    } catch (e) {
-      debugPrint('DEBUG: On-device STT test failed: $e');
+      // on-device STT test failed
      return 'On-device STT test failed: $e';
    }
  }

  String? get selectedLocaleId => _selectedLocaleId;
-  List<stt.LocaleName> get locales => _locales;
+  List<LocaleName> get locales => _locales;

  void setLocale(String? localeId) {
    _selectedLocaleId = localeId;
@@ -206,15 +169,13 @@ class VoiceInputService {
    _intensityController = StreamController<int>.broadcast();

    // Check if speech recognition is available before trying to use it
-    if (_localSttAvailable && _speech != null) {
+    if (_localSttAvailable) {
      // Schedule a check for speech recognition availability
      Future.microtask(() async {
        try {
-          final isStillAvailable = await _speech!.isAvailable;
+          final isStillAvailable = await _speech.isSupported();
          if (!isStillAvailable && _isListening) {
-            debugPrint(
-              'DEBUG: Speech recognition no longer available, falling back to recording',
-            );
+            // speech recognition no longer available, fallback to recording
            _localSttAvailable = false;
            // Restart with fallback method
            _startRecordingProxyIntensity();
@@ -227,52 +188,47 @@ class VoiceInputService {
            return;
          }
        } catch (e) {
-          debugPrint('DEBUG: Error checking speech availability: $e');
+          // ignore availability check errors
        }
      });

      // Local on-device STT path
-      debugPrint(
-        'DEBUG: Starting on-device STT with locale: $_selectedLocaleId',
-      );
      _autoStopTimer?.cancel();
-      // SpeechToText has its own end-of-speech handling; we still cap at 60s
      _autoStopTimer = Timer(const Duration(seconds: 60), () {
        if (_isListening) {
          _stopListening();
        }
      });
-      _speech!.listen(
-        localeId: _selectedLocaleId,
-        listenFor: const Duration(seconds: 60),
-        pauseFor: const Duration(seconds: 3),
-        onResult: (SpeechRecognitionResult result) {
+
+      // Listen for results and state changes; keep subscriptions so we can cancel later
+      _sttResultSub = _speech.onResultChanged.listen((SttRecognition result) {
        if (!_isListening) return;
-          debugPrint(
-            'DEBUG: Speech result: "${result.recognizedWords}" (final: ${result.finalResult})',
-          );
-          _currentText = result.recognizedWords;
+        _currentText = result.text;
        _textStreamController?.add(_currentText);
-          if (result.finalResult) {
-            // Will be followed by notListening status; we proactively close
+        if (result.isFinal) {
          _stopListening();
        }
-        },
-        onSoundLevelChange: (level) {
-          debugPrint('DEBUG: Sound level: $level');
-          // level is roughly 0..1+; map to 0..10
-          final scaled = (level * 10).clamp(0, 10).round();
-          _intensityController?.add(scaled);
-        },
-        partialResults: true,
-        cancelOnError: true,
-        listenMode: stt.ListenMode.dictation,
-        onDevice: true,
-      );
-      debugPrint('DEBUG: SpeechToText.listen() called with onDevice: true');
+      }, onError: (_) {});
+
+      _sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
+
+      try {
+        if (_selectedLocaleId != null) {
+          _speech.setLanguage(_selectedLocaleId!).catchError((_) {});
+        }
+        // Start recognition (no await blocking the sync flow)
+        _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
+          // fallback to recording
+          _localSttAvailable = false;
+          _startRecordingProxyIntensity();
+        });
+      } catch (e) {
+        _localSttAvailable = false;
+        _startRecordingProxyIntensity();
+      }
    } else {
      // Fallback: record audio and signal file path for server transcription
-      debugPrint('DEBUG: Local STT not available, falling back to recording');
+      // Local STT not available, falling back to recording
      _startRecordingProxyIntensity();
      _autoStopTimer?.cancel();
      _autoStopTimer = Timer(const Duration(seconds: 30), () {
@@ -293,10 +249,19 @@ class VoiceInputService {
    if (!_isListening) return;

    _isListening = false;
-    if (_localSttAvailable && _speech != null) {
+    if (_localSttAvailable) {
      try {
-        await _speech!.stop();
+        await _speech.stop();
      } catch (_) {}
+      // Cancel STT subscriptions
+      try {
+        _sttResultSub?.cancel();
+      } catch (_) {}
+      _sttResultSub = null;
+      try {
+        _sttStateSub?.cancel();
+      } catch (_) {}
+      _sttStateSub = null;
    } else {
      // Also stop recorder if active
      await _stopRecording();
@@ -321,7 +286,7 @@ class VoiceInputService {
    stopListening();
    _stopRecording(force: true);
    try {
-      _speech?.cancel();
+      _speech.dispose().catchError((_) {});
    } catch (_) {}
  }

@@ -418,12 +383,12 @@ final voiceInputAvailableProvider = FutureProvider<bool>((ref) async {
 });

 final voiceInputStreamProvider = StreamProvider<String>((ref) {
-  // Voice input stream would be initialized when needed
-  return const Stream.empty();
+  final service = ref.watch(voiceInputServiceProvider);
+  return service.textStream;
 });

 /// Stream of crude voice intensity for waveform visuals
 final voiceIntensityStreamProvider = StreamProvider<int>((ref) {
-  // Connected at runtime by the UI after calling startListening
-  return const Stream.empty();
+  final service = ref.watch(voiceInputServiceProvider);
+  return service.intensityStream;
 });
@@ -837,14 +837,6 @@ packages:
      url: "https://pub.dev"
    source: hosted
    version: "2.3.0"
-  pedantic:
-    dependency: transitive
-    description:
-      name: pedantic
-      sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.11.1"
  petitparser:
    dependency: transitive
    description:
@@ -1106,30 +1098,6 @@ packages:
      url: "https://pub.dev"
    source: hosted
    version: "1.10.1"
-  speech_to_text:
-    dependency: "direct main"
-    description:
-      name: speech_to_text
-      sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04
-      url: "https://pub.dev"
-    source: hosted
-    version: "7.3.0"
-  speech_to_text_platform_interface:
-    dependency: transitive
-    description:
-      name: speech_to_text_platform_interface
-      sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
-      url: "https://pub.dev"
-    source: hosted
-    version: "2.3.0"
-  speech_to_text_windows:
-    dependency: transitive
-    description:
-      name: speech_to_text_windows
-      sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.0.0+beta.8"
  sprintf:
    dependency: transitive
    description:
@@ -1218,6 +1186,30 @@ packages:
      url: "https://pub.dev"
    source: hosted
    version: "1.4.1"
+  stts:
+    dependency: "direct main"
+    description:
+      name: stts
+      sha256: "097aabf3600b3327651f6ae13de440d6e09e5d447dbb42bf35e36a02e5f611c2"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.5"
+  stts_platform_interface:
+    dependency: transitive
+    description:
+      name: stts_platform_interface
+      sha256: "6b82268d59d608e9b5accdadf0e7ccaea7928e8fce68ca393111fa7193d1bf10"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.2.0"
+  stts_web:
+    dependency: transitive
+    description:
+      name: stts_web
+      sha256: "62625c3b4d86076820d687dc468845a0f54c7dd4ead155b58f1e5864488c7f1c"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.0"
  synchronized:
    dependency: transitive
    description:
@@ -36,7 +36,7 @@ dependencies:
  
  # Platform Features
  record: ^6.0.0
-  speech_to_text: ^7.3.0
+  stts: ^1.2.5
  image_picker: ^1.1.2
  file_picker: ^10.2.1
  path_provider: ^2.1.4