refactor: migrate from speech_to_text to stts for voice input functionality

This commit is contained in:
cogwheel0
2025-08-25 20:04:04 +05:30
parent 265c7026af
commit fa9fa8dd1b
4 changed files with 122 additions and 174 deletions

View File

@@ -1,7 +1,4 @@
PODS:
- CwlCatchException (2.2.1):
- CwlCatchExceptionSupport (~> 2.2.1)
- CwlCatchExceptionSupport (2.2.1)
- DKImagePickerController/Core (4.3.9):
- DKImagePickerController/ImageDataManager
- DKImagePickerController/Resource
@@ -58,13 +55,11 @@ PODS:
- shared_preferences_foundation (0.0.1):
- Flutter
- FlutterMacOS
- speech_to_text (7.2.0):
- CwlCatchException
- Flutter
- FlutterMacOS
- sqflite_darwin (0.0.4):
- Flutter
- FlutterMacOS
- stts (1.0.0):
- Flutter
- SwiftyGif (5.4.5)
- url_launcher_ios (0.0.1):
- Flutter
@@ -82,15 +77,13 @@ DEPENDENCIES:
- record_ios (from `.symlinks/plugins/record_ios/ios`)
- share_plus (from `.symlinks/plugins/share_plus/ios`)
- shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
- speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
- sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`)
- stts (from `.symlinks/plugins/stts/ios`)
- url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
- wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`)
SPEC REPOS:
trunk:
- CwlCatchException
- CwlCatchExceptionSupport
- DKImagePickerController
- DKPhotoGallery
- SDWebImage
@@ -117,18 +110,16 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/share_plus/ios"
shared_preferences_foundation:
:path: ".symlinks/plugins/shared_preferences_foundation/darwin"
speech_to_text:
:path: ".symlinks/plugins/speech_to_text/darwin"
sqflite_darwin:
:path: ".symlinks/plugins/sqflite_darwin/darwin"
stts:
:path: ".symlinks/plugins/stts/ios"
url_launcher_ios:
:path: ".symlinks/plugins/url_launcher_ios/ios"
wakelock_plus:
:path: ".symlinks/plugins/wakelock_plus/ios"
SPEC CHECKSUMS:
CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60
file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be
@@ -142,8 +133,8 @@ SPEC CHECKSUMS:
SDWebImage: f29024626962457f3470184232766516dee8dfea
share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a
shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7
speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0
stts: 1a48df645bb516e86e4121d5253b582749a1d3a6
SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4
url_launcher_ios: 694010445543906933d732453a59da0a173ae33d
wakelock_plus: e29112ab3ef0b318e58cfa5c32326458be66b556

View File

@@ -5,26 +5,37 @@ import 'dart:async';
import 'dart:io' show Platform;
import 'package:path_provider/path_provider.dart';
import 'package:path/path.dart' as p;
import 'package:speech_to_text/speech_recognition_error.dart';
import 'package:speech_to_text/speech_recognition_result.dart';
import 'package:speech_to_text/speech_to_text.dart' as stt;
import 'package:stts/stts.dart';
// Lightweight replacement for previous stt.LocaleName used across the UI
class LocaleName {
final String localeId;
final String name;
const LocaleName(this.localeId, this.name);
}
class VoiceInputService {
final AudioRecorder _recorder = AudioRecorder();
stt.SpeechToText? _speech;
final Stt _speech = Stt();
bool _isInitialized = false;
bool _isListening = false;
bool _localSttAvailable = false;
String? _selectedLocaleId;
List<stt.LocaleName> _locales = const [];
List<LocaleName> _locales = const [];
StreamController<String>? _textStreamController;
String _currentText = '';
// Public stream for UI waveform visualization (emits partial text length as proxy)
StreamController<int>? _intensityController;
Stream<int> get intensityStream =>
_intensityController?.stream ?? const Stream<int>.empty();
/// Public stream of partial/final transcript strings and special audio tokens.
Stream<String> get textStream =>
_textStreamController?.stream ?? const Stream<String>.empty();
Timer? _autoStopTimer;
StreamSubscription<Amplitude>? _ampSub;
StreamSubscription<SttRecognition>? _sttResultSub;
StreamSubscription<SttState>? _sttStateSub;
bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;
@@ -33,40 +44,14 @@ class VoiceInputService {
if (!isSupportedPlatform) return false;
// Prepare local speech recognizer
try {
_speech = stt.SpeechToText();
debugPrint('DEBUG: Initializing speech_to_text...');
_localSttAvailable = await _speech!.initialize(
onStatus: (status) {
debugPrint('DEBUG: SpeechToText status: $status');
// When platform end-of-speech triggers, ensure we stop timer/streams
if (status.toLowerCase().contains('notListening') ||
status.toLowerCase().contains('done')) {
// No-op: UI manages stopping; SpeechToText emits final result
}
},
onError: (SpeechRecognitionError error) {
debugPrint('DEBUG: SpeechToText error: ${error.errorMsg}');
debugPrint('DEBUG: SpeechToText error permanent: ${error.permanent}');
// If error is permanent, mark local STT as unavailable
if (error.permanent) {
debugPrint('DEBUG: Permanent error detected, disabling local STT');
_localSttAvailable = false;
}
// If any error, we keep fallback available; no throws here.
},
);
debugPrint(
'DEBUG: SpeechToText initialization result: $_localSttAvailable',
);
// Check permission and supported status
_localSttAvailable = await _speech.isSupported();
if (_localSttAvailable) {
try {
_locales = await _speech!.locales();
debugPrint(
'DEBUG: Available locales: ${_locales.map((l) => l.localeId).join(', ')}',
);
final langs = await _speech.getLanguages();
_locales = langs.map((l) => LocaleName(l, l)).toList();
final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
.toLanguageTag();
debugPrint('DEBUG: Device locale: $deviceTag');
final match = _locales.firstWhere(
(l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
orElse: () {
@@ -78,14 +63,13 @@ class VoiceInputService {
(l) => l.localeId.toLowerCase().startsWith('$primary-'),
orElse: () => _locales.isNotEmpty
? _locales.first
: stt.LocaleName('en_US', 'English (US)'),
: LocaleName('en_US', 'en_US'),
);
},
);
_selectedLocaleId = match.localeId;
debugPrint('DEBUG: Selected locale: $_selectedLocaleId');
} catch (e) {
debugPrint('DEBUG: Error loading locales: $e');
// ignore locale load errors
_selectedLocaleId = null;
}
}
@@ -98,6 +82,9 @@ class VoiceInputService {
Future<bool> checkPermissions() async {
try {
// Prefer stts permission check which will request microphone permission
final mic = await _speech.hasPermission();
if (mic) return true;
return await _recorder.hasPermission();
} catch (_) {
return false;
@@ -111,24 +98,11 @@ class VoiceInputService {
// Add a method to check if on-device STT is properly supported
Future<bool> checkOnDeviceSupport() async {
if (!isSupportedPlatform || !_isInitialized) return false;
if (_speech == null) return false;
try {
// Check if the speech engine supports on-device recognition
final result = await _speech!.initialize();
debugPrint('DEBUG: On-device support check - initialize result: $result');
if (result) {
// Note: getEngines() method is not available in speech_to_text 7.3.0
// The package handles engine selection internally
debugPrint(
'DEBUG: SpeechToText initialized successfully - engine selection handled internally',
);
}
return result;
final supported = await _speech.isSupported();
return supported;
} catch (e) {
debugPrint('DEBUG: Error checking on-device support: $e');
// ignore errors checking on-device support
return false;
}
}
@@ -136,13 +110,13 @@ class VoiceInputService {
// Test method to verify on-device STT functionality
Future<String> testOnDeviceStt() async {
try {
debugPrint('DEBUG: Starting on-device STT test');
// starting on-device STT test
// First ensure we're initialized
await initialize();
if (!_localSttAvailable || _speech == null) {
return 'Local STT not available. Available: $_localSttAvailable, Speech: ${_speech != null}';
if (!_localSttAvailable) {
return 'Local STT not available. Available: $_localSttAvailable';
}
// Check microphone permission
@@ -152,40 +126,29 @@ class VoiceInputService {
}
// Test if speech recognition is available
final isAvailable = await _speech!.isAvailable;
debugPrint('DEBUG: Speech recognition isAvailable: $isAvailable');
if (!isAvailable) {
final supported = await _speech.isSupported();
if (!supported)
return 'Speech recognition service is not available on this device';
// Set language if available, then start and stop quickly
if (_selectedLocaleId != null) {
try {
await _speech.setLanguage(_selectedLocaleId!);
} catch (_) {}
}
// Check if listening is already active
final isListening = await _speech!.isListening;
debugPrint('DEBUG: Speech recognition isListening: $isListening');
if (isListening) {
await _speech!.stop();
await Future.delayed(const Duration(milliseconds: 500));
}
// Check if we can start listening
startListening();
// Wait a bit for initialization
await _speech.start(SttRecognitionOptions(punctuation: true));
await Future.delayed(const Duration(milliseconds: 100));
// Stop immediately after starting
await stopListening();
await _speech.stop();
return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId';
} catch (e) {
debugPrint('DEBUG: On-device STT test failed: $e');
// on-device STT test failed
return 'On-device STT test failed: $e';
}
}
String? get selectedLocaleId => _selectedLocaleId;
List<stt.LocaleName> get locales => _locales;
List<LocaleName> get locales => _locales;
void setLocale(String? localeId) {
_selectedLocaleId = localeId;
@@ -206,15 +169,13 @@ class VoiceInputService {
_intensityController = StreamController<int>.broadcast();
// Check if speech recognition is available before trying to use it
if (_localSttAvailable && _speech != null) {
if (_localSttAvailable) {
// Schedule a check for speech recognition availability
Future.microtask(() async {
try {
final isStillAvailable = await _speech!.isAvailable;
final isStillAvailable = await _speech.isSupported();
if (!isStillAvailable && _isListening) {
debugPrint(
'DEBUG: Speech recognition no longer available, falling back to recording',
);
// speech recognition no longer available, fallback to recording
_localSttAvailable = false;
// Restart with fallback method
_startRecordingProxyIntensity();
@@ -227,52 +188,47 @@ class VoiceInputService {
return;
}
} catch (e) {
debugPrint('DEBUG: Error checking speech availability: $e');
// ignore availability check errors
}
});
// Local on-device STT path
debugPrint(
'DEBUG: Starting on-device STT with locale: $_selectedLocaleId',
);
_autoStopTimer?.cancel();
// SpeechToText has its own end-of-speech handling; we still cap at 60s
_autoStopTimer = Timer(const Duration(seconds: 60), () {
if (_isListening) {
_stopListening();
}
});
_speech!.listen(
localeId: _selectedLocaleId,
listenFor: const Duration(seconds: 60),
pauseFor: const Duration(seconds: 3),
onResult: (SpeechRecognitionResult result) {
// Listen for results and state changes; keep subscriptions so we can cancel later
_sttResultSub = _speech.onResultChanged.listen((SttRecognition result) {
if (!_isListening) return;
debugPrint(
'DEBUG: Speech result: "${result.recognizedWords}" (final: ${result.finalResult})',
);
_currentText = result.recognizedWords;
_currentText = result.text;
_textStreamController?.add(_currentText);
if (result.finalResult) {
// Will be followed by notListening status; we proactively close
if (result.isFinal) {
_stopListening();
}
},
onSoundLevelChange: (level) {
debugPrint('DEBUG: Sound level: $level');
// level is roughly 0..1+; map to 0..10
final scaled = (level * 10).clamp(0, 10).round();
_intensityController?.add(scaled);
},
partialResults: true,
cancelOnError: true,
listenMode: stt.ListenMode.dictation,
onDevice: true,
);
debugPrint('DEBUG: SpeechToText.listen() called with onDevice: true');
}, onError: (_) {});
_sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
try {
if (_selectedLocaleId != null) {
_speech.setLanguage(_selectedLocaleId!).catchError((_) {});
}
// Start recognition (no await blocking the sync flow)
_speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
// fallback to recording
_localSttAvailable = false;
_startRecordingProxyIntensity();
});
} catch (e) {
_localSttAvailable = false;
_startRecordingProxyIntensity();
}
} else {
// Fallback: record audio and signal file path for server transcription
debugPrint('DEBUG: Local STT not available, falling back to recording');
// Local STT not available, falling back to recording
_startRecordingProxyIntensity();
_autoStopTimer?.cancel();
_autoStopTimer = Timer(const Duration(seconds: 30), () {
@@ -293,10 +249,19 @@ class VoiceInputService {
if (!_isListening) return;
_isListening = false;
if (_localSttAvailable && _speech != null) {
if (_localSttAvailable) {
try {
await _speech!.stop();
await _speech.stop();
} catch (_) {}
// Cancel STT subscriptions
try {
_sttResultSub?.cancel();
} catch (_) {}
_sttResultSub = null;
try {
_sttStateSub?.cancel();
} catch (_) {}
_sttStateSub = null;
} else {
// Also stop recorder if active
await _stopRecording();
@@ -321,7 +286,7 @@ class VoiceInputService {
stopListening();
_stopRecording(force: true);
try {
_speech?.cancel();
_speech.dispose().catchError((_) {});
} catch (_) {}
}
@@ -418,12 +383,12 @@ final voiceInputAvailableProvider = FutureProvider<bool>((ref) async {
});
final voiceInputStreamProvider = StreamProvider<String>((ref) {
// Voice input stream would be initialized when needed
return const Stream.empty();
final service = ref.watch(voiceInputServiceProvider);
return service.textStream;
});
/// Stream of crude voice intensity for waveform visuals
final voiceIntensityStreamProvider = StreamProvider<int>((ref) {
// Connected at runtime by the UI after calling startListening
return const Stream.empty();
final service = ref.watch(voiceInputServiceProvider);
return service.intensityStream;
});

View File

@@ -837,14 +837,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.3.0"
pedantic:
dependency: transitive
description:
name: pedantic
sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
url: "https://pub.dev"
source: hosted
version: "1.11.1"
petitparser:
dependency: transitive
description:
@@ -1106,30 +1098,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.10.1"
speech_to_text:
dependency: "direct main"
description:
name: speech_to_text
sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04
url: "https://pub.dev"
source: hosted
version: "7.3.0"
speech_to_text_platform_interface:
dependency: transitive
description:
name: speech_to_text_platform_interface
sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
url: "https://pub.dev"
source: hosted
version: "2.3.0"
speech_to_text_windows:
dependency: transitive
description:
name: speech_to_text_windows
sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072"
url: "https://pub.dev"
source: hosted
version: "1.0.0+beta.8"
sprintf:
dependency: transitive
description:
@@ -1218,6 +1186,30 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.4.1"
stts:
dependency: "direct main"
description:
name: stts
sha256: "097aabf3600b3327651f6ae13de440d6e09e5d447dbb42bf35e36a02e5f611c2"
url: "https://pub.dev"
source: hosted
version: "1.2.5"
stts_platform_interface:
dependency: transitive
description:
name: stts_platform_interface
sha256: "6b82268d59d608e9b5accdadf0e7ccaea7928e8fce68ca393111fa7193d1bf10"
url: "https://pub.dev"
source: hosted
version: "1.2.0"
stts_web:
dependency: transitive
description:
name: stts_web
sha256: "62625c3b4d86076820d687dc468845a0f54c7dd4ead155b58f1e5864488c7f1c"
url: "https://pub.dev"
source: hosted
version: "1.1.0"
synchronized:
dependency: transitive
description:

View File

@@ -36,7 +36,7 @@ dependencies:
# Platform Features
record: ^6.0.0
speech_to_text: ^7.3.0
stts: ^1.2.5
image_picker: ^1.1.2
file_picker: ^10.2.1
path_provider: ^2.1.4