2025-08-10 01:20:45 +05:30
|
|
|
import 'dart:async';
|
2025-11-10 01:57:28 +05:30
|
|
|
import 'dart:convert';
|
|
|
|
|
import 'dart:io' show Platform;
|
|
|
|
|
import 'dart:typed_data';
|
2025-09-30 14:58:53 +05:30
|
|
|
|
|
|
|
|
import 'package:flutter/widgets.dart';
|
|
|
|
|
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
|
|
|
|
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
2025-08-25 20:04:04 +05:30
|
|
|
import 'package:stts/stts.dart';
|
2025-11-10 01:57:28 +05:30
|
|
|
import 'package:vad/vad.dart';
|
2025-11-02 19:02:37 +05:30
|
|
|
|
|
|
|
|
import '../../../core/providers/app_providers.dart';
|
|
|
|
|
import '../../../core/services/api_service.dart';
|
|
|
|
|
import '../../../core/services/settings_service.dart';
|
2025-08-25 20:04:04 +05:30
|
|
|
|
2025-09-30 14:58:53 +05:30
|
|
|
part 'voice_input_service.g.dart';
|
|
|
|
|
|
2025-08-25 20:04:04 +05:30
|
|
|
// Lightweight replacement for previous stt.LocaleName used across the UI
|
|
|
|
|
class LocaleName {
|
|
|
|
|
final String localeId;
|
|
|
|
|
final String name;
|
|
|
|
|
const LocaleName(this.localeId, this.name);
|
|
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
|
|
|
|
class VoiceInputService {
|
2025-11-10 01:57:28 +05:30
|
|
|
static const int _vadSampleRate = 16000;
|
|
|
|
|
static const int _vadFrameSamples = 1536;
|
|
|
|
|
|
|
|
|
|
final VadHandler _vadHandler = VadHandler.create();
|
2025-08-25 20:04:04 +05:30
|
|
|
final Stt _speech = Stt();
|
2025-11-02 19:02:37 +05:30
|
|
|
final ApiService? _api;
|
2025-11-05 00:33:17 +05:30
|
|
|
final Ref? _ref;
|
2025-08-10 01:20:45 +05:30
|
|
|
bool _isInitialized = false;
|
|
|
|
|
bool _isListening = false;
|
2025-08-22 13:54:58 +05:30
|
|
|
bool _localSttAvailable = false;
|
2025-11-02 19:02:37 +05:30
|
|
|
SttPreference _preference = SttPreference.auto;
|
|
|
|
|
bool _usingServerStt = false;
|
2025-08-22 13:54:58 +05:30
|
|
|
String? _selectedLocaleId;
|
2025-08-25 20:04:04 +05:30
|
|
|
List<LocaleName> _locales = const [];
|
2025-08-10 01:20:45 +05:30
|
|
|
StreamController<String>? _textStreamController;
|
|
|
|
|
String _currentText = '';
|
|
|
|
|
StreamController<int>? _intensityController;
|
|
|
|
|
Stream<int> get intensityStream =>
|
|
|
|
|
_intensityController?.stream ?? const Stream<int>.empty();
|
2025-08-25 21:53:41 +05:30
|
|
|
int _lastIntensity = 0;
|
|
|
|
|
Timer? _intensityDecayTimer;
|
2025-11-10 01:57:28 +05:30
|
|
|
List<double>? _vadPendingSamples;
|
2025-08-25 20:04:04 +05:30
|
|
|
|
|
|
|
|
Stream<String> get textStream =>
|
|
|
|
|
_textStreamController?.stream ?? const Stream<String>.empty();
|
2025-08-10 01:20:45 +05:30
|
|
|
Timer? _autoStopTimer;
|
2025-08-25 20:04:04 +05:30
|
|
|
StreamSubscription<SttRecognition>? _sttResultSub;
|
|
|
|
|
StreamSubscription<SttState>? _sttStateSub;
|
2025-11-10 01:57:28 +05:30
|
|
|
StreamSubscription<List<double>>? _vadSpeechEndSub;
|
|
|
|
|
StreamSubscription<({double isSpeech, double notSpeech, List<double> frame})>?
|
|
|
|
|
_vadFrameSub;
|
|
|
|
|
StreamSubscription<String>? _vadErrorSub;
|
2025-08-10 01:20:45 +05:30
|
|
|
|
|
|
|
|
bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;
|
2025-11-02 19:02:37 +05:30
|
|
|
bool get hasServerStt => _api != null;
|
|
|
|
|
SttPreference get preference => _preference;
|
|
|
|
|
bool get allowsServerFallback => _preference != SttPreference.deviceOnly;
|
|
|
|
|
bool get prefersServerOnly => _preference == SttPreference.serverOnly;
|
|
|
|
|
bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly;
|
|
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
VoiceInputService({ApiService? api, Ref? ref}) : _api = api, _ref = ref;
|
2025-11-02 19:02:37 +05:30
|
|
|
|
|
|
|
|
void updatePreference(SttPreference preference) {
|
|
|
|
|
_preference = preference;
|
|
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
|
|
|
|
Future<bool> initialize() async {
|
|
|
|
|
if (_isInitialized) return true;
|
|
|
|
|
if (!isSupportedPlatform) return false;
|
2025-08-22 13:54:58 +05:30
|
|
|
// Prepare local speech recognizer
|
|
|
|
|
try {
|
2025-08-25 20:04:04 +05:30
|
|
|
// Check permission and supported status
|
|
|
|
|
_localSttAvailable = await _speech.isSupported();
|
2025-08-22 13:54:58 +05:30
|
|
|
if (_localSttAvailable) {
|
|
|
|
|
try {
|
2025-08-25 20:04:04 +05:30
|
|
|
final langs = await _speech.getLanguages();
|
|
|
|
|
_locales = langs.map((l) => LocaleName(l, l)).toList();
|
2025-08-22 13:54:58 +05:30
|
|
|
final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
|
|
|
|
|
.toLanguageTag();
|
|
|
|
|
final match = _locales.firstWhere(
|
|
|
|
|
(l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
|
|
|
|
|
orElse: () {
|
2025-08-25 10:35:48 +05:30
|
|
|
final primary = deviceTag
|
|
|
|
|
.split(RegExp('[-_]'))
|
|
|
|
|
.first
|
|
|
|
|
.toLowerCase();
|
2025-08-22 13:54:58 +05:30
|
|
|
return _locales.firstWhere(
|
|
|
|
|
(l) => l.localeId.toLowerCase().startsWith('$primary-'),
|
|
|
|
|
orElse: () => _locales.isNotEmpty
|
|
|
|
|
? _locales.first
|
2025-08-25 20:04:04 +05:30
|
|
|
: LocaleName('en_US', 'en_US'),
|
2025-08-22 13:54:58 +05:30
|
|
|
);
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
_selectedLocaleId = match.localeId;
|
2025-08-25 10:35:48 +05:30
|
|
|
} catch (e) {
|
2025-08-25 20:04:04 +05:30
|
|
|
// ignore locale load errors
|
2025-08-22 13:54:58 +05:30
|
|
|
_selectedLocaleId = null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (_) {
|
|
|
|
|
_localSttAvailable = false;
|
|
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
_isInitialized = true;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<bool> checkPermissions() async {
|
|
|
|
|
try {
|
2025-11-05 00:09:35 +05:30
|
|
|
return await _speech.hasPermission();
|
2025-08-10 01:20:45 +05:30
|
|
|
} catch (_) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool get isListening => _isListening;
|
2025-11-02 19:02:37 +05:30
|
|
|
bool get isAvailable =>
|
|
|
|
|
_isInitialized && (_localSttAvailable || hasServerStt);
|
2025-08-22 13:54:58 +05:30
|
|
|
bool get hasLocalStt => _localSttAvailable;
|
2025-08-25 10:35:48 +05:30
|
|
|
|
|
|
|
|
// Add a method to check if on-device STT is properly supported
|
|
|
|
|
Future<bool> checkOnDeviceSupport() async {
|
|
|
|
|
if (!isSupportedPlatform || !_isInitialized) return false;
|
|
|
|
|
try {
|
2025-08-25 20:04:04 +05:30
|
|
|
final supported = await _speech.isSupported();
|
|
|
|
|
return supported;
|
2025-08-25 10:35:48 +05:30
|
|
|
} catch (e) {
|
2025-08-25 20:04:04 +05:30
|
|
|
// ignore errors checking on-device support
|
2025-08-25 10:35:48 +05:30
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Test method to verify on-device STT functionality
|
|
|
|
|
Future<String> testOnDeviceStt() async {
|
|
|
|
|
try {
|
2025-08-25 20:04:04 +05:30
|
|
|
// starting on-device STT test
|
2025-08-25 10:35:48 +05:30
|
|
|
|
|
|
|
|
// First ensure we're initialized
|
|
|
|
|
await initialize();
|
|
|
|
|
|
2025-08-25 20:04:04 +05:30
|
|
|
if (!_localSttAvailable) {
|
|
|
|
|
return 'Local STT not available. Available: $_localSttAvailable';
|
2025-08-25 10:35:48 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check microphone permission
|
|
|
|
|
final hasMic = await checkPermissions();
|
|
|
|
|
if (!hasMic) {
|
|
|
|
|
return 'Microphone permission not granted';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Test if speech recognition is available
|
2025-08-25 20:04:04 +05:30
|
|
|
final supported = await _speech.isSupported();
|
2025-09-02 20:43:57 +05:30
|
|
|
if (!supported) {
|
2025-08-25 10:35:48 +05:30
|
|
|
return 'Speech recognition service is not available on this device';
|
2025-09-02 20:43:57 +05:30
|
|
|
}
|
2025-08-25 10:35:48 +05:30
|
|
|
|
2025-08-25 20:04:04 +05:30
|
|
|
// Set language if available, then start and stop quickly
|
|
|
|
|
if (_selectedLocaleId != null) {
|
|
|
|
|
try {
|
|
|
|
|
await _speech.setLanguage(_selectedLocaleId!);
|
|
|
|
|
} catch (_) {}
|
2025-08-25 10:35:48 +05:30
|
|
|
}
|
2025-08-25 20:04:04 +05:30
|
|
|
await _speech.start(SttRecognitionOptions(punctuation: true));
|
2025-08-25 10:35:48 +05:30
|
|
|
await Future.delayed(const Duration(milliseconds: 100));
|
2025-08-25 20:04:04 +05:30
|
|
|
await _speech.stop();
|
2025-08-25 10:35:48 +05:30
|
|
|
|
|
|
|
|
return 'On-device STT test completed successfully. Local STT available: $_localSttAvailable, Selected locale: $_selectedLocaleId';
|
|
|
|
|
} catch (e) {
|
2025-08-25 20:04:04 +05:30
|
|
|
// on-device STT test failed
|
2025-08-25 10:35:48 +05:30
|
|
|
return 'On-device STT test failed: $e';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-22 13:54:58 +05:30
|
|
|
String? get selectedLocaleId => _selectedLocaleId;
|
2025-08-25 20:04:04 +05:30
|
|
|
List<LocaleName> get locales => _locales;
|
2025-08-22 13:54:58 +05:30
|
|
|
|
|
|
|
|
void setLocale(String? localeId) {
|
|
|
|
|
_selectedLocaleId = localeId;
|
|
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
|
|
|
|
Stream<String> startListening() {
|
|
|
|
|
if (!_isInitialized) {
|
|
|
|
|
throw Exception('Voice input not initialized');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (_isListening) {
|
2025-11-02 19:02:37 +05:30
|
|
|
unawaited(stopListening());
|
2025-08-10 01:20:45 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_textStreamController = StreamController<String>.broadcast();
|
|
|
|
|
_currentText = '';
|
|
|
|
|
_isListening = true;
|
|
|
|
|
_intensityController = StreamController<int>.broadcast();
|
2025-08-25 21:53:41 +05:30
|
|
|
_lastIntensity = 0;
|
2025-11-02 19:02:37 +05:30
|
|
|
_usingServerStt = false;
|
|
|
|
|
|
|
|
|
|
_startIntensityDecayTimer();
|
|
|
|
|
|
|
|
|
|
final bool canUseLocal = _localSttAvailable;
|
|
|
|
|
final bool serverAvailable = hasServerStt;
|
|
|
|
|
final bool shouldUseLocal =
|
|
|
|
|
canUseLocal && _preference != SttPreference.serverOnly;
|
|
|
|
|
final bool shouldUseServer =
|
|
|
|
|
serverAvailable &&
|
|
|
|
|
(_preference == SttPreference.serverOnly || !shouldUseLocal);
|
|
|
|
|
|
|
|
|
|
if (shouldUseLocal) {
|
|
|
|
|
_autoStopTimer?.cancel();
|
|
|
|
|
_autoStopTimer = Timer(const Duration(seconds: 60), () {
|
|
|
|
|
if (_isListening) {
|
|
|
|
|
unawaited(_stopListening());
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-08-25 21:53:41 +05:30
|
|
|
|
2025-08-25 10:35:48 +05:30
|
|
|
Future.microtask(() async {
|
|
|
|
|
try {
|
2025-08-25 20:04:04 +05:30
|
|
|
final isStillAvailable = await _speech.isSupported();
|
2025-08-25 10:35:48 +05:30
|
|
|
if (!isStillAvailable && _isListening) {
|
|
|
|
|
_localSttAvailable = false;
|
2025-11-02 19:02:37 +05:30
|
|
|
if (hasServerStt && allowsServerFallback) {
|
|
|
|
|
unawaited(_beginServerFallback());
|
|
|
|
|
} else {
|
|
|
|
|
unawaited(_stopListening());
|
|
|
|
|
}
|
2025-08-25 10:35:48 +05:30
|
|
|
}
|
2025-11-02 19:02:37 +05:30
|
|
|
} catch (_) {
|
2025-08-25 20:04:04 +05:30
|
|
|
// ignore availability check errors
|
2025-08-25 10:35:48 +05:30
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2025-08-25 20:04:04 +05:30
|
|
|
_sttResultSub = _speech.onResultChanged.listen((SttRecognition result) {
|
|
|
|
|
if (!_isListening) return;
|
2025-08-25 21:53:41 +05:30
|
|
|
final prevLen = _currentText.length;
|
2025-08-25 20:04:04 +05:30
|
|
|
_currentText = result.text;
|
|
|
|
|
_textStreamController?.add(_currentText);
|
2025-08-25 21:53:41 +05:30
|
|
|
final delta = (_currentText.length - prevLen).clamp(0, 50);
|
2025-11-02 19:02:37 +05:30
|
|
|
final mapped = (delta / 5.0).ceil();
|
2025-08-25 21:53:41 +05:30
|
|
|
_lastIntensity = mapped.clamp(0, 10);
|
|
|
|
|
try {
|
|
|
|
|
_intensityController?.add(_lastIntensity);
|
|
|
|
|
} catch (_) {}
|
2025-08-25 20:04:04 +05:30
|
|
|
if (result.isFinal) {
|
2025-11-02 19:02:37 +05:30
|
|
|
unawaited(_stopListening());
|
2025-08-25 20:04:04 +05:30
|
|
|
}
|
|
|
|
|
}, onError: (_) {});
|
|
|
|
|
|
|
|
|
|
_sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
|
|
|
|
|
|
2025-11-02 19:02:37 +05:30
|
|
|
Future(() async {
|
|
|
|
|
try {
|
|
|
|
|
if (_selectedLocaleId != null) {
|
|
|
|
|
await _speech.setLanguage(_selectedLocaleId!);
|
|
|
|
|
}
|
|
|
|
|
await _speech.start(SttRecognitionOptions(punctuation: true));
|
|
|
|
|
} catch (error) {
|
2025-08-25 20:04:04 +05:30
|
|
|
_localSttAvailable = false;
|
2025-11-02 19:02:37 +05:30
|
|
|
if (!_isListening) return;
|
|
|
|
|
if (hasServerStt && allowsServerFallback) {
|
|
|
|
|
await _beginServerFallback();
|
|
|
|
|
} else {
|
|
|
|
|
_textStreamController?.addError(error);
|
|
|
|
|
await _stopListening();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
} else if (shouldUseServer) {
|
|
|
|
|
_usingServerStt = true;
|
|
|
|
|
_autoStopTimer?.cancel();
|
|
|
|
|
_autoStopTimer = Timer(const Duration(seconds: 90), () {
|
|
|
|
|
if (_isListening) {
|
|
|
|
|
unawaited(_stopListening());
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
Future(() async {
|
|
|
|
|
try {
|
|
|
|
|
await _startServerRecording();
|
|
|
|
|
} catch (error) {
|
|
|
|
|
if (!_isListening) return;
|
|
|
|
|
_textStreamController?.addError(error);
|
|
|
|
|
await _stopListening();
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-08-22 13:54:58 +05:30
|
|
|
} else {
|
2025-11-02 19:02:37 +05:30
|
|
|
final Exception error;
|
|
|
|
|
if (prefersDeviceOnly) {
|
|
|
|
|
error = Exception(
|
|
|
|
|
'On-device speech recognition required but unavailable',
|
|
|
|
|
);
|
|
|
|
|
} else if (prefersServerOnly) {
|
|
|
|
|
error = Exception('Server speech-to-text is not configured');
|
|
|
|
|
} else {
|
|
|
|
|
error = Exception('Speech recognition not available on this device');
|
|
|
|
|
}
|
|
|
|
|
Future.microtask(() {
|
|
|
|
|
_textStreamController?.addError(error);
|
|
|
|
|
unawaited(_stopListening());
|
|
|
|
|
});
|
2025-08-22 13:54:58 +05:30
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
|
|
|
|
return _textStreamController!.stream;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-28 19:48:35 +05:30
|
|
|
/// Centralized entry point to begin voice recognition.
|
|
|
|
|
/// Ensures initialization and microphone permission before starting.
|
|
|
|
|
Future<Stream<String>> beginListening() async {
|
|
|
|
|
await initialize();
|
|
|
|
|
final hasMic = await checkPermissions();
|
|
|
|
|
if (!hasMic) {
|
|
|
|
|
throw Exception('Microphone permission not granted');
|
|
|
|
|
}
|
|
|
|
|
return startListening();
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-10 01:20:45 +05:30
|
|
|
Future<void> stopListening() async {
|
|
|
|
|
await _stopListening();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<void> _stopListening() async {
|
|
|
|
|
if (!_isListening) return;
|
|
|
|
|
|
|
|
|
|
_isListening = false;
|
2025-11-02 19:02:37 +05:30
|
|
|
|
|
|
|
|
_autoStopTimer?.cancel();
|
|
|
|
|
_autoStopTimer = null;
|
|
|
|
|
|
|
|
|
|
if (_usingServerStt) {
|
2025-11-10 01:57:28 +05:30
|
|
|
await _stopVadRecording();
|
|
|
|
|
final samples = _vadPendingSamples;
|
|
|
|
|
_vadPendingSamples = null;
|
|
|
|
|
if (samples != null && samples.isNotEmpty) {
|
|
|
|
|
await _processVadSamples(samples);
|
|
|
|
|
}
|
2025-11-02 19:02:37 +05:30
|
|
|
} else {
|
|
|
|
|
await _stopLocalStt();
|
2025-11-10 01:57:28 +05:30
|
|
|
if (_currentText.isNotEmpty) {
|
|
|
|
|
_textStreamController?.add(_currentText);
|
|
|
|
|
}
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_intensityDecayTimer?.cancel();
|
|
|
|
|
_intensityDecayTimer = null;
|
|
|
|
|
_lastIntensity = 0;
|
|
|
|
|
|
|
|
|
|
await _closeControllers();
|
|
|
|
|
|
|
|
|
|
_usingServerStt = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<void> _stopLocalStt() async {
|
|
|
|
|
if (_sttResultSub != null) {
|
2025-08-22 13:54:58 +05:30
|
|
|
try {
|
2025-11-02 19:02:37 +05:30
|
|
|
await _sttResultSub?.cancel();
|
2025-08-22 13:54:58 +05:30
|
|
|
} catch (_) {}
|
2025-08-25 20:04:04 +05:30
|
|
|
_sttResultSub = null;
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
|
|
|
|
if (_sttStateSub != null) {
|
2025-08-25 20:04:04 +05:30
|
|
|
try {
|
2025-11-02 19:02:37 +05:30
|
|
|
await _sttStateSub?.cancel();
|
2025-08-25 20:04:04 +05:30
|
|
|
} catch (_) {}
|
|
|
|
|
_sttStateSub = null;
|
2025-08-22 13:54:58 +05:30
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
2025-11-02 19:02:37 +05:30
|
|
|
if (_localSttAvailable) {
|
|
|
|
|
try {
|
|
|
|
|
await _speech.stop();
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<void> _beginServerFallback() async {
|
|
|
|
|
if (!allowsServerFallback) {
|
|
|
|
|
_textStreamController?.addError(
|
|
|
|
|
Exception('Server speech-to-text disabled in preferences'),
|
|
|
|
|
);
|
|
|
|
|
await _stopListening();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
await _stopLocalStt();
|
|
|
|
|
if (!hasServerStt) {
|
|
|
|
|
_textStreamController?.addError(
|
|
|
|
|
Exception('Server speech-to-text unavailable'),
|
|
|
|
|
);
|
|
|
|
|
await _stopListening();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_usingServerStt = true;
|
2025-08-10 01:20:45 +05:30
|
|
|
_autoStopTimer?.cancel();
|
2025-11-02 19:02:37 +05:30
|
|
|
_autoStopTimer = Timer(const Duration(seconds: 90), () {
|
|
|
|
|
if (_isListening) {
|
|
|
|
|
unawaited(_stopListening());
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-08-10 01:20:45 +05:30
|
|
|
|
2025-11-02 19:02:37 +05:30
|
|
|
try {
|
|
|
|
|
await _startServerRecording();
|
|
|
|
|
} catch (error) {
|
|
|
|
|
_textStreamController?.addError(error);
|
|
|
|
|
await _stopListening();
|
2025-08-10 01:20:45 +05:30
|
|
|
}
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<void> _startServerRecording() async {
|
2025-11-10 01:57:28 +05:30
|
|
|
await _setupVadStreams();
|
|
|
|
|
final settings = _ref?.read(appSettingsProvider);
|
|
|
|
|
final silenceMs = settings?.voiceSilenceDuration ?? 2000;
|
|
|
|
|
final redemptionFrames = _silenceDurationToFrames(silenceMs);
|
|
|
|
|
final endPadFrames = redemptionFrames > 4
|
|
|
|
|
? (redemptionFrames / 4).round().clamp(1, redemptionFrames)
|
|
|
|
|
: 1;
|
2025-11-02 19:02:37 +05:30
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
try {
|
|
|
|
|
await _vadHandler.startListening(
|
|
|
|
|
frameSamples: _vadFrameSamples,
|
|
|
|
|
redemptionFrames: redemptionFrames,
|
|
|
|
|
endSpeechPadFrames: endPadFrames,
|
|
|
|
|
preSpeechPadFrames: 2,
|
|
|
|
|
minSpeechFrames: 3,
|
|
|
|
|
submitUserSpeechOnPause: true,
|
|
|
|
|
recordConfig: const RecordConfig(
|
|
|
|
|
encoder: AudioEncoder.pcm16bits,
|
|
|
|
|
sampleRate: _vadSampleRate,
|
|
|
|
|
numChannels: 1,
|
|
|
|
|
bitRate: 16,
|
|
|
|
|
echoCancel: true,
|
|
|
|
|
autoGain: true,
|
|
|
|
|
noiseSuppress: true,
|
|
|
|
|
androidConfig: AndroidRecordConfig(
|
|
|
|
|
audioSource: AndroidAudioSource.voiceCommunication,
|
|
|
|
|
audioManagerMode: AudioManagerMode.modeInCommunication,
|
|
|
|
|
speakerphone: true,
|
|
|
|
|
manageBluetooth: true,
|
|
|
|
|
useLegacy: false,
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
_textStreamController?.addError(error);
|
|
|
|
|
rethrow;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-05 00:09:35 +05:30
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
Future<void> _setupVadStreams() async {
|
|
|
|
|
await _vadSpeechEndSub?.cancel();
|
|
|
|
|
_vadSpeechEndSub = _vadHandler.onSpeechEnd.listen((samples) {
|
|
|
|
|
if (!_isListening || !_usingServerStt) return;
|
|
|
|
|
if (samples.isEmpty) return;
|
|
|
|
|
_vadPendingSamples = samples;
|
|
|
|
|
if (_isListening) {
|
|
|
|
|
unawaited(_stopListening());
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-11-05 00:09:35 +05:30
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
await _vadFrameSub?.cancel();
|
|
|
|
|
_vadFrameSub = _vadHandler.onFrameProcessed.listen((frameData) {
|
2025-11-05 00:09:35 +05:30
|
|
|
if (!_isListening) return;
|
2025-11-10 01:57:28 +05:30
|
|
|
final intensity = _intensityFromVadFrame(frameData.frame);
|
|
|
|
|
_lastIntensity = intensity;
|
2025-11-05 00:09:35 +05:30
|
|
|
try {
|
|
|
|
|
_intensityController?.add(_lastIntensity);
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
});
|
|
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
await _vadErrorSub?.cancel();
|
|
|
|
|
_vadErrorSub = _vadHandler.onError.listen((message) {
|
|
|
|
|
_textStreamController?.addError(Exception(message));
|
|
|
|
|
if (_isListening) {
|
|
|
|
|
unawaited(_stopListening());
|
2025-11-05 00:09:35 +05:30
|
|
|
}
|
|
|
|
|
});
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
Future<void> _stopVadRecording() async {
|
|
|
|
|
try {
|
|
|
|
|
await _vadHandler.stopListening();
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
await _vadSpeechEndSub?.cancel();
|
|
|
|
|
_vadSpeechEndSub = null;
|
|
|
|
|
await _vadFrameSub?.cancel();
|
|
|
|
|
_vadFrameSub = null;
|
|
|
|
|
await _vadErrorSub?.cancel();
|
|
|
|
|
_vadErrorSub = null;
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
|
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
Future<void> _processVadSamples(List<double> samples) async {
|
2025-11-02 19:02:37 +05:30
|
|
|
final api = _api;
|
2025-11-05 00:09:35 +05:30
|
|
|
if (api == null) return;
|
2025-11-02 19:02:37 +05:30
|
|
|
|
|
|
|
|
try {
|
2025-11-10 01:57:28 +05:30
|
|
|
final wavBytes = _samplesToWav(samples);
|
|
|
|
|
final fileName =
|
|
|
|
|
'conduit_voice_${DateTime.now().millisecondsSinceEpoch}.wav';
|
2025-11-02 19:02:37 +05:30
|
|
|
|
|
|
|
|
final response = await api.transcribeSpeech(
|
2025-11-10 01:57:28 +05:30
|
|
|
audioBytes: wavBytes,
|
|
|
|
|
fileName: fileName,
|
|
|
|
|
mimeType: 'audio/wav',
|
2025-11-02 19:02:37 +05:30
|
|
|
language: _languageForServer(),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
final transcript = _extractTranscriptionText(response);
|
|
|
|
|
if (transcript != null && transcript.trim().isNotEmpty) {
|
|
|
|
|
_currentText = transcript.trim();
|
|
|
|
|
_textStreamController?.add(_currentText);
|
|
|
|
|
} else {
|
|
|
|
|
throw StateError('Empty transcription result');
|
|
|
|
|
}
|
|
|
|
|
} catch (error) {
|
|
|
|
|
_textStreamController?.addError(error);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
int _silenceDurationToFrames(int milliseconds) {
|
|
|
|
|
final frameDurationMs = (_vadFrameSamples / _vadSampleRate) * 1000;
|
|
|
|
|
final frames = (milliseconds / frameDurationMs).round();
|
|
|
|
|
return frames.clamp(4, 50);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int _intensityFromVadFrame(List<double> frame) {
|
|
|
|
|
if (frame.isEmpty) return 0;
|
|
|
|
|
double peak = 0;
|
|
|
|
|
for (final sample in frame) {
|
|
|
|
|
final value = sample.abs();
|
|
|
|
|
if (value > peak) {
|
|
|
|
|
peak = value;
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
2025-11-10 01:57:28 +05:30
|
|
|
}
|
|
|
|
|
final scaled = (peak * 12).round();
|
|
|
|
|
return scaled.clamp(0, 10);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Uint8List _samplesToWav(List<double> samples) {
|
|
|
|
|
if (samples.isEmpty) {
|
|
|
|
|
return Uint8List(0);
|
|
|
|
|
}
|
|
|
|
|
final Int16List pcm = Int16List(samples.length);
|
|
|
|
|
for (var i = 0; i < samples.length; i++) {
|
|
|
|
|
final clamped = samples[i].clamp(-1.0, 1.0);
|
|
|
|
|
final scaled = (clamped * 32767).round().clamp(-32768, 32767);
|
|
|
|
|
pcm[i] = scaled;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final dataLength = pcm.lengthInBytes;
|
|
|
|
|
final bytesPerSample = 2;
|
|
|
|
|
final numChannels = 1;
|
|
|
|
|
final byteRate = _vadSampleRate * numChannels * bytesPerSample;
|
|
|
|
|
final blockAlign = numChannels * bytesPerSample;
|
|
|
|
|
|
|
|
|
|
final builder = BytesBuilder();
|
|
|
|
|
builder.add(ascii.encode('RIFF'));
|
|
|
|
|
builder.add(_int32Le(36 + dataLength));
|
|
|
|
|
builder.add(ascii.encode('WAVE'));
|
|
|
|
|
builder.add(ascii.encode('fmt '));
|
|
|
|
|
builder.add(_int32Le(16));
|
|
|
|
|
builder.add(_int16Le(1));
|
|
|
|
|
builder.add(_int16Le(numChannels));
|
|
|
|
|
builder.add(_int32Le(_vadSampleRate));
|
|
|
|
|
builder.add(_int32Le(byteRate));
|
|
|
|
|
builder.add(_int16Le(blockAlign));
|
|
|
|
|
builder.add(_int16Le(16));
|
|
|
|
|
builder.add(ascii.encode('data'));
|
|
|
|
|
builder.add(_int32Le(dataLength));
|
|
|
|
|
builder.add(Uint8List.view(pcm.buffer));
|
|
|
|
|
return builder.toBytes();
|
2025-11-02 19:02:37 +05:30
|
|
|
}
|
|
|
|
|
|
2025-11-10 01:57:28 +05:30
|
|
|
List<int> _int16Le(int value) => [value & 0xff, (value >> 8) & 0xff];
|
|
|
|
|
|
|
|
|
|
List<int> _int32Le(int value) => [
|
|
|
|
|
value & 0xff,
|
|
|
|
|
(value >> 8) & 0xff,
|
|
|
|
|
(value >> 16) & 0xff,
|
|
|
|
|
(value >> 24) & 0xff,
|
|
|
|
|
];
|
|
|
|
|
|
2025-11-02 19:02:37 +05:30
|
|
|
String? _languageForServer() {
|
|
|
|
|
final locale = _selectedLocaleId;
|
|
|
|
|
if (locale != null && locale.isNotEmpty) {
|
|
|
|
|
final primary = locale.split(RegExp('[-_]')).first.toLowerCase();
|
|
|
|
|
if (primary.length >= 2) {
|
|
|
|
|
return primary;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
final fallback = WidgetsBinding.instance.platformDispatcher.locale;
|
|
|
|
|
final primary = fallback.languageCode.toLowerCase();
|
|
|
|
|
if (primary.isNotEmpty) {
|
|
|
|
|
return primary;
|
|
|
|
|
}
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String? _extractTranscriptionText(Map<String, dynamic> data) {
|
|
|
|
|
final direct = data['text'];
|
|
|
|
|
if (direct is String && direct.trim().isNotEmpty) {
|
|
|
|
|
return direct;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final display = data['display_text'] ?? data['DisplayText'];
|
|
|
|
|
if (display is String && display.trim().isNotEmpty) {
|
|
|
|
|
return display;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final result = data['result'];
|
|
|
|
|
if (result is Map<String, dynamic>) {
|
|
|
|
|
final resultText = result['text'];
|
|
|
|
|
if (resultText is String && resultText.trim().isNotEmpty) {
|
|
|
|
|
return resultText;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final combined = data['combinedRecognizedPhrases'];
|
|
|
|
|
if (combined is List && combined.isNotEmpty) {
|
|
|
|
|
final first = combined.first;
|
|
|
|
|
if (first is Map<String, dynamic>) {
|
|
|
|
|
final candidate =
|
|
|
|
|
first['display'] ??
|
|
|
|
|
first['Display'] ??
|
|
|
|
|
first['transcript'] ??
|
|
|
|
|
first['text'];
|
|
|
|
|
if (candidate is String && candidate.trim().isNotEmpty) {
|
|
|
|
|
return candidate;
|
|
|
|
|
}
|
|
|
|
|
} else if (first is String && first.trim().isNotEmpty) {
|
|
|
|
|
return first;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final results = data['results'];
|
|
|
|
|
if (results is Map<String, dynamic>) {
|
|
|
|
|
final channels = results['channels'];
|
|
|
|
|
if (channels is List && channels.isNotEmpty) {
|
|
|
|
|
final channel = channels.first;
|
|
|
|
|
if (channel is Map<String, dynamic>) {
|
|
|
|
|
final alternatives = channel['alternatives'];
|
|
|
|
|
if (alternatives is List && alternatives.isNotEmpty) {
|
|
|
|
|
final alternative = alternatives.first;
|
|
|
|
|
if (alternative is Map<String, dynamic>) {
|
|
|
|
|
final transcript =
|
|
|
|
|
alternative['transcript'] ?? alternative['text'];
|
|
|
|
|
if (transcript is String && transcript.trim().isNotEmpty) {
|
|
|
|
|
return transcript;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final segments = data['segments'];
|
|
|
|
|
if (segments is List && segments.isNotEmpty) {
|
|
|
|
|
final buffer = StringBuffer();
|
|
|
|
|
for (final segment in segments) {
|
|
|
|
|
if (segment is Map<String, dynamic>) {
|
|
|
|
|
final text = segment['text'];
|
|
|
|
|
if (text is String && text.trim().isNotEmpty) {
|
|
|
|
|
buffer.write(text.trim());
|
|
|
|
|
buffer.write(' ');
|
|
|
|
|
}
|
|
|
|
|
} else if (segment is String && segment.trim().isNotEmpty) {
|
|
|
|
|
buffer.write(segment.trim());
|
|
|
|
|
buffer.write(' ');
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
final combinedText = buffer.toString().trim();
|
|
|
|
|
if (combinedText.isNotEmpty) {
|
|
|
|
|
return combinedText;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Future<void> _closeControllers() async {
|
|
|
|
|
if (_textStreamController != null) {
|
|
|
|
|
try {
|
|
|
|
|
await _textStreamController?.close();
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
_textStreamController = null;
|
|
|
|
|
}
|
|
|
|
|
if (_intensityController != null) {
|
|
|
|
|
try {
|
|
|
|
|
await _intensityController?.close();
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
_intensityController = null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void _startIntensityDecayTimer() {
|
|
|
|
|
_intensityDecayTimer?.cancel();
|
|
|
|
|
_intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), (
|
|
|
|
|
_,
|
|
|
|
|
) {
|
|
|
|
|
if (!_isListening) return;
|
|
|
|
|
if (_lastIntensity <= 0) return;
|
|
|
|
|
_lastIntensity = (_lastIntensity - 1).clamp(0, 10);
|
|
|
|
|
try {
|
|
|
|
|
_intensityController?.add(_lastIntensity);
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
});
|
2025-08-10 01:20:45 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void dispose() {
|
|
|
|
|
stopListening();
|
2025-11-10 01:57:28 +05:30
|
|
|
unawaited(_vadHandler.dispose());
|
2025-08-22 13:54:58 +05:30
|
|
|
try {
|
2025-08-25 20:04:04 +05:30
|
|
|
_speech.dispose().catchError((_) {});
|
2025-08-22 13:54:58 +05:30
|
|
|
} catch (_) {}
|
2025-08-10 01:20:45 +05:30
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final voiceInputServiceProvider = Provider<VoiceInputService>((ref) {
|
2025-11-02 19:02:37 +05:30
|
|
|
final api = ref.watch(apiServiceProvider);
|
2025-11-05 00:33:17 +05:30
|
|
|
final service = VoiceInputService(api: api, ref: ref);
|
2025-11-02 19:02:37 +05:30
|
|
|
final currentSettings = ref.read(appSettingsProvider);
|
|
|
|
|
service.updatePreference(currentSettings.sttPreference);
|
|
|
|
|
ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
|
|
|
|
|
if (previous?.sttPreference != next.sttPreference) {
|
|
|
|
|
service.updatePreference(next.sttPreference);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
ref.onDispose(service.dispose);
|
|
|
|
|
return service;
|
2025-08-10 01:20:45 +05:30
|
|
|
});
|
|
|
|
|
|
2025-10-01 18:32:16 +05:30
|
|
|
@Riverpod(keepAlive: true)
|
2025-09-30 14:58:53 +05:30
|
|
|
Future<bool> voiceInputAvailable(Ref ref) async {
|
2025-08-10 01:20:45 +05:30
|
|
|
final service = ref.watch(voiceInputServiceProvider);
|
|
|
|
|
if (!service.isSupportedPlatform) return false;
|
|
|
|
|
final initialized = await service.initialize();
|
|
|
|
|
if (!initialized) return false;
|
2025-11-02 19:02:37 +05:30
|
|
|
switch (service.preference) {
|
|
|
|
|
case SttPreference.deviceOnly:
|
|
|
|
|
return service.hasLocalStt;
|
|
|
|
|
case SttPreference.serverOnly:
|
|
|
|
|
return service.hasServerStt;
|
|
|
|
|
case SttPreference.auto:
|
|
|
|
|
if (service.hasLocalStt) return true;
|
|
|
|
|
if (!service.hasServerStt) return false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
final hasPermission = await service.checkPermissions();
|
|
|
|
|
if (!hasPermission) return false;
|
|
|
|
|
return service.isAvailable;
|
2025-09-30 14:58:53 +05:30
|
|
|
}
|
2025-08-10 01:20:45 +05:30
|
|
|
|
|
|
|
|
final voiceInputStreamProvider = StreamProvider<String>((ref) {
|
2025-08-25 20:04:04 +05:30
|
|
|
final service = ref.watch(voiceInputServiceProvider);
|
|
|
|
|
return service.textStream;
|
2025-08-10 01:20:45 +05:30
|
|
|
});
|
|
|
|
|
|
|
|
|
|
/// Stream of crude voice intensity for waveform visuals
|
|
|
|
|
final voiceIntensityStreamProvider = StreamProvider<int>((ref) {
|
2025-08-25 20:04:04 +05:30
|
|
|
final service = ref.watch(voiceInputServiceProvider);
|
|
|
|
|
return service.intensityStream;
|
2025-08-10 01:20:45 +05:30
|
|
|
});
|
2025-11-02 19:02:37 +05:30
|
|
|
|
|
|
|
|
final localVoiceRecognitionAvailableProvider = FutureProvider<bool>((
|
|
|
|
|
ref,
|
|
|
|
|
) async {
|
|
|
|
|
final service = ref.watch(voiceInputServiceProvider);
|
|
|
|
|
final initialized = await service.initialize();
|
|
|
|
|
if (!initialized) return false;
|
|
|
|
|
if (service.hasLocalStt) return true;
|
|
|
|
|
return service.checkOnDeviceSupport();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
final serverVoiceRecognitionAvailableProvider = Provider<bool>((ref) {
|
|
|
|
|
final service = ref.watch(voiceInputServiceProvider);
|
|
|
|
|
return service.hasServerStt;
|
|
|
|
|
});
|