feat(voice-input): improve server-side speech detection with silence auto-stop

This commit is contained in:
cogwheel0
2025-11-03 00:36:25 +05:30
parent a05837b985
commit 1a570f4a08

View File

@@ -44,6 +44,8 @@ class VoiceInputService {
_intensityController?.stream ?? const Stream<int>.empty();
int _lastIntensity = 0;
Timer? _intensityDecayTimer;
Timer? _silenceTimer;
bool _hasDetectedSpeech = false;
/// Public stream of partial/final transcript strings and special audio tokens.
Stream<String> get textStream =>
@@ -331,6 +333,9 @@ class VoiceInputService {
_autoStopTimer?.cancel();
_autoStopTimer = null;
_silenceTimer?.cancel();
_silenceTimer = null;
if (_usingServerStt) {
await _finalizeServerRecording();
} else {
@@ -354,6 +359,7 @@ class VoiceInputService {
_serverRecorderActive = false;
_serverRecordingPath = null;
_serverRecordingMimeType = null;
_hasDetectedSpeech = false;
}
Future<void> _stopLocalStt() async {
@@ -425,6 +431,7 @@ class VoiceInputService {
await _recorder.start(config, path: path);
_serverRecorderActive = true;
_hasDetectedSpeech = false;
await _ampSub?.cancel();
_ampSub = _recorder
@@ -435,9 +442,34 @@ class VoiceInputService {
try {
_intensityController?.add(_lastIntensity);
} catch (_) {}
// Detect silence and auto-stop for server-side STT
_handleServerAmplitude(amplitude.current);
}, onError: (_) {});
}
void _handleServerAmplitude(double? amplitude) {
if (!_usingServerStt || !_isListening) return;
// Threshold for detecting speech (in dB)
const double speechThreshold = -45.0;
final double currentDb = amplitude ?? -100.0;
// If we detect speech, mark it and reset silence timer
if (currentDb > speechThreshold) {
_hasDetectedSpeech = true;
_silenceTimer?.cancel();
_silenceTimer = null;
} else if (_hasDetectedSpeech && _silenceTimer == null) {
// Start silence timer only after we've detected speech at least once
_silenceTimer = Timer(const Duration(seconds: 2), () {
if (_isListening && _usingServerStt) {
unawaited(_stopListening());
}
});
}
}
Future<(String, String)> _createRecordingTarget() async {
final directory = await getTemporaryDirectory();
final timestamp = DateTime.now().millisecondsSinceEpoch;
@@ -657,6 +689,7 @@ class VoiceInputService {
void dispose() {
stopListening();
_silenceTimer?.cancel();
try {
_speech.dispose().catchError((_) {});
} catch (_) {}