feat(voice-input): improve server-side speech detection with silence auto-stop
This commit is contained in:
@@ -44,6 +44,8 @@ class VoiceInputService {
|
|||||||
_intensityController?.stream ?? const Stream<int>.empty();
|
_intensityController?.stream ?? const Stream<int>.empty();
|
||||||
int _lastIntensity = 0;
|
int _lastIntensity = 0;
|
||||||
Timer? _intensityDecayTimer;
|
Timer? _intensityDecayTimer;
|
||||||
|
Timer? _silenceTimer;
|
||||||
|
bool _hasDetectedSpeech = false;
|
||||||
|
|
||||||
/// Public stream of partial/final transcript strings and special audio tokens.
|
/// Public stream of partial/final transcript strings and special audio tokens.
|
||||||
Stream<String> get textStream =>
|
Stream<String> get textStream =>
|
||||||
@@ -331,6 +333,9 @@ class VoiceInputService {
|
|||||||
_autoStopTimer?.cancel();
|
_autoStopTimer?.cancel();
|
||||||
_autoStopTimer = null;
|
_autoStopTimer = null;
|
||||||
|
|
||||||
|
_silenceTimer?.cancel();
|
||||||
|
_silenceTimer = null;
|
||||||
|
|
||||||
if (_usingServerStt) {
|
if (_usingServerStt) {
|
||||||
await _finalizeServerRecording();
|
await _finalizeServerRecording();
|
||||||
} else {
|
} else {
|
||||||
@@ -354,6 +359,7 @@ class VoiceInputService {
|
|||||||
_serverRecorderActive = false;
|
_serverRecorderActive = false;
|
||||||
_serverRecordingPath = null;
|
_serverRecordingPath = null;
|
||||||
_serverRecordingMimeType = null;
|
_serverRecordingMimeType = null;
|
||||||
|
_hasDetectedSpeech = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<void> _stopLocalStt() async {
|
Future<void> _stopLocalStt() async {
|
||||||
@@ -425,6 +431,7 @@ class VoiceInputService {
|
|||||||
|
|
||||||
await _recorder.start(config, path: path);
|
await _recorder.start(config, path: path);
|
||||||
_serverRecorderActive = true;
|
_serverRecorderActive = true;
|
||||||
|
_hasDetectedSpeech = false;
|
||||||
|
|
||||||
await _ampSub?.cancel();
|
await _ampSub?.cancel();
|
||||||
_ampSub = _recorder
|
_ampSub = _recorder
|
||||||
@@ -435,9 +442,34 @@ class VoiceInputService {
|
|||||||
try {
|
try {
|
||||||
_intensityController?.add(_lastIntensity);
|
_intensityController?.add(_lastIntensity);
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
|
||||||
|
// Detect silence and auto-stop for server-side STT
|
||||||
|
_handleServerAmplitude(amplitude.current);
|
||||||
}, onError: (_) {});
|
}, onError: (_) {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void _handleServerAmplitude(double? amplitude) {
|
||||||
|
if (!_usingServerStt || !_isListening) return;
|
||||||
|
|
||||||
|
// Threshold for detecting speech (in dB)
|
||||||
|
const double speechThreshold = -45.0;
|
||||||
|
final double currentDb = amplitude ?? -100.0;
|
||||||
|
|
||||||
|
// If we detect speech, mark it and reset silence timer
|
||||||
|
if (currentDb > speechThreshold) {
|
||||||
|
_hasDetectedSpeech = true;
|
||||||
|
_silenceTimer?.cancel();
|
||||||
|
_silenceTimer = null;
|
||||||
|
} else if (_hasDetectedSpeech && _silenceTimer == null) {
|
||||||
|
// Start silence timer only after we've detected speech at least once
|
||||||
|
_silenceTimer = Timer(const Duration(seconds: 2), () {
|
||||||
|
if (_isListening && _usingServerStt) {
|
||||||
|
unawaited(_stopListening());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Future<(String, String)> _createRecordingTarget() async {
|
Future<(String, String)> _createRecordingTarget() async {
|
||||||
final directory = await getTemporaryDirectory();
|
final directory = await getTemporaryDirectory();
|
||||||
final timestamp = DateTime.now().millisecondsSinceEpoch;
|
final timestamp = DateTime.now().millisecondsSinceEpoch;
|
||||||
@@ -657,6 +689,7 @@ class VoiceInputService {
|
|||||||
|
|
||||||
void dispose() {
|
void dispose() {
|
||||||
stopListening();
|
stopListening();
|
||||||
|
_silenceTimer?.cancel();
|
||||||
try {
|
try {
|
||||||
_speech.dispose().catchError((_) {});
|
_speech.dispose().catchError((_) {});
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
|||||||
Reference in New Issue
Block a user