feat(voice-input): improve server-side speech detection with silence auto-stop

2025-11-03 00:36:25 +05:30
parent a05837b985
commit 1a570f4a08
1 changed files with 33 additions and 0 deletions
@@ -44,6 +44,8 @@ class VoiceInputService {
      _intensityController?.stream ?? const Stream<int>.empty();
  int _lastIntensity = 0;
  Timer? _intensityDecayTimer;
  Timer? _silenceTimer;
  bool _hasDetectedSpeech = false;
  /// Public stream of partial/final transcript strings and special audio tokens.
  Stream<String> get textStream =>
@@ -331,6 +333,9 @@ class VoiceInputService {
    _autoStopTimer?.cancel();
    _autoStopTimer = null;
    _silenceTimer?.cancel();
    _silenceTimer = null;
    if (_usingServerStt) {
      await _finalizeServerRecording();
    } else {
@@ -354,6 +359,7 @@ class VoiceInputService {
    _serverRecorderActive = false;
    _serverRecordingPath = null;
    _serverRecordingMimeType = null;
    _hasDetectedSpeech = false;
  }
  Future<void> _stopLocalStt() async {
@@ -425,6 +431,7 @@ class VoiceInputService {
    await _recorder.start(config, path: path);
    _serverRecorderActive = true;
    _hasDetectedSpeech = false;
    await _ampSub?.cancel();
    _ampSub = _recorder
@@ -435,9 +442,34 @@ class VoiceInputService {
          try {
            _intensityController?.add(_lastIntensity);
          } catch (_) {}
          // Detect silence and auto-stop for server-side STT
          _handleServerAmplitude(amplitude.current);
        }, onError: (_) {});
  }
  void _handleServerAmplitude(double? amplitude) {
    if (!_usingServerStt || !_isListening) return;
    // Threshold for detecting speech (in dB)
    const double speechThreshold = -45.0;
    final double currentDb = amplitude ?? -100.0;
    // If we detect speech, mark it and reset silence timer
    if (currentDb > speechThreshold) {
      _hasDetectedSpeech = true;
      _silenceTimer?.cancel();
      _silenceTimer = null;
    } else if (_hasDetectedSpeech && _silenceTimer == null) {
      // Start silence timer only after we've detected speech at least once
      _silenceTimer = Timer(const Duration(seconds: 2), () {
        if (_isListening && _usingServerStt) {
          unawaited(_stopListening());
        }
      });
    }
  }
  Future<(String, String)> _createRecordingTarget() async {
    final directory = await getTemporaryDirectory();
    final timestamp = DateTime.now().millisecondsSinceEpoch;
@@ -657,6 +689,7 @@ class VoiceInputService {
  void dispose() {
    stopListening();
    _silenceTimer?.cancel();
    try {
      _speech.dispose().catchError((_) {});
    } catch (_) {}