refactor: remove server audio transcription and related fallback logic, retaining only on-device speech-to-text functionality

2025-08-25 20:56:33 +05:30
parent fa9fa8dd1b
commit ac21ec6493
4 changed files with 56 additions and 355 deletions
@@ -3,8 +3,7 @@ import 'package:record/record.dart';
 import 'package:flutter/widgets.dart';
 import 'dart:async';
 import 'dart:io' show Platform;
-import 'package:path_provider/path_provider.dart';
-import 'package:path/path.dart' as p;
+// Removed path imports as server transcription fallback was removed
 import 'package:stts/stts.dart';

 // Lightweight replacement for previous stt.LocaleName used across the UI
@@ -175,16 +174,9 @@ class VoiceInputService {
        try {
          final isStillAvailable = await _speech.isSupported();
          if (!isStillAvailable && _isListening) {
-            // speech recognition no longer available, fallback to recording
+            // Speech recognition no longer available; stop listening
            _localSttAvailable = false;
-            // Restart with fallback method
-            _startRecordingProxyIntensity();
-            _autoStopTimer?.cancel();
-            _autoStopTimer = Timer(const Duration(seconds: 30), () {
-              if (_isListening) {
-                _stopListening();
-              }
-            });
+            _stopListening();
            return;
          }
        } catch (e) {
@@ -218,24 +210,17 @@ class VoiceInputService {
        }
        // Start recognition (no await blocking the sync flow)
        _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
-          // fallback to recording
+          // On-device STT failed; stop listening entirely as server transcription is removed
          _localSttAvailable = false;
-          _startRecordingProxyIntensity();
+          _stopListening();
        });
      } catch (e) {
        _localSttAvailable = false;
-        _startRecordingProxyIntensity();
+        _stopListening();
      }
    } else {
-      // Fallback: record audio and signal file path for server transcription
-      // Local STT not available, falling back to recording
-      _startRecordingProxyIntensity();
-      _autoStopTimer?.cancel();
-      _autoStopTimer = Timer(const Duration(seconds: 30), () {
-        if (_isListening) {
-          _stopListening();
-        }
-      });
+      // No local STT available; stop immediately since server transcription is removed
+      _stopListening();
    }

    return _textStreamController!.stream;
@@ -262,9 +247,6 @@ class VoiceInputService {
        _sttStateSub?.cancel();
      } catch (_) {}
      _sttStateSub = null;
-    } else {
-      // Also stop recorder if active
-      await _stopRecording();
    }

    _autoStopTimer?.cancel();
@@ -284,84 +266,12 @@ class VoiceInputService {

  void dispose() {
    stopListening();
-    _stopRecording(force: true);
    try {
      _speech.dispose().catchError((_) {});
    } catch (_) {}
  }

-  // --- Recording and intensity proxy for server transcription path ---
-  Future<void> _startRecordingProxyIntensity() async {
-    try {
-      final hasMic = await _recorder.hasPermission();
-      if (!hasMic) {
-        _textStreamController?.addError('Microphone permission not granted');
-        _stopListening();
-        return;
-      }
-
-      // Start recording in a portable format (WAV/PCM) for best compatibility with server
-      final tmpDir = await getTemporaryDirectory();
-      final filePath = p.join(
-        tmpDir.path,
-        'conduit_voice_${DateTime.now().millisecondsSinceEpoch}.wav',
-      );
-      await _recorder.start(
-        const RecordConfig(
-          encoder: AudioEncoder.wav,
-          numChannels: 1,
-          sampleRate: 16000,
-          bitRate: 128000,
-        ),
-        path: filePath,
-      );
-      // recording started at filePath
-
-      // Drive intensity from amplitude stream and detect silence
-      // Consider amplitude less than threshold as silence; stop after ~3s of continuous silence
-      const silenceThresholdDb = -45.0; // dBFS threshold
-      const silenceWindow = Duration(seconds: 3);
-      DateTime lastNonSilent = DateTime.now();
-
-      _ampSub = _recorder
-          .onAmplitudeChanged(const Duration(milliseconds: 125))
-          .listen((amp) {
-            if (!_isListening) return;
-            // Normalize peak power (dBFS) into 0-10 bar scale
-            final db = amp.current;
-            // Map dB [-60..0] -> [0..10]
-            final clamped = db.clamp(-60.0, 0.0);
-            final norm = ((clamped + 60.0) / 60.0) * 10.0;
-            _intensityController?.add(norm.round().clamp(0, 10));
-
-            if (db > silenceThresholdDb) {
-              lastNonSilent = DateTime.now();
-            } else {
-              if (DateTime.now().difference(lastNonSilent) >= silenceWindow) {
-                _stopListening();
-              }
-            }
-          });
-    } catch (e) {
-      _textStreamController?.addError('Audio recording failed: $e');
-      _stopListening();
-    }
-  }
-
-  Future<void> _stopRecording({bool force = false}) async {
-    try {
-      if (!await _recorder.isRecording() && !force) return;
-      final path = await _recorder.stop();
-      if (path == null) {
-        _textStreamController?.addError('Recording failed: no file path');
-        return;
-      }
-      // Hand off recorded file path to listeners as a special token; UI layer will upload for transcription
-      _textStreamController?.add('[[AUDIO_FILE_PATH]]:$path');
-    } catch (e) {
-      _textStreamController?.addError('Stop recording error: $e');
-    }
-  }
+  // Recording fallback removed; only on-device STT is supported now

  // Native locales not used in server transcription mode
 }
@@ -7,7 +7,7 @@ import 'package:flutter/services.dart';
 import 'package:flutter/cupertino.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:flutter_animate/flutter_animate.dart';
-import 'dart:io' show Platform, File;
+import 'dart:io' show Platform;
 import 'dart:async';
 import '../../../core/providers/app_providers.dart';
 import '../providers/chat_providers.dart';
@@ -1927,7 +1927,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
  StreamSubscription<String>? _textSub;
  int _elapsedSeconds = 0;
  Timer? _elapsedTimer;
-  bool _isTranscribing = false;
+  // Removed server transcription; keep only on-device listening state
  String _languageTag = 'en';
  bool _holdToTalk = false;
  bool _autoSendFinal = false;
@@ -2005,18 +2005,9 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
      });
      _textSub = stream.listen(
        (text) {
-          // If we receive a special token with recorded audio path, transcribe it via API (fallback)
-          if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
-            final filePath = text.split(':').skip(1).join(':');
-            debugPrint(
-              'DEBUG: VoiceInputSheet received audio file path: $filePath',
-            );
-            _transcribeRecordedFile(filePath);
-          } else {
-            setState(() {
-              _recognizedText = text;
-            });
-          }
+          setState(() {
+            _recognizedText = text;
+          });
        },
        onDone: () {
          debugPrint('DEBUG: VoiceInputSheet stream done');
@@ -2052,44 +2043,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
    }
  }

-  Future<void> _transcribeRecordedFile(String filePath) async {
-    try {
-      setState(() => _isTranscribing = true);
-      final api = ref.read(apiServiceProvider);
-      if (api == null) throw Exception('API service unavailable');
-      final file = File(filePath);
-      final bytes = await file.readAsBytes();
-      // Try to use device locale; fall back to en-US
-      String? language;
-      try {
-        language = WidgetsBinding.instance.platformDispatcher.locale
-            .toLanguageTag();
-      } catch (_) {
-        language = 'en-US';
-      }
-      final text = await api.transcribeAudio(
-        bytes.toList(),
-        language: language,
-      );
-      debugPrint(
-        'DEBUG: Transcription received: ${text.isEmpty ? '[empty]' : text}',
-      );
-      if (!mounted) return;
-      setState(() {
-        _recognizedText = text;
-      });
-      // Stop listening state if we have a result
-      setState(() => _isListening = false);
-      if (_autoSendFinal && _recognizedText.trim().isNotEmpty) {
-        _sendText();
-      }
-    } catch (e) {
-      if (!mounted) return;
-      setState(() => _isListening = false);
-    } finally {
-      if (mounted) setState(() => _isTranscribing = false);
-    }
-  }
+  // Server transcription removed; only on-device STT is supported

  Future<void> _stopListening() async {
    _intensitySub?.cancel();
@@ -2279,9 +2233,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
                  mainAxisAlignment: MainAxisAlignment.spaceBetween,
                  children: [
                    Text(
-                      _isTranscribing
-                          ? 'Transcribing…'
-                          : _isListening
+                      _isListening
                          ? (_voiceService.hasLocalStt
                                ? 'Listening…'
                                : 'Recording…')
@@ -2601,9 +2553,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
                                        tooltip: AppLocalizations.of(
                                          context,
                                        )!.clear,
-                                        onPressed:
-                                            _recognizedText.isNotEmpty &&
-                                                !_isTranscribing
+                                        onPressed: _recognizedText.isNotEmpty
                                            ? () {
                                                setState(
                                                  () => _recognizedText = '',
@@ -2614,68 +2564,35 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
                                    ],
                                  ),
                                  const SizedBox(height: Spacing.xs),
-                                  if (_isTranscribing)
-                                    Center(
-                                      child: Row(
-                                        mainAxisAlignment:
-                                            MainAxisAlignment.center,
-                                        children: [
-                                          ConduitLoadingIndicator(
-                                            size: isUltra
-                                                ? 14
-                                                : (isCompact ? 16 : 18),
-                                            isCompact: true,
-                                          ),
-                                          const SizedBox(width: Spacing.xs),
-                                          Text(
-                                            'Transcribing…',
-                                            style: TextStyle(
-                                              fontSize: isUltra
-                                                  ? AppTypography.bodySmall
-                                                  : (isCompact
-                                                        ? AppTypography
-                                                              .bodyMedium
-                                                        : AppTypography
-                                                              .bodyLarge),
-                                              color: context
-                                                  .conduitTheme
-                                                  .textSecondary,
-                                            ),
-                                          ),
-                                        ],
-                                      ),
-                                    )
-                                  else
-                                    Flexible(
-                                      child: SingleChildScrollView(
-                                        child: Text(
-                                          _recognizedText.isEmpty
-                                              ? (_isListening
-                                                    ? (_voiceService.hasLocalStt
-                                                          ? 'Speak now…'
-                                                          : 'Recording…')
-                                                    : 'Tap Start to begin')
-                                              : _recognizedText,
-                                          style: TextStyle(
-                                            fontSize: isUltra
-                                                ? AppTypography.bodySmall
-                                                : (isCompact
-                                                      ? AppTypography.bodyMedium
-                                                      : AppTypography
-                                                            .bodyLarge),
-                                            color: _recognizedText.isEmpty
-                                                ? context
-                                                      .conduitTheme
-                                                      .inputPlaceholder
-                                                : context
-                                                      .conduitTheme
-                                                      .textPrimary,
-                                            height: 1.4,
-                                          ),
-                                          textAlign: TextAlign.center,
+                                  Flexible(
+                                    child: SingleChildScrollView(
+                                      child: Text(
+                                        _recognizedText.isEmpty
+                                            ? (_isListening
+                                                  ? (_voiceService.hasLocalStt
+                                                        ? 'Speak now…'
+                                                        : 'Recording…')
+                                                  : 'Tap Start to begin')
+                                            : _recognizedText,
+                                        style: TextStyle(
+                                          fontSize: isUltra
+                                              ? AppTypography.bodySmall
+                                              : (isCompact
+                                                    ? AppTypography.bodyMedium
+                                                    : AppTypography.bodyLarge),
+                                          color: _recognizedText.isEmpty
+                                              ? context
+                                                    .conduitTheme
+                                                    .inputPlaceholder
+                                              : context
+                                                    .conduitTheme
+                                                    .textPrimary,
+                                          height: 1.4,
                                        ),
+                                        textAlign: TextAlign.center,
                                      ),
                                    ),
+                                  ),
                                ],
                              ),
                            ),
@@ -6,7 +6,7 @@ import '../../../shared/widgets/sheet_handle.dart';

 import 'package:flutter_riverpod/flutter_riverpod.dart';

-import 'dart:io' show Platform, File;
+import 'dart:io' show Platform;
 import 'dart:async';
 import '../providers/chat_providers.dart';
 import '../../tools/widgets/unified_tools_modal.dart';
@@ -991,20 +991,15 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
      _textSub?.cancel();
      _textSub = stream.listen(
        (text) async {
-          if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
-            final path = text.split(':').skip(1).join(':');
-            await _transcribeRecordedFile(path);
-          } else {
-            final updated =
-                (_baseTextAtStart.isEmpty
-                    ? ''
-                    : (_baseTextAtStart.trimRight() + ' ')) +
-                text;
-            _controller.value = TextEditingValue(
-              text: updated,
-              selection: TextSelection.collapsed(offset: updated.length),
-            );
-          }
+          final updated =
+              (_baseTextAtStart.isEmpty
+                  ? ''
+                  : (_baseTextAtStart.trimRight() + ' ')) +
+              text;
+          _controller.value = TextEditingValue(
+            text: updated,
+            selection: TextSelection.collapsed(offset: updated.length),
+          );
        },
        onDone: () {
          if (!mounted) return;
@@ -1039,39 +1034,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
    HapticFeedback.selectionClick();
  }

-  Future<void> _transcribeRecordedFile(String filePath) async {
-    try {
-      final api = ref.read(apiServiceProvider);
-      if (api == null) return;
-      final file = File(filePath);
-      final bytes = await file.readAsBytes();
-      String? language;
-      try {
-        language = WidgetsBinding.instance.platformDispatcher.locale
-            .toLanguageTag();
-      } catch (_) {
-        language = 'en-US';
-      }
-      final text = await api.transcribeAudio(
-        bytes.toList(),
-        language: language,
-      );
-      final updated =
-          (_baseTextAtStart.isEmpty
-              ? ''
-              : (_baseTextAtStart.trimRight() + ' ')) +
-          text;
-      if (!mounted) return;
-      _controller.value = TextEditingValue(
-        text: updated,
-        selection: TextSelection.collapsed(offset: updated.length),
-      );
-    } catch (_) {
-    } finally {
-      if (!mounted) return;
-      setState(() => _isRecording = false);
-    }
-  }
+  // Server transcription removed; only on-device STT updates the input text

  void _showVoiceUnavailable(String message) {
    if (!mounted) return;