From ac21ec649327338ea36fa5a541c6c30f7dd3afdb Mon Sep 17 00:00:00 2001
From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com>
Date: Mon, 25 Aug 2025 20:56:33 +0530
Subject: [PATCH] refactor: remove server audio transcription and related
 fallback logic, retaining only on-device speech-to-text functionality

---
 lib/core/services/api_service.dart            |  93 +----------
 .../chat/services/voice_input_service.dart    | 108 ++-----------
 lib/features/chat/views/chat_page.dart        | 151 ++++--------------
 .../chat/widgets/modern_chat_input.dart       |  59 ++-----
 4 files changed, 56 insertions(+), 355 deletions(-)
diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart
index 7da574f..0fdc5da 100644
--- a/lib/core/services/api_service.dart
+++ b/lib/core/services/api_service.dart
@@ -3,7 +3,7 @@ import 'dart:convert';
 import 'dart:io';
 import 'package:flutter/foundation.dart';
 import 'package:dio/dio.dart';
-import 'package:http_parser/http_parser.dart';
+// import 'package:http_parser/http_parser.dart';
 // Removed legacy websocket/socket.io imports
 import 'package:uuid/uuid.dart';
 import '../models/server_config.dart';
@@ -1651,96 +1651,7 @@ class ApiService {
     return [];
   }
 
-  Future<String> transcribeAudio(
-    List<int> audioData, {
-    String? language,
-  }) async {
-    // Normalize language to primary ISO 639-1 (e.g., en-US -> en) per server accepted list
-    String? normalizedLang;
-    if (language != null && language.isNotEmpty) {
-      normalizedLang = language.split(RegExp('[-_]')).first.toLowerCase();
-    }
-
-    debugPrint(
-      'DEBUG: Transcribing audio data: bytes=${audioData.length}, language=${normalizedLang ?? 'null'}',
-    );
-
-    FormData buildForm(String? lang) {
-      final Map<String, dynamic> formMap = {
-        'file': MultipartFile.fromBytes(
-          audioData,
-          filename: 'audio.wav',
-          contentType: MediaType.parse('audio/wav'),
-        ),
-      };
-      if (lang != null && lang.isNotEmpty) {
-        formMap['language'] = lang;
-      }
-      return FormData.fromMap(formMap);
-    }
-
-    var formData = buildForm(normalizedLang);
-    try {
-      final response = await _dio.post(
-        '/api/v1/audio/transcriptions',
-        data: formData,
-        options: Options(headers: {'Accept': 'application/json'}),
-      );
-      final data = response.data;
-      debugPrint(
-        'DEBUG: Transcription response status: ${response.statusCode}',
-      );
-      DebugLogger.log('Transcription response received successfully');
-      if (data is String) return data;
-      if (data is Map<String, dynamic>) {
-        final text = data['text'] ?? data['transcription'] ?? data['result'];
-        if (text is String) return text;
-        if (data['data'] is Map && (data['data']['text'] is String)) {
-          return data['data']['text'] as String;
-        }
-      }
-      return '';
-    } catch (e) {
-      debugPrint('DEBUG: Transcription API error: $e');
-      // If server complains about invalid language code, retry without language
-      try {
-        if (e is DioException) {
-          final data = e.response?.data;
-          final msg = data is Map<String, dynamic>
-              ? data.toString()
-              : data?.toString() ?? '';
-          if (msg.contains("not a valid language code")) {
-            debugPrint('DEBUG: Retrying transcription without language');
-            final retryResponse = await _dio.post(
-              '/api/v1/audio/transcriptions',
-              data: buildForm(null),
-              options: Options(headers: {'Accept': 'application/json'}),
-            );
-            final rdata = retryResponse.data;
-            debugPrint(
-              'DEBUG: Transcription retry status: ${retryResponse.statusCode}',
-            );
-            DebugLogger.log(
-              'Transcription retry response received successfully',
-            );
-            if (rdata is String) return rdata;
-            if (rdata is Map<String, dynamic>) {
-              final text =
-                  rdata['text'] ?? rdata['transcription'] ?? rdata['result'];
-              if (text is String) return text;
-              if (rdata['data'] is Map && (rdata['data']['text'] is String)) {
-                return rdata['data']['text'] as String;
-              }
-            }
-            return '';
-          }
-        }
-      } catch (e2) {
-        debugPrint('DEBUG: Transcription retry error: $e2');
-      }
-      rethrow;
-    }
-  }
+  // Server audio transcription removed; rely on on-device STT in UI layer
 
   // Image Generation
   Future<List<Map<String, dynamic>>> getImageModels() async {
diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart
index 1c06d62..035fe89 100644
--- a/lib/features/chat/services/voice_input_service.dart
+++ b/lib/features/chat/services/voice_input_service.dart
@@ -3,8 +3,7 @@ import 'package:record/record.dart';
 import 'package:flutter/widgets.dart';
 import 'dart:async';
 import 'dart:io' show Platform;
-import 'package:path_provider/path_provider.dart';
-import 'package:path/path.dart' as p;
+// Removed path imports as server transcription fallback was removed
 import 'package:stts/stts.dart';
 
 // Lightweight replacement for previous stt.LocaleName used across the UI
@@ -175,16 +174,9 @@ class VoiceInputService {
         try {
           final isStillAvailable = await _speech.isSupported();
           if (!isStillAvailable && _isListening) {
-            // speech recognition no longer available, fallback to recording
+            // Speech recognition no longer available; stop listening
             _localSttAvailable = false;
-            // Restart with fallback method
-            _startRecordingProxyIntensity();
-            _autoStopTimer?.cancel();
-            _autoStopTimer = Timer(const Duration(seconds: 30), () {
-              if (_isListening) {
-                _stopListening();
-              }
-            });
+            _stopListening();
             return;
           }
         } catch (e) {
@@ -218,24 +210,17 @@ class VoiceInputService {
         }
         // Start recognition (no await blocking the sync flow)
         _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
-          // fallback to recording
+          // On-device STT failed; stop listening entirely as server transcription is removed
           _localSttAvailable = false;
-          _startRecordingProxyIntensity();
+          _stopListening();
         });
       } catch (e) {
         _localSttAvailable = false;
-        _startRecordingProxyIntensity();
+        _stopListening();
       }
     } else {
-      // Fallback: record audio and signal file path for server transcription
-      // Local STT not available, falling back to recording
-      _startRecordingProxyIntensity();
-      _autoStopTimer?.cancel();
-      _autoStopTimer = Timer(const Duration(seconds: 30), () {
-        if (_isListening) {
-          _stopListening();
-        }
-      });
+      // No local STT available; stop immediately since server transcription is removed
+      _stopListening();
     }
 
     return _textStreamController!.stream;
@@ -262,9 +247,6 @@ class VoiceInputService {
         _sttStateSub?.cancel();
       } catch (_) {}
       _sttStateSub = null;
-    } else {
-      // Also stop recorder if active
-      await _stopRecording();
     }
 
     _autoStopTimer?.cancel();
@@ -284,84 +266,12 @@ class VoiceInputService {
 
   void dispose() {
     stopListening();
-    _stopRecording(force: true);
     try {
       _speech.dispose().catchError((_) {});
     } catch (_) {}
   }
 
-  // --- Recording and intensity proxy for server transcription path ---
-  Future<void> _startRecordingProxyIntensity() async {
-    try {
-      final hasMic = await _recorder.hasPermission();
-      if (!hasMic) {
-        _textStreamController?.addError('Microphone permission not granted');
-        _stopListening();
-        return;
-      }
-
-      // Start recording in a portable format (WAV/PCM) for best compatibility with server
-      final tmpDir = await getTemporaryDirectory();
-      final filePath = p.join(
-        tmpDir.path,
-        'conduit_voice_${DateTime.now().millisecondsSinceEpoch}.wav',
-      );
-      await _recorder.start(
-        const RecordConfig(
-          encoder: AudioEncoder.wav,
-          numChannels: 1,
-          sampleRate: 16000,
-          bitRate: 128000,
-        ),
-        path: filePath,
-      );
-      // recording started at filePath
-
-      // Drive intensity from amplitude stream and detect silence
-      // Consider amplitude less than threshold as silence; stop after ~3s of continuous silence
-      const silenceThresholdDb = -45.0; // dBFS threshold
-      const silenceWindow = Duration(seconds: 3);
-      DateTime lastNonSilent = DateTime.now();
-
-      _ampSub = _recorder
-          .onAmplitudeChanged(const Duration(milliseconds: 125))
-          .listen((amp) {
-            if (!_isListening) return;
-            // Normalize peak power (dBFS) into 0-10 bar scale
-            final db = amp.current;
-            // Map dB [-60..0] -> [0..10]
-            final clamped = db.clamp(-60.0, 0.0);
-            final norm = ((clamped + 60.0) / 60.0) * 10.0;
-            _intensityController?.add(norm.round().clamp(0, 10));
-
-            if (db > silenceThresholdDb) {
-              lastNonSilent = DateTime.now();
-            } else {
-              if (DateTime.now().difference(lastNonSilent) >= silenceWindow) {
-                _stopListening();
-              }
-            }
-          });
-    } catch (e) {
-      _textStreamController?.addError('Audio recording failed: $e');
-      _stopListening();
-    }
-  }
-
-  Future<void> _stopRecording({bool force = false}) async {
-    try {
-      if (!await _recorder.isRecording() && !force) return;
-      final path = await _recorder.stop();
-      if (path == null) {
-        _textStreamController?.addError('Recording failed: no file path');
-        return;
-      }
-      // Hand off recorded file path to listeners as a special token; UI layer will upload for transcription
-      _textStreamController?.add('[[AUDIO_FILE_PATH]]:$path');
-    } catch (e) {
-      _textStreamController?.addError('Stop recording error: $e');
-    }
-  }
+  // Recording fallback removed; only on-device STT is supported now
 
   // Native locales not used in server transcription mode
 }
diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart
index f03e132..a03ebfd 100644
--- a/lib/features/chat/views/chat_page.dart
+++ b/lib/features/chat/views/chat_page.dart
@@ -7,7 +7,7 @@ import 'package:flutter/services.dart';
 import 'package:flutter/cupertino.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:flutter_animate/flutter_animate.dart';
-import 'dart:io' show Platform, File;
+import 'dart:io' show Platform;
 import 'dart:async';
 import '../../../core/providers/app_providers.dart';
 import '../providers/chat_providers.dart';
@@ -1927,7 +1927,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
   StreamSubscription<String>? _textSub;
   int _elapsedSeconds = 0;
   Timer? _elapsedTimer;
-  bool _isTranscribing = false;
+  // Removed server transcription; keep only on-device listening state
   String _languageTag = 'en';
   bool _holdToTalk = false;
   bool _autoSendFinal = false;
@@ -2005,18 +2005,9 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
       });
       _textSub = stream.listen(
         (text) {
-          // If we receive a special token with recorded audio path, transcribe it via API (fallback)
-          if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
-            final filePath = text.split(':').skip(1).join(':');
-            debugPrint(
-              'DEBUG: VoiceInputSheet received audio file path: $filePath',
-            );
-            _transcribeRecordedFile(filePath);
-          } else {
-            setState(() {
-              _recognizedText = text;
-            });
-          }
+          setState(() {
+            _recognizedText = text;
+          });
         },
         onDone: () {
           debugPrint('DEBUG: VoiceInputSheet stream done');
@@ -2052,44 +2043,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
     }
   }
 
-  Future<void> _transcribeRecordedFile(String filePath) async {
-    try {
-      setState(() => _isTranscribing = true);
-      final api = ref.read(apiServiceProvider);
-      if (api == null) throw Exception('API service unavailable');
-      final file = File(filePath);
-      final bytes = await file.readAsBytes();
-      // Try to use device locale; fall back to en-US
-      String? language;
-      try {
-        language = WidgetsBinding.instance.platformDispatcher.locale
-            .toLanguageTag();
-      } catch (_) {
-        language = 'en-US';
-      }
-      final text = await api.transcribeAudio(
-        bytes.toList(),
-        language: language,
-      );
-      debugPrint(
-        'DEBUG: Transcription received: ${text.isEmpty ? '[empty]' : text}',
-      );
-      if (!mounted) return;
-      setState(() {
-        _recognizedText = text;
-      });
-      // Stop listening state if we have a result
-      setState(() => _isListening = false);
-      if (_autoSendFinal && _recognizedText.trim().isNotEmpty) {
-        _sendText();
-      }
-    } catch (e) {
-      if (!mounted) return;
-      setState(() => _isListening = false);
-    } finally {
-      if (mounted) setState(() => _isTranscribing = false);
-    }
-  }
+  // Server transcription removed; only on-device STT is supported
 
   Future<void> _stopListening() async {
     _intensitySub?.cancel();
@@ -2279,9 +2233,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
                   mainAxisAlignment: MainAxisAlignment.spaceBetween,
                   children: [
                     Text(
-                      _isTranscribing
-                          ? 'Transcribing…'
-                          : _isListening
+                      _isListening
                           ? (_voiceService.hasLocalStt
                                 ? 'Listening…'
                                 : 'Recording…')
@@ -2601,9 +2553,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
                                         tooltip: AppLocalizations.of(
                                           context,
                                         )!.clear,
-                                        onPressed:
-                                            _recognizedText.isNotEmpty &&
-                                                !_isTranscribing
+                                        onPressed: _recognizedText.isNotEmpty
                                             ? () {
                                                 setState(
                                                   () => _recognizedText = '',
@@ -2614,68 +2564,35 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
                                     ],
                                   ),
                                   const SizedBox(height: Spacing.xs),
-                                  if (_isTranscribing)
-                                    Center(
-                                      child: Row(
-                                        mainAxisAlignment:
-                                            MainAxisAlignment.center,
-                                        children: [
-                                          ConduitLoadingIndicator(
-                                            size: isUltra
-                                                ? 14
-                                                : (isCompact ? 16 : 18),
-                                            isCompact: true,
-                                          ),
-                                          const SizedBox(width: Spacing.xs),
-                                          Text(
-                                            'Transcribing…',
-                                            style: TextStyle(
-                                              fontSize: isUltra
-                                                  ? AppTypography.bodySmall
-                                                  : (isCompact
-                                                        ? AppTypography
-                                                              .bodyMedium
-                                                        : AppTypography
-                                                              .bodyLarge),
-                                              color: context
-                                                  .conduitTheme
-                                                  .textSecondary,
-                                            ),
-                                          ),
-                                        ],
-                                      ),
-                                    )
-                                  else
-                                    Flexible(
-                                      child: SingleChildScrollView(
-                                        child: Text(
-                                          _recognizedText.isEmpty
-                                              ? (_isListening
-                                                    ? (_voiceService.hasLocalStt
-                                                          ? 'Speak now…'
-                                                          : 'Recording…')
-                                                    : 'Tap Start to begin')
-                                              : _recognizedText,
-                                          style: TextStyle(
-                                            fontSize: isUltra
-                                                ? AppTypography.bodySmall
-                                                : (isCompact
-                                                      ? AppTypography.bodyMedium
-                                                      : AppTypography
-                                                            .bodyLarge),
-                                            color: _recognizedText.isEmpty
-                                                ? context
-                                                      .conduitTheme
-                                                      .inputPlaceholder
-                                                : context
-                                                      .conduitTheme
-                                                      .textPrimary,
-                                            height: 1.4,
-                                          ),
-                                          textAlign: TextAlign.center,
+                                  Flexible(
+                                    child: SingleChildScrollView(
+                                      child: Text(
+                                        _recognizedText.isEmpty
+                                            ? (_isListening
+                                                  ? (_voiceService.hasLocalStt
+                                                        ? 'Speak now…'
+                                                        : 'Recording…')
+                                                  : 'Tap Start to begin')
+                                            : _recognizedText,
+                                        style: TextStyle(
+                                          fontSize: isUltra
+                                              ? AppTypography.bodySmall
+                                              : (isCompact
+                                                    ? AppTypography.bodyMedium
+                                                    : AppTypography.bodyLarge),
+                                          color: _recognizedText.isEmpty
+                                              ? context
+                                                    .conduitTheme
+                                                    .inputPlaceholder
+                                              : context
+                                                    .conduitTheme
+                                                    .textPrimary,
+                                          height: 1.4,
                                         ),
+                                        textAlign: TextAlign.center,
                                       ),
                                     ),
+                                  ),
                                 ],
                               ),
                             ),
diff --git a/lib/features/chat/widgets/modern_chat_input.dart b/lib/features/chat/widgets/modern_chat_input.dart
index 1332bf1..e190cb2 100644
--- a/lib/features/chat/widgets/modern_chat_input.dart
+++ b/lib/features/chat/widgets/modern_chat_input.dart
@@ -6,7 +6,7 @@ import '../../../shared/widgets/sheet_handle.dart';
 
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 
-import 'dart:io' show Platform, File;
+import 'dart:io' show Platform;
 import 'dart:async';
 import '../providers/chat_providers.dart';
 import '../../tools/widgets/unified_tools_modal.dart';
@@ -991,20 +991,15 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
       _textSub?.cancel();
       _textSub = stream.listen(
         (text) async {
-          if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
-            final path = text.split(':').skip(1).join(':');
-            await _transcribeRecordedFile(path);
-          } else {
-            final updated =
-                (_baseTextAtStart.isEmpty
-                    ? ''
-                    : (_baseTextAtStart.trimRight() + ' ')) +
-                text;
-            _controller.value = TextEditingValue(
-              text: updated,
-              selection: TextSelection.collapsed(offset: updated.length),
-            );
-          }
+          final updated =
+              (_baseTextAtStart.isEmpty
+                  ? ''
+                  : (_baseTextAtStart.trimRight() + ' ')) +
+              text;
+          _controller.value = TextEditingValue(
+            text: updated,
+            selection: TextSelection.collapsed(offset: updated.length),
+          );
         },
         onDone: () {
           if (!mounted) return;
@@ -1039,39 +1034,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
     HapticFeedback.selectionClick();
   }
 
-  Future<void> _transcribeRecordedFile(String filePath) async {
-    try {
-      final api = ref.read(apiServiceProvider);
-      if (api == null) return;
-      final file = File(filePath);
-      final bytes = await file.readAsBytes();
-      String? language;
-      try {
-        language = WidgetsBinding.instance.platformDispatcher.locale
-            .toLanguageTag();
-      } catch (_) {
-        language = 'en-US';
-      }
-      final text = await api.transcribeAudio(
-        bytes.toList(),
-        language: language,
-      );
-      final updated =
-          (_baseTextAtStart.isEmpty
-              ? ''
-              : (_baseTextAtStart.trimRight() + ' ')) +
-          text;
-      if (!mounted) return;
-      _controller.value = TextEditingValue(
-        text: updated,
-        selection: TextSelection.collapsed(offset: updated.length),
-      );
-    } catch (_) {
-    } finally {
-      if (!mounted) return;
-      setState(() => _isRecording = false);
-    }
-  }
+  // Server transcription removed; only on-device STT updates the input text
 
   void _showVoiceUnavailable(String message) {
     if (!mounted) return;