From ac21ec649327338ea36fa5a541c6c30f7dd3afdb Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Mon, 25 Aug 2025 20:56:33 +0530 Subject: [PATCH] refactor: remove server audio transcription and related fallback logic, retaining only on-device speech-to-text functionality --- lib/core/services/api_service.dart | 93 +---------- .../chat/services/voice_input_service.dart | 108 ++----------- lib/features/chat/views/chat_page.dart | 151 ++++-------------- .../chat/widgets/modern_chat_input.dart | 59 ++----- 4 files changed, 56 insertions(+), 355 deletions(-) diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart index 7da574f..0fdc5da 100644 --- a/lib/core/services/api_service.dart +++ b/lib/core/services/api_service.dart @@ -3,7 +3,7 @@ import 'dart:convert'; import 'dart:io'; import 'package:flutter/foundation.dart'; import 'package:dio/dio.dart'; -import 'package:http_parser/http_parser.dart'; +// import 'package:http_parser/http_parser.dart'; // Removed legacy websocket/socket.io imports import 'package:uuid/uuid.dart'; import '../models/server_config.dart'; @@ -1651,96 +1651,7 @@ class ApiService { return []; } - Future transcribeAudio( - List audioData, { - String? language, - }) async { - // Normalize language to primary ISO 639-1 (e.g., en-US -> en) per server accepted list - String? normalizedLang; - if (language != null && language.isNotEmpty) { - normalizedLang = language.split(RegExp('[-_]')).first.toLowerCase(); - } - - debugPrint( - 'DEBUG: Transcribing audio data: bytes=${audioData.length}, language=${normalizedLang ?? 'null'}', - ); - - FormData buildForm(String? lang) { - final Map formMap = { - 'file': MultipartFile.fromBytes( - audioData, - filename: 'audio.wav', - contentType: MediaType.parse('audio/wav'), - ), - }; - if (lang != null && lang.isNotEmpty) { - formMap['language'] = lang; - } - return FormData.fromMap(formMap); - } - - var formData = buildForm(normalizedLang); - try { - final response = await _dio.post( - '/api/v1/audio/transcriptions', - data: formData, - options: Options(headers: {'Accept': 'application/json'}), - ); - final data = response.data; - debugPrint( - 'DEBUG: Transcription response status: ${response.statusCode}', - ); - DebugLogger.log('Transcription response received successfully'); - if (data is String) return data; - if (data is Map) { - final text = data['text'] ?? data['transcription'] ?? data['result']; - if (text is String) return text; - if (data['data'] is Map && (data['data']['text'] is String)) { - return data['data']['text'] as String; - } - } - return ''; - } catch (e) { - debugPrint('DEBUG: Transcription API error: $e'); - // If server complains about invalid language code, retry without language - try { - if (e is DioException) { - final data = e.response?.data; - final msg = data is Map - ? data.toString() - : data?.toString() ?? ''; - if (msg.contains("not a valid language code")) { - debugPrint('DEBUG: Retrying transcription without language'); - final retryResponse = await _dio.post( - '/api/v1/audio/transcriptions', - data: buildForm(null), - options: Options(headers: {'Accept': 'application/json'}), - ); - final rdata = retryResponse.data; - debugPrint( - 'DEBUG: Transcription retry status: ${retryResponse.statusCode}', - ); - DebugLogger.log( - 'Transcription retry response received successfully', - ); - if (rdata is String) return rdata; - if (rdata is Map) { - final text = - rdata['text'] ?? rdata['transcription'] ?? rdata['result']; - if (text is String) return text; - if (rdata['data'] is Map && (rdata['data']['text'] is String)) { - return rdata['data']['text'] as String; - } - } - return ''; - } - } - } catch (e2) { - debugPrint('DEBUG: Transcription retry error: $e2'); - } - rethrow; - } - } + // Server audio transcription removed; rely on on-device STT in UI layer // Image Generation Future>> getImageModels() async { diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index 1c06d62..035fe89 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -3,8 +3,7 @@ import 'package:record/record.dart'; import 'package:flutter/widgets.dart'; import 'dart:async'; import 'dart:io' show Platform; -import 'package:path_provider/path_provider.dart'; -import 'package:path/path.dart' as p; +// Removed path imports as server transcription fallback was removed import 'package:stts/stts.dart'; // Lightweight replacement for previous stt.LocaleName used across the UI @@ -175,16 +174,9 @@ class VoiceInputService { try { final isStillAvailable = await _speech.isSupported(); if (!isStillAvailable && _isListening) { - // speech recognition no longer available, fallback to recording + // Speech recognition no longer available; stop listening _localSttAvailable = false; - // Restart with fallback method - _startRecordingProxyIntensity(); - _autoStopTimer?.cancel(); - _autoStopTimer = Timer(const Duration(seconds: 30), () { - if (_isListening) { - _stopListening(); - } - }); + _stopListening(); return; } } catch (e) { @@ -218,24 +210,17 @@ class VoiceInputService { } // Start recognition (no await blocking the sync flow) _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) { - // fallback to recording + // On-device STT failed; stop listening entirely as server transcription is removed _localSttAvailable = false; - _startRecordingProxyIntensity(); + _stopListening(); }); } catch (e) { _localSttAvailable = false; - _startRecordingProxyIntensity(); + _stopListening(); } } else { - // Fallback: record audio and signal file path for server transcription - // Local STT not available, falling back to recording - _startRecordingProxyIntensity(); - _autoStopTimer?.cancel(); - _autoStopTimer = Timer(const Duration(seconds: 30), () { - if (_isListening) { - _stopListening(); - } - }); + // No local STT available; stop immediately since server transcription is removed + _stopListening(); } return _textStreamController!.stream; @@ -262,9 +247,6 @@ class VoiceInputService { _sttStateSub?.cancel(); } catch (_) {} _sttStateSub = null; - } else { - // Also stop recorder if active - await _stopRecording(); } _autoStopTimer?.cancel(); @@ -284,84 +266,12 @@ class VoiceInputService { void dispose() { stopListening(); - _stopRecording(force: true); try { _speech.dispose().catchError((_) {}); } catch (_) {} } - // --- Recording and intensity proxy for server transcription path --- - Future _startRecordingProxyIntensity() async { - try { - final hasMic = await _recorder.hasPermission(); - if (!hasMic) { - _textStreamController?.addError('Microphone permission not granted'); - _stopListening(); - return; - } - - // Start recording in a portable format (WAV/PCM) for best compatibility with server - final tmpDir = await getTemporaryDirectory(); - final filePath = p.join( - tmpDir.path, - 'conduit_voice_${DateTime.now().millisecondsSinceEpoch}.wav', - ); - await _recorder.start( - const RecordConfig( - encoder: AudioEncoder.wav, - numChannels: 1, - sampleRate: 16000, - bitRate: 128000, - ), - path: filePath, - ); - // recording started at filePath - - // Drive intensity from amplitude stream and detect silence - // Consider amplitude less than threshold as silence; stop after ~3s of continuous silence - const silenceThresholdDb = -45.0; // dBFS threshold - const silenceWindow = Duration(seconds: 3); - DateTime lastNonSilent = DateTime.now(); - - _ampSub = _recorder - .onAmplitudeChanged(const Duration(milliseconds: 125)) - .listen((amp) { - if (!_isListening) return; - // Normalize peak power (dBFS) into 0-10 bar scale - final db = amp.current; - // Map dB [-60..0] -> [0..10] - final clamped = db.clamp(-60.0, 0.0); - final norm = ((clamped + 60.0) / 60.0) * 10.0; - _intensityController?.add(norm.round().clamp(0, 10)); - - if (db > silenceThresholdDb) { - lastNonSilent = DateTime.now(); - } else { - if (DateTime.now().difference(lastNonSilent) >= silenceWindow) { - _stopListening(); - } - } - }); - } catch (e) { - _textStreamController?.addError('Audio recording failed: $e'); - _stopListening(); - } - } - - Future _stopRecording({bool force = false}) async { - try { - if (!await _recorder.isRecording() && !force) return; - final path = await _recorder.stop(); - if (path == null) { - _textStreamController?.addError('Recording failed: no file path'); - return; - } - // Hand off recorded file path to listeners as a special token; UI layer will upload for transcription - _textStreamController?.add('[[AUDIO_FILE_PATH]]:$path'); - } catch (e) { - _textStreamController?.addError('Stop recording error: $e'); - } - } + // Recording fallback removed; only on-device STT is supported now // Native locales not used in server transcription mode } diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart index f03e132..a03ebfd 100644 --- a/lib/features/chat/views/chat_page.dart +++ b/lib/features/chat/views/chat_page.dart @@ -7,7 +7,7 @@ import 'package:flutter/services.dart'; import 'package:flutter/cupertino.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:flutter_animate/flutter_animate.dart'; -import 'dart:io' show Platform, File; +import 'dart:io' show Platform; import 'dart:async'; import '../../../core/providers/app_providers.dart'; import '../providers/chat_providers.dart'; @@ -1927,7 +1927,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { StreamSubscription? _textSub; int _elapsedSeconds = 0; Timer? _elapsedTimer; - bool _isTranscribing = false; + // Removed server transcription; keep only on-device listening state String _languageTag = 'en'; bool _holdToTalk = false; bool _autoSendFinal = false; @@ -2005,18 +2005,9 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { }); _textSub = stream.listen( (text) { - // If we receive a special token with recorded audio path, transcribe it via API (fallback) - if (text.startsWith('[[AUDIO_FILE_PATH]]:')) { - final filePath = text.split(':').skip(1).join(':'); - debugPrint( - 'DEBUG: VoiceInputSheet received audio file path: $filePath', - ); - _transcribeRecordedFile(filePath); - } else { - setState(() { - _recognizedText = text; - }); - } + setState(() { + _recognizedText = text; + }); }, onDone: () { debugPrint('DEBUG: VoiceInputSheet stream done'); @@ -2052,44 +2043,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { } } - Future _transcribeRecordedFile(String filePath) async { - try { - setState(() => _isTranscribing = true); - final api = ref.read(apiServiceProvider); - if (api == null) throw Exception('API service unavailable'); - final file = File(filePath); - final bytes = await file.readAsBytes(); - // Try to use device locale; fall back to en-US - String? language; - try { - language = WidgetsBinding.instance.platformDispatcher.locale - .toLanguageTag(); - } catch (_) { - language = 'en-US'; - } - final text = await api.transcribeAudio( - bytes.toList(), - language: language, - ); - debugPrint( - 'DEBUG: Transcription received: ${text.isEmpty ? '[empty]' : text}', - ); - if (!mounted) return; - setState(() { - _recognizedText = text; - }); - // Stop listening state if we have a result - setState(() => _isListening = false); - if (_autoSendFinal && _recognizedText.trim().isNotEmpty) { - _sendText(); - } - } catch (e) { - if (!mounted) return; - setState(() => _isListening = false); - } finally { - if (mounted) setState(() => _isTranscribing = false); - } - } + // Server transcription removed; only on-device STT is supported Future _stopListening() async { _intensitySub?.cancel(); @@ -2279,9 +2233,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { mainAxisAlignment: MainAxisAlignment.spaceBetween, children: [ Text( - _isTranscribing - ? 'Transcribing…' - : _isListening + _isListening ? (_voiceService.hasLocalStt ? 'Listening…' : 'Recording…') @@ -2601,9 +2553,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { tooltip: AppLocalizations.of( context, )!.clear, - onPressed: - _recognizedText.isNotEmpty && - !_isTranscribing + onPressed: _recognizedText.isNotEmpty ? () { setState( () => _recognizedText = '', @@ -2614,68 +2564,35 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> { ], ), const SizedBox(height: Spacing.xs), - if (_isTranscribing) - Center( - child: Row( - mainAxisAlignment: - MainAxisAlignment.center, - children: [ - ConduitLoadingIndicator( - size: isUltra - ? 14 - : (isCompact ? 16 : 18), - isCompact: true, - ), - const SizedBox(width: Spacing.xs), - Text( - 'Transcribing…', - style: TextStyle( - fontSize: isUltra - ? AppTypography.bodySmall - : (isCompact - ? AppTypography - .bodyMedium - : AppTypography - .bodyLarge), - color: context - .conduitTheme - .textSecondary, - ), - ), - ], - ), - ) - else - Flexible( - child: SingleChildScrollView( - child: Text( - _recognizedText.isEmpty - ? (_isListening - ? (_voiceService.hasLocalStt - ? 'Speak now…' - : 'Recording…') - : 'Tap Start to begin') - : _recognizedText, - style: TextStyle( - fontSize: isUltra - ? AppTypography.bodySmall - : (isCompact - ? AppTypography.bodyMedium - : AppTypography - .bodyLarge), - color: _recognizedText.isEmpty - ? context - .conduitTheme - .inputPlaceholder - : context - .conduitTheme - .textPrimary, - height: 1.4, - ), - textAlign: TextAlign.center, + Flexible( + child: SingleChildScrollView( + child: Text( + _recognizedText.isEmpty + ? (_isListening + ? (_voiceService.hasLocalStt + ? 'Speak now…' + : 'Recording…') + : 'Tap Start to begin') + : _recognizedText, + style: TextStyle( + fontSize: isUltra + ? AppTypography.bodySmall + : (isCompact + ? AppTypography.bodyMedium + : AppTypography.bodyLarge), + color: _recognizedText.isEmpty + ? context + .conduitTheme + .inputPlaceholder + : context + .conduitTheme + .textPrimary, + height: 1.4, ), + textAlign: TextAlign.center, ), ), + ), ], ), ), diff --git a/lib/features/chat/widgets/modern_chat_input.dart b/lib/features/chat/widgets/modern_chat_input.dart index 1332bf1..e190cb2 100644 --- a/lib/features/chat/widgets/modern_chat_input.dart +++ b/lib/features/chat/widgets/modern_chat_input.dart @@ -6,7 +6,7 @@ import '../../../shared/widgets/sheet_handle.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; -import 'dart:io' show Platform, File; +import 'dart:io' show Platform; import 'dart:async'; import '../providers/chat_providers.dart'; import '../../tools/widgets/unified_tools_modal.dart'; @@ -991,20 +991,15 @@ class _ModernChatInputState extends ConsumerState _textSub?.cancel(); _textSub = stream.listen( (text) async { - if (text.startsWith('[[AUDIO_FILE_PATH]]:')) { - final path = text.split(':').skip(1).join(':'); - await _transcribeRecordedFile(path); - } else { - final updated = - (_baseTextAtStart.isEmpty - ? '' - : (_baseTextAtStart.trimRight() + ' ')) + - text; - _controller.value = TextEditingValue( - text: updated, - selection: TextSelection.collapsed(offset: updated.length), - ); - } + final updated = + (_baseTextAtStart.isEmpty + ? '' + : (_baseTextAtStart.trimRight() + ' ')) + + text; + _controller.value = TextEditingValue( + text: updated, + selection: TextSelection.collapsed(offset: updated.length), + ); }, onDone: () { if (!mounted) return; @@ -1039,39 +1034,7 @@ class _ModernChatInputState extends ConsumerState HapticFeedback.selectionClick(); } - Future _transcribeRecordedFile(String filePath) async { - try { - final api = ref.read(apiServiceProvider); - if (api == null) return; - final file = File(filePath); - final bytes = await file.readAsBytes(); - String? language; - try { - language = WidgetsBinding.instance.platformDispatcher.locale - .toLanguageTag(); - } catch (_) { - language = 'en-US'; - } - final text = await api.transcribeAudio( - bytes.toList(), - language: language, - ); - final updated = - (_baseTextAtStart.isEmpty - ? '' - : (_baseTextAtStart.trimRight() + ' ')) + - text; - if (!mounted) return; - _controller.value = TextEditingValue( - text: updated, - selection: TextSelection.collapsed(offset: updated.length), - ); - } catch (_) { - } finally { - if (!mounted) return; - setState(() => _isRecording = false); - } - } + // Server transcription removed; only on-device STT updates the input text void _showVoiceUnavailable(String message) { if (!mounted) return;