refactor: remove server audio transcription and related fallback logic, retaining only on-device speech-to-text functionality
This commit is contained in:
@@ -3,8 +3,7 @@ import 'package:record/record.dart';
|
||||
import 'package:flutter/widgets.dart';
|
||||
import 'dart:async';
|
||||
import 'dart:io' show Platform;
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
import 'package:path/path.dart' as p;
|
||||
// Removed path imports as server transcription fallback was removed
|
||||
import 'package:stts/stts.dart';
|
||||
|
||||
// Lightweight replacement for previous stt.LocaleName used across the UI
|
||||
@@ -175,16 +174,9 @@ class VoiceInputService {
|
||||
try {
|
||||
final isStillAvailable = await _speech.isSupported();
|
||||
if (!isStillAvailable && _isListening) {
|
||||
// speech recognition no longer available, fallback to recording
|
||||
// Speech recognition no longer available; stop listening
|
||||
_localSttAvailable = false;
|
||||
// Restart with fallback method
|
||||
_startRecordingProxyIntensity();
|
||||
_autoStopTimer?.cancel();
|
||||
_autoStopTimer = Timer(const Duration(seconds: 30), () {
|
||||
if (_isListening) {
|
||||
_stopListening();
|
||||
}
|
||||
});
|
||||
_stopListening();
|
||||
return;
|
||||
}
|
||||
} catch (e) {
|
||||
@@ -218,24 +210,17 @@ class VoiceInputService {
|
||||
}
|
||||
// Start recognition (no await blocking the sync flow)
|
||||
_speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
|
||||
// fallback to recording
|
||||
// On-device STT failed; stop listening entirely as server transcription is removed
|
||||
_localSttAvailable = false;
|
||||
_startRecordingProxyIntensity();
|
||||
_stopListening();
|
||||
});
|
||||
} catch (e) {
|
||||
_localSttAvailable = false;
|
||||
_startRecordingProxyIntensity();
|
||||
_stopListening();
|
||||
}
|
||||
} else {
|
||||
// Fallback: record audio and signal file path for server transcription
|
||||
// Local STT not available, falling back to recording
|
||||
_startRecordingProxyIntensity();
|
||||
_autoStopTimer?.cancel();
|
||||
_autoStopTimer = Timer(const Duration(seconds: 30), () {
|
||||
if (_isListening) {
|
||||
_stopListening();
|
||||
}
|
||||
});
|
||||
// No local STT available; stop immediately since server transcription is removed
|
||||
_stopListening();
|
||||
}
|
||||
|
||||
return _textStreamController!.stream;
|
||||
@@ -262,9 +247,6 @@ class VoiceInputService {
|
||||
_sttStateSub?.cancel();
|
||||
} catch (_) {}
|
||||
_sttStateSub = null;
|
||||
} else {
|
||||
// Also stop recorder if active
|
||||
await _stopRecording();
|
||||
}
|
||||
|
||||
_autoStopTimer?.cancel();
|
||||
@@ -284,84 +266,12 @@ class VoiceInputService {
|
||||
|
||||
void dispose() {
|
||||
stopListening();
|
||||
_stopRecording(force: true);
|
||||
try {
|
||||
_speech.dispose().catchError((_) {});
|
||||
} catch (_) {}
|
||||
}
|
||||
|
||||
// --- Recording and intensity proxy for server transcription path ---
|
||||
Future<void> _startRecordingProxyIntensity() async {
|
||||
try {
|
||||
final hasMic = await _recorder.hasPermission();
|
||||
if (!hasMic) {
|
||||
_textStreamController?.addError('Microphone permission not granted');
|
||||
_stopListening();
|
||||
return;
|
||||
}
|
||||
|
||||
// Start recording in a portable format (WAV/PCM) for best compatibility with server
|
||||
final tmpDir = await getTemporaryDirectory();
|
||||
final filePath = p.join(
|
||||
tmpDir.path,
|
||||
'conduit_voice_${DateTime.now().millisecondsSinceEpoch}.wav',
|
||||
);
|
||||
await _recorder.start(
|
||||
const RecordConfig(
|
||||
encoder: AudioEncoder.wav,
|
||||
numChannels: 1,
|
||||
sampleRate: 16000,
|
||||
bitRate: 128000,
|
||||
),
|
||||
path: filePath,
|
||||
);
|
||||
// recording started at filePath
|
||||
|
||||
// Drive intensity from amplitude stream and detect silence
|
||||
// Consider amplitude less than threshold as silence; stop after ~3s of continuous silence
|
||||
const silenceThresholdDb = -45.0; // dBFS threshold
|
||||
const silenceWindow = Duration(seconds: 3);
|
||||
DateTime lastNonSilent = DateTime.now();
|
||||
|
||||
_ampSub = _recorder
|
||||
.onAmplitudeChanged(const Duration(milliseconds: 125))
|
||||
.listen((amp) {
|
||||
if (!_isListening) return;
|
||||
// Normalize peak power (dBFS) into 0-10 bar scale
|
||||
final db = amp.current;
|
||||
// Map dB [-60..0] -> [0..10]
|
||||
final clamped = db.clamp(-60.0, 0.0);
|
||||
final norm = ((clamped + 60.0) / 60.0) * 10.0;
|
||||
_intensityController?.add(norm.round().clamp(0, 10));
|
||||
|
||||
if (db > silenceThresholdDb) {
|
||||
lastNonSilent = DateTime.now();
|
||||
} else {
|
||||
if (DateTime.now().difference(lastNonSilent) >= silenceWindow) {
|
||||
_stopListening();
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (e) {
|
||||
_textStreamController?.addError('Audio recording failed: $e');
|
||||
_stopListening();
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> _stopRecording({bool force = false}) async {
|
||||
try {
|
||||
if (!await _recorder.isRecording() && !force) return;
|
||||
final path = await _recorder.stop();
|
||||
if (path == null) {
|
||||
_textStreamController?.addError('Recording failed: no file path');
|
||||
return;
|
||||
}
|
||||
// Hand off recorded file path to listeners as a special token; UI layer will upload for transcription
|
||||
_textStreamController?.add('[[AUDIO_FILE_PATH]]:$path');
|
||||
} catch (e) {
|
||||
_textStreamController?.addError('Stop recording error: $e');
|
||||
}
|
||||
}
|
||||
// Recording fallback removed; only on-device STT is supported now
|
||||
|
||||
// Native locales not used in server transcription mode
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ import 'package:flutter/services.dart';
|
||||
import 'package:flutter/cupertino.dart';
|
||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||
import 'package:flutter_animate/flutter_animate.dart';
|
||||
import 'dart:io' show Platform, File;
|
||||
import 'dart:io' show Platform;
|
||||
import 'dart:async';
|
||||
import '../../../core/providers/app_providers.dart';
|
||||
import '../providers/chat_providers.dart';
|
||||
@@ -1927,7 +1927,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
|
||||
StreamSubscription<String>? _textSub;
|
||||
int _elapsedSeconds = 0;
|
||||
Timer? _elapsedTimer;
|
||||
bool _isTranscribing = false;
|
||||
// Removed server transcription; keep only on-device listening state
|
||||
String _languageTag = 'en';
|
||||
bool _holdToTalk = false;
|
||||
bool _autoSendFinal = false;
|
||||
@@ -2005,18 +2005,9 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
|
||||
});
|
||||
_textSub = stream.listen(
|
||||
(text) {
|
||||
// If we receive a special token with recorded audio path, transcribe it via API (fallback)
|
||||
if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
|
||||
final filePath = text.split(':').skip(1).join(':');
|
||||
debugPrint(
|
||||
'DEBUG: VoiceInputSheet received audio file path: $filePath',
|
||||
);
|
||||
_transcribeRecordedFile(filePath);
|
||||
} else {
|
||||
setState(() {
|
||||
_recognizedText = text;
|
||||
});
|
||||
}
|
||||
setState(() {
|
||||
_recognizedText = text;
|
||||
});
|
||||
},
|
||||
onDone: () {
|
||||
debugPrint('DEBUG: VoiceInputSheet stream done');
|
||||
@@ -2052,44 +2043,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> _transcribeRecordedFile(String filePath) async {
|
||||
try {
|
||||
setState(() => _isTranscribing = true);
|
||||
final api = ref.read(apiServiceProvider);
|
||||
if (api == null) throw Exception('API service unavailable');
|
||||
final file = File(filePath);
|
||||
final bytes = await file.readAsBytes();
|
||||
// Try to use device locale; fall back to en-US
|
||||
String? language;
|
||||
try {
|
||||
language = WidgetsBinding.instance.platformDispatcher.locale
|
||||
.toLanguageTag();
|
||||
} catch (_) {
|
||||
language = 'en-US';
|
||||
}
|
||||
final text = await api.transcribeAudio(
|
||||
bytes.toList(),
|
||||
language: language,
|
||||
);
|
||||
debugPrint(
|
||||
'DEBUG: Transcription received: ${text.isEmpty ? '[empty]' : text}',
|
||||
);
|
||||
if (!mounted) return;
|
||||
setState(() {
|
||||
_recognizedText = text;
|
||||
});
|
||||
// Stop listening state if we have a result
|
||||
setState(() => _isListening = false);
|
||||
if (_autoSendFinal && _recognizedText.trim().isNotEmpty) {
|
||||
_sendText();
|
||||
}
|
||||
} catch (e) {
|
||||
if (!mounted) return;
|
||||
setState(() => _isListening = false);
|
||||
} finally {
|
||||
if (mounted) setState(() => _isTranscribing = false);
|
||||
}
|
||||
}
|
||||
// Server transcription removed; only on-device STT is supported
|
||||
|
||||
Future<void> _stopListening() async {
|
||||
_intensitySub?.cancel();
|
||||
@@ -2279,9 +2233,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
|
||||
mainAxisAlignment: MainAxisAlignment.spaceBetween,
|
||||
children: [
|
||||
Text(
|
||||
_isTranscribing
|
||||
? 'Transcribing…'
|
||||
: _isListening
|
||||
_isListening
|
||||
? (_voiceService.hasLocalStt
|
||||
? 'Listening…'
|
||||
: 'Recording…')
|
||||
@@ -2601,9 +2553,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
|
||||
tooltip: AppLocalizations.of(
|
||||
context,
|
||||
)!.clear,
|
||||
onPressed:
|
||||
_recognizedText.isNotEmpty &&
|
||||
!_isTranscribing
|
||||
onPressed: _recognizedText.isNotEmpty
|
||||
? () {
|
||||
setState(
|
||||
() => _recognizedText = '',
|
||||
@@ -2614,68 +2564,35 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
|
||||
],
|
||||
),
|
||||
const SizedBox(height: Spacing.xs),
|
||||
if (_isTranscribing)
|
||||
Center(
|
||||
child: Row(
|
||||
mainAxisAlignment:
|
||||
MainAxisAlignment.center,
|
||||
children: [
|
||||
ConduitLoadingIndicator(
|
||||
size: isUltra
|
||||
? 14
|
||||
: (isCompact ? 16 : 18),
|
||||
isCompact: true,
|
||||
),
|
||||
const SizedBox(width: Spacing.xs),
|
||||
Text(
|
||||
'Transcribing…',
|
||||
style: TextStyle(
|
||||
fontSize: isUltra
|
||||
? AppTypography.bodySmall
|
||||
: (isCompact
|
||||
? AppTypography
|
||||
.bodyMedium
|
||||
: AppTypography
|
||||
.bodyLarge),
|
||||
color: context
|
||||
.conduitTheme
|
||||
.textSecondary,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
else
|
||||
Flexible(
|
||||
child: SingleChildScrollView(
|
||||
child: Text(
|
||||
_recognizedText.isEmpty
|
||||
? (_isListening
|
||||
? (_voiceService.hasLocalStt
|
||||
? 'Speak now…'
|
||||
: 'Recording…')
|
||||
: 'Tap Start to begin')
|
||||
: _recognizedText,
|
||||
style: TextStyle(
|
||||
fontSize: isUltra
|
||||
? AppTypography.bodySmall
|
||||
: (isCompact
|
||||
? AppTypography.bodyMedium
|
||||
: AppTypography
|
||||
.bodyLarge),
|
||||
color: _recognizedText.isEmpty
|
||||
? context
|
||||
.conduitTheme
|
||||
.inputPlaceholder
|
||||
: context
|
||||
.conduitTheme
|
||||
.textPrimary,
|
||||
height: 1.4,
|
||||
),
|
||||
textAlign: TextAlign.center,
|
||||
Flexible(
|
||||
child: SingleChildScrollView(
|
||||
child: Text(
|
||||
_recognizedText.isEmpty
|
||||
? (_isListening
|
||||
? (_voiceService.hasLocalStt
|
||||
? 'Speak now…'
|
||||
: 'Recording…')
|
||||
: 'Tap Start to begin')
|
||||
: _recognizedText,
|
||||
style: TextStyle(
|
||||
fontSize: isUltra
|
||||
? AppTypography.bodySmall
|
||||
: (isCompact
|
||||
? AppTypography.bodyMedium
|
||||
: AppTypography.bodyLarge),
|
||||
color: _recognizedText.isEmpty
|
||||
? context
|
||||
.conduitTheme
|
||||
.inputPlaceholder
|
||||
: context
|
||||
.conduitTheme
|
||||
.textPrimary,
|
||||
height: 1.4,
|
||||
),
|
||||
textAlign: TextAlign.center,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
|
||||
@@ -6,7 +6,7 @@ import '../../../shared/widgets/sheet_handle.dart';
|
||||
|
||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||
|
||||
import 'dart:io' show Platform, File;
|
||||
import 'dart:io' show Platform;
|
||||
import 'dart:async';
|
||||
import '../providers/chat_providers.dart';
|
||||
import '../../tools/widgets/unified_tools_modal.dart';
|
||||
@@ -991,20 +991,15 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
|
||||
_textSub?.cancel();
|
||||
_textSub = stream.listen(
|
||||
(text) async {
|
||||
if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
|
||||
final path = text.split(':').skip(1).join(':');
|
||||
await _transcribeRecordedFile(path);
|
||||
} else {
|
||||
final updated =
|
||||
(_baseTextAtStart.isEmpty
|
||||
? ''
|
||||
: (_baseTextAtStart.trimRight() + ' ')) +
|
||||
text;
|
||||
_controller.value = TextEditingValue(
|
||||
text: updated,
|
||||
selection: TextSelection.collapsed(offset: updated.length),
|
||||
);
|
||||
}
|
||||
final updated =
|
||||
(_baseTextAtStart.isEmpty
|
||||
? ''
|
||||
: (_baseTextAtStart.trimRight() + ' ')) +
|
||||
text;
|
||||
_controller.value = TextEditingValue(
|
||||
text: updated,
|
||||
selection: TextSelection.collapsed(offset: updated.length),
|
||||
);
|
||||
},
|
||||
onDone: () {
|
||||
if (!mounted) return;
|
||||
@@ -1039,39 +1034,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
|
||||
HapticFeedback.selectionClick();
|
||||
}
|
||||
|
||||
Future<void> _transcribeRecordedFile(String filePath) async {
|
||||
try {
|
||||
final api = ref.read(apiServiceProvider);
|
||||
if (api == null) return;
|
||||
final file = File(filePath);
|
||||
final bytes = await file.readAsBytes();
|
||||
String? language;
|
||||
try {
|
||||
language = WidgetsBinding.instance.platformDispatcher.locale
|
||||
.toLanguageTag();
|
||||
} catch (_) {
|
||||
language = 'en-US';
|
||||
}
|
||||
final text = await api.transcribeAudio(
|
||||
bytes.toList(),
|
||||
language: language,
|
||||
);
|
||||
final updated =
|
||||
(_baseTextAtStart.isEmpty
|
||||
? ''
|
||||
: (_baseTextAtStart.trimRight() + ' ')) +
|
||||
text;
|
||||
if (!mounted) return;
|
||||
_controller.value = TextEditingValue(
|
||||
text: updated,
|
||||
selection: TextSelection.collapsed(offset: updated.length),
|
||||
);
|
||||
} catch (_) {
|
||||
} finally {
|
||||
if (!mounted) return;
|
||||
setState(() => _isRecording = false);
|
||||
}
|
||||
}
|
||||
// Server transcription removed; only on-device STT updates the input text
|
||||
|
||||
void _showVoiceUnavailable(String message) {
|
||||
if (!mounted) return;
|
||||
|
||||
Reference in New Issue
Block a user