From 7f30b728abe1abf9220b063e48bffdf3fed85468 Mon Sep 17 00:00:00 2001
From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com>
Date: Fri, 22 Aug 2025 13:54:58 +0530
Subject: [PATCH] feat: on device speech to text
---
ios/Podfile.lock | 15 +
ios/Runner/Info.plist | 2 +
lib/core/services/settings_service.dart | 80 ++-
.../chat/services/voice_input_service.dart | 147 +++-
lib/features/chat/views/chat_page.dart | 656 ++++++++++++------
pubspec.lock | 32 +
pubspec.yaml | 1 +
7 files changed, 697 insertions(+), 236 deletions(-)
diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index ea11de5..87bcc6e 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -1,4 +1,7 @@
PODS:
+ - CwlCatchException (2.2.1):
+ - CwlCatchExceptionSupport (~> 2.2.1)
+ - CwlCatchExceptionSupport (2.2.1)
- DKImagePickerController/Core (4.3.9):
- DKImagePickerController/ImageDataManager
- DKImagePickerController/Resource
@@ -55,6 +58,10 @@ PODS:
- shared_preferences_foundation (0.0.1):
- Flutter
- FlutterMacOS
+ - speech_to_text (7.2.0):
+ - CwlCatchException
+ - Flutter
+ - FlutterMacOS
- sqflite_darwin (0.0.4):
- Flutter
- FlutterMacOS
@@ -75,12 +82,15 @@ DEPENDENCIES:
- record_ios (from `.symlinks/plugins/record_ios/ios`)
- share_plus (from `.symlinks/plugins/share_plus/ios`)
- shared_preferences_foundation (from `.symlinks/plugins/shared_preferences_foundation/darwin`)
+ - speech_to_text (from `.symlinks/plugins/speech_to_text/darwin`)
- sqflite_darwin (from `.symlinks/plugins/sqflite_darwin/darwin`)
- url_launcher_ios (from `.symlinks/plugins/url_launcher_ios/ios`)
- wakelock_plus (from `.symlinks/plugins/wakelock_plus/ios`)
SPEC REPOS:
trunk:
+ - CwlCatchException
+ - CwlCatchExceptionSupport
- DKImagePickerController
- DKPhotoGallery
- SDWebImage
@@ -107,6 +117,8 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/share_plus/ios"
shared_preferences_foundation:
:path: ".symlinks/plugins/shared_preferences_foundation/darwin"
+ speech_to_text:
+ :path: ".symlinks/plugins/speech_to_text/darwin"
sqflite_darwin:
:path: ".symlinks/plugins/sqflite_darwin/darwin"
url_launcher_ios:
@@ -115,6 +127,8 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/wakelock_plus/ios"
SPEC CHECKSUMS:
+ CwlCatchException: 7acc161b299a6de7f0a46a6ed741eae2c8b4d75a
+ CwlCatchExceptionSupport: 54ccab8d8c78907b57f99717fb19d4cc3bce02dc
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60
file_picker: a0560bc09d61de87f12d246fc47d2119e6ef37be
@@ -128,6 +142,7 @@ SPEC CHECKSUMS:
SDWebImage: f29024626962457f3470184232766516dee8dfea
share_plus: 50da8cb520a8f0f65671c6c6a99b3617ed10a58a
shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7
+ speech_to_text: 3b313d98516d3d0406cea424782ec25470c59d19
sqflite_darwin: 20b2a3a3b70e43edae938624ce550a3cbf66a3d0
SwiftyGif: 706c60cf65fa2bc5ee0313beece843c8eb8194d4
url_launcher_ios: 694010445543906933d732453a59da0a173ae33d
diff --git a/ios/Runner/Info.plist b/ios/Runner/Info.plist
index 665831b..89681d8 100644
--- a/ios/Runner/Info.plist
+++ b/ios/Runner/Info.plist
@@ -54,6 +54,8 @@
NSMicrophoneUsageDescription
Conduit uses the microphone to record voice messages and enable voice-to-text in chats. For example, when you hold the mic button in a conversation, we capture your speech to send as an audio message or transcript.
+ NSSpeechRecognitionUsageDescription
+ Conduit uses on-device speech recognition so you can dictate messages hands‑free. Your speech is converted to text on your device when available.
NSCameraUsageDescription
Conduit uses the camera to take photos or videos you choose to share in chats. For example, you can snap a photo of a document and attach it to a message.
NSPhotoLibraryUsageDescription
diff --git a/lib/core/services/settings_service.dart b/lib/core/services/settings_service.dart
index 7cd3850..e112991 100644
--- a/lib/core/services/settings_service.dart
+++ b/lib/core/services/settings_service.dart
@@ -12,6 +12,10 @@ class SettingsService {
static const String _largeTextKey = 'large_text';
static const String _darkModeKey = 'dark_mode';
static const String _defaultModelKey = 'default_model';
+ // Voice input settings
+ static const String _voiceLocaleKey = 'voice_locale_id';
+ static const String _voiceHoldToTalkKey = 'voice_hold_to_talk';
+ static const String _voiceAutoSendKey = 'voice_auto_send_final';
/// Get reduced motion preference
static Future getReduceMotion() async {
@@ -111,6 +115,9 @@ class SettingsService {
largeText: await getLargeText(),
darkMode: await getDarkMode(),
defaultModel: await getDefaultModel(),
+ voiceLocaleId: await getVoiceLocaleId(),
+ voiceHoldToTalk: await getVoiceHoldToTalk(),
+ voiceAutoSendFinal: await getVoiceAutoSendFinal(),
);
}
@@ -124,9 +131,47 @@ class SettingsService {
setLargeText(settings.largeText),
setDarkMode(settings.darkMode),
setDefaultModel(settings.defaultModel),
+ setVoiceLocaleId(settings.voiceLocaleId),
+ setVoiceHoldToTalk(settings.voiceHoldToTalk),
+ setVoiceAutoSendFinal(settings.voiceAutoSendFinal),
]);
}
+ // Voice input specific settings
+ static Future getVoiceLocaleId() async {
+ final prefs = await SharedPreferences.getInstance();
+ return prefs.getString(_voiceLocaleKey);
+ }
+
+ static Future setVoiceLocaleId(String? localeId) async {
+ final prefs = await SharedPreferences.getInstance();
+ if (localeId == null || localeId.isEmpty) {
+ await prefs.remove(_voiceLocaleKey);
+ } else {
+ await prefs.setString(_voiceLocaleKey, localeId);
+ }
+ }
+
+ static Future getVoiceHoldToTalk() async {
+ final prefs = await SharedPreferences.getInstance();
+ return prefs.getBool(_voiceHoldToTalkKey) ?? false;
+ }
+
+ static Future setVoiceHoldToTalk(bool value) async {
+ final prefs = await SharedPreferences.getInstance();
+ await prefs.setBool(_voiceHoldToTalkKey, value);
+ }
+
+ static Future getVoiceAutoSendFinal() async {
+ final prefs = await SharedPreferences.getInstance();
+ return prefs.getBool(_voiceAutoSendKey) ?? false;
+ }
+
+ static Future setVoiceAutoSendFinal(bool value) async {
+ final prefs = await SharedPreferences.getInstance();
+ await prefs.setBool(_voiceAutoSendKey, value);
+ }
+
/// Get effective animation duration considering all settings
static Duration getEffectiveAnimationDuration(
BuildContext context,
@@ -176,6 +221,9 @@ class AppSettings {
final bool largeText;
final bool darkMode;
final String? defaultModel;
+ final String? voiceLocaleId;
+ final bool voiceHoldToTalk;
+ final bool voiceAutoSendFinal;
const AppSettings({
this.reduceMotion = false,
@@ -185,6 +233,9 @@ class AppSettings {
this.largeText = false,
this.darkMode = true,
this.defaultModel,
+ this.voiceLocaleId,
+ this.voiceHoldToTalk = false,
+ this.voiceAutoSendFinal = false,
});
AppSettings copyWith({
@@ -195,6 +246,9 @@ class AppSettings {
bool? largeText,
bool? darkMode,
Object? defaultModel = const _DefaultValue(),
+ Object? voiceLocaleId = const _DefaultValue(),
+ bool? voiceHoldToTalk,
+ bool? voiceAutoSendFinal,
}) {
return AppSettings(
reduceMotion: reduceMotion ?? this.reduceMotion,
@@ -204,6 +258,9 @@ class AppSettings {
largeText: largeText ?? this.largeText,
darkMode: darkMode ?? this.darkMode,
defaultModel: defaultModel is _DefaultValue ? this.defaultModel : defaultModel as String?,
+ voiceLocaleId: voiceLocaleId is _DefaultValue ? this.voiceLocaleId : voiceLocaleId as String?,
+ voiceHoldToTalk: voiceHoldToTalk ?? this.voiceHoldToTalk,
+ voiceAutoSendFinal: voiceAutoSendFinal ?? this.voiceAutoSendFinal,
);
}
@@ -217,7 +274,10 @@ class AppSettings {
other.highContrast == highContrast &&
other.largeText == largeText &&
other.darkMode == darkMode &&
- other.defaultModel == defaultModel;
+ other.defaultModel == defaultModel &&
+ other.voiceLocaleId == voiceLocaleId &&
+ other.voiceHoldToTalk == voiceHoldToTalk &&
+ other.voiceAutoSendFinal == voiceAutoSendFinal;
}
@override
@@ -230,6 +290,9 @@ class AppSettings {
largeText,
darkMode,
defaultModel,
+ voiceLocaleId,
+ voiceHoldToTalk,
+ voiceAutoSendFinal,
);
}
}
@@ -285,6 +348,21 @@ class AppSettingsNotifier extends StateNotifier {
await SettingsService.setDefaultModel(modelId);
}
+ Future setVoiceLocaleId(String? localeId) async {
+ state = state.copyWith(voiceLocaleId: localeId);
+ await SettingsService.setVoiceLocaleId(localeId);
+ }
+
+ Future setVoiceHoldToTalk(bool value) async {
+ state = state.copyWith(voiceHoldToTalk: value);
+ await SettingsService.setVoiceHoldToTalk(value);
+ }
+
+ Future setVoiceAutoSendFinal(bool value) async {
+ state = state.copyWith(voiceAutoSendFinal: value);
+ await SettingsService.setVoiceAutoSendFinal(value);
+ }
+
Future resetToDefaults() async {
const defaultSettings = AppSettings();
await SettingsService.saveSettings(defaultSettings);
diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart
index 11f6441..1a6d5d4 100644
--- a/lib/features/chat/services/voice_input_service.dart
+++ b/lib/features/chat/services/voice_input_service.dart
@@ -1,14 +1,22 @@
import 'package:flutter_riverpod/flutter_riverpod.dart';
import 'package:record/record.dart';
+import 'package:flutter/widgets.dart';
import 'dart:async';
import 'dart:io' show Platform;
import 'package:path_provider/path_provider.dart';
import 'package:path/path.dart' as p;
+import 'package:speech_to_text/speech_recognition_error.dart';
+import 'package:speech_to_text/speech_recognition_result.dart';
+import 'package:speech_to_text/speech_to_text.dart' as stt;
class VoiceInputService {
final AudioRecorder _recorder = AudioRecorder();
+ stt.SpeechToText? _speech;
bool _isInitialized = false;
bool _isListening = false;
+ bool _localSttAvailable = false;
+ String? _selectedLocaleId;
+ List _locales = const [];
StreamController? _textStreamController;
String _currentText = '';
// Public stream for UI waveform visualization (emits partial text length as proxy)
@@ -23,16 +31,46 @@ class VoiceInputService {
Future initialize() async {
if (_isInitialized) return true;
if (!isSupportedPlatform) return false;
- // Log platform for diagnostics
- // ignore: avoid_print
- print(
- 'DEBUG: VoiceInputService initialize on platform: '
- '${Platform.isAndroid
- ? 'Android'
- : Platform.isIOS
- ? 'iOS'
- : 'Other'}',
- );
+ // Prepare local speech recognizer
+ try {
+ _speech = stt.SpeechToText();
+ _localSttAvailable = await _speech!.initialize(
+ onStatus: (status) {
+ // When platform end-of-speech triggers, ensure we stop timer/streams
+ if (status.toLowerCase().contains('notListening') ||
+ status.toLowerCase().contains('done')) {
+ // No-op: UI manages stopping; SpeechToText emits final result
+ }
+ },
+ onError: (SpeechRecognitionError error) {
+ // If any error, we keep fallback available; no throws here.
+ },
+ );
+ if (_localSttAvailable) {
+ try {
+ _locales = await _speech!.locales();
+ final deviceTag = WidgetsBinding.instance.platformDispatcher.locale
+ .toLanguageTag();
+ final match = _locales.firstWhere(
+ (l) => l.localeId.toLowerCase() == deviceTag.toLowerCase(),
+ orElse: () {
+ final primary = deviceTag.split(RegExp('[-_]')).first.toLowerCase();
+ return _locales.firstWhere(
+ (l) => l.localeId.toLowerCase().startsWith('$primary-'),
+ orElse: () => _locales.isNotEmpty
+ ? _locales.first
+ : stt.LocaleName('en_US', 'English (US)'),
+ );
+ },
+ );
+ _selectedLocaleId = match.localeId;
+ } catch (_) {
+ _selectedLocaleId = null;
+ }
+ }
+ } catch (_) {
+ _localSttAvailable = false;
+ }
_isInitialized = true;
return true;
}
@@ -46,10 +84,16 @@ class VoiceInputService {
}
bool get isListening => _isListening;
- bool get isAvailable => _isInitialized;
+ bool get isAvailable => _isInitialized; // service usable (local or fallback)
+ bool get hasLocalStt => _localSttAvailable;
+ String? get selectedLocaleId => _selectedLocaleId;
+ List get locales => _locales;
+
+ void setLocale(String? localeId) {
+ _selectedLocaleId = localeId;
+ }
Stream startListening() {
- // Ensure initialized; we allow initialize to pass even if native STT unavailable
if (!_isInitialized) {
throw Exception('Voice input not initialized');
}
@@ -61,21 +105,52 @@ class VoiceInputService {
_textStreamController = StreamController.broadcast();
_currentText = '';
_isListening = true;
-
_intensityController = StreamController.broadcast();
- // Start recording raw audio; UI or auto-timer will stop and trigger transcription via API
- // ignore: avoid_print
- print('DEBUG: VoiceInputService startListening');
- _startRecordingProxyIntensity();
+ if (_localSttAvailable && _speech != null) {
+ // Local on-device STT path
+ _autoStopTimer?.cancel();
+ // SpeechToText has its own end-of-speech handling; we still cap at 60s
+ _autoStopTimer = Timer(const Duration(seconds: 60), () {
+ if (_isListening) {
+ _stopListening();
+ }
+ });
- // Auto-stop after 30 seconds similar to native STT behavior
- _autoStopTimer?.cancel();
- _autoStopTimer = Timer(const Duration(seconds: 30), () {
- if (_isListening) {
- _stopListening();
- }
- });
+ _speech!.listen(
+ localeId: _selectedLocaleId,
+ listenFor: const Duration(seconds: 60),
+ pauseFor: const Duration(seconds: 5),
+ onResult: (SpeechRecognitionResult result) {
+ if (!_isListening) return;
+ _currentText = result.recognizedWords;
+ _textStreamController?.add(_currentText);
+ if (result.finalResult) {
+ // Will be followed by notListening status; we proactively close
+ _stopListening();
+ }
+ },
+ onSoundLevelChange: (level) {
+ // level is roughly 0..1+; map to 0..10
+ final scaled = (level * 10).clamp(0, 10).round();
+ _intensityController?.add(scaled);
+ },
+ listenOptions: stt.SpeechListenOptions(
+ partialResults: true,
+ cancelOnError: true,
+ listenMode: stt.ListenMode.confirmation,
+ ),
+ );
+ } else {
+ // Fallback: record audio and signal file path for server transcription
+ _startRecordingProxyIntensity();
+ _autoStopTimer?.cancel();
+ _autoStopTimer = Timer(const Duration(seconds: 30), () {
+ if (_isListening) {
+ _stopListening();
+ }
+ });
+ }
return _textStreamController!.stream;
}
@@ -88,10 +163,14 @@ class VoiceInputService {
if (!_isListening) return;
_isListening = false;
- // Also stop recorder if active
- await _stopRecording();
- // ignore: avoid_print
- print('DEBUG: VoiceInputService stopped listening');
+ if (_localSttAvailable && _speech != null) {
+ try {
+ await _speech!.stop();
+ } catch (_) {}
+ } else {
+ // Also stop recorder if active
+ await _stopRecording();
+ }
_autoStopTimer?.cancel();
_autoStopTimer = null;
@@ -111,6 +190,9 @@ class VoiceInputService {
void dispose() {
stopListening();
_stopRecording(force: true);
+ try {
+ _speech?.cancel();
+ } catch (_) {}
}
// --- Recording and intensity proxy for server transcription path ---
@@ -138,8 +220,7 @@ class VoiceInputService {
),
path: filePath,
);
- // ignore: avoid_print
- print('DEBUG: VoiceInputService recording started at: $filePath');
+ // recording started at filePath
// Drive intensity from amplitude stream and detect silence
// Consider amplitude less than threshold as silence; stop after ~3s of continuous silence
@@ -167,8 +248,6 @@ class VoiceInputService {
}
});
} catch (e) {
- // ignore: avoid_print
- print('DEBUG: VoiceInputService recording failed: $e');
_textStreamController?.addError('Audio recording failed: $e');
_stopListening();
}
@@ -182,8 +261,6 @@ class VoiceInputService {
_textStreamController?.addError('Recording failed: no file path');
return;
}
- // ignore: avoid_print
- print('DEBUG: VoiceInputService recording saved: $path');
// Hand off recorded file path to listeners as a special token; UI layer will upload for transcription
_textStreamController?.add('[[AUDIO_FILE_PATH]]:$path');
} catch (e) {
@@ -203,6 +280,8 @@ final voiceInputAvailableProvider = FutureProvider((ref) async {
if (!service.isSupportedPlatform) return false;
final initialized = await service.initialize();
if (!initialized) return false;
+ // If local STT exists, we consider it available; otherwise ensure mic permission for fallback
+ if (service.hasLocalStt) return true;
final hasPermission = await service.checkPermissions();
if (!hasPermission) return false;
return service.isAvailable;
diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart
index 9dce29c..0e05440 100644
--- a/lib/features/chat/views/chat_page.dart
+++ b/lib/features/chat/views/chat_page.dart
@@ -29,6 +29,10 @@ import 'chat_page_helpers.dart';
import '../../../shared/widgets/themed_dialogs.dart';
import '../../onboarding/views/onboarding_sheet.dart';
import '../../../shared/widgets/sheet_handle.dart';
+import '../../../shared/widgets/conduit_components.dart';
+import '../../../core/services/settings_service.dart';
+// Removed unused PlatformUtils import
+import '../../../core/services/platform_service.dart' as ps;
class ChatPage extends ConsumerStatefulWidget {
const ChatPage({super.key});
@@ -1791,20 +1795,35 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
Timer? _elapsedTimer;
bool _isTranscribing = false;
String _languageTag = 'en';
+ bool _holdToTalk = false;
+ bool _autoSendFinal = false;
@override
void initState() {
super.initState();
_voiceService = ref.read(voiceInputServiceProvider);
try {
- _languageTag = WidgetsBinding.instance.platformDispatcher.locale
- .toLanguageTag()
- .split(RegExp('[-_]'))
- .first
- .toLowerCase();
+ final preset = _voiceService.selectedLocaleId;
+ if (preset != null && preset.isNotEmpty) {
+ _languageTag = preset.split(RegExp('[-_]')).first.toLowerCase();
+ } else {
+ _languageTag = WidgetsBinding.instance.platformDispatcher.locale
+ .toLanguageTag()
+ .split(RegExp('[-_]'))
+ .first
+ .toLowerCase();
+ }
} catch (_) {
_languageTag = 'en';
}
+ // Load voice settings from app settings
+ final settings = ref.read(appSettingsProvider);
+ _holdToTalk = settings.voiceHoldToTalk;
+ _autoSendFinal = settings.voiceAutoSendFinal;
+ if (settings.voiceLocaleId != null && settings.voiceLocaleId!.isNotEmpty) {
+ _voiceService.setLocale(settings.voiceLocaleId);
+ _languageTag = settings.voiceLocaleId!.split(RegExp('[-_]')).first.toLowerCase();
+ }
}
void _startListening() async {
@@ -1813,12 +1832,23 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
_recognizedText = '';
_elapsedSeconds = 0;
});
+ // Haptic: indicate start listening
+ final hapticEnabled = ref.read(hapticEnabledProvider);
+ ps.PlatformService.hapticFeedbackWithSettings(
+ type: ps.HapticType.medium,
+ hapticEnabled: hapticEnabled,
+ );
try {
- // Ensure service is initialized and has permission
+ // Ensure service is initialized (local STT will request permissions itself)
final ok = await _voiceService.initialize();
- if (!ok || !await _voiceService.checkPermissions()) {
- throw Exception('Microphone permission not granted');
+ if (!ok) {
+ throw Exception('Voice service unavailable');
+ }
+ // Only check mic permission when falling back to recording
+ if (!_voiceService.hasLocalStt) {
+ final mic = await _voiceService.checkPermissions();
+ if (!mic) throw Exception('Microphone permission not granted');
}
// Start elapsed timer for UX
@@ -1838,7 +1868,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
});
_textSub = stream.listen(
(text) {
- // If we receive a special token with recorded audio path, transcribe it via API
+ // If we receive a special token with recorded audio path, transcribe it via API (fallback)
if (text.startsWith('[[AUDIO_FILE_PATH]]:')) {
final filePath = text.split(':').skip(1).join(':');
debugPrint(
@@ -1857,6 +1887,10 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
_isListening = false;
});
_elapsedTimer?.cancel();
+ // Auto-send on final local result if enabled
+ if (_autoSendFinal && _recognizedText.trim().isNotEmpty) {
+ _sendText();
+ }
},
onError: (error) {
debugPrint('DEBUG: VoiceInputSheet stream error: $error');
@@ -1864,7 +1898,13 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
_isListening = false;
});
_elapsedTimer?.cancel();
- if (mounted) {}
+ if (mounted) {
+ final hapticEnabled = ref.read(hapticEnabledProvider);
+ ps.PlatformService.hapticFeedbackWithSettings(
+ type: ps.HapticType.warning,
+ hapticEnabled: hapticEnabled,
+ );
+ }
},
);
} catch (e) {
@@ -1903,6 +1943,9 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
});
// Stop listening state if we have a result
setState(() => _isListening = false);
+ if (_autoSendFinal && _recognizedText.trim().isNotEmpty) {
+ _sendText();
+ }
} catch (e) {
if (!mounted) return;
setState(() => _isListening = false);
@@ -1922,10 +1965,22 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
_isListening = false;
});
}
+ // Haptic: subtle stop confirmation
+ final hapticEnabled = ref.read(hapticEnabledProvider);
+ ps.PlatformService.hapticFeedbackWithSettings(
+ type: ps.HapticType.selection,
+ hapticEnabled: hapticEnabled,
+ );
}
void _sendText() {
if (_recognizedText.isNotEmpty) {
+ // Haptic: success send
+ final hapticEnabled = ref.read(hapticEnabledProvider);
+ ps.PlatformService.hapticFeedbackWithSettings(
+ type: ps.HapticType.success,
+ hapticEnabled: hapticEnabled,
+ );
widget.onTextReceived(_recognizedText);
Navigator.pop(context);
}
@@ -1937,9 +1992,103 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
return '$m:$s';
}
- void _cancel() {
- _stopListening();
- Navigator.pop(context);
+ void _pickLanguage() async {
+ // Only for local STT
+ if (!_voiceService.hasLocalStt) return;
+ final locales = _voiceService.locales;
+ if (locales.isEmpty) return;
+ if (!mounted) return;
+ final selected = await showModalBottomSheet(
+ context: context,
+ backgroundColor: Colors.transparent,
+ builder: (context) {
+ return Container(
+ decoration: BoxDecoration(
+ color: context.conduitTheme.surfaceBackground,
+ borderRadius: const BorderRadius.vertical(
+ top: Radius.circular(AppBorderRadius.bottomSheet),
+ ),
+ border: Border.all(
+ color: context.conduitTheme.dividerColor,
+ width: BorderWidth.regular,
+ ),
+ boxShadow: ConduitShadows.modal,
+ ),
+ padding: const EdgeInsets.all(Spacing.bottomSheetPadding),
+ child: SafeArea(
+ top: false,
+ child: Column(
+ mainAxisSize: MainAxisSize.min,
+ children: [
+ const SheetHandle(),
+ const SizedBox(height: Spacing.md),
+ Text('Select Language',
+ style: TextStyle(
+ fontSize: AppTypography.headlineSmall,
+ color: context.conduitTheme.textPrimary,
+ fontWeight: FontWeight.w600,
+ )),
+ const SizedBox(height: Spacing.sm),
+ Flexible(
+ child: ListView.separated(
+ shrinkWrap: true,
+ itemCount: locales.length,
+ separatorBuilder: (_, sep) => Divider(
+ height: 1,
+ color: context.conduitTheme.dividerColor,
+ ),
+ itemBuilder: (ctx, i) {
+ final l = locales[i];
+ final isSelected = l.localeId == _voiceService.selectedLocaleId;
+ return ListTile(
+ title: Text(
+ l.name,
+ style: TextStyle(color: context.conduitTheme.textPrimary),
+ ),
+ subtitle: Text(
+ l.localeId,
+ style: TextStyle(color: context.conduitTheme.textSecondary),
+ ),
+ trailing: isSelected
+ ? Icon(Icons.check, color: context.conduitTheme.buttonPrimary)
+ : null,
+ onTap: () => Navigator.pop(ctx, l.localeId),
+ );
+ },
+ ),
+ ),
+ ],
+ ),
+ ),
+ );
+ },
+ );
+
+ if (selected != null && mounted) {
+ setState(() {
+ _voiceService.setLocale(selected);
+ _languageTag = selected.split(RegExp('[-_]')).first.toLowerCase();
+ });
+ // Persist preferred locale
+ await ref.read(appSettingsProvider.notifier).setVoiceLocaleId(selected);
+ if (_isListening) {
+ // Restart listening to apply new language
+ await _voiceService.stopListening();
+ _startListening();
+ }
+ }
+ }
+
+ Widget _buildThemedSwitch({
+ required bool value,
+ required ValueChanged onChanged,
+ }) {
+ final theme = context.conduitTheme;
+ return ps.PlatformService.getPlatformSwitch(
+ value: value,
+ onChanged: onChanged,
+ activeColor: theme.buttonPrimary,
+ );
}
@override
@@ -1951,40 +2100,40 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
@override
Widget build(BuildContext context) {
+ final media = MediaQuery.of(context);
+ final isCompact = media.size.height < 680;
return Container(
- height: MediaQuery.of(context).size.height * 0.6,
+ height: media.size.height * (isCompact ? 0.45 : 0.6),
decoration: BoxDecoration(
color: context.conduitTheme.surfaceBackground,
borderRadius: const BorderRadius.vertical(
- top: Radius.circular(AppBorderRadius.lg),
+ top: Radius.circular(AppBorderRadius.bottomSheet),
),
border: Border.all(color: context.conduitTheme.dividerColor, width: 1),
+ boxShadow: ConduitShadows.modal,
),
- child: Column(
+ child: SafeArea(
+ top: false,
+ bottom: true,
+ child: Padding(
+ padding: const EdgeInsets.all(Spacing.bottomSheetPadding),
+ child: Column(
children: [
// Handle bar
- Container(
- margin: const EdgeInsets.only(top: Spacing.sm),
- width: 40,
- height: 4,
- decoration: BoxDecoration(
- color: context.conduitTheme.dividerColor,
- borderRadius: BorderRadius.circular(AppBorderRadius.xs),
- ),
- ),
+ const SheetHandle(),
// Header: Title + timer + language chip
- Padding(
- padding: const EdgeInsets.all(Spacing.lg),
- child: Row(
- mainAxisAlignment: MainAxisAlignment.spaceBetween,
- children: [
- Text(
- _isListening
- ? 'Listening\u2026'
- : _isTranscribing
- ? 'Transcribing\u2026'
- : 'Voice',
+ Padding(
+ padding: const EdgeInsets.only(top: Spacing.md, bottom: Spacing.md),
+ child: Row(
+ mainAxisAlignment: MainAxisAlignment.spaceBetween,
+ children: [
+ Text(
+ _isTranscribing
+ ? 'Transcribing…'
+ : _isListening
+ ? (_voiceService.hasLocalStt ? 'Listening…' : 'Recording…')
+ : 'Voice',
style: TextStyle(
fontSize: AppTypography.headlineMedium,
fontWeight: FontWeight.w600,
@@ -1994,28 +2143,43 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
Row(
children: [
// Language chip
- Container(
- padding: const EdgeInsets.symmetric(
- horizontal: Spacing.xs,
- vertical: 4,
- ),
- decoration: BoxDecoration(
- color: context.conduitTheme.surfaceBackground
- .withValues(alpha: 0.4),
- borderRadius: BorderRadius.circular(
- AppBorderRadius.badge,
+ GestureDetector(
+ onTap: _voiceService.hasLocalStt ? _pickLanguage : null,
+ child: Container(
+ padding: const EdgeInsets.symmetric(
+ horizontal: Spacing.xs,
+ vertical: 4,
),
- border: Border.all(
- color: context.conduitTheme.dividerColor,
- width: BorderWidth.thin,
+ decoration: BoxDecoration(
+ color: context.conduitTheme.surfaceBackground
+ .withValues(alpha: 0.4),
+ borderRadius: BorderRadius.circular(
+ AppBorderRadius.badge,
+ ),
+ border: Border.all(
+ color: context.conduitTheme.dividerColor,
+ width: BorderWidth.thin,
+ ),
),
- ),
- child: Text(
- _languageTag.toUpperCase(),
- style: TextStyle(
- fontSize: AppTypography.labelSmall,
- color: context.conduitTheme.textSecondary,
- fontWeight: FontWeight.w600,
+ child: Row(
+ children: [
+ Text(
+ _languageTag.toUpperCase(),
+ style: TextStyle(
+ fontSize: AppTypography.labelSmall,
+ color: context.conduitTheme.textSecondary,
+ fontWeight: FontWeight.w600,
+ ),
+ ),
+ if (_voiceService.hasLocalStt) ...[
+ const SizedBox(width: 4),
+ Icon(
+ Icons.arrow_drop_down,
+ size: 16,
+ color: context.conduitTheme.iconSecondary,
+ ),
+ ],
+ ],
),
),
),
@@ -2032,25 +2196,108 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
),
),
),
+ const SizedBox(width: Spacing.sm),
+ // Close sheet
+ ConduitIconButton(
+ icon: Platform.isIOS ? CupertinoIcons.xmark : Icons.close,
+ tooltip: 'Close',
+ isCompact: true,
+ onPressed: () => Navigator.of(context).pop(),
+ ),
],
),
],
),
),
- // Microphone animation and waveform
+ // Toggles row: Hold to talk, Auto-send
+ Padding(
+ padding: const EdgeInsets.only(bottom: Spacing.sm),
+ child: Row(
+ children: [
+ Expanded(
+ child: Row(
+ mainAxisSize: MainAxisSize.min,
+ children: [
+ _buildThemedSwitch(
+ value: _holdToTalk,
+ onChanged: (v) async {
+ setState(() => _holdToTalk = v);
+ await ref.read(appSettingsProvider.notifier).setVoiceHoldToTalk(v);
+ },
+ ),
+ const SizedBox(width: Spacing.xs),
+ Text(
+ 'Hold to talk',
+ style: TextStyle(color: context.conduitTheme.textSecondary),
+ ),
+ ],
+ ),
+ ),
+ Expanded(
+ child: Row(
+ mainAxisAlignment: MainAxisAlignment.end,
+ children: [
+ _buildThemedSwitch(
+ value: _autoSendFinal,
+ onChanged: (v) async {
+ setState(() => _autoSendFinal = v);
+ await ref.read(appSettingsProvider.notifier).setVoiceAutoSendFinal(v);
+ },
+ ),
+ const SizedBox(width: Spacing.xs),
+ Text(
+ 'Auto-send',
+ style: TextStyle(color: context.conduitTheme.textSecondary),
+ ),
+ ],
+ ),
+ ),
+ ],
+ ),
+ ),
+
+ // Microphone + waveform
Expanded(
- child: Center(
- child: Column(
- mainAxisAlignment: MainAxisAlignment.center,
- children: [
- // Microphone icon with animation (tap to toggle)
+ child: LayoutBuilder(
+ builder: (context, viewport) {
+ final isUltra = media.size.height < 560;
+ final double micSize = isUltra ? 64 : (isCompact ? 80 : 100);
+ final double micIconSize = isUltra ? 26 : (isCompact ? 32 : 40);
+ // Extra top padding so scale animation (up to 1.2x) never clips
+ final double topPaddingForScale = ((micSize * 1.2) - micSize) / 2 + 8;
+
+ final content = Center(
+ child: Column(
+ mainAxisSize: MainAxisSize.min,
+ mainAxisAlignment: MainAxisAlignment.center,
+ children: [
+ // Top spacer (baseline); additional padding handled by scroll view
+ SizedBox(height: isUltra ? Spacing.sm : Spacing.md),
+ // Microphone control
GestureDetector(
+ onTapDown: _holdToTalk
+ ? (_) {
+ if (!_isListening) _startListening();
+ }
+ : null,
+ onTapUp: _holdToTalk
+ ? (_) {
+ if (_isListening) _stopListening();
+ }
+ : null,
+ onTapCancel: _holdToTalk
+ ? () {
+ if (_isListening) _stopListening();
+ }
+ : null,
onTap: () =>
- _isListening ? _stopListening() : _startListening(),
+ _holdToTalk
+ ? null
+ : (_isListening ? _stopListening() : _startListening()),
child: Container(
- width: 100,
- height: 100,
+ width: micSize,
+ height: micSize,
decoration: BoxDecoration(
color: _isListening
? context.conduitTheme.error.withValues(
@@ -2076,7 +2323,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
: (Platform.isIOS
? CupertinoIcons.mic_off
: Icons.mic_off),
- size: 40,
+ size: micIconSize,
color: _isListening
? context.conduitTheme.error
: context.conduitTheme.iconSecondary,
@@ -2099,22 +2346,24 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
end: const Offset(1, 1),
),
- const SizedBox(height: Spacing.md),
+ SizedBox(height: isUltra ? Spacing.xs : (isCompact ? Spacing.sm : Spacing.md)),
// Simple animated bars waveform based on intensity proxy
SizedBox(
- height: 32,
+ height: isUltra ? 18 : (isCompact ? 24 : 32),
child: AnimatedSwitcher(
duration: const Duration(milliseconds: 150),
child: Row(
key: ValueKey(_intensity),
mainAxisAlignment: MainAxisAlignment.center,
- children: List.generate(12, (i) {
+ children: List.generate(isUltra ? 10 : 12, (i) {
final normalized = ((_intensity + i) % 10) / 10.0;
- final barHeight = 8 + (normalized * 24);
+ final base = isUltra ? 4 : (isCompact ? 6 : 8);
+ final range = isUltra ? 14 : (isCompact ? 18 : 24);
+ final barHeight = base + (normalized * range);
return Container(
- width: 4,
+ width: isUltra ? 2.5 : (isCompact ? 3 : 4),
height: barHeight,
- margin: const EdgeInsets.symmetric(horizontal: 2),
+ margin: EdgeInsets.symmetric(horizontal: isUltra ? 1 : (isCompact ? 1.5 : 2)),
decoration: BoxDecoration(
color: context.conduitTheme.buttonPrimary
.withValues(alpha: 0.7),
@@ -2125,151 +2374,156 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
),
),
),
- const SizedBox(height: Spacing.xl),
+ SizedBox(height: isUltra ? Spacing.sm : (isCompact ? Spacing.md : Spacing.xl)),
- // Recognized text / Transcribing state
- Container(
- margin: const EdgeInsets.symmetric(horizontal: 20),
- padding: const EdgeInsets.all(Spacing.md),
+ // Recognized text / Transcribing state with Clear action
+ ConstrainedBox(
constraints: BoxConstraints(
- maxHeight: MediaQuery.of(context).size.height * 0.2,
- minHeight: 80,
+ maxHeight: media.size.height * (isUltra ? 0.13 : (isCompact ? 0.16 : 0.2)),
+ minHeight: isUltra ? 56 : (isCompact ? 64 : 80),
),
- decoration: BoxDecoration(
- color: context.conduitTheme.inputBackground,
- borderRadius: BorderRadius.circular(AppBorderRadius.md),
- border: Border.all(
- color: context.conduitTheme.inputBorder,
- width: 1,
+ child: ConduitCard(
+ isCompact: isCompact,
+ padding: EdgeInsets.all(isCompact ? Spacing.md : Spacing.md),
+ child: Column(
+ mainAxisSize: MainAxisSize.min,
+ crossAxisAlignment: CrossAxisAlignment.stretch,
+ children: [
+ // Inline clear action aligned to the end
+ Row(
+ children: [
+ Text(
+ 'Transcript',
+ style: TextStyle(
+ fontSize: AppTypography.labelSmall,
+ fontWeight: FontWeight.w600,
+ color: context.conduitTheme.textSecondary,
+ ),
+ ),
+ const Spacer(),
+ ConduitIconButton(
+ icon: Icons.close,
+ isCompact: true,
+ tooltip: 'Clear',
+ onPressed: _recognizedText.isNotEmpty && !_isTranscribing
+ ? () {
+ setState(() => _recognizedText = '');
+ }
+ : null,
+ ),
+ ],
+ ),
+ const SizedBox(height: Spacing.xs),
+ if (_isTranscribing)
+ Center(
+ child: Row(
+ mainAxisAlignment: MainAxisAlignment.center,
+ children: [
+ ConduitLoadingIndicator(
+ size: isUltra ? 14 : (isCompact ? 16 : 18),
+ isCompact: true,
+ ),
+ const SizedBox(width: Spacing.xs),
+ Text(
+ 'Transcribing…',
+ style: TextStyle(
+ fontSize: isUltra
+ ? AppTypography.bodySmall
+ : (isCompact
+ ? AppTypography.bodyMedium
+ : AppTypography.bodyLarge),
+ color: context.conduitTheme.textSecondary,
+ ),
+ ),
+ ],
+ ),
+ )
+ else
+ Flexible(
+ child: SingleChildScrollView(
+ child: Text(
+ _recognizedText.isEmpty
+ ? (_isListening
+ ? (_voiceService.hasLocalStt
+ ? 'Speak now…'
+ : 'Recording…')
+ : 'Tap Start to begin')
+ : _recognizedText,
+ style: TextStyle(
+ fontSize: isUltra
+ ? AppTypography.bodySmall
+ : (isCompact
+ ? AppTypography.bodyMedium
+ : AppTypography.bodyLarge),
+ color: _recognizedText.isEmpty
+ ? context.conduitTheme.inputPlaceholder
+ : context.conduitTheme.textPrimary,
+ height: 1.4,
+ ),
+ textAlign: TextAlign.center,
+ ),
+ ),
+ ),
+ ],
),
),
- child: _isTranscribing
- ? Center(
- child: Row(
- mainAxisAlignment: MainAxisAlignment.center,
- children: [
- SizedBox(
- width: 16,
- height: 16,
- child: CircularProgressIndicator(
- strokeWidth: 2,
- color: context.conduitTheme.buttonPrimary,
- ),
- ),
- const SizedBox(width: Spacing.xs),
- Text(
- 'Transcribing…',
- style: TextStyle(
- fontSize: AppTypography.bodyLarge,
- color: context.conduitTheme.textSecondary,
- ),
- ),
- ],
- ),
- )
- : SingleChildScrollView(
- child: Text(
- _recognizedText.isEmpty
- ? (_isListening
- ? 'Speak now…'
- : 'Tap Start to begin')
- : _recognizedText,
- style: TextStyle(
- fontSize: AppTypography.bodyLarge,
- color: _recognizedText.isEmpty
- ? context.conduitTheme.inputPlaceholder
- : context.conduitTheme.textPrimary,
- height: 1.5,
- ),
- textAlign: TextAlign.center,
- ),
- ),
),
- ],
- ),
+ ],
+ ),
+ );
+
+ // Make scrollable if content exceeds available height
+ return SingleChildScrollView(
+ physics: const ClampingScrollPhysics(),
+ padding: EdgeInsets.only(top: topPaddingForScale),
+ child: ConstrainedBox(
+ constraints: BoxConstraints(minHeight: viewport.maxHeight),
+ child: content,
+ ),
+ );
+ },
),
),
// Action buttons
- Padding(
- padding: const EdgeInsets.all(Spacing.lg),
- child: Row(
- children: [
- // Start/Stop toggle button
- Expanded(
- child: FilledButton.tonal(
- onPressed: _isListening ? _stopListening : _startListening,
- style: FilledButton.styleFrom(
- padding: const EdgeInsets.symmetric(vertical: Spacing.md),
- shape: RoundedRectangleBorder(
- borderRadius: BorderRadius.circular(AppBorderRadius.md),
+ Builder(builder: (context) {
+ final showStartStop = !_holdToTalk;
+ final showSend = !_autoSendFinal;
+ if (!showStartStop && !showSend) {
+ return const SizedBox.shrink();
+ }
+ return Padding(
+ padding: EdgeInsets.only(top: isCompact ? Spacing.sm : Spacing.md),
+ child: Row(
+ children: [
+ if (showStartStop) ...[
+ Expanded(
+ child: ConduitButton(
+ text: _isListening ? 'Stop' : 'Start',
+ isSecondary: true,
+ isCompact: isCompact,
+ onPressed: _isListening ? _stopListening : _startListening,
),
),
- child: Text(
- _isListening ? 'Stop' : 'Start',
- style: TextStyle(
- fontSize: AppTypography.bodyLarge,
- fontWeight: FontWeight.w600,
- color: context.conduitTheme.textPrimary,
+ ],
+ if (showStartStop && showSend) const SizedBox(width: Spacing.xs),
+ if (showSend) ...[
+ Expanded(
+ child: ConduitButton(
+ text: 'Send',
+ isCompact: isCompact,
+ onPressed: _recognizedText.isNotEmpty ? _sendText : null,
),
),
- ),
- ),
-
- const SizedBox(width: Spacing.xs),
- // Cancel button
- Expanded(
- child: TextButton(
- onPressed: _cancel,
- style: TextButton.styleFrom(
- padding: const EdgeInsets.symmetric(vertical: Spacing.md),
- shape: RoundedRectangleBorder(
- borderRadius: BorderRadius.circular(AppBorderRadius.md),
- side: BorderSide(
- color: context.conduitTheme.dividerColor,
- width: 1,
- ),
- ),
- ),
- child: Text(
- 'Cancel',
- style: TextStyle(
- color: context.conduitTheme.textPrimary,
- fontSize: AppTypography.bodyLarge,
- fontWeight: FontWeight.w500,
- ),
- ),
- ),
- ),
-
- const SizedBox(width: Spacing.xs),
-
- // Send button
- Expanded(
- child: FilledButton(
- onPressed: _recognizedText.isNotEmpty ? _sendText : null,
- style: FilledButton.styleFrom(
- backgroundColor: context.conduitTheme.buttonPrimary,
- foregroundColor: context.conduitTheme.buttonPrimaryText,
- padding: const EdgeInsets.symmetric(vertical: Spacing.md),
- shape: RoundedRectangleBorder(
- borderRadius: BorderRadius.circular(AppBorderRadius.md),
- ),
- ),
- child: Text(
- 'Send',
- style: TextStyle(
- fontSize: AppTypography.bodyLarge,
- fontWeight: FontWeight.w600,
- ),
- ),
- ),
- ),
- ],
- ),
- ),
+ ],
+ ],
+ ),
+ );
+ }),
],
),
+ ),
+ ),
);
}
}
diff --git a/pubspec.lock b/pubspec.lock
index 13a74e7..bb4ef95 100644
--- a/pubspec.lock
+++ b/pubspec.lock
@@ -824,6 +824,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.3.0"
+ pedantic:
+ dependency: transitive
+ description:
+ name: pedantic
+ sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
+ url: "https://pub.dev"
+ source: hosted
+ version: "1.11.1"
petitparser:
dependency: transitive
description:
@@ -1085,6 +1093,30 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.10.1"
+ speech_to_text:
+ dependency: "direct main"
+ description:
+ name: speech_to_text
+ sha256: c07557664974afa061f221d0d4186935bea4220728ea9446702825e8b988db04
+ url: "https://pub.dev"
+ source: hosted
+ version: "7.3.0"
+ speech_to_text_platform_interface:
+ dependency: transitive
+ description:
+ name: speech_to_text_platform_interface
+ sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
+ url: "https://pub.dev"
+ source: hosted
+ version: "2.3.0"
+ speech_to_text_windows:
+ dependency: transitive
+ description:
+ name: speech_to_text_windows
+ sha256: "2c9846d18253c7bbe059a276297ef9f27e8a2745dead32192525beb208195072"
+ url: "https://pub.dev"
+ source: hosted
+ version: "1.0.0+beta.8"
sprintf:
dependency: transitive
description:
diff --git a/pubspec.yaml b/pubspec.yaml
index 65291cd..3ca0dfd 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -35,6 +35,7 @@ dependencies:
# Platform Features
record: ^6.0.0
+ speech_to_text: ^7.3.0
image_picker: ^1.1.2
file_picker: ^10.2.1
path_provider: ^2.1.4