diff --git a/flutter_01.png b/flutter_01.png
new file mode 100644
index 0000000..8ed4c34
Binary files /dev/null and b/flutter_01.png differ
diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index afb4210..42cde87 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -49,13 +49,13 @@ PODS:
     - Flutter
   - image_picker_ios (0.0.1):
     - Flutter
+  - mic_stream_recorder (0.0.1):
+    - Flutter
   - package_info_plus (0.4.5):
     - Flutter
   - path_provider_foundation (0.0.1):
     - Flutter
     - FlutterMacOS
-  - record_ios (1.1.0):
-    - Flutter
   - SDWebImage (5.21.1):
     - SDWebImage/Core (= 5.21.1)
   - SDWebImage/Core (5.21.1)
@@ -96,9 +96,9 @@ DEPENDENCIES:
   - flutter_secure_storage (from `.symlinks/plugins/flutter_secure_storage/ios`)
   - flutter_tts (from `.symlinks/plugins/flutter_tts/ios`)
   - image_picker_ios (from `.symlinks/plugins/image_picker_ios/ios`)
+  - mic_stream_recorder (from `.symlinks/plugins/mic_stream_recorder/ios`)
   - package_info_plus (from `.symlinks/plugins/package_info_plus/ios`)
   - path_provider_foundation (from `.symlinks/plugins/path_provider_foundation/darwin`)
-  - record_ios (from `.symlinks/plugins/record_ios/ios`)
   - share_handler_ios (from `.symlinks/plugins/share_handler_ios/ios`)
   - share_handler_ios_models (from `.symlinks/plugins/share_handler_ios/ios/Models`)
   - share_plus (from `.symlinks/plugins/share_plus/ios`)
@@ -135,12 +135,12 @@ EXTERNAL SOURCES:
     :path: ".symlinks/plugins/flutter_tts/ios"
   image_picker_ios:
     :path: ".symlinks/plugins/image_picker_ios/ios"
+  mic_stream_recorder:
+    :path: ".symlinks/plugins/mic_stream_recorder/ios"
   package_info_plus:
     :path: ".symlinks/plugins/package_info_plus/ios"
   path_provider_foundation:
     :path: ".symlinks/plugins/path_provider_foundation/darwin"
-  record_ios:
-    :path: ".symlinks/plugins/record_ios/ios"
   share_handler_ios:
     :path: ".symlinks/plugins/share_handler_ios/ios"
   share_handler_ios_models:
@@ -172,9 +172,9 @@ SPEC CHECKSUMS:
   flutter_secure_storage: 1ed9476fba7e7a782b22888f956cce43e2c62f13
   flutter_tts: b88dbc8655d3dc961bc4a796e4e16a4cc1795833
   image_picker_ios: 7fe1ff8e34c1790d6fff70a32484959f563a928a
+  mic_stream_recorder: 27d2d1225563a3a28bf4019fc5cc198cffd7dad1
   package_info_plus: af8e2ca6888548050f16fa2f1938db7b5a5df499
   path_provider_foundation: 080d55be775b7414fd5a5ef3ac137b97b097e564
-  record_ios: f75fa1d57f840012775c0e93a38a7f3ceea1a374
   SDWebImage: f29024626962457f3470184232766516dee8dfea
   share_handler_ios: e2244e990f826b2c8eaa291ac3831569438ba0fb
   share_handler_ios_models: fc638c9b4330dc7f082586c92aee9dfa0b87b871
diff --git a/lib/core/persistence/persistence_keys.dart b/lib/core/persistence/persistence_keys.dart
index d0078c5..c74a616 100644
--- a/lib/core/persistence/persistence_keys.dart
+++ b/lib/core/persistence/persistence_keys.dart
@@ -11,6 +11,7 @@ final class PreferenceKeys {
   static const String voiceLocaleId = 'voice_locale_id';
   static const String voiceHoldToTalk = 'voice_hold_to_talk';
   static const String voiceAutoSendFinal = 'voice_auto_send_final';
+  static const String voiceSttPreference = 'voice_stt_preference';
   static const String socketTransportMode = 'socket_transport_mode';
   static const String quickPills = 'quick_pills';
   static const String sendOnEnterKey = 'send_on_enter';
@@ -27,6 +28,7 @@ final class PreferenceKeys {
   static const String ttsEngine = 'tts_engine'; // 'device' | 'server'
   static const String ttsServerVoiceId = 'tts_server_voice_id';
   static const String ttsServerVoiceName = 'tts_server_voice_name';
+  static const String voiceSilenceDuration = 'voice_silence_duration';
 }
 
 final class LegacyPreferenceKeys {
diff --git a/lib/core/persistence/persistence_migrator.dart b/lib/core/persistence/persistence_migrator.dart
index 8a7350f..d6a278c 100644
--- a/lib/core/persistence/persistence_migrator.dart
+++ b/lib/core/persistence/persistence_migrator.dart
@@ -90,6 +90,7 @@ class PersistenceMigrator {
     copyString(PreferenceKeys.voiceLocaleId);
     copyBool(PreferenceKeys.voiceHoldToTalk);
     copyBool(PreferenceKeys.voiceAutoSendFinal);
+    copyString(PreferenceKeys.voiceSttPreference);
     copyString(PreferenceKeys.socketTransportMode);
     copyStringList(PreferenceKeys.quickPills);
     copyBool(PreferenceKeys.sendOnEnterKey);
@@ -194,6 +195,7 @@ class PersistenceMigrator {
       PreferenceKeys.voiceLocaleId,
       PreferenceKeys.voiceHoldToTalk,
       PreferenceKeys.voiceAutoSendFinal,
+      PreferenceKeys.voiceSttPreference,
       PreferenceKeys.socketTransportMode,
       PreferenceKeys.quickPills,
       PreferenceKeys.sendOnEnterKey,
diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart
index efafb45..f88cd49 100644
--- a/lib/core/services/api_service.dart
+++ b/lib/core/services/api_service.dart
@@ -4,7 +4,7 @@ import 'dart:io';
 import 'package:dio/dio.dart';
 import 'package:dio/io.dart';
 import 'package:flutter/foundation.dart';
-// import 'package:http_parser/http_parser.dart';
+import 'package:http_parser/http_parser.dart';
 // Removed legacy websocket/socket.io imports
 import 'package:uuid/uuid.dart';
 import '../models/backend_config.dart';
@@ -1607,15 +1607,69 @@ class ApiService {
     return [];
   }
 
+  Future<Map<String, dynamic>> transcribeSpeech({
+    required Uint8List audioBytes,
+    String? fileName,
+    String? mimeType,
+    String? language,
+  }) async {
+    if (audioBytes.isEmpty) {
+      throw ArgumentError('audioBytes cannot be empty for transcription');
+    }
+
+    final sanitizedFileName = (fileName != null && fileName.trim().isNotEmpty
+        ? fileName.trim()
+        : 'audio.m4a');
+    final resolvedMimeType = (mimeType != null && mimeType.trim().isNotEmpty)
+        ? mimeType.trim()
+        : _inferMimeTypeFromName(sanitizedFileName);
+
+    _traceApi(
+      'Uploading $sanitizedFileName (${audioBytes.length} bytes) for transcription',
+    );
+
+    final formData = FormData.fromMap({
+      'file': MultipartFile.fromBytes(
+        audioBytes,
+        filename: sanitizedFileName,
+        contentType: _parseMediaType(resolvedMimeType),
+      ),
+      if (language != null && language.trim().isNotEmpty)
+        'language': language.trim(),
+    });
+
+    final response = await _dio.post(
+      '/api/v1/audio/transcriptions',
+      data: formData,
+      options: Options(headers: const {'accept': 'application/json'}),
+    );
+
+    final data = response.data;
+    if (data is Map<String, dynamic>) {
+      return data;
+    }
+    if (data is String) {
+      return {'text': data};
+    }
+    throw StateError(
+      'Unexpected transcription response type: ${data.runtimeType}',
+    );
+  }
+
   Future<({Uint8List bytes, String mimeType})> generateSpeech({
     required String text,
     String? voice,
+    double? speed,
   }) async {
     final textPreview = text.length > 50 ? text.substring(0, 50) : text;
     _traceApi('Generating speech for text: $textPreview...');
     final response = await _dio.post(
       '/api/v1/audio/speech',
-      data: {'input': text, if (voice != null) 'voice': voice},
+      data: {
+        'input': text,
+        if (voice != null) 'voice': voice,
+        if (speed != null) 'speed': speed,
+      },
       options: Options(responseType: ResponseType.bytes),
     );
 
@@ -1690,7 +1744,43 @@ class ApiService {
     return bytes.length >= 2 && bytes[0] == 0xFF && (bytes[1] & 0xE0) == 0xE0;
   }
 
-  // Server audio transcription removed; rely on on-device STT in UI layer
+  String _inferMimeTypeFromName(String name) {
+    final dotIndex = name.lastIndexOf('.');
+    if (dotIndex == -1 || dotIndex == name.length - 1) {
+      return 'audio/mpeg';
+    }
+    final ext = name.substring(dotIndex + 1).toLowerCase();
+    switch (ext) {
+      case 'wav':
+        return 'audio/wav';
+      case 'ogg':
+        return 'audio/ogg';
+      case 'm4a':
+      case 'mp4':
+        return 'audio/mp4';
+      case 'aac':
+        return 'audio/aac';
+      case 'webm':
+        return 'audio/webm';
+      case 'flac':
+        return 'audio/flac';
+      case 'mp3':
+        return 'audio/mpeg';
+      default:
+        return 'audio/mpeg';
+    }
+  }
+
+  MediaType? _parseMediaType(String? value) {
+    if (value == null || value.isEmpty) {
+      return null;
+    }
+    try {
+      return MediaType.parse(value);
+    } catch (_) {
+      return null;
+    }
+  }
 
   // Image Generation
   Future<List<Map<String, dynamic>>> getImageModels() async {
diff --git a/lib/core/services/settings_service.dart b/lib/core/services/settings_service.dart
index 19ba497..e04f210 100644
--- a/lib/core/services/settings_service.dart
+++ b/lib/core/services/settings_service.dart
@@ -8,8 +8,11 @@ import 'animation_service.dart';
 
 part 'settings_service.g.dart';
 
+/// Speech-to-text preference selection.
+enum SttPreference { auto, deviceOnly, serverOnly }
+
 /// TTS engine selection
-enum TtsEngine { device, server }
+enum TtsEngine { auto, device, server }
 
 /// Service for managing app-wide settings including accessibility preferences
 class SettingsService {
@@ -32,6 +35,9 @@ class SettingsService {
       .quickPills; // StringList of identifiers e.g. ['web','image','tools']
   // Chat input behavior
   static const String _sendOnEnterKey = PreferenceKeys.sendOnEnterKey;
+  // Voice silence duration for auto-stop (milliseconds)
+  static const String _voiceSilenceDurationKey =
+      PreferenceKeys.voiceSilenceDuration;
   static Box<dynamic> _preferencesBox() =>
       Hive.box<dynamic>(HiveBoxNames.preferences);
 
@@ -151,6 +157,11 @@ class SettingsService {
         ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?,
         ttsServerVoiceName:
             box.get(PreferenceKeys.ttsServerVoiceName) as String?,
+        sttPreference: _parseSttPreference(
+          box.get(PreferenceKeys.voiceSttPreference) as String?,
+        ),
+        voiceSilenceDuration:
+            (box.get(_voiceSilenceDurationKey) as int? ?? 2000).clamp(300, 3000),
       ),
     );
   }
@@ -174,6 +185,8 @@ class SettingsService {
       PreferenceKeys.ttsPitch: settings.ttsPitch,
       PreferenceKeys.ttsVolume: settings.ttsVolume,
       PreferenceKeys.ttsEngine: settings.ttsEngine.name,
+      PreferenceKeys.voiceSttPreference: settings.sttPreference.name,
+      _voiceSilenceDurationKey: settings.voiceSilenceDuration,
     };
 
     await box.putAll(updates);
@@ -216,11 +229,31 @@ class SettingsService {
 
   static TtsEngine _parseTtsEngine(String? raw) {
     switch ((raw ?? '').toLowerCase()) {
+      case 'auto':
+      case '':
+        return TtsEngine.auto;
       case 'server':
         return TtsEngine.server;
       case 'device':
-      default:
         return TtsEngine.device;
+      default:
+        return TtsEngine.auto;
+    }
+  }
+
+  static SttPreference _parseSttPreference(String? raw) {
+    switch ((raw ?? '').toLowerCase()) {
+      case 'deviceonly':
+      case 'device_only':
+      case 'device':
+        return SttPreference.deviceOnly;
+      case 'serveronly':
+      case 'server_only':
+      case 'server':
+        return SttPreference.serverOnly;
+      case 'auto':
+      default:
+        return SttPreference.auto;
     }
   }
 
@@ -304,6 +337,16 @@ class SettingsService {
     return _preferencesBox().put(_sendOnEnterKey, value);
   }
 
+  static Future<int> getVoiceSilenceDuration() {
+    final value = _preferencesBox().get(_voiceSilenceDurationKey) as int?;
+    return Future.value((value ?? 2000).clamp(300, 3000));
+  }
+
+  static Future<void> setVoiceSilenceDuration(int milliseconds) {
+    final sanitized = milliseconds.clamp(300, 3000);
+    return _preferencesBox().put(_voiceSilenceDurationKey, sanitized);
+  }
+
   /// Get effective animation duration considering all settings
   static Duration getEffectiveAnimationDuration(
     BuildContext context,
@@ -359,6 +402,7 @@ class AppSettings {
   final String socketTransportMode; // 'polling' or 'ws'
   final List<String> quickPills; // e.g., ['web','image']
   final bool sendOnEnter;
+  final SttPreference sttPreference;
   final String? ttsVoice;
   final double ttsSpeechRate;
   final double ttsPitch;
@@ -366,6 +410,7 @@ class AppSettings {
   final TtsEngine ttsEngine;
   final String? ttsServerVoiceId;
   final String? ttsServerVoiceName;
+  final int voiceSilenceDuration;
   const AppSettings({
     this.reduceMotion = false,
     this.animationSpeed = 1.0,
@@ -380,13 +425,15 @@ class AppSettings {
     this.socketTransportMode = 'ws',
     this.quickPills = const [],
     this.sendOnEnter = false,
+    this.sttPreference = SttPreference.auto,
     this.ttsVoice,
     this.ttsSpeechRate = 0.5,
     this.ttsPitch = 1.0,
     this.ttsVolume = 1.0,
-    this.ttsEngine = TtsEngine.device,
+    this.ttsEngine = TtsEngine.auto,
     this.ttsServerVoiceId,
     this.ttsServerVoiceName,
+    this.voiceSilenceDuration = 2000,
   });
 
   AppSettings copyWith({
@@ -403,6 +450,7 @@ class AppSettings {
     String? socketTransportMode,
     List<String>? quickPills,
     bool? sendOnEnter,
+    SttPreference? sttPreference,
     Object? ttsVoice = const _DefaultValue(),
     double? ttsSpeechRate,
     double? ttsPitch,
@@ -410,6 +458,7 @@ class AppSettings {
     TtsEngine? ttsEngine,
     Object? ttsServerVoiceId = const _DefaultValue(),
     Object? ttsServerVoiceName = const _DefaultValue(),
+    int? voiceSilenceDuration,
   }) {
     return AppSettings(
       reduceMotion: reduceMotion ?? this.reduceMotion,
@@ -429,6 +478,7 @@ class AppSettings {
       socketTransportMode: socketTransportMode ?? this.socketTransportMode,
       quickPills: quickPills ?? this.quickPills,
       sendOnEnter: sendOnEnter ?? this.sendOnEnter,
+      sttPreference: sttPreference ?? this.sttPreference,
       ttsVoice: ttsVoice is _DefaultValue ? this.ttsVoice : ttsVoice as String?,
       ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate,
       ttsPitch: ttsPitch ?? this.ttsPitch,
@@ -440,6 +490,7 @@ class AppSettings {
       ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
           ? this.ttsServerVoiceName
           : ttsServerVoiceName as String?,
+      voiceSilenceDuration: voiceSilenceDuration ?? this.voiceSilenceDuration,
     );
   }
 
@@ -457,6 +508,7 @@ class AppSettings {
         other.voiceLocaleId == voiceLocaleId &&
         other.voiceHoldToTalk == voiceHoldToTalk &&
         other.voiceAutoSendFinal == voiceAutoSendFinal &&
+        other.sttPreference == sttPreference &&
         other.sendOnEnter == sendOnEnter &&
         other.ttsVoice == ttsVoice &&
         other.ttsSpeechRate == ttsSpeechRate &&
@@ -465,13 +517,14 @@ class AppSettings {
         other.ttsEngine == ttsEngine &&
         other.ttsServerVoiceId == ttsServerVoiceId &&
         other.ttsServerVoiceName == ttsServerVoiceName &&
+        other.voiceSilenceDuration == voiceSilenceDuration &&
         _listEquals(other.quickPills, quickPills);
     // socketTransportMode intentionally not included in == to avoid frequent rebuilds
   }
 
   @override
   int get hashCode {
-    return Object.hash(
+    return Object.hashAll([
       reduceMotion,
       animationSpeed,
       hapticFeedback,
@@ -482,6 +535,7 @@ class AppSettings {
       voiceLocaleId,
       voiceHoldToTalk,
       voiceAutoSendFinal,
+      sttPreference,
       socketTransportMode,
       sendOnEnter,
       ttsVoice,
@@ -491,8 +545,9 @@ class AppSettings {
       ttsEngine,
       ttsServerVoiceId,
       ttsServerVoiceName,
+      voiceSilenceDuration,
       Object.hashAllUnordered(quickPills),
-    );
+    ]);
   }
 }
 
@@ -603,6 +658,14 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
     await SettingsService.setSendOnEnter(value);
   }
 
+  Future<void> setSttPreference(SttPreference preference) async {
+    if (state.sttPreference == preference) {
+      return;
+    }
+    state = state.copyWith(sttPreference: preference);
+    await SettingsService.saveSettings(state);
+  }
+
   Future<void> setTtsVoice(String? voice) async {
     state = state.copyWith(ttsVoice: voice);
     await SettingsService.saveSettings(state);
@@ -638,6 +701,11 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
     await SettingsService.saveSettings(state);
   }
 
+  Future<void> setVoiceSilenceDuration(int milliseconds) async {
+    state = state.copyWith(voiceSilenceDuration: milliseconds);
+    await SettingsService.setVoiceSilenceDuration(milliseconds);
+  }
+
   Future<void> resetToDefaults() async {
     const defaultSettings = AppSettings();
     await SettingsService.saveSettings(defaultSettings);
diff --git a/lib/core/services/streaming_helper.dart b/lib/core/services/streaming_helper.dart
index 682c11e..c546181 100644
--- a/lib/core/services/streaming_helper.dart
+++ b/lib/core/services/streaming_helper.dart
@@ -277,6 +277,13 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
     )..start();
   }
 
+  Timer? imageCollectionDebounce;
+  String? pendingImageContent;
+  String? pendingImageMessageId;
+  String? pendingImageSignature;
+  String? lastProcessedImageSignature;
+  int imageCollectionRequestId = 0;
+
   void disposeSocketSubscriptions() {
     if (socketSubscriptions.isEmpty) {
       return;
@@ -287,56 +294,119 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
       } catch (_) {}
     }
     socketSubscriptions.clear();
+    imageCollectionDebounce?.cancel();
+    imageCollectionDebounce = null;
+    pendingImageContent = null;
+    pendingImageMessageId = null;
+    pendingImageSignature = null;
+    lastProcessedImageSignature = null;
+    imageCollectionRequestId = 0;
     socketWatchdog?.stop();
   }
 
   bool isSearching = false;
 
+  void runPendingImageCollection() {
+    imageCollectionDebounce?.cancel();
+    imageCollectionDebounce = null;
+
+    final content = pendingImageContent;
+    final targetMessageId = pendingImageMessageId;
+    final signature = pendingImageSignature;
+    if (content == null || targetMessageId == null || signature == null) {
+      return;
+    }
+
+    pendingImageContent = null;
+    pendingImageMessageId = null;
+    pendingImageSignature = null;
+
+    final requestId = ++imageCollectionRequestId;
+    unawaited(
+      workerManager
+          .schedule<String, List<Map<String, dynamic>>>(
+            _collectImageReferencesWorker,
+            content,
+            debugLabel: 'stream_collect_images',
+          )
+          .then((collected) {
+            if (requestId != imageCollectionRequestId) {
+              return;
+            }
+
+            final currentMessages = getMessages();
+            if (currentMessages.isEmpty) {
+              return;
+            }
+            final last = currentMessages.last;
+            if (last.id != targetMessageId || last.role != 'assistant') {
+              return;
+            }
+
+            lastProcessedImageSignature = signature;
+
+            if (collected.isEmpty) {
+              return;
+            }
+
+            final existing = last.files ?? <Map<String, dynamic>>[];
+            final seen = <String>{
+              for (final f in existing)
+                if (f['url'] is String) (f['url'] as String) else '',
+            }..removeWhere((e) => e.isEmpty);
+
+            final merged = <Map<String, dynamic>>[...existing];
+            for (final f in collected) {
+              final url = f['url'] as String?;
+              if (url != null && url.isNotEmpty && !seen.contains(url)) {
+                merged.add({'type': 'image', 'url': url});
+                seen.add(url);
+              }
+            }
+
+            if (merged.length != existing.length) {
+              updateLastMessageWith((m) => m.copyWith(files: merged));
+            }
+          })
+          .catchError((_) {}),
+    );
+  }
+
   void updateImagesFromCurrentContent() {
     try {
       final msgs = getMessages();
       if (msgs.isEmpty || msgs.last.role != 'assistant') return;
-      final content = msgs.last.content;
+      final last = msgs.last;
+      final content = last.content;
       if (content.isEmpty) return;
 
-      final targetMessageId = msgs.last.id;
-      unawaited(
-        workerManager
-            .schedule<String, List<Map<String, dynamic>>>(
-              _collectImageReferencesWorker,
-              content,
-              debugLabel: 'stream_collect_images',
-            )
-            .then((collected) {
-              if (collected.isEmpty) return;
-              final currentMessages = getMessages();
-              if (currentMessages.isEmpty) return;
-              final last = currentMessages.last;
-              if (last.id != targetMessageId || last.role != 'assistant') {
-                return;
-              }
+      final targetMessageId = last.id;
+      final signature =
+          '$targetMessageId:${content.hashCode}:${content.length}';
 
-              final existing = last.files ?? <Map<String, dynamic>>[];
-              final seen = <String>{
-                for (final f in existing)
-                  if (f['url'] is String) (f['url'] as String) else '',
-              }..removeWhere((e) => e.isEmpty);
+      if (signature == lastProcessedImageSignature &&
+          pendingImageSignature == null) {
+        return;
+      }
+      if (signature == pendingImageSignature) {
+        return;
+      }
 
-              final merged = <Map<String, dynamic>>[...existing];
-              for (final f in collected) {
-                final url = f['url'] as String?;
-                if (url != null && url.isNotEmpty && !seen.contains(url)) {
-                  merged.add({'type': 'image', 'url': url});
-                  seen.add(url);
-                }
-              }
+      pendingImageMessageId = targetMessageId;
+      pendingImageContent = content;
+      pendingImageSignature = signature;
 
-              if (merged.length != existing.length) {
-                updateLastMessageWith((m) => m.copyWith(files: merged));
-              }
-            })
-            .catchError((_) {}),
-      );
+      final shouldDelay = last.isStreaming;
+
+      imageCollectionDebounce?.cancel();
+      if (shouldDelay) {
+        imageCollectionDebounce = Timer(
+          const Duration(milliseconds: 200),
+          runPendingImageCollection,
+        );
+      } else {
+        runPendingImageCollection();
+      }
     } catch (_) {}
   }
 
diff --git a/lib/features/chat/providers/chat_providers.dart b/lib/features/chat/providers/chat_providers.dart
index cea62f7..c9f8307 100644
--- a/lib/features/chat/providers/chat_providers.dart
+++ b/lib/features/chat/providers/chat_providers.dart
@@ -647,13 +647,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
       return;
     }
 
-    // Log content replacement for debugging
-    DebugLogger.log(
-      'Replacing message content: messageId=${lastMessage.id}, '
-      'oldLength=${lastMessage.content.length}, newLength=${content.length}',
-      scope: 'chat/providers',
-    );
-
     _ensureFormatterForMessage(lastMessage);
 
     // Defensive check: ensure the formatter is for the correct message
diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart
index 2f53bd6..7992596 100644
--- a/lib/features/chat/providers/text_to_speech_provider.dart
+++ b/lib/features/chat/providers/text_to_speech_provider.dart
@@ -107,11 +107,9 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     // Listen to settings changes and update TTS when initialized
     ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
       if (_service.isInitialized && _service.isAvailable) {
-        final selectedVoice = next.ttsEngine == TtsEngine.server
-            ? next.ttsServerVoiceId
-            : next.ttsVoice;
         _service.updateSettings(
-          voice: selectedVoice,
+          voice: next.ttsVoice,
+          serverVoice: next.ttsServerVoiceId,
           speechRate: next.ttsSpeechRate,
           pitch: next.ttsPitch,
           volume: next.ttsVolume,
@@ -137,9 +135,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     final settings = ref.read(appSettingsProvider);
     final future = _service
         .initialize(
-          voice: settings.ttsEngine == TtsEngine.server
-              ? settings.ttsServerVoiceId
-              : settings.ttsVoice,
+          deviceVoice: settings.ttsVoice,
+          serverVoice: settings.ttsServerVoiceId,
           speechRate: settings.ttsSpeechRate,
           pitch: settings.ttsPitch,
           volume: settings.ttsVolume,
@@ -222,8 +219,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
 
     // Prepare sentence split for highlighting
     final cleanText = MarkdownToText.convert(text);
-    final sentences = _splitForTts(cleanText);
-    final offsets = _computeOffsets(sentences);
+    final sentences = _service.splitTextForSpeech(cleanText);
+    final offsets = _computeOffsets(cleanText, sentences);
 
     state = state.copyWith(
       status: TtsPlaybackStatus.loading,
@@ -268,30 +265,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     }
   }
 
-  List<String> _splitForTts(String text) {
-    final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
-    if (normalized.isEmpty) return const [];
-    final parts = <String>[];
-    final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
-    int index = 0;
-    for (final match in sentenceRegex.allMatches('$normalized ')) {
-      final s = match.group(1) ?? '';
-      if (s.trim().isNotEmpty) parts.add(s.trim());
-      index = match.end;
-    }
-    if (index < normalized.length) {
-      final tail = normalized.substring(index).trim();
-      if (tail.isNotEmpty) parts.add(tail);
-    }
-    return parts;
-  }
-
-  List<int> _computeOffsets(List<String> sentences) {
+  List<int> _computeOffsets(String source, List<String> sentences) {
+    if (sentences.isEmpty) return const [];
     final offsets = <int>[];
-    int acc = 0;
-    for (final s in sentences) {
-      offsets.add(acc);
-      acc += s.length + 1; // assume a space or punctuation between
+    var cursor = 0;
+    for (final sentence in sentences) {
+      final chunk = sentence.trim();
+      if (chunk.isEmpty) {
+        offsets.add(cursor);
+        continue;
+      }
+      final index = source.indexOf(chunk, cursor);
+      if (index == -1) {
+        offsets.add(cursor);
+        cursor += chunk.length;
+      } else {
+        offsets.add(index);
+        cursor = index + chunk.length;
+      }
     }
     return offsets;
   }
diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart
index 56de3b5..7c8f4b4 100644
--- a/lib/features/chat/services/text_to_speech_service.dart
+++ b/lib/features/chat/services/text_to_speech_service.dart
@@ -16,8 +16,10 @@ class TextToSpeechService {
   final FlutterTts _tts = FlutterTts();
   final AudioPlayer _player = AudioPlayer();
   final ApiService? _api;
-  TtsEngine _engine = TtsEngine.device;
+  TtsEngine _engine = TtsEngine.auto;
   String? _preferredVoice;
+  String? _serverPreferredVoice;
+  double _speechRate = 0.5;
   bool _initialized = false;
   bool _available = false;
   bool _voiceConfigured = false;
@@ -41,6 +43,8 @@ class TextToSpeechService {
 
   bool get isInitialized => _initialized;
   bool get isAvailable => _available;
+  bool get deviceEngineAvailable => _deviceEngineAvailable;
+  bool get serverEngineAvailable => _api != null;
 
   TextToSpeechService({ApiService? api}) : _api = api {
     // Wire minimal player events to callbacks
@@ -59,6 +63,69 @@ class TextToSpeechService {
     });
   }
 
+  Future<void> _configureDeviceEngine({
+    required String? voice,
+    required double speechRate,
+    required double pitch,
+    required double volume,
+  }) async {
+    _deviceEngineAvailable = false;
+    try {
+      await _tts.awaitSpeakCompletion(false);
+      await _tts.setVolume(volume);
+      await _tts.setSpeechRate(speechRate);
+      await _tts.setPitch(pitch);
+
+      if (!kIsWeb && Platform.isIOS) {
+        await _tts.setSharedInstance(true);
+        await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
+          IosTextToSpeechAudioCategoryOptions.mixWithOthers,
+          IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
+          IosTextToSpeechAudioCategoryOptions.allowBluetooth,
+          IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
+        ]);
+      }
+
+      if (_engine != TtsEngine.server) {
+        await _setVoiceByName(_preferredVoice);
+      } else {
+        _voiceConfigured = false;
+      }
+
+      _deviceEngineAvailable = true;
+    } catch (e) {
+      _voiceConfigured = false;
+      _deviceEngineAvailable = false;
+      rethrow;
+    }
+  }
+
+  bool _computeAvailability() {
+    final serverAvailable = _api != null;
+    switch (_engine) {
+      case TtsEngine.device:
+        return _deviceEngineAvailable;
+      case TtsEngine.server:
+        return serverAvailable;
+      case TtsEngine.auto:
+        return _deviceEngineAvailable || serverAvailable;
+    }
+  }
+
+  bool _shouldUseServer() {
+    if (_engine == TtsEngine.server) {
+      return _api != null;
+    }
+    if (_engine == TtsEngine.device) {
+      return false;
+    }
+    // Auto: prefer device when available, otherwise fall back to server
+    if (_deviceEngineAvailable) {
+      return false;
+    }
+    return _api != null;
+  }
+
   /// Register callbacks for TTS lifecycle events
   void bindHandlers({
     VoidCallback? onStart,
@@ -96,56 +163,60 @@ class TextToSpeechService {
 
   /// Initialize the native TTS engine lazily
   Future<bool> initialize({
-    String? voice,
+    String? deviceVoice,
+    String? serverVoice,
     double speechRate = 0.5,
     double pitch = 1.0,
     double volume = 1.0,
-    TtsEngine engine = TtsEngine.device,
+    TtsEngine engine = TtsEngine.auto,
   }) async {
     if (_initialized) {
+      _engine = engine;
+      _speechRate = speechRate;
+      if (deviceVoice != null) {
+        _preferredVoice = deviceVoice;
+        _voiceConfigured = false;
+      }
+      if (serverVoice != null) {
+        _serverPreferredVoice = serverVoice;
+      }
+      _available = _computeAvailability();
       return _available;
     }
 
-    try {
-      _engine = engine;
-      _preferredVoice = voice;
-      await _tts.awaitSpeakCompletion(false);
+    _engine = engine;
+    _speechRate = speechRate;
+    _preferredVoice = deviceVoice;
+    _serverPreferredVoice = serverVoice;
+    _voiceConfigured = false;
 
-      // Set volume
-      await _tts.setVolume(volume);
-
-      // Set speech rate
-      await _tts.setSpeechRate(speechRate);
-
-      // Set pitch
-      await _tts.setPitch(pitch);
-
-      if (!kIsWeb && Platform.isIOS) {
-        await _tts.setSharedInstance(true);
-        await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
-          IosTextToSpeechAudioCategoryOptions.mixWithOthers,
-          IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
-          IosTextToSpeechAudioCategoryOptions.allowBluetooth,
-          IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
-        ]);
+    if (_engine != TtsEngine.server || _api == null) {
+      try {
+        await _configureDeviceEngine(
+          voice: deviceVoice,
+          speechRate: speechRate,
+          pitch: pitch,
+          volume: volume,
+        );
+      } catch (e) {
+        if (_engine == TtsEngine.device) {
+          _available = false;
+          _onError?.call(e.toString());
+          _initialized = true;
+          return _available;
+        }
       }
-
-      // Set the voice (specific or default) when using device engine
-      if (_engine == TtsEngine.device) {
-        await _setVoiceByName(voice);
-      }
-      _deviceEngineAvailable = true;
-    } catch (e) {
+    } else {
       _deviceEngineAvailable = false;
-      if (_engine != TtsEngine.server) {
-        _available = false;
-        _onError?.call(e.toString());
-        _initialized = true;
-        return _available;
-      }
+      try {
+        await _tts.awaitSpeakCompletion(false);
+        await _tts.setVolume(volume);
+        await _tts.setSpeechRate(speechRate);
+        await _tts.setPitch(pitch);
+      } catch (_) {}
     }
 
-    _available = _engine == TtsEngine.server || _deviceEngineAvailable;
+    _available = _computeAvailability();
     _initialized = true;
     return _available;
   }
@@ -156,10 +227,23 @@ class TextToSpeechService {
     }
 
     if (!_initialized) {
-      await initialize(voice: _preferredVoice, engine: _engine);
+      await initialize(
+        deviceVoice: _preferredVoice,
+        serverVoice: _serverPreferredVoice,
+        engine: _engine,
+      );
     }
 
-    if (_engine == TtsEngine.server && _api != null) {
+    final bool useServer = _shouldUseServer();
+
+    if (useServer) {
+      if (_api == null) {
+        if (_deviceEngineAvailable) {
+          await _speakOnDevice(text);
+          return;
+        }
+        throw StateError('Server text-to-speech is unavailable');
+      }
       // Server-backed TTS with sentence chunking & queued playback
       try {
         await _startServerChunkedPlayback(text);
@@ -196,7 +280,7 @@ class TextToSpeechService {
   Future<void> pause() async {
     if (!_initialized) return;
     try {
-      if (_engine == TtsEngine.server) {
+      if (_shouldUseServer()) {
         await _player.pause();
         _handlePause();
       } else if (_deviceEngineAvailable) {
@@ -210,7 +294,7 @@ class TextToSpeechService {
   Future<void> resume() async {
     if (!_initialized) return;
     try {
-      if (_engine == TtsEngine.server) {
+      if (_shouldUseServer()) {
         if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
           _waitingNext = false;
           await _playNextIfBuffered(_session);
@@ -235,7 +319,7 @@ class TextToSpeechService {
       _expectedChunks = 0;
       _currentIndex = -1;
       _waitingNext = false;
-      if (_engine == TtsEngine.server) {
+      if (_shouldUseServer()) {
         await _player.stop();
         _handleCancel();
       } else {
@@ -254,17 +338,24 @@ class TextToSpeechService {
   /// Update TTS settings on-the-fly
   Future<void> updateSettings({
     Object? voice = const _VoiceNotProvided(),
+    Object? serverVoice = const _VoiceNotProvided(),
     double? speechRate,
     double? pitch,
     double? volume,
     TtsEngine? engine,
   }) async {
     final voiceProvided = voice is! _VoiceNotProvided;
+    final serverVoiceProvided = serverVoice is! _VoiceNotProvided;
     final voiceValue = voiceProvided ? voice as String? : null;
+    final serverVoiceValue = serverVoiceProvided
+        ? serverVoice as String?
+        : null;
     if (!_initialized || !_available) {
       // Allow engine and voice to update before init
       if (engine != null) _engine = engine;
       if (voiceProvided) _preferredVoice = voiceValue;
+      if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue;
+      if (speechRate != null) _speechRate = speechRate;
       return;
     }
 
@@ -275,22 +366,28 @@ class TextToSpeechService {
       if (voiceProvided) {
         _preferredVoice = voiceValue;
       }
+      if (serverVoiceProvided) {
+        _serverPreferredVoice = serverVoiceValue;
+      }
       if (volume != null) {
         await _tts.setVolume(volume);
       }
       if (speechRate != null) {
+        _speechRate = speechRate;
         await _tts.setSpeechRate(speechRate);
       }
       if (pitch != null) {
         await _tts.setPitch(pitch);
       }
-      // Set specific voice by name on device engine
-      if (_engine == TtsEngine.device && voiceProvided) {
+      // Set specific voice by name on device-capable engines
+      if (_engine != TtsEngine.server && voiceProvided) {
         await _setVoiceByName(_preferredVoice);
       }
     } catch (e) {
       _onError?.call(e.toString());
     }
+
+    _available = _computeAvailability();
   }
 
   /// Set voice by name, or use system default if null
@@ -343,7 +440,11 @@ class TextToSpeechService {
   /// Get available voices from the TTS engine
   Future<List<Map<String, dynamic>>> getAvailableVoices() async {
     if (!_initialized) {
-      await initialize(voice: _preferredVoice, engine: _engine);
+      await initialize(
+        deviceVoice: _preferredVoice,
+        serverVoice: _serverPreferredVoice,
+        engine: _engine,
+      );
     }
 
     if (_engine == TtsEngine.server && _api != null) {
@@ -425,6 +526,10 @@ class TextToSpeechService {
   }
 
   Future<String?> _resolveServerVoice() async {
+    final serverSelected = _serverPreferredVoice?.trim();
+    if (serverSelected != null && serverSelected.isNotEmpty) {
+      return serverSelected;
+    }
     final selected = _preferredVoice?.trim();
     if (selected != null && selected.isNotEmpty) {
       return selected;
@@ -545,9 +650,19 @@ class TextToSpeechService {
     String? voice,
     int session,
   ) async {
-    return await _api!.generateSpeech(text: text, voice: voice);
+    return await _api!.generateSpeech(
+      text: text,
+      voice: voice,
+      speed: _speechRate,
+    );
   }
 
+  /// Splits [text] into the chunks used for playback sequencing.
+  ///
+  /// This mirrors the server-side streaming behavior so UI consumers can stay
+  /// in sync with sentence indices reported during playback.
+  List<String> splitTextForSpeech(String text) => _splitForTts(text);
+
   Future<void> _onAudioComplete() async {
     final session = _session;
     // If there are more expected chunks
@@ -580,43 +695,66 @@ class TextToSpeechService {
   }
 
   List<String> _splitForTts(String text) {
-    // Normalize whitespace
-    final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
-    if (normalized.isEmpty) return const [];
+    // Mirrors OpenWebUI's extractSentencesForAudio implementation
+    // See: src/lib/utils/index.ts lines 953-970, 907-928
 
-    // Split on sentence-ending punctuation while keeping the delimiter
-    final parts = <String>[];
-    final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
-    int index = 0;
-    for (final match in sentenceRegex.allMatches('$normalized ')) {
-      final s = match.group(1) ?? '';
-      if (s.trim().isNotEmpty) parts.add(s.trim());
-      index = match.end;
-    }
-    if (index < normalized.length) {
-      final tail = normalized.substring(index).trim();
-      if (tail.isNotEmpty) parts.add(tail);
-    }
+    // 1. Preserve code blocks (replace with placeholders)
+    final codeBlocks = <String>[];
+    var processed = text;
+    var codeBlockIndex = 0;
 
-    // Fallback to length-based splits for very long segments
-    const maxLen = 300;
-    final chunks = <String>[];
-    for (final p in parts.isEmpty ? [normalized] : parts) {
-      if (p.length <= maxLen) {
-        chunks.add(p);
+    // Match triple backticks code blocks
+    final codeBlockRegex = RegExp(r'```[\s\S]*?```', multiLine: true);
+    processed = processed.replaceAllMapped(codeBlockRegex, (match) {
+      final placeholder = '\u0000$codeBlockIndex\u0000';
+      codeBlocks.add(match.group(0)!);
+      codeBlockIndex++;
+      return placeholder;
+    });
+
+    // 2. Split on sentence-ending punctuation: .!?
+    // OpenWebUI uses: /(?<=[.!?])\s+/
+    final sentences = processed
+        .split(RegExp(r'(?<=[.!?])\s+'))
+        .map((s) => s.trim())
+        .where((s) => s.isNotEmpty)
+        .toList();
+
+    // 3. Restore code blocks from placeholders
+    final restoredSentences = sentences
+        .map((sentence) {
+          return sentence.replaceAllMapped(RegExp(r'\u0000(\d+)\u0000'), (
+            match,
+          ) {
+            final idx = int.parse(match.group(1)!);
+            return idx < codeBlocks.length ? codeBlocks[idx] : '';
+          });
+        })
+        .where((s) => s.isNotEmpty)
+        .toList();
+
+    // 4. Merge short sentences (< 4 words OR < 50 chars)
+    // OpenWebUI logic from extractSentencesForAudio
+    final mergedChunks = <String>[];
+    for (final sentence in restoredSentences) {
+      if (mergedChunks.isEmpty) {
+        mergedChunks.add(sentence);
       } else {
-        // Try splitting on commas/spaces
-        var remaining = p;
-        while (remaining.length > maxLen) {
-          int cut = remaining.lastIndexOf(RegExp(r",\s|\s"), maxLen);
-          cut = cut <= 0 ? maxLen : cut;
-          chunks.add(remaining.substring(0, cut).trim());
-          remaining = remaining.substring(cut).trim();
+        final lastIndex = mergedChunks.length - 1;
+        final previousText = mergedChunks[lastIndex];
+        final wordCount = previousText.split(RegExp(r'\s+')).length;
+        final charCount = previousText.length;
+
+        // Merge if previous chunk is too short
+        if (wordCount < 4 || charCount < 50) {
+          mergedChunks[lastIndex] = '$previousText $sentence';
+        } else {
+          mergedChunks.add(sentence);
         }
-        if (remaining.isNotEmpty) chunks.add(remaining);
       }
     }
-    return chunks;
+
+    return mergedChunks.isEmpty ? [text.trim()] : mergedChunks;
   }
 
   Future<void> _configurePreferredVoice() async {
diff --git a/lib/features/chat/services/voice_call_service.dart b/lib/features/chat/services/voice_call_service.dart
index 1037b38..1fb4123 100644
--- a/lib/features/chat/services/voice_call_service.dart
+++ b/lib/features/chat/services/voice_call_service.dart
@@ -108,11 +108,18 @@ class VoiceCallService {
       throw Exception('Voice input initialization failed');
     }
 
-    // Check if local STT is available
+    // Check if preferred STT path is available
     final hasLocalStt = _voiceInput.hasLocalStt;
-    if (!hasLocalStt) {
+    final hasServerStt = _voiceInput.hasServerStt;
+    final ready = switch (_voiceInput.preference) {
+      SttPreference.deviceOnly => hasLocalStt,
+      SttPreference.serverOnly => hasServerStt,
+      SttPreference.auto => hasLocalStt || hasServerStt,
+    };
+
+    if (!ready) {
       _updateState(VoiceCallState.error);
-      throw Exception('Speech recognition not available on this device');
+      throw Exception('Preferred speech recognition engine is unavailable');
     }
 
     // Check microphone permissions
@@ -125,9 +132,8 @@ class VoiceCallService {
     // Initialize TTS with current app settings (engine/voice/rate/pitch/volume)
     final settings = _ref.read(appSettingsProvider);
     await _tts.initialize(
-      voice: settings.ttsEngine == TtsEngine.server
-          ? settings.ttsServerVoiceId
-          : settings.ttsVoice,
+      deviceVoice: settings.ttsVoice,
+      serverVoice: settings.ttsServerVoiceId,
       speechRate: settings.ttsSpeechRate,
       pitch: settings.ttsPitch,
       volume: settings.ttsVolume,
@@ -202,10 +208,18 @@ class VoiceCallService {
       _listeningPaused = false;
       _accumulatedTranscript = '';
 
-      // Check if voice input is available
-      if (!_voiceInput.hasLocalStt) {
+      final hasLocalStt = _voiceInput.hasLocalStt;
+      final hasServerStt = _voiceInput.hasServerStt;
+      final pref = _voiceInput.preference;
+      final engineAvailable = switch (pref) {
+        SttPreference.deviceOnly => hasLocalStt,
+        SttPreference.serverOnly => hasServerStt,
+        SttPreference.auto => hasLocalStt || hasServerStt,
+      };
+
+      if (!engineAvailable) {
         _updateState(VoiceCallState.error);
-        throw Exception('Voice input not available on this device');
+        throw Exception('Preferred speech recognition engine is unavailable');
       }
 
       _updateState(VoiceCallState.listening);
@@ -572,11 +586,9 @@ VoiceCallService voiceCallService(Ref ref) {
   // Keep TTS settings in sync with app settings during a call
   ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
     // Update voice/engine and runtime parameters
-    final selectedVoice = next.ttsEngine == TtsEngine.server
-        ? next.ttsServerVoiceId
-        : next.ttsVoice;
     service._tts.updateSettings(
-      voice: selectedVoice,
+      voice: next.ttsVoice,
+      serverVoice: next.ttsServerVoiceId,
       speechRate: next.ttsSpeechRate,
       pitch: next.ttsPitch,
       volume: next.ttsVolume,
diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart
index 1c30c63..c1990aa 100644
--- a/lib/features/chat/services/voice_input_service.dart
+++ b/lib/features/chat/services/voice_input_service.dart
@@ -1,14 +1,19 @@
 import 'dart:async';
-import 'dart:io' show Platform;
+import 'dart:io' show File, Platform;
 
 import 'package:flutter/widgets.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:riverpod_annotation/riverpod_annotation.dart';
-import 'package:record/record.dart';
+import 'package:mic_stream_recorder/mic_stream_recorder.dart';
 import 'package:stts/stts.dart';
+import 'package:path/path.dart' as p;
+import 'package:path_provider/path_provider.dart';
+
+import '../../../core/providers/app_providers.dart';
+import '../../../core/services/api_service.dart';
+import '../../../core/services/settings_service.dart';
 
 part 'voice_input_service.g.dart';
-// Removed path imports as server transcription fallback was removed
 
 // Lightweight replacement for previous stt.LocaleName used across the UI
 class LocaleName {
@@ -18,31 +23,50 @@ class LocaleName {
 }
 
 class VoiceInputService {
-  final AudioRecorder _recorder = AudioRecorder();
+  final MicStreamRecorder _recorder = MicStreamRecorder();
   final Stt _speech = Stt();
+  final ApiService? _api;
+  final Ref? _ref;
   bool _isInitialized = false;
   bool _isListening = false;
   bool _localSttAvailable = false;
+  SttPreference _preference = SttPreference.auto;
+  bool _usingServerStt = false;
   String? _selectedLocaleId;
   List<LocaleName> _locales = const [];
   StreamController<String>? _textStreamController;
   String _currentText = '';
-  // Public stream for UI waveform visualization (emits partial text length as proxy)
   StreamController<int>? _intensityController;
   Stream<int> get intensityStream =>
       _intensityController?.stream ?? const Stream<int>.empty();
   int _lastIntensity = 0;
   Timer? _intensityDecayTimer;
+  Timer? _silenceTimer;
+  bool _hasDetectedSpeech = false;
+  int _amplitudeCallbackCount = 0;
+  Timer? _amplitudeFallbackTimer;
 
-  /// Public stream of partial/final transcript strings and special audio tokens.
   Stream<String> get textStream =>
       _textStreamController?.stream ?? const Stream<String>.empty();
   Timer? _autoStopTimer;
-  StreamSubscription<Amplitude>? _ampSub;
+  StreamSubscription<double>? _ampSub;
   StreamSubscription<SttRecognition>? _sttResultSub;
   StreamSubscription<SttState>? _sttStateSub;
 
   bool get isSupportedPlatform => Platform.isAndroid || Platform.isIOS;
+  bool get hasServerStt => _api != null;
+  SttPreference get preference => _preference;
+  bool get allowsServerFallback => _preference != SttPreference.deviceOnly;
+  bool get prefersServerOnly => _preference == SttPreference.serverOnly;
+  bool get prefersDeviceOnly => _preference == SttPreference.deviceOnly;
+
+  VoiceInputService({ApiService? api, Ref? ref})
+      : _api = api,
+        _ref = ref;
+
+  void updatePreference(SttPreference preference) {
+    _preference = preference;
+  }
 
   Future<bool> initialize() async {
     if (_isInitialized) return true;
@@ -87,17 +111,15 @@ class VoiceInputService {
 
   Future<bool> checkPermissions() async {
     try {
-      // Prefer stts permission check which will request microphone permission
-      final mic = await _speech.hasPermission();
-      if (mic) return true;
-      return await _recorder.hasPermission();
+      return await _speech.hasPermission();
     } catch (_) {
       return false;
     }
   }
 
   bool get isListening => _isListening;
-  bool get isAvailable => _isInitialized; // service usable (local or fallback)
+  bool get isAvailable =>
+      _isInitialized && (_localSttAvailable || hasServerStt);
   bool get hasLocalStt => _localSttAvailable;
 
   // Add a method to check if on-device STT is properly supported
@@ -166,7 +188,7 @@ class VoiceInputService {
     }
 
     if (_isListening) {
-      stopListening();
+      unawaited(stopListening());
     }
 
     _textStreamController = StreamController<String>.broadcast();
@@ -174,82 +196,109 @@ class VoiceInputService {
     _isListening = true;
     _intensityController = StreamController<int>.broadcast();
     _lastIntensity = 0;
+    _usingServerStt = false;
 
-    // Begin a gentle decay timer so the UI level bars fall when silent
-    _intensityDecayTimer?.cancel();
-    _intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), (
-      t,
-    ) {
-      if (!_isListening) return;
-      if (_lastIntensity <= 0) return;
-      _lastIntensity = (_lastIntensity - 1).clamp(0, 10);
-      try {
-        _intensityController?.add(_lastIntensity);
-      } catch (_) {}
-    });
+    _startIntensityDecayTimer();
+
+    final bool canUseLocal = _localSttAvailable;
+    final bool serverAvailable = hasServerStt;
+    final bool shouldUseLocal =
+        canUseLocal && _preference != SttPreference.serverOnly;
+    final bool shouldUseServer =
+        serverAvailable &&
+        (_preference == SttPreference.serverOnly || !shouldUseLocal);
+
+    if (shouldUseLocal) {
+      _autoStopTimer?.cancel();
+      _autoStopTimer = Timer(const Duration(seconds: 60), () {
+        if (_isListening) {
+          unawaited(_stopListening());
+        }
+      });
 
-    // Check if speech recognition is available before trying to use it
-    if (_localSttAvailable) {
-      // Schedule a check for speech recognition availability
       Future.microtask(() async {
         try {
           final isStillAvailable = await _speech.isSupported();
           if (!isStillAvailable && _isListening) {
-            // Speech recognition no longer available; stop listening
             _localSttAvailable = false;
-            _stopListening();
-            return;
+            if (hasServerStt && allowsServerFallback) {
+              unawaited(_beginServerFallback());
+            } else {
+              unawaited(_stopListening());
+            }
           }
-        } catch (e) {
+        } catch (_) {
           // ignore availability check errors
         }
       });
 
-      // Local on-device STT path
-      _autoStopTimer?.cancel();
-      _autoStopTimer = Timer(const Duration(seconds: 60), () {
-        if (_isListening) {
-          _stopListening();
-        }
-      });
-
-      // Listen for results and state changes; keep subscriptions so we can cancel later
       _sttResultSub = _speech.onResultChanged.listen((SttRecognition result) {
         if (!_isListening) return;
         final prevLen = _currentText.length;
         _currentText = result.text;
         _textStreamController?.add(_currentText);
-        // Map number of new characters to a rough 0..10 intensity
         final delta = (_currentText.length - prevLen).clamp(0, 50);
-        final mapped = (delta / 5.0).ceil(); // 0 chars -> 0, 1-5 -> 1, ...
+        final mapped = (delta / 5.0).ceil();
         _lastIntensity = mapped.clamp(0, 10);
         try {
           _intensityController?.add(_lastIntensity);
         } catch (_) {}
         if (result.isFinal) {
-          _stopListening();
+          unawaited(_stopListening());
         }
       }, onError: (_) {});
 
       _sttStateSub = _speech.onStateChanged.listen((_) {}, onError: (_) {});
 
-      try {
-        if (_selectedLocaleId != null) {
-          _speech.setLanguage(_selectedLocaleId!).catchError((_) {});
-        }
-        // Start recognition (no await blocking the sync flow)
-        _speech.start(SttRecognitionOptions(punctuation: true)).catchError((_) {
-          // On-device STT failed; stop listening entirely as server transcription is removed
+      Future(() async {
+        try {
+          if (_selectedLocaleId != null) {
+            await _speech.setLanguage(_selectedLocaleId!);
+          }
+          await _speech.start(SttRecognitionOptions(punctuation: true));
+        } catch (error) {
           _localSttAvailable = false;
-          _stopListening();
-        });
-      } catch (e) {
-        _localSttAvailable = false;
-        _stopListening();
-      }
+          if (!_isListening) return;
+          if (hasServerStt && allowsServerFallback) {
+            await _beginServerFallback();
+          } else {
+            _textStreamController?.addError(error);
+            await _stopListening();
+          }
+        }
+      });
+    } else if (shouldUseServer) {
+      _usingServerStt = true;
+      _autoStopTimer?.cancel();
+      _autoStopTimer = Timer(const Duration(seconds: 90), () {
+        if (_isListening) {
+          unawaited(_stopListening());
+        }
+      });
+      Future(() async {
+        try {
+          await _startServerRecording();
+        } catch (error) {
+          if (!_isListening) return;
+          _textStreamController?.addError(error);
+          await _stopListening();
+        }
+      });
     } else {
-      // No local STT available; stop immediately since server transcription is removed
-      _stopListening();
+      final Exception error;
+      if (prefersDeviceOnly) {
+        error = Exception(
+          'On-device speech recognition required but unavailable',
+        );
+      } else if (prefersServerOnly) {
+        error = Exception('Server speech-to-text is not configured');
+      } else {
+        error = Exception('Speech recognition not available on this device');
+      }
+      Future.microtask(() {
+        _textStreamController?.addError(error);
+        unawaited(_stopListening());
+      });
     }
 
     return _textStreamController!.stream;
@@ -258,14 +307,11 @@ class VoiceInputService {
   /// Centralized entry point to begin voice recognition.
   /// Ensures initialization and microphone permission before starting.
   Future<Stream<String>> beginListening() async {
-    // Ensure service is ready
     await initialize();
-    // Ensure microphone permission (triggers OS prompt if needed)
     final hasMic = await checkPermissions();
     if (!hasMic) {
       throw Exception('Microphone permission not granted');
     }
-    // Start listening and return the transcript stream
     return startListening();
   }
 
@@ -277,53 +323,349 @@ class VoiceInputService {
     if (!_isListening) return;
 
     _isListening = false;
-    if (_localSttAvailable) {
-      try {
-        await _speech.stop();
-      } catch (_) {}
-      // Cancel STT subscriptions
-      try {
-        _sttResultSub?.cancel();
-      } catch (_) {}
-      _sttResultSub = null;
-      try {
-        _sttStateSub?.cancel();
-      } catch (_) {}
-      _sttStateSub = null;
-    }
 
     _autoStopTimer?.cancel();
     _autoStopTimer = null;
-    _ampSub?.cancel();
+
+    _silenceTimer?.cancel();
+    _silenceTimer = null;
+
+    _amplitudeFallbackTimer?.cancel();
+    _amplitudeFallbackTimer = null;
+
+    if (_usingServerStt) {
+      await _finalizeServerRecording();
+    } else {
+      await _stopLocalStt();
+    }
+
+    await _ampSub?.cancel();
     _ampSub = null;
+
     _intensityDecayTimer?.cancel();
     _intensityDecayTimer = null;
     _lastIntensity = 0;
 
-    if (_currentText.isNotEmpty) {
+    if (!_usingServerStt && _currentText.isNotEmpty) {
       _textStreamController?.add(_currentText);
     }
 
-    _textStreamController?.close();
-    _textStreamController = null;
-    _intensityController?.close();
-    _intensityController = null;
+    await _closeControllers();
+
+    _usingServerStt = false;
+    _hasDetectedSpeech = false;
+  }
+
+  Future<void> _stopLocalStt() async {
+    if (_sttResultSub != null) {
+      try {
+        await _sttResultSub?.cancel();
+      } catch (_) {}
+      _sttResultSub = null;
+    }
+    if (_sttStateSub != null) {
+      try {
+        await _sttStateSub?.cancel();
+      } catch (_) {}
+      _sttStateSub = null;
+    }
+
+    if (_localSttAvailable) {
+      try {
+        await _speech.stop();
+      } catch (_) {}
+    }
+  }
+
+  Future<void> _beginServerFallback() async {
+    if (!allowsServerFallback) {
+      _textStreamController?.addError(
+        Exception('Server speech-to-text disabled in preferences'),
+      );
+      await _stopListening();
+      return;
+    }
+    await _stopLocalStt();
+    if (!hasServerStt) {
+      _textStreamController?.addError(
+        Exception('Server speech-to-text unavailable'),
+      );
+      await _stopListening();
+      return;
+    }
+
+    _usingServerStt = true;
+    _autoStopTimer?.cancel();
+    _autoStopTimer = Timer(const Duration(seconds: 90), () {
+      if (_isListening) {
+        unawaited(_stopListening());
+      }
+    });
+
+    try {
+      await _startServerRecording();
+    } catch (error) {
+      _textStreamController?.addError(error);
+      await _stopListening();
+    }
+  }
+
+  Future<void> _startServerRecording() async {
+    final path = await _createRecordingPath();
+    _hasDetectedSpeech = false;
+
+    await _recorder.startRecording(path);
+
+    await _ampSub?.cancel();
+    _amplitudeFallbackTimer?.cancel();
+    _amplitudeCallbackCount = 0;
+
+    _ampSub = _recorder.amplitudeStream.listen((amplitude) {
+      _amplitudeCallbackCount++;
+      if (!_isListening) return;
+
+      _lastIntensity = _normalizedToIntensity(amplitude);
+      try {
+        _intensityController?.add(_lastIntensity);
+      } catch (_) {}
+
+      _handleServerAmplitude(amplitude);
+    });
+
+    _amplitudeFallbackTimer = Timer(const Duration(seconds: 1), () {
+      if (_amplitudeCallbackCount == 0) {
+        _silenceTimer = Timer(const Duration(seconds: 15), () {
+          if (_isListening && _usingServerStt) {
+            unawaited(_stopListening());
+          }
+        });
+      }
+    });
+  }
+
+  void _handleServerAmplitude(double amplitude) {
+    if (!_usingServerStt || !_isListening) return;
+
+    const double speechThreshold = 0.55;
+    if (amplitude.isNaN || amplitude.isInfinite) return;
+
+    if (amplitude > speechThreshold) {
+      _hasDetectedSpeech = true;
+      _silenceTimer?.cancel();
+      _silenceTimer = null;
+    } else if (_hasDetectedSpeech && _silenceTimer == null) {
+      final silenceDuration = _ref?.read(appSettingsProvider).voiceSilenceDuration ?? 2000;
+      _silenceTimer = Timer(Duration(milliseconds: silenceDuration), () {
+        if (_isListening && _usingServerStt) {
+          unawaited(_stopListening());
+        }
+      });
+    }
+  }
+
+  Future<String> _createRecordingPath() async {
+    final directory = await getTemporaryDirectory();
+    final timestamp = DateTime.now().millisecondsSinceEpoch;
+    final fileName = 'conduit_voice_$timestamp.m4a';
+    return p.join(directory.path, fileName);
+  }
+
+  Future<void> _finalizeServerRecording() async {
+    final api = _api;
+    if (api == null) return;
+
+    final path = await _recorder.stopRecording();
+    if (path == null || path.isEmpty) return;
+
+    final file = File(path);
+    try {
+      if (!await file.exists()) return;
+      final bytes = await file.readAsBytes();
+      if (bytes.isEmpty) return;
+
+      final response = await api.transcribeSpeech(
+        audioBytes: bytes,
+        fileName: p.basename(path),
+        mimeType: 'audio/mp4',
+        language: _languageForServer(),
+      );
+
+      final transcript = _extractTranscriptionText(response);
+      if (transcript != null && transcript.trim().isNotEmpty) {
+        _currentText = transcript.trim();
+        _textStreamController?.add(_currentText);
+      } else {
+        throw StateError('Empty transcription result');
+      }
+    } catch (error) {
+      _textStreamController?.addError(error);
+    } finally {
+      unawaited(_cleanupRecordingFile(file));
+    }
+  }
+
+  Future<void> _cleanupRecordingFile(File file) async {
+    try {
+      if (await file.exists()) {
+        await file.delete();
+      }
+    } catch (_) {}
+  }
+
+  String? _languageForServer() {
+    final locale = _selectedLocaleId;
+    if (locale != null && locale.isNotEmpty) {
+      final primary = locale.split(RegExp('[-_]')).first.toLowerCase();
+      if (primary.length >= 2) {
+        return primary;
+      }
+    }
+    try {
+      final fallback = WidgetsBinding.instance.platformDispatcher.locale;
+      final primary = fallback.languageCode.toLowerCase();
+      if (primary.isNotEmpty) {
+        return primary;
+      }
+    } catch (_) {}
+    return null;
+  }
+
+  String? _extractTranscriptionText(Map<String, dynamic> data) {
+    final direct = data['text'];
+    if (direct is String && direct.trim().isNotEmpty) {
+      return direct;
+    }
+
+    final display = data['display_text'] ?? data['DisplayText'];
+    if (display is String && display.trim().isNotEmpty) {
+      return display;
+    }
+
+    final result = data['result'];
+    if (result is Map<String, dynamic>) {
+      final resultText = result['text'];
+      if (resultText is String && resultText.trim().isNotEmpty) {
+        return resultText;
+      }
+    }
+
+    final combined = data['combinedRecognizedPhrases'];
+    if (combined is List && combined.isNotEmpty) {
+      final first = combined.first;
+      if (first is Map<String, dynamic>) {
+        final candidate =
+            first['display'] ??
+            first['Display'] ??
+            first['transcript'] ??
+            first['text'];
+        if (candidate is String && candidate.trim().isNotEmpty) {
+          return candidate;
+        }
+      } else if (first is String && first.trim().isNotEmpty) {
+        return first;
+      }
+    }
+
+    final results = data['results'];
+    if (results is Map<String, dynamic>) {
+      final channels = results['channels'];
+      if (channels is List && channels.isNotEmpty) {
+        final channel = channels.first;
+        if (channel is Map<String, dynamic>) {
+          final alternatives = channel['alternatives'];
+          if (alternatives is List && alternatives.isNotEmpty) {
+            final alternative = alternatives.first;
+            if (alternative is Map<String, dynamic>) {
+              final transcript =
+                  alternative['transcript'] ?? alternative['text'];
+              if (transcript is String && transcript.trim().isNotEmpty) {
+                return transcript;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    final segments = data['segments'];
+    if (segments is List && segments.isNotEmpty) {
+      final buffer = StringBuffer();
+      for (final segment in segments) {
+        if (segment is Map<String, dynamic>) {
+          final text = segment['text'];
+          if (text is String && text.trim().isNotEmpty) {
+            buffer.write(text.trim());
+            buffer.write(' ');
+          }
+        } else if (segment is String && segment.trim().isNotEmpty) {
+          buffer.write(segment.trim());
+          buffer.write(' ');
+        }
+      }
+      final combinedText = buffer.toString().trim();
+      if (combinedText.isNotEmpty) {
+        return combinedText;
+      }
+    }
+
+    return null;
+  }
+
+  int _normalizedToIntensity(double value) {
+    if (value.isNaN || value.isInfinite) return 0;
+    return (value * 10).round().clamp(0, 10);
+  }
+
+  Future<void> _closeControllers() async {
+    if (_textStreamController != null) {
+      try {
+        await _textStreamController?.close();
+      } catch (_) {}
+      _textStreamController = null;
+    }
+    if (_intensityController != null) {
+      try {
+        await _intensityController?.close();
+      } catch (_) {}
+      _intensityController = null;
+    }
+  }
+
+  void _startIntensityDecayTimer() {
+    _intensityDecayTimer?.cancel();
+    _intensityDecayTimer = Timer.periodic(const Duration(milliseconds: 120), (
+      _,
+    ) {
+      if (!_isListening) return;
+      if (_lastIntensity <= 0) return;
+      _lastIntensity = (_lastIntensity - 1).clamp(0, 10);
+      try {
+        _intensityController?.add(_lastIntensity);
+      } catch (_) {}
+    });
   }
 
   void dispose() {
     stopListening();
+    _silenceTimer?.cancel();
     try {
       _speech.dispose().catchError((_) {});
     } catch (_) {}
   }
-
-  // Recording fallback removed; only on-device STT is supported now
-
-  // Native locales not used in server transcription mode
 }
 
 final voiceInputServiceProvider = Provider<VoiceInputService>((ref) {
-  return VoiceInputService();
+  final api = ref.watch(apiServiceProvider);
+  final service = VoiceInputService(api: api, ref: ref);
+  final currentSettings = ref.read(appSettingsProvider);
+  service.updatePreference(currentSettings.sttPreference);
+  ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
+    if (previous?.sttPreference != next.sttPreference) {
+      service.updatePreference(next.sttPreference);
+    }
+  });
+  ref.onDispose(service.dispose);
+  return service;
 });
 
 @Riverpod(keepAlive: true)
@@ -332,8 +674,16 @@ Future<bool> voiceInputAvailable(Ref ref) async {
   if (!service.isSupportedPlatform) return false;
   final initialized = await service.initialize();
   if (!initialized) return false;
-  // If local STT exists, we consider it available; otherwise ensure mic permission for fallback
-  if (service.hasLocalStt) return true;
+  switch (service.preference) {
+    case SttPreference.deviceOnly:
+      return service.hasLocalStt;
+    case SttPreference.serverOnly:
+      return service.hasServerStt;
+    case SttPreference.auto:
+      if (service.hasLocalStt) return true;
+      if (!service.hasServerStt) return false;
+      break;
+  }
   final hasPermission = await service.checkPermissions();
   if (!hasPermission) return false;
   return service.isAvailable;
@@ -349,3 +699,18 @@ final voiceIntensityStreamProvider = StreamProvider<int>((ref) {
   final service = ref.watch(voiceInputServiceProvider);
   return service.intensityStream;
 });
+
+final localVoiceRecognitionAvailableProvider = FutureProvider<bool>((
+  ref,
+) async {
+  final service = ref.watch(voiceInputServiceProvider);
+  final initialized = await service.initialize();
+  if (!initialized) return false;
+  if (service.hasLocalStt) return true;
+  return service.checkOnDeviceSupport();
+});
+
+final serverVoiceRecognitionAvailableProvider = Provider<bool>((ref) {
+  final service = ref.watch(voiceInputServiceProvider);
+  return service.hasServerStt;
+});
diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart
index 7a28e67..2e49411 100644
--- a/lib/features/chat/views/chat_page.dart
+++ b/lib/features/chat/views/chat_page.dart
@@ -2380,7 +2380,7 @@ class _VoiceInputSheetState extends ConsumerState<_VoiceInputSheet> {
     }
   }
 
-  // Server transcription removed; only on-device STT is supported
+  // When on-device STT is unavailable we fall back to server transcription.
 
   Future<void> _stopListening() async {
     _intensitySub?.cancel();
diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart
index bfd348c..570ee3f 100644
--- a/lib/features/chat/widgets/assistant_message_widget.dart
+++ b/lib/features/chat/widgets/assistant_message_widget.dart
@@ -71,6 +71,11 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
   bool _allowTypingIndicator = false;
   Timer? _typingGateTimer;
   String _ttsPlainText = '';
+  Timer? _ttsPlainTextDebounce;
+  Map<String, dynamic>? _pendingTtsPlainTextPayload;
+  String? _pendingTtsPlainTextSource;
+  String? _lastAppliedTtsPlainTextSource;
+  int _ttsPlainTextRequestId = 0;
   // Active version index (-1 means current/live content)
   int _activeVersionIndex = -1;
   // press state handled by shared ChatActionButton
@@ -162,13 +167,11 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
     final rSegs = ReasoningParser.segments(raw);
 
     final out = <MessageSegment>[];
-    final textBuf = StringBuffer();
     final textSegments = <String>[];
     if (rSegs == null || rSegs.isEmpty) {
       final tSegs = ToolCallsParser.segments(raw);
       if (tSegs == null || tSegs.isEmpty) {
         out.add(MessageSegment.text(raw));
-        textBuf.write(raw);
         textSegments.add(raw);
       } else {
         for (final s in tSegs) {
@@ -176,7 +179,6 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
             out.add(MessageSegment.tool(s.entry!));
           } else if ((s.text ?? '').isNotEmpty) {
             out.add(MessageSegment.text(s.text!));
-            textBuf.write(s.text);
             textSegments.add(s.text!);
           }
         }
@@ -190,7 +192,6 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
           final tSegs = ToolCallsParser.segments(t);
           if (tSegs == null || tSegs.isEmpty) {
             out.add(MessageSegment.text(t));
-            textBuf.write(t);
             textSegments.add(t);
           } else {
             for (final s in tSegs) {
@@ -198,7 +199,6 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
                 out.add(MessageSegment.tool(s.entry!));
               } else if ((s.text ?? '').isNotEmpty) {
                 out.add(MessageSegment.text(s.text!));
-                textBuf.write(s.text);
                 textSegments.add(s.text!);
               }
             }
@@ -208,23 +208,15 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
     }
 
     final segments = out.isEmpty ? [MessageSegment.text(raw)] : out;
-    String speechText;
-    try {
-      final worker = ref.read(workerManagerProvider);
-      speechText = await worker.schedule<Map<String, dynamic>, String>(
-        _buildTtsPlainTextWorker,
-        {'segments': textSegments, 'fallback': raw},
-        debugLabel: 'tts_plain_text',
-      );
-    } catch (_) {
-      speechText = _buildTtsPlainTextFallback(textSegments, raw);
-    }
 
     if (!mounted) return;
     setState(() {
       _segments = segments;
-      _ttsPlainText = speechText;
     });
+    _scheduleTtsPlainTextBuild(
+      List<String>.from(textSegments, growable: false),
+      raw,
+    );
     _updateTypingIndicatorGate();
   }
 
@@ -290,6 +282,96 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
     return result;
   }
 
+  void _scheduleTtsPlainTextBuild(List<String> segments, String raw) {
+    final hasContent =
+        segments.any((segment) => segment.trim().isNotEmpty) ||
+        raw.trim().isNotEmpty;
+    if (!hasContent) {
+      _pendingTtsPlainTextPayload = null;
+      _pendingTtsPlainTextSource = null;
+      _lastAppliedTtsPlainTextSource = '';
+      if (_ttsPlainText.isNotEmpty && mounted) {
+        setState(() {
+          _ttsPlainText = '';
+        });
+      }
+      return;
+    }
+
+    if (_pendingTtsPlainTextPayload == null &&
+        raw == _lastAppliedTtsPlainTextSource) {
+      return;
+    }
+    if (raw == _pendingTtsPlainTextSource &&
+        _pendingTtsPlainTextPayload != null) {
+      return;
+    }
+
+    final pendingSegments = List<String>.from(segments, growable: false);
+    _pendingTtsPlainTextPayload = {
+      'segments': pendingSegments,
+      'fallback': raw,
+    };
+    _pendingTtsPlainTextSource = raw;
+
+    final delay = widget.isStreaming
+        ? const Duration(milliseconds: 250)
+        : Duration.zero;
+
+    _ttsPlainTextDebounce?.cancel();
+    if (delay == Duration.zero) {
+      _runPendingTtsPlainTextBuild();
+    } else {
+      _ttsPlainTextDebounce = Timer(delay, _runPendingTtsPlainTextBuild);
+    }
+  }
+
+  void _runPendingTtsPlainTextBuild() {
+    _ttsPlainTextDebounce?.cancel();
+    _ttsPlainTextDebounce = null;
+
+    final payload = _pendingTtsPlainTextPayload;
+    final source = _pendingTtsPlainTextSource;
+    if (payload == null || source == null) {
+      return;
+    }
+
+    _pendingTtsPlainTextPayload = null;
+    _pendingTtsPlainTextSource = null;
+    final requestId = ++_ttsPlainTextRequestId;
+    unawaited(_executeTtsPlainTextBuild(payload, source, requestId));
+  }
+
+  Future<void> _executeTtsPlainTextBuild(
+    Map<String, dynamic> payload,
+    String raw,
+    int requestId,
+  ) async {
+    final segments = (payload['segments'] as List).cast<String>();
+    String speechText;
+    try {
+      final worker = ref.read(workerManagerProvider);
+      speechText = await worker.schedule<Map<String, dynamic>, String>(
+        _buildTtsPlainTextWorker,
+        payload,
+        debugLabel: 'tts_plain_text',
+      );
+    } catch (_) {
+      speechText = _buildTtsPlainTextFallback(segments, raw);
+    }
+
+    if (!mounted || requestId != _ttsPlainTextRequestId) {
+      return;
+    }
+
+    _lastAppliedTtsPlainTextSource = raw;
+    if (_ttsPlainText != speechText) {
+      setState(() {
+        _ttsPlainText = speechText;
+      });
+    }
+  }
+
   // No streaming-specific markdown fixes needed here; handled by Markdown widget
 
   Widget _buildToolCallTile(ToolCallEntry tc) {
@@ -622,6 +704,9 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
   @override
   void dispose() {
     _typingGateTimer?.cancel();
+    _ttsPlainTextDebounce?.cancel();
+    _pendingTtsPlainTextPayload = null;
+    _pendingTtsPlainTextSource = null;
     _fadeController.dispose();
     _slideController.dispose();
     super.dispose();
diff --git a/lib/features/chat/widgets/modern_chat_input.dart b/lib/features/chat/widgets/modern_chat_input.dart
index 6fb4e39..d3daf19 100644
--- a/lib/features/chat/widgets/modern_chat_input.dart
+++ b/lib/features/chat/widgets/modern_chat_input.dart
@@ -2460,7 +2460,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
     HapticFeedback.selectionClick();
   }
 
-  // Server transcription removed; only on-device STT updates the input text
+  // When on-device STT is unavailable we rely on server transcription.
 
   void _showVoiceUnavailable(String message) {
     if (!mounted) return;
diff --git a/lib/features/profile/views/app_customization_page.dart b/lib/features/profile/views/app_customization_page.dart
index c8adde3..86c81bf 100644
--- a/lib/features/profile/views/app_customization_page.dart
+++ b/lib/features/profile/views/app_customization_page.dart
@@ -14,6 +14,7 @@ import '../../../shared/utils/ui_utils.dart';
 import '../../../core/providers/app_providers.dart';
 import '../../../l10n/app_localizations.dart';
 import '../../chat/providers/text_to_speech_provider.dart';
+import '../../chat/services/voice_input_service.dart';
 
 class AppCustomizationPage extends ConsumerWidget {
   const AppCustomizationPage({super.key});
@@ -70,6 +71,8 @@ class AppCustomizationPage extends ConsumerWidget {
               languageLabel,
             ),
             const SizedBox(height: Spacing.xl),
+            _buildSttSection(context, ref, settings),
+            const SizedBox(height: Spacing.xl),
             _buildTtsDropdownSection(context, ref, settings),
             const SizedBox(height: Spacing.xl),
             _buildChatSection(context, ref, settings),
@@ -468,6 +471,303 @@ class AppCustomizationPage extends ConsumerWidget {
     );
   }
 
+  Widget _buildSttSection(
+    BuildContext context,
+    WidgetRef ref,
+    AppSettings settings,
+  ) {
+    final theme = context.conduitTheme;
+    final l10n = AppLocalizations.of(context)!;
+    final localSupport = ref.watch(localVoiceRecognitionAvailableProvider);
+    final bool localAvailable = localSupport.maybeWhen(
+      data: (value) => value,
+      orElse: () => false,
+    );
+    final bool localLoading = localSupport.isLoading;
+    final bool serverAvailable = ref.watch(
+      serverVoiceRecognitionAvailableProvider,
+    );
+    final notifier = ref.read(appSettingsProvider.notifier);
+    final description = _sttPreferenceDescription(l10n, settings.sttPreference);
+
+    final warnings = <String>[];
+    if (settings.sttPreference == SttPreference.deviceOnly &&
+        !localAvailable &&
+        !localLoading) {
+      warnings.add(l10n.sttDeviceUnavailableWarning);
+    }
+    if (settings.sttPreference == SttPreference.serverOnly &&
+        !serverAvailable) {
+      warnings.add(l10n.sttServerUnavailableWarning);
+    }
+
+    final bool autoSelectable =
+        localAvailable || serverAvailable || localLoading;
+    final bool deviceSelectable = localAvailable || localLoading;
+    final bool serverSelectable = serverAvailable;
+
+    return Column(
+      crossAxisAlignment: CrossAxisAlignment.start,
+      children: [
+        Text(
+          l10n.sttSettings,
+          style:
+              theme.headingSmall?.copyWith(color: theme.sidebarForeground) ??
+              TextStyle(color: theme.sidebarForeground, fontSize: 18),
+        ),
+        const SizedBox(height: Spacing.sm),
+        ConduitCard(
+          padding: const EdgeInsets.all(Spacing.md),
+          child: Column(
+            crossAxisAlignment: CrossAxisAlignment.start,
+            children: [
+              Row(
+                children: [
+                  _buildIconBadge(
+                    context,
+                    UiUtils.platformIcon(
+                      ios: CupertinoIcons.mic,
+                      android: Icons.mic,
+                    ),
+                    color: theme.buttonPrimary,
+                  ),
+                  const SizedBox(width: Spacing.md),
+                  Expanded(
+                    child: Text(
+                      l10n.sttEngineLabel,
+                      style:
+                          theme.bodyMedium?.copyWith(
+                            color: theme.sidebarForeground,
+                            fontWeight: FontWeight.w600,
+                          ) ??
+                          TextStyle(
+                            color: theme.sidebarForeground,
+                            fontSize: 14,
+                            fontWeight: FontWeight.w600,
+                          ),
+                    ),
+                  ),
+                ],
+              ),
+              const SizedBox(height: Spacing.sm),
+              Wrap(
+                spacing: Spacing.sm,
+                runSpacing: Spacing.sm,
+                children: [
+                  ChoiceChip(
+                    label: Text(l10n.sttEngineAuto),
+                    selected: settings.sttPreference == SttPreference.auto,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.sttPreference == SttPreference.auto
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(alpha: 0.2),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.sttPreference == SttPreference.auto
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary,
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: autoSelectable
+                        ? (value) {
+                            if (value) {
+                              notifier.setSttPreference(SttPreference.auto);
+                            }
+                          }
+                        : null,
+                  ),
+                  ChoiceChip(
+                    label: Text(l10n.sttEngineDevice),
+                    selected:
+                        settings.sttPreference == SttPreference.deviceOnly,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.sttPreference == SttPreference.deviceOnly
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(alpha: 0.2),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.sttPreference == SttPreference.deviceOnly
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary,
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: deviceSelectable
+                        ? (value) {
+                            if (value) {
+                              notifier.setSttPreference(
+                                SttPreference.deviceOnly,
+                              );
+                            }
+                          }
+                        : null,
+                  ),
+                  ChoiceChip(
+                    label: Text(l10n.sttEngineServer),
+                    selected:
+                        settings.sttPreference == SttPreference.serverOnly,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.sttPreference == SttPreference.serverOnly
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(alpha: 0.2),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.sttPreference == SttPreference.serverOnly
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary,
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: serverSelectable
+                        ? (value) {
+                            if (value) {
+                              notifier.setSttPreference(
+                                SttPreference.serverOnly,
+                              );
+                            }
+                          }
+                        : null,
+                  ),
+                ],
+              ),
+              if (localLoading) ...[
+                const SizedBox(height: Spacing.sm),
+                LinearProgressIndicator(
+                  minHeight: 3,
+                  color: theme.buttonPrimary,
+                  backgroundColor: theme.cardBorder.withValues(alpha: 0.4),
+                ),
+              ],
+              const SizedBox(height: Spacing.sm),
+              AnimatedSwitcher(
+                duration: const Duration(milliseconds: 200),
+                child: Text(
+                  description,
+                  key: ValueKey<String>(
+                    'stt-desc-${settings.sttPreference.name}',
+                  ),
+                  style:
+                      theme.bodyMedium?.copyWith(
+                        color: theme.sidebarForeground.withValues(alpha: 0.9),
+                      ) ??
+                      TextStyle(
+                        color: theme.sidebarForeground.withValues(alpha: 0.9),
+                        fontSize: 14,
+                      ),
+                ),
+              ),
+              if (warnings.isNotEmpty) ...[
+                const SizedBox(height: Spacing.sm),
+                ...warnings.map(
+                  (warning) => Padding(
+                    padding: const EdgeInsets.only(top: Spacing.xs),
+                    child: Text(
+                      warning,
+                      style:
+                          theme.bodySmall?.copyWith(
+                            color: theme.error,
+                            fontWeight: FontWeight.w600,
+                          ) ??
+                          TextStyle(
+                            color: theme.error,
+                            fontSize: 12,
+                            fontWeight: FontWeight.w600,
+                          ),
+                    ),
+                  ),
+                ),
+              ],
+              if (settings.sttPreference == SttPreference.serverOnly ||
+                  (settings.sttPreference == SttPreference.auto &&
+                      serverAvailable)) ...[
+                const SizedBox(height: Spacing.md),
+                const Divider(),
+                const SizedBox(height: Spacing.md),
+                Row(
+                  children: [
+                    Expanded(
+                      child: Column(
+                        crossAxisAlignment: CrossAxisAlignment.start,
+                        children: [
+                          Text(
+                            l10n.sttSilenceDuration,
+                            style: theme.bodyMedium?.copyWith(
+                                  color: theme.sidebarForeground,
+                                  fontWeight: FontWeight.w600,
+                                ) ??
+                                TextStyle(
+                                  color: theme.sidebarForeground,
+                                  fontSize: 14,
+                                  fontWeight: FontWeight.w600,
+                                ),
+                          ),
+                          const SizedBox(height: Spacing.xs),
+                          Text(
+                            '${settings.voiceSilenceDuration}ms',
+                            style: theme.bodySmall?.copyWith(
+                                  color: theme.sidebarForeground
+                                      .withValues(alpha: 0.7),
+                                ) ??
+                                TextStyle(
+                                  color: theme.sidebarForeground
+                                      .withValues(alpha: 0.7),
+                                  fontSize: 12,
+                                ),
+                          ),
+                        ],
+                      ),
+                    ),
+                    Text(
+                      '${(settings.voiceSilenceDuration / 1000).toStringAsFixed(1)}s',
+                      style: theme.bodyMedium?.copyWith(
+                            color: theme.buttonPrimary,
+                            fontWeight: FontWeight.w600,
+                          ) ??
+                          TextStyle(
+                            color: theme.buttonPrimary,
+                            fontSize: 14,
+                            fontWeight: FontWeight.w600,
+                          ),
+                    ),
+                  ],
+                ),
+                const SizedBox(height: Spacing.sm),
+                Slider(
+                  value: settings.voiceSilenceDuration.toDouble(),
+                  min: 300,
+                  max: 3000,
+                  divisions: 27,
+                  activeColor: theme.buttonPrimary,
+                  inactiveColor: theme.cardBorder.withValues(alpha: 0.4),
+                  onChanged: (value) {
+                    notifier.setVoiceSilenceDuration(value.round());
+                  },
+                ),
+                Text(
+                  l10n.sttSilenceDurationDescription,
+                  style: theme.bodySmall?.copyWith(
+                        color: theme.sidebarForeground.withValues(alpha: 0.7),
+                      ) ??
+                      TextStyle(
+                        color: theme.sidebarForeground.withValues(alpha: 0.7),
+                        fontSize: 12,
+                      ),
+                ),
+              ],
+            ],
+          ),
+        ),
+      ],
+    );
+  }
+
   Widget _buildTtsDropdownSection(
     BuildContext context,
     WidgetRef ref,
@@ -475,6 +775,35 @@ class AppCustomizationPage extends ConsumerWidget {
   ) {
     final theme = context.conduitTheme;
     final l10n = AppLocalizations.of(context)!;
+    final ttsService = ref.watch(textToSpeechServiceProvider);
+    final bool deviceAvailable =
+        ttsService.deviceEngineAvailable || !ttsService.isInitialized;
+    final bool serverAvailable = ttsService.serverEngineAvailable;
+    final bool autoSelectable = deviceAvailable || serverAvailable;
+    final bool deviceSelectable = deviceAvailable;
+    final bool serverSelectable = serverAvailable;
+    final ttsDescription = _ttsPreferenceDescription(l10n, settings);
+    final warnings = <String>[];
+    switch (settings.ttsEngine) {
+      case TtsEngine.auto:
+        if (!deviceAvailable) {
+          warnings.add(l10n.ttsDeviceUnavailableWarning);
+        }
+        if (!serverAvailable) {
+          warnings.add(l10n.ttsServerUnavailableWarning);
+        }
+        break;
+      case TtsEngine.device:
+        if (!deviceAvailable) {
+          warnings.add(l10n.ttsDeviceUnavailableWarning);
+        }
+        break;
+      case TtsEngine.server:
+        if (!serverAvailable) {
+          warnings.add(l10n.ttsServerUnavailableWarning);
+        }
+        break;
+    }
     return Column(
       crossAxisAlignment: CrossAxisAlignment.start,
       children: [
@@ -510,82 +839,154 @@ class AppCustomizationPage extends ConsumerWidget {
                         ) ??
                         TextStyle(color: theme.sidebarForeground, fontSize: 14),
                   ),
-                  const Spacer(),
-                  Wrap(
-                    spacing: Spacing.sm,
-                    children: [
-                      ChoiceChip(
-                        label: Text(l10n.ttsEngineDevice),
-                        selected: settings.ttsEngine == TtsEngine.device,
-                        showCheckmark: false,
-                        selectedColor: theme.buttonPrimary,
-                        backgroundColor: theme.cardBackground,
-                        side: BorderSide(
-                          color: settings.ttsEngine == TtsEngine.device
-                              ? theme.buttonPrimary.withValues(alpha: 0.6)
-                              : theme.textPrimary.withValues(alpha: 0.2),
-                        ),
-                        labelStyle: TextStyle(
-                          color: settings.ttsEngine == TtsEngine.device
-                              ? theme.buttonPrimaryText
-                              : theme.textPrimary,
-                          fontWeight: FontWeight.w600,
-                        ),
-                        onSelected: (v) {
-                          if (v) {
-                            final notifier = ref.read(
-                              appSettingsProvider.notifier,
-                            );
-                            notifier.setTtsEngine(TtsEngine.device);
-                            // Keep previous voice (device voices)
+                ],
+              ),
+              const SizedBox(height: Spacing.sm),
+              Wrap(
+                spacing: Spacing.sm,
+                runSpacing: Spacing.sm,
+                children: [
+                  ChoiceChip(
+                    label: Text(l10n.ttsEngineAuto),
+                    selected: settings.ttsEngine == TtsEngine.auto,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.ttsEngine == TtsEngine.auto
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(
+                              alpha: autoSelectable ? 0.2 : 0.12,
+                            ),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.ttsEngine == TtsEngine.auto
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary.withValues(
+                              alpha: autoSelectable ? 1.0 : 0.45,
+                            ),
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: autoSelectable
+                        ? (value) {
+                            if (value) {
+                              ref
+                                  .read(appSettingsProvider.notifier)
+                                  .setTtsEngine(TtsEngine.auto);
+                            }
                           }
-                        },
-                      ),
-                      ChoiceChip(
-                        label: Text(l10n.ttsEngineServer),
-                        selected: settings.ttsEngine == TtsEngine.server,
-                        showCheckmark: false,
-                        selectedColor: theme.buttonPrimary,
-                        backgroundColor: theme.cardBackground,
-                        side: BorderSide(
-                          color: settings.ttsEngine == TtsEngine.server
-                              ? theme.buttonPrimary.withValues(alpha: 0.6)
-                              : theme.textPrimary.withValues(alpha: 0.2),
-                        ),
-                        labelStyle: TextStyle(
-                          color: settings.ttsEngine == TtsEngine.server
-                              ? theme.buttonPrimaryText
-                              : theme.textPrimary,
-                          fontWeight: FontWeight.w600,
-                        ),
-                        onSelected: (v) {
-                          if (v) {
-                            final notifier = ref.read(
-                              appSettingsProvider.notifier,
-                            );
-                            // Clear device-specific voice so server can default
-                            notifier.setTtsVoice(null);
-                            notifier.setTtsEngine(TtsEngine.server);
+                        : null,
+                  ),
+                  ChoiceChip(
+                    label: Text(l10n.ttsEngineDevice),
+                    selected: settings.ttsEngine == TtsEngine.device,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.ttsEngine == TtsEngine.device
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(
+                              alpha: deviceSelectable ? 0.2 : 0.12,
+                            ),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.ttsEngine == TtsEngine.device
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary.withValues(
+                              alpha: deviceSelectable ? 1.0 : 0.45,
+                            ),
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: deviceSelectable
+                        ? (value) {
+                            if (value) {
+                              ref
+                                  .read(appSettingsProvider.notifier)
+                                  .setTtsEngine(TtsEngine.device);
+                            }
                           }
-                        },
-                      ),
-                    ],
+                        : null,
+                  ),
+                  ChoiceChip(
+                    label: Text(l10n.ttsEngineServer),
+                    selected: settings.ttsEngine == TtsEngine.server,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.ttsEngine == TtsEngine.server
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(
+                              alpha: serverSelectable ? 0.2 : 0.12,
+                            ),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.ttsEngine == TtsEngine.server
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary.withValues(
+                              alpha: serverSelectable ? 1.0 : 0.45,
+                            ),
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: serverSelectable
+                        ? (value) {
+                            if (value) {
+                              final notifier = ref.read(
+                                appSettingsProvider.notifier,
+                              );
+                              notifier.setTtsVoice(null);
+                              notifier.setTtsEngine(TtsEngine.server);
+                            }
+                          }
+                        : null,
                   ),
                 ],
               ),
+              const SizedBox(height: Spacing.sm),
+              AnimatedSwitcher(
+                duration: const Duration(milliseconds: 200),
+                child: Text(
+                  ttsDescription,
+                  key: ValueKey<String>('tts-desc-${settings.ttsEngine.name}'),
+                  style:
+                      theme.bodyMedium?.copyWith(
+                        color: theme.sidebarForeground.withValues(alpha: 0.9),
+                      ) ??
+                      TextStyle(
+                        color: theme.sidebarForeground.withValues(alpha: 0.9),
+                        fontSize: 14,
+                      ),
+                ),
+              ),
+              if (warnings.isNotEmpty) ...[
+                const SizedBox(height: Spacing.sm),
+                ...warnings.map(
+                  (warning) => Padding(
+                    padding: const EdgeInsets.only(top: Spacing.xs),
+                    child: Text(
+                      warning,
+                      style:
+                          theme.bodySmall?.copyWith(
+                            color: theme.error,
+                            fontWeight: FontWeight.w600,
+                          ) ??
+                          TextStyle(
+                            color: theme.error,
+                            fontSize: 12,
+                            fontWeight: FontWeight.w600,
+                          ),
+                    ),
+                  ),
+                ),
+              ],
             ],
           ),
         ),
         const SizedBox(height: Spacing.sm),
         _ExpandableCard(
           title: l10n.ttsVoice,
-          subtitle: _getDisplayVoiceName(
-            settings.ttsEngine == TtsEngine.server
-                ? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
-                      '')
-                : (settings.ttsVoice ?? ''),
-            l10n.ttsSystemDefault,
-          ),
+          subtitle: _ttsVoiceSubtitle(l10n, settings),
           icon: UiUtils.platformIcon(
             ios: CupertinoIcons.speaker_3,
             android: Icons.record_voice_over,
@@ -604,14 +1005,7 @@ class AppCustomizationPage extends ConsumerWidget {
                   color: theme.buttonPrimary,
                 ),
                 title: l10n.ttsVoice,
-                subtitle: _getDisplayVoiceName(
-                  settings.ttsEngine == TtsEngine.server
-                      ? ((settings.ttsServerVoiceName ??
-                                settings.ttsServerVoiceId) ??
-                            '')
-                      : (settings.ttsVoice ?? ''),
-                  l10n.ttsSystemDefault,
-                ),
+                subtitle: _ttsVoiceSubtitle(l10n, settings),
                 onTap: () => _showVoicePickerSheet(context, ref, settings),
               ),
               const SizedBox(height: Spacing.md),
@@ -627,49 +1021,13 @@ class AppCustomizationPage extends ConsumerWidget {
                 value: settings.ttsSpeechRate,
                 min: 0.25,
                 max: 2.0,
-                divisions: 7,
+                divisions: 35,
                 label: '${(settings.ttsSpeechRate * 100).round()}%',
                 onChanged: (value) => ref
                     .read(appSettingsProvider.notifier)
                     .setTtsSpeechRate(value),
               ),
               const SizedBox(height: Spacing.md),
-              // Pitch Slider
-              _buildSliderTile(
-                context,
-                ref,
-                icon: UiUtils.platformIcon(
-                  ios: CupertinoIcons.waveform,
-                  android: Icons.graphic_eq,
-                ),
-                title: l10n.ttsPitch,
-                value: settings.ttsPitch,
-                min: 0.5,
-                max: 2.0,
-                divisions: 6,
-                label: settings.ttsPitch.toStringAsFixed(1),
-                onChanged: (value) =>
-                    ref.read(appSettingsProvider.notifier).setTtsPitch(value),
-              ),
-              const SizedBox(height: Spacing.md),
-              // Volume Slider
-              _buildSliderTile(
-                context,
-                ref,
-                icon: UiUtils.platformIcon(
-                  ios: CupertinoIcons.volume_up,
-                  android: Icons.volume_up,
-                ),
-                title: l10n.ttsVolume,
-                value: settings.ttsVolume,
-                min: 0.0,
-                max: 1.0,
-                divisions: 10,
-                label: '${(settings.ttsVolume * 100).round()}%',
-                onChanged: (value) =>
-                    ref.read(appSettingsProvider.notifier).setTtsVolume(value),
-              ),
-              const SizedBox(height: Spacing.md),
               // Preview Button
               _CustomizationTile(
                 leading: _buildIconBadge(
@@ -691,6 +1049,53 @@ class AppCustomizationPage extends ConsumerWidget {
     );
   }
 
+  String _sttPreferenceDescription(
+    AppLocalizations l10n,
+    SttPreference preference,
+  ) {
+    switch (preference) {
+      case SttPreference.auto:
+        return l10n.sttEngineAutoDescription;
+      case SttPreference.deviceOnly:
+        return l10n.sttEngineDeviceDescription;
+      case SttPreference.serverOnly:
+        return l10n.sttEngineServerDescription;
+    }
+  }
+
+  String _ttsPreferenceDescription(
+    AppLocalizations l10n,
+    AppSettings settings,
+  ) {
+    switch (settings.ttsEngine) {
+      case TtsEngine.auto:
+        return l10n.ttsEngineAutoDescription;
+      case TtsEngine.device:
+        return l10n.ttsEngineDeviceDescription;
+      case TtsEngine.server:
+        return l10n.ttsEngineServerDescription;
+    }
+  }
+
+  String _ttsVoiceSubtitle(AppLocalizations l10n, AppSettings settings) {
+    final deviceName = _getDisplayVoiceName(
+      settings.ttsVoice,
+      l10n.ttsSystemDefault,
+    );
+    final serverVoice =
+        (settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? '';
+    final serverName = _getDisplayVoiceName(serverVoice, l10n.ttsSystemDefault);
+
+    switch (settings.ttsEngine) {
+      case TtsEngine.auto:
+        return '${l10n.ttsEngineDevice}: $deviceName • ${l10n.ttsEngineServer}: $serverName';
+      case TtsEngine.device:
+        return deviceName;
+      case TtsEngine.server:
+        return serverName;
+    }
+  }
+
   Widget _buildSliderTile(
     BuildContext context,
     WidgetRef ref, {
diff --git a/lib/l10n/app_de.arb b/lib/l10n/app_de.arb
index c99b5fa..17381a0 100644
--- a/lib/l10n/app_de.arb
+++ b/lib/l10n/app_de.arb
@@ -307,7 +307,25 @@
   "chatSettings": "Chat",
   "sendOnEnter": "Mit Enter senden",
   "sendOnEnterDescription": "Enter sendet (Soft-Tastatur). Cmd/Ctrl+Enter ebenfalls verfügbar",
+  "sttSettings": "Sprache zu Text",
+  "sttEngineLabel": "Erkennungs-Engine",
+  "sttEngineAuto": "Automatisch",
+  "sttEngineDevice": "Auf dem Gerät",
+  "sttEngineServer": "Server",
+  "sttEngineAutoDescription": "Verwendet die Erkennung auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.",
+  "sttEngineDeviceDescription": "Behält Audio auf diesem Gerät. Spracheingabe funktioniert nicht, wenn das Gerät keine Spracherkennung unterstützt.",
+  "sttEngineServerDescription": "Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.",
+  "sttDeviceUnavailableWarning": "Auf diesem Gerät steht keine Spracherkennung zur Verfügung.",
+  "sttServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.",
+  "sttSilenceDuration": "Stille-Dauer",
+  "sttSilenceDurationDescription": "Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird",
   "ttsSettings": "Text zu Sprache",
+  "ttsEngineAuto": "Automatisch",
+  "ttsEngineAutoDescription": "Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.",
+  "ttsEngineDeviceDescription": "Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.",
+  "ttsEngineServerDescription": "Sendet die Ausgabe immer an deinen OpenWebUI-Server.",
+  "ttsDeviceUnavailableWarning": "Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.",
+  "ttsServerUnavailableWarning": "Verbinde dich mit einem Server mit aktivierter Sprachausgabe, um diese Option zu nutzen.",
   "ttsVoice": "Stimme",
   "ttsSpeechRate": "Sprechgeschwindigkeit",
   "ttsPitch": "Tonhöhe",
diff --git a/lib/l10n/app_en.arb b/lib/l10n/app_en.arb
index d45abb4..cf6a18c 100644
--- a/lib/l10n/app_en.arb
+++ b/lib/l10n/app_en.arb
@@ -1219,10 +1219,62 @@
   "@sendOnEnterDescription": {
     "description": "Explanation of how the Send on Enter toggle behaves."
   },
+  "sttSettings": "Speech to Text",
+  "@sttSettings": {
+    "description": "Section header for speech-to-text settings."
+  },
+  "sttEngineLabel": "Recognition engine",
+  "@sttEngineLabel": {
+    "description": "Label shown above the speech-to-text engine chips."
+  },
+  "sttEngineAuto": "Auto",
+  "@sttEngineAuto": {
+    "description": "Chip label for automatic speech-to-text selection."
+  },
+  "sttEngineDevice": "On device",
+  "@sttEngineDevice": {
+    "description": "Chip label for on-device speech recognition."
+  },
+  "sttEngineServer": "Server",
+  "@sttEngineServer": {
+    "description": "Chip label for server speech recognition."
+  },
+  "sttEngineAutoDescription": "Use on-device recognition when available and fall back to your server.",
+  "@sttEngineAutoDescription": {
+    "description": "Description shown when automatic speech-to-text preference is active."
+  },
+  "sttEngineDeviceDescription": "Keep audio on this device. Voice input stops working if on-device speech recognition isn’t supported.",
+  "@sttEngineDeviceDescription": {
+    "description": "Description shown when on-device speech-to-text preference is active."
+  },
+  "sttEngineServerDescription": "Always send recordings to your OpenWebUI server for transcription.",
+  "@sttEngineServerDescription": {
+    "description": "Description shown when server speech-to-text preference is active."
+  },
+  "sttDeviceUnavailableWarning": "On-device speech recognition isn’t available on this device.",
+  "@sttDeviceUnavailableWarning": {
+    "description": "Warning shown when the user selects on-device speech recognition but it is unavailable."
+  },
+  "sttServerUnavailableWarning": "Connect to a server with transcription enabled to use this option.",
+  "@sttServerUnavailableWarning": {
+    "description": "Warning shown when the user selects server speech recognition but no server is available."
+  },
+  "sttSilenceDuration": "Silence Duration",
+  "@sttSilenceDuration": {
+    "description": "Label for the silence duration setting in server speech-to-text."
+  },
+  "sttSilenceDurationDescription": "Time to wait after silence before auto-stopping recording",
+  "@sttSilenceDurationDescription": {
+    "description": "Description for the silence duration slider in server speech-to-text settings."
+  },
   "ttsEngineLabel": "Engine",
   "@ttsEngineLabel": {
     "description": "Label for selecting the text-to-speech engine."
   },
+  "ttsEngineAuto": "Auto",
+  "@ttsEngineAuto": {
+    "description": "Chip label for automatically selecting the text-to-speech engine."
+  },
   "ttsEngineDevice": "On device",
   "@ttsEngineDevice": {
     "description": "Chip label for using on-device text-to-speech."
@@ -1231,6 +1283,26 @@
   "@ttsEngineServer": {
     "description": "Chip label for using server-side text-to-speech."
   },
+  "ttsEngineAutoDescription": "Use on-device speech when available and fall back to your server.",
+  "@ttsEngineAutoDescription": {
+    "description": "Description shown when automatic text-to-speech preference is active."
+  },
+  "ttsEngineDeviceDescription": "Keep synthesis on this device. Voice playback stops working if on-device TTS isn’t supported.",
+  "@ttsEngineDeviceDescription": {
+    "description": "Description shown when on-device text-to-speech preference is active."
+  },
+  "ttsEngineServerDescription": "Always request audio from your OpenWebUI server.",
+  "@ttsEngineServerDescription": {
+    "description": "Description shown when server text-to-speech preference is active."
+  },
+  "ttsDeviceUnavailableWarning": "On-device text-to-speech isn’t available on this device.",
+  "@ttsDeviceUnavailableWarning": {
+    "description": "Warning shown when on-device text-to-speech is unavailable."
+  },
+  "ttsServerUnavailableWarning": "Connect to a server with text-to-speech enabled to use this option.",
+  "@ttsServerUnavailableWarning": {
+    "description": "Warning shown when server text-to-speech is unavailable."
+  },
   "ttsSettings": "Text to Speech",
   "@ttsSettings": {
     "description": "Section header for TTS-related customization options."
diff --git a/lib/l10n/app_es.arb b/lib/l10n/app_es.arb
index a8d8ddc..73c1b35 100644
--- a/lib/l10n/app_es.arb
+++ b/lib/l10n/app_es.arb
@@ -307,7 +307,25 @@
   "chatSettings": "Conversación",
   "sendOnEnter": "Enviar con Enter",
   "sendOnEnterDescription": "Enter envía (teclado virtual). Cmd/Ctrl+Enter también disponible",
+  "sttSettings": "Voz a texto",
+  "sttEngineLabel": "Motor de reconocimiento",
+  "sttEngineAuto": "Automático",
+  "sttEngineDevice": "En el dispositivo",
+  "sttEngineServer": "Servidor",
+  "sttEngineAutoDescription": "Usa el reconocimiento en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.",
+  "sttEngineDeviceDescription": "Mantiene el audio en este dispositivo. La entrada de voz no funciona si el dispositivo no admite reconocimiento de voz.",
+  "sttEngineServerDescription": "Envía siempre las grabaciones a tu servidor OpenWebUI para la transcripción.",
+  "sttDeviceUnavailableWarning": "El reconocimiento de voz en el dispositivo no está disponible en este dispositivo.",
+  "sttServerUnavailableWarning": "Conéctate a un servidor con transcripción habilitada para usar esta opción.",
+  "sttSilenceDuration": "Duración del silencio",
+  "sttSilenceDurationDescription": "Tiempo de espera después del silencio antes de detener automáticamente la grabación",
   "ttsSettings": "Texto a voz",
+  "ttsEngineAuto": "Automático",
+  "ttsEngineAutoDescription": "Usa la síntesis en el dispositivo cuando esté disponible y, si no, recurre a tu servidor.",
+  "ttsEngineDeviceDescription": "Mantiene la síntesis en este dispositivo. La reproducción de voz no funciona si el dispositivo no admite TTS.",
+  "ttsEngineServerDescription": "Solicita siempre el audio a tu servidor OpenWebUI.",
+  "ttsDeviceUnavailableWarning": "La síntesis de voz en el dispositivo no está disponible en este dispositivo.",
+  "ttsServerUnavailableWarning": "Conéctate a un servidor con texto a voz habilitado para usar esta opción.",
   "ttsVoice": "Voz",
   "ttsSpeechRate": "Velocidad de voz",
   "ttsPitch": "Tono",
diff --git a/lib/l10n/app_fr.arb b/lib/l10n/app_fr.arb
index d50ae5e..18955b9 100644
--- a/lib/l10n/app_fr.arb
+++ b/lib/l10n/app_fr.arb
@@ -307,7 +307,25 @@
   "chatSettings": "Discussion",
   "sendOnEnter": "Envoyer avec Entrée",
   "sendOnEnterDescription": "Entrée envoie (clavier logiciel). Cmd/Ctrl+Entrée aussi disponible",
+  "sttSettings": "Voix vers texte",
+  "sttEngineLabel": "Moteur de reconnaissance",
+  "sttEngineAuto": "Auto",
+  "sttEngineDevice": "Sur l’appareil",
+  "sttEngineServer": "Serveur",
+  "sttEngineAutoDescription": "Utilise la reconnaissance sur l’appareil quand c’est possible, sinon bascule vers votre serveur.",
+  "sttEngineDeviceDescription": "Conserve l’audio sur cet appareil. L’entrée vocale cesse de fonctionner si la reconnaissance vocale n’est pas prise en charge.",
+  "sttEngineServerDescription": "Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.",
+  "sttDeviceUnavailableWarning": "La reconnaissance vocale sur l’appareil n’est pas disponible sur cet appareil.",
+  "sttServerUnavailableWarning": "Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.",
+  "sttSilenceDuration": "Durée du silence",
+  "sttSilenceDurationDescription": "Temps d'attente après le silence avant d'arrêter automatiquement l'enregistrement",
   "ttsSettings": "Synthèse vocale",
+  "ttsEngineAuto": "Auto",
+  "ttsEngineAutoDescription": "Utilise la synthèse locale quand c’est possible, sinon bascule vers votre serveur.",
+  "ttsEngineDeviceDescription": "Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si l’appareil n’offre pas la synthèse vocale.",
+  "ttsEngineServerDescription": "Demande toujours l'audio à votre serveur OpenWebUI.",
+  "ttsDeviceUnavailableWarning": "La synthèse vocale sur l’appareil n’est pas disponible sur cet appareil.",
+  "ttsServerUnavailableWarning": "Connectez-vous à un serveur avec la synthèse vocale activée pour utiliser cette option.",
   "ttsVoice": "Voix",
   "ttsSpeechRate": "Vitesse de parole",
   "ttsPitch": "Hauteur",
diff --git a/lib/l10n/app_it.arb b/lib/l10n/app_it.arb
index cc2e0f0..fe7948e 100644
--- a/lib/l10n/app_it.arb
+++ b/lib/l10n/app_it.arb
@@ -307,7 +307,25 @@
   "chatSettings": "Chat",
   "sendOnEnter": "Invia con Invio",
   "sendOnEnterDescription": "Invio invia (tastiera software). Cmd/Ctrl+Invio disponibile",
+  "sttSettings": "Voce in testo",
+  "sttEngineLabel": "Motore di riconoscimento",
+  "sttEngineAuto": "Automatico",
+  "sttEngineDevice": "Sul dispositivo",
+  "sttEngineServer": "Server",
+  "sttEngineAutoDescription": "Usa il riconoscimento sul dispositivo quando disponibile e altrimenti passa al tuo server.",
+  "sttEngineDeviceDescription": "Mantiene l’audio su questo dispositivo. L’input vocale non funziona se il dispositivo non supporta il riconoscimento vocale.",
+  "sttEngineServerDescription": "Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.",
+  "sttDeviceUnavailableWarning": "Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.",
+  "sttServerUnavailableWarning": "Collegati a un server con la trascrizione abilitata per usare questa opzione.",
+  "sttSilenceDuration": "Durata del silenzio",
+  "sttSilenceDurationDescription": "Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione",
   "ttsSettings": "Sintesi vocale",
+  "ttsEngineAuto": "Automatico",
+  "ttsEngineAutoDescription": "Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.",
+  "ttsEngineDeviceDescription": "Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.",
+  "ttsEngineServerDescription": "Richiede sempre l'audio dal tuo server OpenWebUI.",
+  "ttsDeviceUnavailableWarning": "La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.",
+  "ttsServerUnavailableWarning": "Collegati a un server con la sintesi vocale abilitata per usare questa opzione.",
   "ttsVoice": "Voce",
   "ttsSpeechRate": "Velocità di sintesi vocale",
   "ttsPitch": "Tonalità",
diff --git a/lib/l10n/app_localizations.dart b/lib/l10n/app_localizations.dart
index 91f7949..a2c974f 100644
--- a/lib/l10n/app_localizations.dart
+++ b/lib/l10n/app_localizations.dart
@@ -1784,12 +1784,90 @@ abstract class AppLocalizations {
   /// **'Enter sends (soft keyboard). Cmd/Ctrl+Enter also available'**
   String get sendOnEnterDescription;
 
+  /// Section header for speech-to-text settings.
+  ///
+  /// In en, this message translates to:
+  /// **'Speech to Text'**
+  String get sttSettings;
+
+  /// Label shown above the speech-to-text engine chips.
+  ///
+  /// In en, this message translates to:
+  /// **'Recognition engine'**
+  String get sttEngineLabel;
+
+  /// Chip label for automatic speech-to-text selection.
+  ///
+  /// In en, this message translates to:
+  /// **'Auto'**
+  String get sttEngineAuto;
+
+  /// Chip label for on-device speech recognition.
+  ///
+  /// In en, this message translates to:
+  /// **'On device'**
+  String get sttEngineDevice;
+
+  /// Chip label for server speech recognition.
+  ///
+  /// In en, this message translates to:
+  /// **'Server'**
+  String get sttEngineServer;
+
+  /// Description shown when automatic speech-to-text preference is active.
+  ///
+  /// In en, this message translates to:
+  /// **'Use on-device recognition when available and fall back to your server.'**
+  String get sttEngineAutoDescription;
+
+  /// Description shown when on-device speech-to-text preference is active.
+  ///
+  /// In en, this message translates to:
+  /// **'Keep audio on this device. Voice input stops working if on-device speech recognition isn’t supported.'**
+  String get sttEngineDeviceDescription;
+
+  /// Description shown when server speech-to-text preference is active.
+  ///
+  /// In en, this message translates to:
+  /// **'Always send recordings to your OpenWebUI server for transcription.'**
+  String get sttEngineServerDescription;
+
+  /// Warning shown when the user selects on-device speech recognition but it is unavailable.
+  ///
+  /// In en, this message translates to:
+  /// **'On-device speech recognition isn’t available on this device.'**
+  String get sttDeviceUnavailableWarning;
+
+  /// Warning shown when the user selects server speech recognition but no server is available.
+  ///
+  /// In en, this message translates to:
+  /// **'Connect to a server with transcription enabled to use this option.'**
+  String get sttServerUnavailableWarning;
+
+  /// Label for the silence duration setting in server speech-to-text.
+  ///
+  /// In en, this message translates to:
+  /// **'Silence Duration'**
+  String get sttSilenceDuration;
+
+  /// Description for the silence duration slider in server speech-to-text settings.
+  ///
+  /// In en, this message translates to:
+  /// **'Time to wait after silence before auto-stopping recording'**
+  String get sttSilenceDurationDescription;
+
   /// Label for selecting the text-to-speech engine.
   ///
   /// In en, this message translates to:
   /// **'Engine'**
   String get ttsEngineLabel;
 
+  /// Chip label for automatically selecting the text-to-speech engine.
+  ///
+  /// In en, this message translates to:
+  /// **'Auto'**
+  String get ttsEngineAuto;
+
   /// Chip label for using on-device text-to-speech.
   ///
   /// In en, this message translates to:
@@ -1802,6 +1880,36 @@ abstract class AppLocalizations {
   /// **'Server'**
   String get ttsEngineServer;
 
+  /// Description shown when automatic text-to-speech preference is active.
+  ///
+  /// In en, this message translates to:
+  /// **'Use on-device speech when available and fall back to your server.'**
+  String get ttsEngineAutoDescription;
+
+  /// Description shown when on-device text-to-speech preference is active.
+  ///
+  /// In en, this message translates to:
+  /// **'Keep synthesis on this device. Voice playback stops working if on-device TTS isn’t supported.'**
+  String get ttsEngineDeviceDescription;
+
+  /// Description shown when server text-to-speech preference is active.
+  ///
+  /// In en, this message translates to:
+  /// **'Always request audio from your OpenWebUI server.'**
+  String get ttsEngineServerDescription;
+
+  /// Warning shown when on-device text-to-speech is unavailable.
+  ///
+  /// In en, this message translates to:
+  /// **'On-device text-to-speech isn’t available on this device.'**
+  String get ttsDeviceUnavailableWarning;
+
+  /// Warning shown when server text-to-speech is unavailable.
+  ///
+  /// In en, this message translates to:
+  /// **'Connect to a server with text-to-speech enabled to use this option.'**
+  String get ttsServerUnavailableWarning;
+
   /// Section header for TTS-related customization options.
   ///
   /// In en, this message translates to:
diff --git a/lib/l10n/app_localizations_de.dart b/lib/l10n/app_localizations_de.dart
index cbb3f39..58fb7f9 100644
--- a/lib/l10n/app_localizations_de.dart
+++ b/lib/l10n/app_localizations_de.dart
@@ -931,15 +931,80 @@ class AppLocalizationsDe extends AppLocalizations {
   String get sendOnEnterDescription =>
       'Enter sendet (Soft-Tastatur). Cmd/Ctrl+Enter ebenfalls verfügbar';
 
+  @override
+  String get sttSettings => 'Sprache zu Text';
+
+  @override
+  String get sttEngineLabel => 'Erkennungs-Engine';
+
+  @override
+  String get sttEngineAuto => 'Automatisch';
+
+  @override
+  String get sttEngineDevice => 'Auf dem Gerät';
+
+  @override
+  String get sttEngineServer => 'Server';
+
+  @override
+  String get sttEngineAutoDescription =>
+      'Verwendet die Erkennung auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.';
+
+  @override
+  String get sttEngineDeviceDescription =>
+      'Behält Audio auf diesem Gerät. Spracheingabe funktioniert nicht, wenn das Gerät keine Spracherkennung unterstützt.';
+
+  @override
+  String get sttEngineServerDescription =>
+      'Sendet Aufnahmen immer an deinen OpenWebUI-Server zur Transkription.';
+
+  @override
+  String get sttDeviceUnavailableWarning =>
+      'Auf diesem Gerät steht keine Spracherkennung zur Verfügung.';
+
+  @override
+  String get sttServerUnavailableWarning =>
+      'Verbinde dich mit einem Server mit aktivierter Transkription, um diese Option zu nutzen.';
+
+  @override
+  String get sttSilenceDuration => 'Stille-Dauer';
+
+  @override
+  String get sttSilenceDurationDescription =>
+      'Zeit nach Stille warten, bevor die Aufnahme automatisch gestoppt wird';
+
   @override
   String get ttsEngineLabel => 'Engine';
 
+  @override
+  String get ttsEngineAuto => 'Automatisch';
+
   @override
   String get ttsEngineDevice => 'Auf dem Gerät';
 
   @override
   String get ttsEngineServer => 'Server';
 
+  @override
+  String get ttsEngineAutoDescription =>
+      'Verwendet die Sprachausgabe auf dem Gerät, wenn verfügbar, und greift sonst auf deinen Server zurück.';
+
+  @override
+  String get ttsEngineDeviceDescription =>
+      'Behält die Ausgabe auf diesem Gerät. Sprachausgabe funktioniert nicht, wenn das Gerät keine TTS-Unterstützung bietet.';
+
+  @override
+  String get ttsEngineServerDescription =>
+      'Sendet die Ausgabe immer an deinen OpenWebUI-Server.';
+
+  @override
+  String get ttsDeviceUnavailableWarning =>
+      'Sprachausgabe auf dem Gerät steht auf diesem Gerät nicht zur Verfügung.';
+
+  @override
+  String get ttsServerUnavailableWarning =>
+      'Verbinde dich mit einem Server mit aktivierter Sprachausgabe, um diese Option zu nutzen.';
+
   @override
   String get ttsSettings => 'Text zu Sprache';
 
diff --git a/lib/l10n/app_localizations_en.dart b/lib/l10n/app_localizations_en.dart
index 72eb92f..7f2baba 100644
--- a/lib/l10n/app_localizations_en.dart
+++ b/lib/l10n/app_localizations_en.dart
@@ -925,15 +925,80 @@ class AppLocalizationsEn extends AppLocalizations {
   String get sendOnEnterDescription =>
       'Enter sends (soft keyboard). Cmd/Ctrl+Enter also available';
 
+  @override
+  String get sttSettings => 'Speech to Text';
+
+  @override
+  String get sttEngineLabel => 'Recognition engine';
+
+  @override
+  String get sttEngineAuto => 'Auto';
+
+  @override
+  String get sttEngineDevice => 'On device';
+
+  @override
+  String get sttEngineServer => 'Server';
+
+  @override
+  String get sttEngineAutoDescription =>
+      'Use on-device recognition when available and fall back to your server.';
+
+  @override
+  String get sttEngineDeviceDescription =>
+      'Keep audio on this device. Voice input stops working if on-device speech recognition isn’t supported.';
+
+  @override
+  String get sttEngineServerDescription =>
+      'Always send recordings to your OpenWebUI server for transcription.';
+
+  @override
+  String get sttDeviceUnavailableWarning =>
+      'On-device speech recognition isn’t available on this device.';
+
+  @override
+  String get sttServerUnavailableWarning =>
+      'Connect to a server with transcription enabled to use this option.';
+
+  @override
+  String get sttSilenceDuration => 'Silence Duration';
+
+  @override
+  String get sttSilenceDurationDescription =>
+      'Time to wait after silence before auto-stopping recording';
+
   @override
   String get ttsEngineLabel => 'Engine';
 
+  @override
+  String get ttsEngineAuto => 'Auto';
+
   @override
   String get ttsEngineDevice => 'On device';
 
   @override
   String get ttsEngineServer => 'Server';
 
+  @override
+  String get ttsEngineAutoDescription =>
+      'Use on-device speech when available and fall back to your server.';
+
+  @override
+  String get ttsEngineDeviceDescription =>
+      'Keep synthesis on this device. Voice playback stops working if on-device TTS isn’t supported.';
+
+  @override
+  String get ttsEngineServerDescription =>
+      'Always request audio from your OpenWebUI server.';
+
+  @override
+  String get ttsDeviceUnavailableWarning =>
+      'On-device text-to-speech isn’t available on this device.';
+
+  @override
+  String get ttsServerUnavailableWarning =>
+      'Connect to a server with text-to-speech enabled to use this option.';
+
   @override
   String get ttsSettings => 'Text to Speech';
 
diff --git a/lib/l10n/app_localizations_fr.dart b/lib/l10n/app_localizations_fr.dart
index e312b8e..97abbd8 100644
--- a/lib/l10n/app_localizations_fr.dart
+++ b/lib/l10n/app_localizations_fr.dart
@@ -940,15 +940,80 @@ class AppLocalizationsFr extends AppLocalizations {
   String get sendOnEnterDescription =>
       'Entrée envoie (clavier logiciel). Cmd/Ctrl+Entrée aussi disponible';
 
+  @override
+  String get sttSettings => 'Voix vers texte';
+
+  @override
+  String get sttEngineLabel => 'Moteur de reconnaissance';
+
+  @override
+  String get sttEngineAuto => 'Auto';
+
+  @override
+  String get sttEngineDevice => 'Sur l’appareil';
+
+  @override
+  String get sttEngineServer => 'Serveur';
+
+  @override
+  String get sttEngineAutoDescription =>
+      'Utilise la reconnaissance sur l’appareil quand c’est possible, sinon bascule vers votre serveur.';
+
+  @override
+  String get sttEngineDeviceDescription =>
+      'Conserve l’audio sur cet appareil. L’entrée vocale cesse de fonctionner si la reconnaissance vocale n’est pas prise en charge.';
+
+  @override
+  String get sttEngineServerDescription =>
+      'Envoie toujours les enregistrements à votre serveur OpenWebUI pour transcription.';
+
+  @override
+  String get sttDeviceUnavailableWarning =>
+      'La reconnaissance vocale sur l’appareil n’est pas disponible sur cet appareil.';
+
+  @override
+  String get sttServerUnavailableWarning =>
+      'Connectez-vous à un serveur avec la transcription activée pour utiliser cette option.';
+
+  @override
+  String get sttSilenceDuration => 'Durée du silence';
+
+  @override
+  String get sttSilenceDurationDescription =>
+      'Temps d\'attente après le silence avant d\'arrêter automatiquement l\'enregistrement';
+
   @override
   String get ttsEngineLabel => 'Moteur';
 
+  @override
+  String get ttsEngineAuto => 'Auto';
+
   @override
   String get ttsEngineDevice => 'Sur l\'appareil';
 
   @override
   String get ttsEngineServer => 'Serveur';
 
+  @override
+  String get ttsEngineAutoDescription =>
+      'Utilise la synthèse locale quand c’est possible, sinon bascule vers votre serveur.';
+
+  @override
+  String get ttsEngineDeviceDescription =>
+      'Garde la synthèse sur cet appareil. La lecture vocale ne fonctionne plus si l’appareil n’offre pas la synthèse vocale.';
+
+  @override
+  String get ttsEngineServerDescription =>
+      'Demande toujours l\'audio à votre serveur OpenWebUI.';
+
+  @override
+  String get ttsDeviceUnavailableWarning =>
+      'La synthèse vocale sur l’appareil n’est pas disponible sur cet appareil.';
+
+  @override
+  String get ttsServerUnavailableWarning =>
+      'Connectez-vous à un serveur avec la synthèse vocale activée pour utiliser cette option.';
+
   @override
   String get ttsSettings => 'Synthèse vocale';
 
diff --git a/lib/l10n/app_localizations_it.dart b/lib/l10n/app_localizations_it.dart
index 0af2b1b..f47fe4d 100644
--- a/lib/l10n/app_localizations_it.dart
+++ b/lib/l10n/app_localizations_it.dart
@@ -929,15 +929,80 @@ class AppLocalizationsIt extends AppLocalizations {
   String get sendOnEnterDescription =>
       'Invio invia (tastiera software). Cmd/Ctrl+Invio disponibile';
 
+  @override
+  String get sttSettings => 'Voce in testo';
+
+  @override
+  String get sttEngineLabel => 'Motore di riconoscimento';
+
+  @override
+  String get sttEngineAuto => 'Automatico';
+
+  @override
+  String get sttEngineDevice => 'Sul dispositivo';
+
+  @override
+  String get sttEngineServer => 'Server';
+
+  @override
+  String get sttEngineAutoDescription =>
+      'Usa il riconoscimento sul dispositivo quando disponibile e altrimenti passa al tuo server.';
+
+  @override
+  String get sttEngineDeviceDescription =>
+      'Mantiene l’audio su questo dispositivo. L’input vocale non funziona se il dispositivo non supporta il riconoscimento vocale.';
+
+  @override
+  String get sttEngineServerDescription =>
+      'Invia sempre le registrazioni al tuo server OpenWebUI per la trascrizione.';
+
+  @override
+  String get sttDeviceUnavailableWarning =>
+      'Il riconoscimento vocale sul dispositivo non è disponibile su questo dispositivo.';
+
+  @override
+  String get sttServerUnavailableWarning =>
+      'Collegati a un server con la trascrizione abilitata per usare questa opzione.';
+
+  @override
+  String get sttSilenceDuration => 'Durata del silenzio';
+
+  @override
+  String get sttSilenceDurationDescription =>
+      'Tempo di attesa dopo il silenzio prima di fermare automaticamente la registrazione';
+
   @override
   String get ttsEngineLabel => 'Motore';
 
+  @override
+  String get ttsEngineAuto => 'Automatico';
+
   @override
   String get ttsEngineDevice => 'Sul dispositivo';
 
   @override
   String get ttsEngineServer => 'Server';
 
+  @override
+  String get ttsEngineAutoDescription =>
+      'Usa la sintesi sul dispositivo quando disponibile e altrimenti passa al tuo server.';
+
+  @override
+  String get ttsEngineDeviceDescription =>
+      'Mantiene la sintesi su questo dispositivo. La riproduzione vocale non funziona se il dispositivo non supporta il TTS.';
+
+  @override
+  String get ttsEngineServerDescription =>
+      'Richiede sempre l\'audio dal tuo server OpenWebUI.';
+
+  @override
+  String get ttsDeviceUnavailableWarning =>
+      'La sintesi vocale sul dispositivo non è disponibile su questo dispositivo.';
+
+  @override
+  String get ttsServerUnavailableWarning =>
+      'Collegati a un server con la sintesi vocale abilitata per usare questa opzione.';
+
   @override
   String get ttsSettings => 'Sintesi vocale';
 
diff --git a/lib/l10n/app_nl.arb b/lib/l10n/app_nl.arb
index d6133d2..0b5f8aa 100644
--- a/lib/l10n/app_nl.arb
+++ b/lib/l10n/app_nl.arb
@@ -307,7 +307,25 @@
   "chatSettings": "Chat",
   "sendOnEnter": "Verzenden met Enter",
   "sendOnEnterDescription": "Enter verzendt (softtoetsenbord). Cmd/Ctrl+Enter ook beschikbaar",
+  "sttSettings": "Spraak naar tekst",
+  "sttEngineLabel": "Herkenningsengine",
+  "sttEngineAuto": "Automatisch",
+  "sttEngineDevice": "Op het apparaat",
+  "sttEngineServer": "Server",
+  "sttEngineAutoDescription": "Gebruikt spraakherkenning op het apparaat wanneer beschikbaar en valt anders terug op je server.",
+  "sttEngineDeviceDescription": "Houdt audio op dit apparaat. Spraakinput werkt niet als het apparaat geen spraakherkenning ondersteunt.",
+  "sttEngineServerDescription": "Stuurt opnames altijd naar je OpenWebUI-server voor transcriptie.",
+  "sttDeviceUnavailableWarning": "Spraakherkenning op het apparaat is niet beschikbaar op dit apparaat.",
+  "sttServerUnavailableWarning": "Verbind met een server met transcriptie ingeschakeld om deze optie te gebruiken.",
+  "sttSilenceDuration": "Stilteduur",
+  "sttSilenceDurationDescription": "Tijd om te wachten na stilte voordat de opname automatisch stopt",
   "ttsSettings": "Tekst naar spraak",
+  "ttsEngineAuto": "Automatisch",
+  "ttsEngineAutoDescription": "Gebruikt spraaksynthese op het apparaat wanneer beschikbaar en valt anders terug op je server.",
+  "ttsEngineDeviceDescription": "Houdt de synthese op dit apparaat. Spraakweergave werkt niet als het apparaat geen TTS ondersteunt.",
+  "ttsEngineServerDescription": "Vraagt altijd audio op bij je OpenWebUI-server.",
+  "ttsDeviceUnavailableWarning": "Spraaksynthese op het apparaat is niet beschikbaar op dit apparaat.",
+  "ttsServerUnavailableWarning": "Verbind met een server met tekst-naar-spraak ingeschakeld om deze optie te gebruiken.",
   "ttsVoice": "Stem",
   "ttsSpeechRate": "Spraaksnelheid",
   "ttsPitch": "Toonhoogte",
diff --git a/lib/l10n/app_ru.arb b/lib/l10n/app_ru.arb
index d438815..3383c6c 100644
--- a/lib/l10n/app_ru.arb
+++ b/lib/l10n/app_ru.arb
@@ -307,7 +307,25 @@
   "chatSettings": "Чат",
   "sendOnEnter": "Отправка по Enter",
   "sendOnEnterDescription": "Enter отправляет (программная клавиатура). Также доступно Cmd/Ctrl+Enter",
+  "sttSettings": "Речь в текст",
+  "sttEngineLabel": "Движок распознавания",
+  "sttEngineAuto": "Авто",
+  "sttEngineDevice": "На устройстве",
+  "sttEngineServer": "Сервер",
+  "sttEngineAutoDescription": "Использует распознавание на устройстве, когда это возможно, иначе переключается на ваш сервер.",
+  "sttEngineDeviceDescription": "Оставляет звук на этом устройстве. Голосовой ввод не работает, если устройство не поддерживает распознавание речи.",
+  "sttEngineServerDescription": "Всегда отправляет записи на сервер OpenWebUI для транскрибации.",
+  "sttDeviceUnavailableWarning": "Распознавание речи на устройстве недоступно на этом устройстве.",
+  "sttServerUnavailableWarning": "Подключитесь к серверу с включённой транскрибацией, чтобы использовать эту опцию.",
+  "sttSilenceDuration": "Длительность тишины",
+  "sttSilenceDurationDescription": "Время ожидания после тишины перед автоматической остановкой записи",
   "ttsSettings": "Преобразование текста в речь",
+  "ttsEngineAuto": "Авто",
+  "ttsEngineAutoDescription": "Использует синтез речи на устройстве, когда это возможно, иначе переключается на ваш сервер.",
+  "ttsEngineDeviceDescription": "Оставляет синтез на этом устройстве. Воспроизведение голоса не работает, если устройство не поддерживает синтез речи.",
+  "ttsEngineServerDescription": "Всегда запрашивает аудио у вашего сервера OpenWebUI.",
+  "ttsDeviceUnavailableWarning": "Синтез речи на устройстве недоступен на этом устройстве.",
+  "ttsServerUnavailableWarning": "Подключитесь к серверу с включённым синтезом речи, чтобы использовать эту опцию.",
   "ttsVoice": "Голос",
   "ttsSpeechRate": "Скорость речи",
   "ttsPitch": "Высота тона",
diff --git a/lib/l10n/app_zh.arb b/lib/l10n/app_zh.arb
index b8b41f9..0ac5761 100644
--- a/lib/l10n/app_zh.arb
+++ b/lib/l10n/app_zh.arb
@@ -307,7 +307,25 @@
   "chatSettings": "对话",
   "sendOnEnter": "回车发送",
   "sendOnEnterDescription": "回车发送（软键盘）。Cmd/Ctrl+Enter 也可用",
+  "sttSettings": "语音转文字",
+  "sttEngineLabel": "识别引擎",
+  "sttEngineAuto": "自动",
+  "sttEngineDevice": "本机",
+  "sttEngineServer": "服务器",
+  "sttEngineAutoDescription": "在可用时使用本机识别，否则切换到你的服务器。",
+  "sttEngineDeviceDescription": "音频会保留在此设备上。如果设备不支持语音识别，语音输入将不可用。",
+  "sttEngineServerDescription": "始终将录音发送到你的 OpenWebUI 服务器进行转写。",
+  "sttDeviceUnavailableWarning": "此设备不支持本机语音识别。",
+  "sttServerUnavailableWarning": "连接到启用转写功能的服务器后才能使用此选项。",
+  "sttSilenceDuration": "静音持续时间",
+  "sttSilenceDurationDescription": "检测到静音后等待多久自动停止录音",
   "ttsSettings": "文本转语音",
+  "ttsEngineAuto": "自动",
+  "ttsEngineAutoDescription": "在可用时使用本机合成，否则切换到你的服务器。",
+  "ttsEngineDeviceDescription": "在此设备上完成合成。如果设备不支持文本转语音，语音播放将不可用。",
+  "ttsEngineServerDescription": "始终向你的 OpenWebUI 服务器请求音频。",
+  "ttsDeviceUnavailableWarning": "此设备不支持本机文本转语音。",
+  "ttsServerUnavailableWarning": "连接到启用文本转语音的服务器后才能使用此选项。",
   "ttsVoice": "语音",
   "ttsSpeechRate": "语速",
   "ttsPitch": "音调",
diff --git a/pubspec.lock b/pubspec.lock
index 405533d..b662398 100644
--- a/pubspec.lock
+++ b/pubspec.lock
@@ -965,6 +965,14 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "1.16.0"
+  mic_stream_recorder:
+    dependency: "direct main"
+    description:
+      name: mic_stream_recorder
+      sha256: "73965991ef5cc93d2b0c1e6d590cbd567a853b9ee7b2d52de43a73f185bb0d9c"
+      url: "https://pub.dev"
+    source: hosted
+    version: "1.1.2"
   mime:
     dependency: transitive
     description:
@@ -1165,70 +1173,6 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "1.5.0"
-  record:
-    dependency: "direct main"
-    description:
-      name: record
-      sha256: "9dbc6ff3e784612f90a9b001373c45ff76b7a08abd2bd9fdf72c242320c8911c"
-      url: "https://pub.dev"
-    source: hosted
-    version: "6.1.1"
-  record_android:
-    dependency: transitive
-    description:
-      name: record_android
-      sha256: "854627cd78d8d66190377f98477eee06ca96ab7c9f2e662700daf33dbf7e6673"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.4.2"
-  record_ios:
-    dependency: transitive
-    description:
-      name: record_ios
-      sha256: "13e241ed9cbc220534a40ae6b66222e21288db364d96dd66fb762ebd3cb77c71"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.1.2"
-  record_linux:
-    dependency: transitive
-    description:
-      name: record_linux
-      sha256: "235b1f1fb84e810f8149cc0c2c731d7d697f8d1c333b32cb820c449bf7bb72d8"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.2.1"
-  record_macos:
-    dependency: transitive
-    description:
-      name: record_macos
-      sha256: "2849068bb59072f300ad63ed146e543d66afaef8263edba4de4834fc7c8d4d35"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.1.1"
-  record_platform_interface:
-    dependency: transitive
-    description:
-      name: record_platform_interface
-      sha256: b0065fdf1ec28f5a634d676724d388a77e43ce7646fb049949f58c69f3fcb4ed
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.4.0"
-  record_web:
-    dependency: transitive
-    description:
-      name: record_web
-      sha256: "4f0adf20c9ccafcc02d71111fd91fba1ca7b17a7453902593e5a9b25b74a5c56"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.2.0"
-  record_windows:
-    dependency: transitive
-    description:
-      name: record_windows
-      sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78"
-      url: "https://pub.dev"
-    source: hosted
-    version: "1.0.7"
   riverpod:
     dependency: transitive
     description:
diff --git a/pubspec.yaml b/pubspec.yaml
index ee02fd1..10af0a9 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -44,7 +44,7 @@ dependencies:
   flutter_animate: ^4.5.0
   
   # Platform Features
-  record: ^6.1.1
+  mic_stream_recorder: ^1.1.2
   stts: ^1.2.5
   flutter_tts: ^4.2.3
   audioplayers: ^6.5.1