diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index c990258..9a90a3c 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -1,4 +1,6 @@
 PODS:
+  - audioplayers_darwin (0.0.1):
+    - Flutter
   - connectivity_plus (0.0.1):
     - Flutter
   - DKImagePickerController/Core (4.3.9):
@@ -84,6 +86,7 @@ PODS:
     - FlutterMacOS
 
 DEPENDENCIES:
+  - audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/ios`)
   - connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`)
   - file_picker (from `.symlinks/plugins/file_picker/ios`)
   - Flutter (from `Flutter`)
@@ -113,6 +116,8 @@ SPEC REPOS:
     - SwiftyGif
 
 EXTERNAL SOURCES:
+  audioplayers_darwin:
+    :path: ".symlinks/plugins/audioplayers_darwin/ios"
   connectivity_plus:
     :path: ".symlinks/plugins/connectivity_plus/ios"
   file_picker:
@@ -155,6 +160,7 @@ EXTERNAL SOURCES:
     :path: ".symlinks/plugins/webview_flutter_wkwebview/darwin"
 
 SPEC CHECKSUMS:
+  audioplayers_darwin: ccf9c770ee768abb07e26d90af093f7bab1c12ab
   connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd
   DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
   DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60
diff --git a/lib/core/persistence/persistence_keys.dart b/lib/core/persistence/persistence_keys.dart
index 80e58b7..6afce43 100644
--- a/lib/core/persistence/persistence_keys.dart
+++ b/lib/core/persistence/persistence_keys.dart
@@ -25,6 +25,9 @@ final class PreferenceKeys {
   static const String ttsSpeechRate = 'tts_speech_rate';
   static const String ttsPitch = 'tts_pitch';
   static const String ttsVolume = 'tts_volume';
+  static const String ttsEngine = 'tts_engine'; // 'device' | 'server'
+  static const String ttsServerVoiceId = 'tts_server_voice_id';
+  static const String ttsServerVoiceName = 'tts_server_voice_name';
 }
 
 final class LegacyPreferenceKeys {
diff --git a/lib/core/providers/app_providers.dart b/lib/core/providers/app_providers.dart
index 9257d86..8ba3794 100644
--- a/lib/core/providers/app_providers.dart
+++ b/lib/core/providers/app_providers.dart
@@ -1830,7 +1830,11 @@ Future<List<String>> availableVoices(Ref ref) async {
   if (api == null) return [];
 
   try {
-    return await api.getAvailableVoices();
+    final voices = await api.getAvailableServerVoices();
+    return voices
+        .map((v) => (v['name'] ?? v['id'] ?? '').toString())
+        .where((s) => s.isNotEmpty)
+        .toList();
   } catch (e) {
     DebugLogger.error('voices-failed', scope: 'voices', error: e);
     return [];
diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart
index acc7a77..46044aa 100644
--- a/lib/core/services/api_service.dart
+++ b/lib/core/services/api_service.dart
@@ -2261,12 +2261,24 @@ class ApiService {
   }
 
   // Audio
-  Future<List<String>> getAvailableVoices() async {
-    _traceApi('Fetching available voices');
+  Future<List<Map<String, dynamic>>> getAvailableServerVoices() async {
+    _traceApi('Fetching server TTS voices');
     final response = await _dio.get('/api/v1/audio/voices');
     final data = response.data;
+    if (data is Map<String, dynamic>) {
+      final voices = data['voices'];
+      if (voices is List) {
+        return voices
+            .whereType<Map>()
+            .map((e) => e.cast<String, dynamic>())
+            .toList();
+      }
+    }
     if (data is List) {
-      return data.cast<String>();
+      // Fallback: plain list of ids
+      return data
+          .map((e) => {'id': e.toString(), 'name': e.toString()})
+          .toList();
     }
     return [];
   }
@@ -2279,13 +2291,15 @@ class ApiService {
     _traceApi('Generating speech for text: $textPreview...');
     final response = await _dio.post(
       '/api/v1/audio/speech',
-      data: {'text': text, if (voice != null) 'voice': voice},
+      data: {'input': text, if (voice != null) 'voice': voice},
+      options: Options(responseType: ResponseType.bytes),
     );
 
     // Return audio data as bytes
-    if (response.data is List) {
-      return (response.data as List).cast<int>();
-    }
+    final data = response.data;
+    if (data is List<int>) return data;
+    if (data is Uint8List) return data.toList();
+    if (data is List) return (data).cast<int>();
     return [];
   }
 
diff --git a/lib/core/services/settings_service.dart b/lib/core/services/settings_service.dart
index b21a168..b40b533 100644
--- a/lib/core/services/settings_service.dart
+++ b/lib/core/services/settings_service.dart
@@ -8,6 +8,9 @@ import 'animation_service.dart';
 
 part 'settings_service.g.dart';
 
+/// TTS engine selection
+enum TtsEngine { device, server }
+
 /// Service for managing app-wide settings including accessibility preferences
 class SettingsService {
   static const String _reduceMotionKey = PreferenceKeys.reduceMotion;
@@ -142,6 +145,12 @@ class SettingsService {
         ttsPitch: (box.get(PreferenceKeys.ttsPitch) as num?)?.toDouble() ?? 1.0,
         ttsVolume:
             (box.get(PreferenceKeys.ttsVolume) as num?)?.toDouble() ?? 1.0,
+        ttsEngine: _parseTtsEngine(
+          box.get(PreferenceKeys.ttsEngine) as String?,
+        ),
+        ttsServerVoiceId: box.get(PreferenceKeys.ttsServerVoiceId) as String?,
+        ttsServerVoiceName:
+            box.get(PreferenceKeys.ttsServerVoiceName) as String?,
       ),
     );
   }
@@ -164,6 +173,7 @@ class SettingsService {
       PreferenceKeys.ttsSpeechRate: settings.ttsSpeechRate,
       PreferenceKeys.ttsPitch: settings.ttsPitch,
       PreferenceKeys.ttsVolume: settings.ttsVolume,
+      PreferenceKeys.ttsEngine: settings.ttsEngine.name,
     };
 
     await box.putAll(updates);
@@ -185,6 +195,33 @@ class SettingsService {
     } else {
       await box.delete(PreferenceKeys.ttsVoice);
     }
+
+    // Server-specific voice id and friendly name
+    if (settings.ttsServerVoiceId != null &&
+        settings.ttsServerVoiceId!.isNotEmpty) {
+      await box.put(PreferenceKeys.ttsServerVoiceId, settings.ttsServerVoiceId);
+    } else {
+      await box.delete(PreferenceKeys.ttsServerVoiceId);
+    }
+    if (settings.ttsServerVoiceName != null &&
+        settings.ttsServerVoiceName!.isNotEmpty) {
+      await box.put(
+        PreferenceKeys.ttsServerVoiceName,
+        settings.ttsServerVoiceName,
+      );
+    } else {
+      await box.delete(PreferenceKeys.ttsServerVoiceName);
+    }
+  }
+
+  static TtsEngine _parseTtsEngine(String? raw) {
+    switch ((raw ?? '').toLowerCase()) {
+      case 'server':
+        return TtsEngine.server;
+      case 'device':
+      default:
+        return TtsEngine.device;
+    }
   }
 
   // Voice input specific settings
@@ -314,6 +351,9 @@ class AppSettings {
   final double ttsSpeechRate;
   final double ttsPitch;
   final double ttsVolume;
+  final TtsEngine ttsEngine;
+  final String? ttsServerVoiceId;
+  final String? ttsServerVoiceName;
   const AppSettings({
     this.reduceMotion = false,
     this.animationSpeed = 1.0,
@@ -332,6 +372,9 @@ class AppSettings {
     this.ttsSpeechRate = 0.5,
     this.ttsPitch = 1.0,
     this.ttsVolume = 1.0,
+    this.ttsEngine = TtsEngine.device,
+    this.ttsServerVoiceId,
+    this.ttsServerVoiceName,
   });
 
   AppSettings copyWith({
@@ -352,6 +395,9 @@ class AppSettings {
     double? ttsSpeechRate,
     double? ttsPitch,
     double? ttsVolume,
+    TtsEngine? ttsEngine,
+    Object? ttsServerVoiceId = const _DefaultValue(),
+    Object? ttsServerVoiceName = const _DefaultValue(),
   }) {
     return AppSettings(
       reduceMotion: reduceMotion ?? this.reduceMotion,
@@ -375,6 +421,13 @@ class AppSettings {
       ttsSpeechRate: ttsSpeechRate ?? this.ttsSpeechRate,
       ttsPitch: ttsPitch ?? this.ttsPitch,
       ttsVolume: ttsVolume ?? this.ttsVolume,
+      ttsEngine: ttsEngine ?? this.ttsEngine,
+      ttsServerVoiceId: ttsServerVoiceId is _DefaultValue
+          ? this.ttsServerVoiceId
+          : ttsServerVoiceId as String?,
+      ttsServerVoiceName: ttsServerVoiceName is _DefaultValue
+          ? this.ttsServerVoiceName
+          : ttsServerVoiceName as String?,
     );
   }
 
@@ -397,6 +450,9 @@ class AppSettings {
         other.ttsSpeechRate == ttsSpeechRate &&
         other.ttsPitch == ttsPitch &&
         other.ttsVolume == ttsVolume &&
+        other.ttsEngine == ttsEngine &&
+        other.ttsServerVoiceId == ttsServerVoiceId &&
+        other.ttsServerVoiceName == ttsServerVoiceName &&
         _listEquals(other.quickPills, quickPills);
     // socketTransportMode intentionally not included in == to avoid frequent rebuilds
   }
@@ -420,6 +476,9 @@ class AppSettings {
       ttsSpeechRate,
       ttsPitch,
       ttsVolume,
+      ttsEngine,
+      ttsServerVoiceId,
+      ttsServerVoiceName,
       Object.hashAllUnordered(quickPills),
     );
   }
@@ -543,6 +602,21 @@ class AppSettingsNotifier extends _$AppSettingsNotifier {
     await SettingsService.saveSettings(state);
   }
 
+  Future<void> setTtsEngine(TtsEngine engine) async {
+    state = state.copyWith(ttsEngine: engine);
+    await SettingsService.saveSettings(state);
+  }
+
+  Future<void> setTtsServerVoiceName(String? name) async {
+    state = state.copyWith(ttsServerVoiceName: name);
+    await SettingsService.saveSettings(state);
+  }
+
+  Future<void> setTtsServerVoiceId(String? id) async {
+    state = state.copyWith(ttsServerVoiceId: id);
+    await SettingsService.saveSettings(state);
+  }
+
   Future<void> resetToDefaults() async {
     const defaultSettings = AppSettings();
     await SettingsService.saveSettings(defaultSettings);
diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart
index b25e341..a68aff4 100644
--- a/lib/features/chat/providers/text_to_speech_provider.dart
+++ b/lib/features/chat/providers/text_to_speech_provider.dart
@@ -3,6 +3,7 @@ import 'dart:async';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 
 import '../../../core/services/settings_service.dart';
+import '../../../core/providers/app_providers.dart';
 import '../../../core/utils/markdown_to_text.dart';
 import '../services/text_to_speech_service.dart';
 
@@ -14,6 +15,11 @@ class TextToSpeechState {
   final TtsPlaybackStatus status;
   final String? activeMessageId;
   final String? errorMessage;
+  final List<String> sentences;
+  final List<int> sentenceOffsets; // start indices in full text
+  final int activeSentenceIndex; // -1 when none
+  final int? wordStartInSentence; // nullable; only for on-device
+  final int? wordEndInSentence; // nullable; only for on-device
 
   const TextToSpeechState({
     this.initialized = false,
@@ -21,6 +27,11 @@ class TextToSpeechState {
     this.status = TtsPlaybackStatus.idle,
     this.activeMessageId,
     this.errorMessage,
+    this.sentences = const [],
+    this.sentenceOffsets = const [],
+    this.activeSentenceIndex = -1,
+    this.wordStartInSentence,
+    this.wordEndInSentence,
   });
 
   bool get isSpeaking => status == TtsPlaybackStatus.speaking;
@@ -36,6 +47,12 @@ class TextToSpeechState {
     bool clearActiveMessageId = false,
     String? errorMessage,
     bool clearErrorMessage = false,
+    List<String>? sentences,
+    List<int>? sentenceOffsets,
+    int? activeSentenceIndex,
+    bool clearWord = false,
+    int? wordStartInSentence,
+    int? wordEndInSentence,
   }) {
     return TextToSpeechState(
       initialized: initialized ?? this.initialized,
@@ -47,6 +64,15 @@ class TextToSpeechState {
       errorMessage: clearErrorMessage
           ? null
           : errorMessage ?? this.errorMessage,
+      sentences: sentences ?? this.sentences,
+      sentenceOffsets: sentenceOffsets ?? this.sentenceOffsets,
+      activeSentenceIndex: activeSentenceIndex ?? this.activeSentenceIndex,
+      wordStartInSentence: clearWord
+          ? null
+          : (wordStartInSentence ?? this.wordStartInSentence),
+      wordEndInSentence: clearWord
+          ? null
+          : (wordEndInSentence ?? this.wordEndInSentence),
     );
   }
 }
@@ -69,6 +95,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
         onPause: _handlePause,
         onContinue: _handleContinue,
         onError: _handleError,
+        onSentenceIndex: _handleSentenceIndex,
+        onDeviceWordProgress: _handleDeviceWordProgress,
       );
 
       ref.onDispose(() {
@@ -79,11 +107,15 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     // Listen to settings changes and update TTS when initialized
     ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
       if (_service.isInitialized && _service.isAvailable) {
+        final selectedVoice = next.ttsEngine == TtsEngine.server
+            ? next.ttsServerVoiceId
+            : next.ttsVoice;
         _service.updateSettings(
-          voice: next.ttsVoice,
+          voice: selectedVoice,
           speechRate: next.ttsSpeechRate,
           pitch: next.ttsPitch,
           volume: next.ttsVolume,
+          engine: next.ttsEngine,
         );
       }
     }, fireImmediately: false);
@@ -105,10 +137,13 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     final settings = ref.read(appSettingsProvider);
     final future = _service
         .initialize(
-          voice: settings.ttsVoice,
+          voice: settings.ttsEngine == TtsEngine.server
+              ? settings.ttsServerVoiceId
+              : settings.ttsVoice,
           speechRate: settings.ttsSpeechRate,
           pitch: settings.ttsPitch,
           volume: settings.ttsVolume,
+          engine: settings.ttsEngine,
         )
         .then((available) {
           if (!ref.mounted) {
@@ -176,15 +211,23 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
       return;
     }
 
+    // Prepare sentence split for highlighting
+    final cleanText = MarkdownToText.convert(text);
+    final sentences = _splitForTts(cleanText);
+    final offsets = _computeOffsets(sentences);
+
     state = state.copyWith(
       status: TtsPlaybackStatus.loading,
       activeMessageId: messageId,
       clearErrorMessage: true,
+      sentences: sentences,
+      sentenceOffsets: offsets,
+      activeSentenceIndex: sentences.isEmpty ? -1 : 0,
+      clearWord: true,
     );
 
     try {
       // Convert markdown to clean text for TTS
-      final cleanText = MarkdownToText.convert(text);
       if (cleanText.isEmpty) {
         // No speakable content
         if (!ref.mounted) {
@@ -216,6 +259,34 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     }
   }
 
+  List<String> _splitForTts(String text) {
+    final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
+    if (normalized.isEmpty) return const [];
+    final parts = <String>[];
+    final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
+    int index = 0;
+    for (final match in sentenceRegex.allMatches('$normalized ')) {
+      final s = match.group(1) ?? '';
+      if (s.trim().isNotEmpty) parts.add(s.trim());
+      index = match.end;
+    }
+    if (index < normalized.length) {
+      final tail = normalized.substring(index).trim();
+      if (tail.isNotEmpty) parts.add(tail);
+    }
+    return parts;
+  }
+
+  List<int> _computeOffsets(List<String> sentences) {
+    final offsets = <int>[];
+    int acc = 0;
+    for (final s in sentences) {
+      offsets.add(acc);
+      acc += s.length + 1; // assume a space or punctuation between
+    }
+    return offsets;
+  }
+
   Future<void> pause() async {
     if (!state.initialized || !state.available) {
       return;
@@ -286,10 +357,46 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
       clearActiveMessageId: true,
     );
   }
+
+  void _handleSentenceIndex(int index) {
+    if (!ref.mounted) return;
+    final clamped = index.clamp(
+      -1,
+      state.sentences.isEmpty ? -1 : state.sentences.length - 1,
+    );
+    state = state.copyWith(
+      activeSentenceIndex: clamped,
+      // clear per-word highlight when sentence switches (server or device)
+      clearWord: true,
+    );
+  }
+
+  void _handleDeviceWordProgress(int start, int end) {
+    if (!ref.mounted) return;
+    // Map global offsets to sentence index
+    final offsets = state.sentenceOffsets;
+    if (offsets.isEmpty) return;
+    int idx = 0;
+    for (var i = 0; i < offsets.length; i++) {
+      final sStart = offsets[i];
+      final sEnd = i + 1 < offsets.length ? offsets[i + 1] : 1 << 30;
+      if (start >= sStart && start < sEnd) {
+        idx = i;
+        break;
+      }
+    }
+    final sentenceStart = offsets[idx];
+    state = state.copyWith(
+      activeSentenceIndex: idx,
+      wordStartInSentence: (start - sentenceStart).clamp(0, 1 << 20),
+      wordEndInSentence: (end - sentenceStart).clamp(0, 1 << 20),
+    );
+  }
 }
 
 final textToSpeechServiceProvider = Provider<TextToSpeechService>((ref) {
-  final service = TextToSpeechService();
+  final api = ref.watch(apiServiceProvider);
+  final service = TextToSpeechService(api: api);
   ref.onDispose(() {
     unawaited(service.dispose());
   });
diff --git a/lib/features/chat/services/text_to_speech_service.dart b/lib/features/chat/services/text_to_speech_service.dart
index 6591f41..9f01ebb 100644
--- a/lib/features/chat/services/text_to_speech_service.dart
+++ b/lib/features/chat/services/text_to_speech_service.dart
@@ -1,16 +1,29 @@
 import 'dart:async';
 import 'dart:io' show Platform;
 
+import 'package:audioplayers/audioplayers.dart';
 import 'package:flutter/foundation.dart';
 import 'package:flutter/widgets.dart';
 import 'package:flutter_tts/flutter_tts.dart';
 
+import '../../../core/services/api_service.dart';
+import '../../../core/services/settings_service.dart';
+
 /// Lightweight wrapper around FlutterTts to centralize configuration
 class TextToSpeechService {
   final FlutterTts _tts = FlutterTts();
+  final AudioPlayer _player = AudioPlayer();
+  final ApiService? _api;
+  TtsEngine _engine = TtsEngine.device;
+  String? _preferredVoice;
   bool _initialized = false;
   bool _available = false;
   bool _voiceConfigured = false;
+  int _session = 0; // increments to cancel in-flight work
+  final List<Uint8List> _buffered = <Uint8List>[]; // server chunks
+  int _expectedChunks = 0;
+  int _currentIndex = -1;
+  bool _waitingNext = false;
 
   VoidCallback? _onStart;
   VoidCallback? _onComplete;
@@ -18,10 +31,20 @@ class TextToSpeechService {
   VoidCallback? _onPause;
   VoidCallback? _onContinue;
   void Function(String message)? _onError;
+  void Function(int sentenceIndex)? _onSentenceIndex;
+  void Function(int start, int end)? _onDeviceWordProgress;
 
   bool get isInitialized => _initialized;
   bool get isAvailable => _available;
 
+  TextToSpeechService({ApiService? api}) : _api = api {
+    // Wire minimal player events to callbacks
+    _player.onPlayerComplete.listen((_) => _onAudioComplete());
+    _player.onPlayerStateChanged.listen((s) {
+      if (s == PlayerState.playing) _handleStart();
+    });
+  }
+
   /// Register callbacks for TTS lifecycle events
   void bindHandlers({
     VoidCallback? onStart,
@@ -30,6 +53,8 @@ class TextToSpeechService {
     VoidCallback? onPause,
     VoidCallback? onContinue,
     void Function(String message)? onError,
+    void Function(int sentenceIndex)? onSentenceIndex,
+    void Function(int start, int end)? onDeviceWordProgress,
   }) {
     _onStart = onStart;
     _onComplete = onComplete;
@@ -37,6 +62,8 @@ class TextToSpeechService {
     _onPause = onPause;
     _onContinue = onContinue;
     _onError = onError;
+    _onSentenceIndex = onSentenceIndex;
+    _onDeviceWordProgress = onDeviceWordProgress;
 
     _tts.setStartHandler(_handleStart);
     _tts.setCompletionHandler(_handleComplete);
@@ -44,6 +71,13 @@ class TextToSpeechService {
     _tts.setPauseHandler(_handlePause);
     _tts.setContinueHandler(_handleContinue);
     _tts.setErrorHandler(_handleError);
+    try {
+      _tts.setProgressHandler((String text, int start, int end, String word) {
+        _onDeviceWordProgress?.call(start, end);
+      });
+    } catch (_) {
+      // Some platforms may not support progress handler
+    }
   }
 
   /// Initialize the native TTS engine lazily
@@ -52,12 +86,15 @@ class TextToSpeechService {
     double speechRate = 0.5,
     double pitch = 1.0,
     double volume = 1.0,
+    TtsEngine engine = TtsEngine.device,
   }) async {
     if (_initialized) {
       return _available;
     }
 
     try {
+      _engine = engine;
+      _preferredVoice = voice;
       await _tts.awaitSpeakCompletion(false);
 
       // Set volume
@@ -97,34 +134,47 @@ class TextToSpeechService {
     }
 
     if (!_initialized) {
-      await initialize();
+      await initialize(voice: _preferredVoice, engine: _engine);
     }
 
+    if (_engine == TtsEngine.server && _api != null) {
+      // Server-backed TTS with sentence chunking & queued playback
+      try {
+        await _startServerChunkedPlayback(text);
+      } catch (e) {
+        _onError?.call(e.toString());
+        await _speakOnDevice(text);
+      }
+      return;
+    }
+
+    // Device TTS path
+    await _speakOnDevice(text);
+  }
+
+  Future<void> _speakOnDevice(String text) async {
     if (!_available) {
       throw StateError('Text-to-speech is unavailable on this device');
     }
-
     await _tts.stop();
     if (!_voiceConfigured) {
       await _configurePreferredVoice();
     }
     final result = await _tts.speak(text);
-    if (result == null) {
-      return;
-    }
-
     if (result is int && result != 1) {
       _onError?.call('Text-to-speech engine returned code $result');
     }
+    _onSentenceIndex?.call(0);
   }
 
   Future<void> pause() async {
-    if (!_initialized || !_available) {
-      return;
-    }
-
+    if (!_initialized) return;
     try {
-      await _tts.pause();
+      if (_engine == TtsEngine.server) {
+        await _player.pause();
+      } else if (_available) {
+        await _tts.pause();
+      }
     } catch (e) {
       _onError?.call(e.toString());
     }
@@ -136,7 +186,17 @@ class TextToSpeechService {
     }
 
     try {
-      await _tts.stop();
+      // Cancel any in-flight server work
+      _session++;
+      _buffered.clear();
+      _expectedChunks = 0;
+      _currentIndex = -1;
+      _waitingNext = false;
+      if (_engine == TtsEngine.server) {
+        await _player.stop();
+      } else {
+        await _tts.stop();
+      }
     } catch (e) {
       _onError?.call(e.toString());
     }
@@ -144,6 +204,7 @@ class TextToSpeechService {
 
   Future<void> dispose() async {
     await stop();
+    await _player.dispose();
   }
 
   /// Update TTS settings on-the-fly
@@ -152,12 +213,22 @@ class TextToSpeechService {
     double? speechRate,
     double? pitch,
     double? volume,
+    TtsEngine? engine,
   }) async {
     if (!_initialized || !_available) {
+      // Allow engine and voice to update before init
+      if (engine != null) _engine = engine;
+      if (voice != null) _preferredVoice = voice;
       return;
     }
 
     try {
+      if (engine != null) {
+        _engine = engine;
+      }
+      if (voice != null) {
+        _preferredVoice = voice;
+      }
       if (volume != null) {
         await _tts.setVolume(volume);
       }
@@ -167,8 +238,10 @@ class TextToSpeechService {
       if (pitch != null) {
         await _tts.setPitch(pitch);
       }
-      // Set specific voice by name
-      await _setVoiceByName(voice);
+      // Set specific voice by name on device engine
+      if (_engine == TtsEngine.device) {
+        await _setVoiceByName(_preferredVoice);
+      }
     } catch (e) {
       _onError?.call(e.toString());
     }
@@ -224,7 +297,31 @@ class TextToSpeechService {
   /// Get available voices from the TTS engine
   Future<List<Map<String, dynamic>>> getAvailableVoices() async {
     if (!_initialized) {
-      await initialize();
+      await initialize(voice: _preferredVoice, engine: _engine);
+    }
+
+    if (_engine == TtsEngine.server && _api != null) {
+      try {
+        final serverVoices = await _api.getAvailableServerVoices();
+        final mapped = serverVoices
+            .map(
+              (v) => {
+                'name': (v['name'] ?? v['id'] ?? '').toString(),
+                'locale': (v['locale'] ?? '').toString(),
+              },
+            )
+            .where((e) => (e['name'] as String).isNotEmpty)
+            .toList();
+        if (mapped.isEmpty) {
+          return [
+            {'name': 'alloy', 'locale': ''},
+          ];
+        }
+        return mapped;
+      } catch (e) {
+        _onError?.call(e.toString());
+        // Fall back to device voices
+      }
     }
 
     if (!_available) {
@@ -254,6 +351,151 @@ class TextToSpeechService {
     }
   }
 
+  // ===== Server chunked playback =====
+
+  Future<void> _startServerChunkedPlayback(String text) async {
+    final effectiveVoice =
+        (_preferredVoice == null || _preferredVoice!.trim().isEmpty)
+        ? 'alloy'
+        : _preferredVoice!;
+
+    // Reset queue and create a new session
+    _session++;
+    final session = _session;
+    _buffered.clear();
+    _expectedChunks = 0;
+    _currentIndex = -1;
+    _waitingNext = false;
+
+    final chunks = _splitForTts(text);
+    if (chunks.isEmpty) return;
+    _expectedChunks = chunks.length;
+
+    // Fetch first chunk to start playback quickly
+    final firstBytes = await _fetchServerAudio(
+      chunks.first,
+      effectiveVoice,
+      session,
+    );
+    if (session != _session) return; // canceled
+    if (firstBytes.isEmpty) throw Exception('Empty audio response');
+
+    await _player.stop();
+    _buffered.add(Uint8List.fromList(firstBytes));
+    _currentIndex = 0;
+    await _player.play(BytesSource(_buffered.first));
+    _onSentenceIndex?.call(0);
+
+    // Prefetch the rest in background
+    unawaited(
+      _prefetchRemainingChunks(
+        chunks.skip(1).toList(),
+        effectiveVoice,
+        session,
+      ),
+    );
+  }
+
+  Future<void> _prefetchRemainingChunks(
+    List<String> remaining,
+    String voice,
+    int session,
+  ) async {
+    for (final chunk in remaining) {
+      if (session != _session) return; // canceled
+      try {
+        final audio = await _fetchServerAudio(chunk, voice, session);
+        if (session != _session) return;
+        if (audio.isNotEmpty) {
+          _buffered.add(Uint8List.fromList(audio));
+          // If the player finished the previous chunk and is waiting, start now
+          if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
+            _waitingNext = false;
+            await _playNextIfBuffered(session);
+          }
+        }
+      } catch (e) {
+        _onError?.call(e.toString());
+        // continue with other chunks
+      }
+    }
+  }
+
+  Future<List<int>> _fetchServerAudio(
+    String text,
+    String voice,
+    int session,
+  ) async {
+    return await _api!.generateSpeech(text: text, voice: voice);
+  }
+
+  Future<void> _onAudioComplete() async {
+    final session = _session;
+    // If there are more expected chunks
+    if ((_currentIndex + 1) < _expectedChunks) {
+      // If next chunk is already buffered, play it
+      if ((_currentIndex + 1) < _buffered.length) {
+        await _playNextIfBuffered(session);
+      } else {
+        // Wait for prefetch to provide it
+        _waitingNext = true;
+      }
+      return;
+    }
+    // No more chunks – this is the real completion
+    _handleComplete();
+  }
+
+  Future<void> _playNextIfBuffered(int session) async {
+    if (session != _session) return;
+    final nextIndex = _currentIndex + 1;
+    if (nextIndex < 0 || nextIndex >= _buffered.length) return;
+    _currentIndex = nextIndex;
+    final bytes = _buffered[nextIndex];
+    await _player.play(BytesSource(bytes));
+    _onSentenceIndex?.call(_currentIndex);
+  }
+
+  List<String> _splitForTts(String text) {
+    // Normalize whitespace
+    final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
+    if (normalized.isEmpty) return const [];
+
+    // Split on sentence-ending punctuation while keeping the delimiter
+    final parts = <String>[];
+    final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
+    int index = 0;
+    for (final match in sentenceRegex.allMatches('$normalized ')) {
+      final s = match.group(1) ?? '';
+      if (s.trim().isNotEmpty) parts.add(s.trim());
+      index = match.end;
+    }
+    if (index < normalized.length) {
+      final tail = normalized.substring(index).trim();
+      if (tail.isNotEmpty) parts.add(tail);
+    }
+
+    // Fallback to length-based splits for very long segments
+    const maxLen = 300;
+    final chunks = <String>[];
+    for (final p in parts.isEmpty ? [normalized] : parts) {
+      if (p.length <= maxLen) {
+        chunks.add(p);
+      } else {
+        // Try splitting on commas/spaces
+        var remaining = p;
+        while (remaining.length > maxLen) {
+          int cut = remaining.lastIndexOf(RegExp(r",\s|\s"), maxLen);
+          cut = cut <= 0 ? maxLen : cut;
+          chunks.add(remaining.substring(0, cut).trim());
+          remaining = remaining.substring(cut).trim();
+        }
+        if (remaining.isNotEmpty) chunks.add(remaining);
+      }
+    }
+    return chunks;
+  }
+
   Future<void> _configurePreferredVoice() async {
     if (_voiceConfigured) {
       return;
diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart
index 370ee77..5d6a42b 100644
--- a/lib/features/chat/widgets/assistant_message_widget.dart
+++ b/lib/features/chat/widgets/assistant_message_widget.dart
@@ -18,6 +18,7 @@ import 'package:conduit/l10n/app_localizations.dart';
 import 'enhanced_attachment.dart';
 import 'package:conduit/shared/widgets/chat_action_button.dart';
 import '../../../shared/widgets/model_avatar.dart';
+import '../../../shared/widgets/conduit_components.dart';
 import 'package:url_launcher/url_launcher_string.dart';
 import '../providers/chat_providers.dart' show sendMessageWithContainer;
 import '../../../core/utils/debug_logger.dart';
@@ -457,12 +458,72 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
     }
 
     if (children.isEmpty) return const SizedBox.shrink();
+    // Append TTS karaoke bar if this is the active message
+    final ttsState = ref.watch(textToSpeechControllerProvider);
+    final isActive =
+        ttsState.activeMessageId == _messageId &&
+        (ttsState.status == TtsPlaybackStatus.speaking ||
+            ttsState.status == TtsPlaybackStatus.paused ||
+            ttsState.status == TtsPlaybackStatus.loading);
+    if (isActive && ttsState.activeSentenceIndex >= 0) {
+      children.add(const SizedBox(height: Spacing.sm));
+      children.add(_buildKaraokeBar(ttsState));
+    }
+
     return Column(
       crossAxisAlignment: CrossAxisAlignment.start,
       children: children,
     );
   }
 
+  Widget _buildKaraokeBar(TextToSpeechState ttsState) {
+    final theme = context.conduitTheme;
+    final idx = ttsState.activeSentenceIndex;
+    if (idx < 0 || idx >= ttsState.sentences.length) {
+      return const SizedBox.shrink();
+    }
+    final sentence = ttsState.sentences[idx];
+    final ws = ttsState.wordStartInSentence;
+    final we = ttsState.wordEndInSentence;
+
+    final baseStyle = TextStyle(
+      color: theme.textPrimary,
+      height: 1.2,
+      fontSize: 14,
+    );
+    final highlightStyle = baseStyle.copyWith(
+      backgroundColor: theme.buttonPrimary.withValues(alpha: 0.25),
+      color: theme.textPrimary,
+      fontWeight: FontWeight.w600,
+    );
+
+    InlineSpan buildSpans() {
+      if (ws == null ||
+          we == null ||
+          ws < 0 ||
+          we <= ws ||
+          ws >= sentence.length) {
+        return TextSpan(text: sentence, style: baseStyle);
+      }
+      final safeEnd = we.clamp(0, sentence.length);
+      final before = sentence.substring(0, ws);
+      final word = sentence.substring(ws, safeEnd);
+      final after = sentence.substring(safeEnd);
+      return TextSpan(
+        children: [
+          if (before.isNotEmpty) TextSpan(text: before, style: baseStyle),
+          TextSpan(text: word, style: highlightStyle),
+          if (after.isNotEmpty) TextSpan(text: after, style: baseStyle),
+        ],
+      );
+    }
+
+    return ConduitCard(
+      padding: const EdgeInsets.all(Spacing.sm),
+      child: RichText(text: buildSpans()),
+    );
+  }
+
   bool get _shouldShowTypingIndicator =>
       widget.isStreaming && _isAssistantResponseEmpty;
 
diff --git a/lib/features/profile/views/app_customization_page.dart b/lib/features/profile/views/app_customization_page.dart
index ba2c4ac..bc5e01d 100644
--- a/lib/features/profile/views/app_customization_page.dart
+++ b/lib/features/profile/views/app_customization_page.dart
@@ -441,10 +441,97 @@ class AppCustomizationPage extends ConsumerWidget {
               TextStyle(color: theme.sidebarForeground, fontSize: 18),
         ),
         const SizedBox(height: Spacing.sm),
+        ConduitCard(
+          padding: const EdgeInsets.all(Spacing.md),
+          child: Column(
+            crossAxisAlignment: CrossAxisAlignment.start,
+            children: [
+              Row(
+                children: [
+                  _buildIconBadge(
+                    context,
+                    UiUtils.platformIcon(
+                      ios: CupertinoIcons.settings,
+                      android: Icons.settings_voice,
+                    ),
+                    color: theme.buttonPrimary,
+                  ),
+                  const SizedBox(width: Spacing.sm),
+                  const Text('Engine'),
+                  const Spacer(),
+                  Wrap(
+                    spacing: Spacing.sm,
+                    children: [
+                      ChoiceChip(
+                        label: const Text('On Device'),
+                        selected: settings.ttsEngine == TtsEngine.device,
+                        showCheckmark: false,
+                        selectedColor: theme.buttonPrimary,
+                        backgroundColor: theme.cardBackground,
+                        side: BorderSide(
+                          color: settings.ttsEngine == TtsEngine.device
+                              ? theme.buttonPrimary.withValues(alpha: 0.6)
+                              : theme.textPrimary.withValues(alpha: 0.2),
+                        ),
+                        labelStyle: TextStyle(
+                          color: settings.ttsEngine == TtsEngine.device
+                              ? theme.buttonPrimaryText
+                              : theme.textPrimary,
+                          fontWeight: FontWeight.w600,
+                        ),
+                        onSelected: (v) {
+                          if (v) {
+                            final notifier = ref.read(
+                              appSettingsProvider.notifier,
+                            );
+                            notifier.setTtsEngine(TtsEngine.device);
+                            // Keep previous voice (device voices)
+                          }
+                        },
+                      ),
+                      ChoiceChip(
+                        label: const Text('Server'),
+                        selected: settings.ttsEngine == TtsEngine.server,
+                        showCheckmark: false,
+                        selectedColor: theme.buttonPrimary,
+                        backgroundColor: theme.cardBackground,
+                        side: BorderSide(
+                          color: settings.ttsEngine == TtsEngine.server
+                              ? theme.buttonPrimary.withValues(alpha: 0.6)
+                              : theme.textPrimary.withValues(alpha: 0.2),
+                        ),
+                        labelStyle: TextStyle(
+                          color: settings.ttsEngine == TtsEngine.server
+                              ? theme.buttonPrimaryText
+                              : theme.textPrimary,
+                          fontWeight: FontWeight.w600,
+                        ),
+                        onSelected: (v) {
+                          if (v) {
+                            final notifier = ref.read(
+                              appSettingsProvider.notifier,
+                            );
+                            // Clear device-specific voice so server can default
+                            notifier.setTtsVoice(null);
+                            notifier.setTtsEngine(TtsEngine.server);
+                          }
+                        },
+                      ),
+                    ],
+                  ),
+                ],
+              ),
+            ],
+          ),
+        ),
+        const SizedBox(height: Spacing.sm),
         _ExpandableCard(
           title: l10n.ttsVoice,
           subtitle: _getDisplayVoiceName(
-            settings.ttsVoice,
+            settings.ttsEngine == TtsEngine.server
+                ? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
+                      '')
+                : (settings.ttsVoice ?? ''),
             l10n.ttsSystemDefault,
           ),
           icon: UiUtils.platformIcon(
@@ -466,7 +553,11 @@ class AppCustomizationPage extends ConsumerWidget {
                 ),
                 title: l10n.ttsVoice,
                 subtitle: _getDisplayVoiceName(
-                  settings.ttsVoice,
+                  settings.ttsEngine == TtsEngine.server
+                      ? ((settings.ttsServerVoiceName ??
+                                settings.ttsServerVoiceId) ??
+                            '')
+                      : (settings.ttsVoice ?? ''),
                   l10n.ttsSystemDefault,
                 ),
                 onTap: () => _showVoicePickerSheet(context, ref, settings),
@@ -616,7 +707,10 @@ class AppCustomizationPage extends ConsumerWidget {
     final theme = context.conduitTheme;
     final ttsService = ref.read(textToSpeechServiceProvider);
 
-    // Fetch available voices
+    // Ensure the service uses the currently selected engine before fetching
+    await ttsService.updateSettings(engine: settings.ttsEngine);
+
+    // Fetch available voices from the active engine
     final allVoices = await ttsService.getAvailableVoices();
 
     if (!context.mounted) return;
@@ -729,17 +823,29 @@ class AppCustomizationPage extends ConsumerWidget {
                     style:
                         theme.bodyMedium?.copyWith(
                           color: theme.sidebarForeground,
-                          fontWeight: settings.ttsVoice == null
+                          fontWeight:
+                              (settings.ttsEngine == TtsEngine.server
+                                  ? settings.ttsServerVoiceId == null
+                                  : settings.ttsVoice == null)
                               ? FontWeight.bold
                               : FontWeight.normal,
                         ) ??
                         TextStyle(color: theme.sidebarForeground),
                   ),
-                  trailing: settings.ttsVoice == null
+                  trailing:
+                      (settings.ttsEngine == TtsEngine.server
+                          ? settings.ttsServerVoiceId == null
+                          : settings.ttsVoice == null)
                       ? Icon(Icons.check, color: theme.buttonPrimary)
                       : null,
                   onTap: () {
-                    ref.read(appSettingsProvider.notifier).setTtsVoice(null);
+                    final notifier = ref.read(appSettingsProvider.notifier);
+                    if (settings.ttsEngine == TtsEngine.server) {
+                      notifier.setTtsServerVoiceId(null);
+                      notifier.setTtsServerVoiceName(null);
+                    } else {
+                      notifier.setTtsVoice(null);
+                    }
                     Navigator.of(sheetContext).pop();
                   },
                 ),
@@ -823,7 +929,9 @@ class AppCustomizationPage extends ConsumerWidget {
                       final voiceId = _getVoiceIdentifier(voice);
                       final displayName = _formatVoiceName(voice);
                       final subtitle = _getVoiceSubtitle(voice);
-                      final isSelected = settings.ttsVoice == voiceId;
+                      final isSelected = settings.ttsEngine == TtsEngine.server
+                          ? settings.ttsServerVoiceId == voiceId
+                          : settings.ttsVoice == voiceId;
 
                       return ListTile(
                         leading: Icon(
@@ -865,9 +973,15 @@ class AppCustomizationPage extends ConsumerWidget {
                             ? Icon(Icons.check, color: theme.buttonPrimary)
                             : null,
                         onTap: () {
-                          ref
-                              .read(appSettingsProvider.notifier)
-                              .setTtsVoice(voiceId);
+                          final notifier = ref.read(
+                            appSettingsProvider.notifier,
+                          );
+                          if (settings.ttsEngine == TtsEngine.server) {
+                            notifier.setTtsServerVoiceId(voiceId);
+                            notifier.setTtsServerVoiceName(displayName);
+                          } else {
+                            notifier.setTtsVoice(voiceId);
+                          }
                           Navigator.of(sheetContext).pop();
                         },
                       );
diff --git a/pubspec.lock b/pubspec.lock
index 9b612c1..a57444b 100644
--- a/pubspec.lock
+++ b/pubspec.lock
@@ -65,6 +65,62 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "2.13.0"
+  audioplayers:
+    dependency: "direct main"
+    description:
+      name: audioplayers
+      sha256: c05c6147124cd63e725e861335a8b4d57300b80e6e92cea7c145c739223bbaef
+      url: "https://pub.dev"
+    source: hosted
+    version: "5.2.1"
+  audioplayers_android:
+    dependency: transitive
+    description:
+      name: audioplayers_android
+      sha256: b00e1a0e11365d88576320ec2d8c192bc21f1afb6c0e5995d1c57ae63156acb5
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.0.3"
+  audioplayers_darwin:
+    dependency: transitive
+    description:
+      name: audioplayers_darwin
+      sha256: "3034e99a6df8d101da0f5082dcca0a2a99db62ab1d4ddb3277bed3f6f81afe08"
+      url: "https://pub.dev"
+    source: hosted
+    version: "5.0.2"
+  audioplayers_linux:
+    dependency: transitive
+    description:
+      name: audioplayers_linux
+      sha256: "60787e73fefc4d2e0b9c02c69885402177e818e4e27ef087074cf27c02246c9e"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.0"
+  audioplayers_platform_interface:
+    dependency: transitive
+    description:
+      name: audioplayers_platform_interface
+      sha256: "365c547f1bb9e77d94dd1687903a668d8f7ac3409e48e6e6a3668a1ac2982adb"
+      url: "https://pub.dev"
+    source: hosted
+    version: "6.1.0"
+  audioplayers_web:
+    dependency: transitive
+    description:
+      name: audioplayers_web
+      sha256: "22cd0173e54d92bd9b2c80b1204eb1eb159ece87475ab58c9788a70ec43c2a62"
+      url: "https://pub.dev"
+    source: hosted
+    version: "4.1.0"
+  audioplayers_windows:
+    dependency: transitive
+    description:
+      name: audioplayers_windows
+      sha256: "9536812c9103563644ada2ef45ae523806b0745f7a78e89d1b5fb1951de90e1a"
+      url: "https://pub.dev"
+    source: hosted
+    version: "3.1.0"
   boolean_selector:
     dependency: transitive
     description:
diff --git a/pubspec.yaml b/pubspec.yaml
index 4aa6980..8e2d10a 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -47,6 +47,7 @@ dependencies:
   record: ^6.1.1
   stts: ^1.2.5
   flutter_tts: ^4.2.3
+  audioplayers: ^5.2.1
   image_picker: ^1.2.0
   file_picker: ^10.3.3
   path_provider: ^2.1.4