feat(tts): add auto mode for text-to-speech engine selection

2025-11-02 21:31:13 +05:30
parent da249eaa31
commit cfadeffd24
19 changed files with 579 additions and 154 deletions
@@ -107,11 +107,9 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
    // Listen to settings changes and update TTS when initialized
    ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
      if (_service.isInitialized && _service.isAvailable) {
-        final selectedVoice = next.ttsEngine == TtsEngine.server
-            ? next.ttsServerVoiceId
-            : next.ttsVoice;
        _service.updateSettings(
-          voice: selectedVoice,
+          voice: next.ttsVoice,
+          serverVoice: next.ttsServerVoiceId,
          speechRate: next.ttsSpeechRate,
          pitch: next.ttsPitch,
          volume: next.ttsVolume,
@@ -137,9 +135,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
    final settings = ref.read(appSettingsProvider);
    final future = _service
        .initialize(
-          voice: settings.ttsEngine == TtsEngine.server
-              ? settings.ttsServerVoiceId
-              : settings.ttsVoice,
+          deviceVoice: settings.ttsVoice,
+          serverVoice: settings.ttsServerVoiceId,
          speechRate: settings.ttsSpeechRate,
          pitch: settings.ttsPitch,
          volume: settings.ttsVolume,
@@ -16,8 +16,9 @@ class TextToSpeechService {
  final FlutterTts _tts = FlutterTts();
  final AudioPlayer _player = AudioPlayer();
  final ApiService? _api;
-  TtsEngine _engine = TtsEngine.device;
+  TtsEngine _engine = TtsEngine.auto;
  String? _preferredVoice;
+  String? _serverPreferredVoice;
  bool _initialized = false;
  bool _available = false;
  bool _voiceConfigured = false;
@@ -41,6 +42,8 @@ class TextToSpeechService {

  bool get isInitialized => _initialized;
  bool get isAvailable => _available;
+  bool get deviceEngineAvailable => _deviceEngineAvailable;
+  bool get serverEngineAvailable => _api != null;

  TextToSpeechService({ApiService? api}) : _api = api {
    // Wire minimal player events to callbacks
@@ -59,6 +62,69 @@ class TextToSpeechService {
    });
  }

+  Future<void> _configureDeviceEngine({
+    required String? voice,
+    required double speechRate,
+    required double pitch,
+    required double volume,
+  }) async {
+    _deviceEngineAvailable = false;
+    try {
+      await _tts.awaitSpeakCompletion(false);
+      await _tts.setVolume(volume);
+      await _tts.setSpeechRate(speechRate);
+      await _tts.setPitch(pitch);
+
+      if (!kIsWeb && Platform.isIOS) {
+        await _tts.setSharedInstance(true);
+        await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
+          IosTextToSpeechAudioCategoryOptions.mixWithOthers,
+          IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
+          IosTextToSpeechAudioCategoryOptions.allowBluetooth,
+          IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
+        ]);
+      }
+
+      if (_engine != TtsEngine.server) {
+        await _setVoiceByName(_preferredVoice);
+      } else {
+        _voiceConfigured = false;
+      }
+
+      _deviceEngineAvailable = true;
+    } catch (e) {
+      _voiceConfigured = false;
+      _deviceEngineAvailable = false;
+      rethrow;
+    }
+  }
+
+  bool _computeAvailability() {
+    final serverAvailable = _api != null;
+    switch (_engine) {
+      case TtsEngine.device:
+        return _deviceEngineAvailable;
+      case TtsEngine.server:
+        return serverAvailable;
+      case TtsEngine.auto:
+        return _deviceEngineAvailable || serverAvailable;
+    }
+  }
+
+  bool _shouldUseServer() {
+    if (_engine == TtsEngine.server) {
+      return _api != null;
+    }
+    if (_engine == TtsEngine.device) {
+      return false;
+    }
+    // Auto: prefer device when available, otherwise fall back to server
+    if (_deviceEngineAvailable) {
+      return false;
+    }
+    return _api != null;
+  }
+
  /// Register callbacks for TTS lifecycle events
  void bindHandlers({
    VoidCallback? onStart,
@@ -96,56 +162,58 @@ class TextToSpeechService {

  /// Initialize the native TTS engine lazily
  Future<bool> initialize({
-    String? voice,
+    String? deviceVoice,
+    String? serverVoice,
    double speechRate = 0.5,
    double pitch = 1.0,
    double volume = 1.0,
-    TtsEngine engine = TtsEngine.device,
+    TtsEngine engine = TtsEngine.auto,
  }) async {
    if (_initialized) {
+      _engine = engine;
+      if (deviceVoice != null) {
+        _preferredVoice = deviceVoice;
+        _voiceConfigured = false;
+      }
+      if (serverVoice != null) {
+        _serverPreferredVoice = serverVoice;
+      }
+      _available = _computeAvailability();
      return _available;
    }

-    try {
-      _engine = engine;
-      _preferredVoice = voice;
-      await _tts.awaitSpeakCompletion(false);
+    _engine = engine;
+    _preferredVoice = deviceVoice;
+    _serverPreferredVoice = serverVoice;
+    _voiceConfigured = false;

-      // Set volume
-      await _tts.setVolume(volume);
-
-      // Set speech rate
-      await _tts.setSpeechRate(speechRate);
-
-      // Set pitch
-      await _tts.setPitch(pitch);
-
-      if (!kIsWeb && Platform.isIOS) {
-        await _tts.setSharedInstance(true);
-        await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
-          IosTextToSpeechAudioCategoryOptions.mixWithOthers,
-          IosTextToSpeechAudioCategoryOptions.defaultToSpeaker,
-          IosTextToSpeechAudioCategoryOptions.allowBluetooth,
-          IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
-        ]);
+    if (_engine != TtsEngine.server || _api == null) {
+      try {
+        await _configureDeviceEngine(
+          voice: deviceVoice,
+          speechRate: speechRate,
+          pitch: pitch,
+          volume: volume,
+        );
+      } catch (e) {
+        if (_engine == TtsEngine.device) {
+          _available = false;
+          _onError?.call(e.toString());
+          _initialized = true;
+          return _available;
+        }
      }
-
-      // Set the voice (specific or default) when using device engine
-      if (_engine == TtsEngine.device) {
-        await _setVoiceByName(voice);
-      }
-      _deviceEngineAvailable = true;
-    } catch (e) {
+    } else {
      _deviceEngineAvailable = false;
-      if (_engine != TtsEngine.server) {
-        _available = false;
-        _onError?.call(e.toString());
-        _initialized = true;
-        return _available;
-      }
+      try {
+        await _tts.awaitSpeakCompletion(false);
+        await _tts.setVolume(volume);
+        await _tts.setSpeechRate(speechRate);
+        await _tts.setPitch(pitch);
+      } catch (_) {}
    }

-    _available = _engine == TtsEngine.server || _deviceEngineAvailable;
+    _available = _computeAvailability();
    _initialized = true;
    return _available;
  }
@@ -156,10 +224,23 @@ class TextToSpeechService {
    }

    if (!_initialized) {
-      await initialize(voice: _preferredVoice, engine: _engine);
+      await initialize(
+        deviceVoice: _preferredVoice,
+        serverVoice: _serverPreferredVoice,
+        engine: _engine,
+      );
    }

-    if (_engine == TtsEngine.server && _api != null) {
+    final bool useServer = _shouldUseServer();
+
+    if (useServer) {
+      if (_api == null) {
+        if (_deviceEngineAvailable) {
+          await _speakOnDevice(text);
+          return;
+        }
+        throw StateError('Server text-to-speech is unavailable');
+      }
      // Server-backed TTS with sentence chunking & queued playback
      try {
        await _startServerChunkedPlayback(text);
@@ -196,7 +277,7 @@ class TextToSpeechService {
  Future<void> pause() async {
    if (!_initialized) return;
    try {
-      if (_engine == TtsEngine.server) {
+      if (_shouldUseServer()) {
        await _player.pause();
        _handlePause();
      } else if (_deviceEngineAvailable) {
@@ -210,7 +291,7 @@ class TextToSpeechService {
  Future<void> resume() async {
    if (!_initialized) return;
    try {
-      if (_engine == TtsEngine.server) {
+      if (_shouldUseServer()) {
        if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
          _waitingNext = false;
          await _playNextIfBuffered(_session);
@@ -235,7 +316,7 @@ class TextToSpeechService {
      _expectedChunks = 0;
      _currentIndex = -1;
      _waitingNext = false;
-      if (_engine == TtsEngine.server) {
+      if (_shouldUseServer()) {
        await _player.stop();
        _handleCancel();
      } else {
@@ -254,17 +335,23 @@ class TextToSpeechService {
  /// Update TTS settings on-the-fly
  Future<void> updateSettings({
    Object? voice = const _VoiceNotProvided(),
+    Object? serverVoice = const _VoiceNotProvided(),
    double? speechRate,
    double? pitch,
    double? volume,
    TtsEngine? engine,
  }) async {
    final voiceProvided = voice is! _VoiceNotProvided;
+    final serverVoiceProvided = serverVoice is! _VoiceNotProvided;
    final voiceValue = voiceProvided ? voice as String? : null;
+    final serverVoiceValue = serverVoiceProvided
+        ? serverVoice as String?
+        : null;
    if (!_initialized || !_available) {
      // Allow engine and voice to update before init
      if (engine != null) _engine = engine;
      if (voiceProvided) _preferredVoice = voiceValue;
+      if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue;
      return;
    }

@@ -275,6 +362,9 @@ class TextToSpeechService {
      if (voiceProvided) {
        _preferredVoice = voiceValue;
      }
+      if (serverVoiceProvided) {
+        _serverPreferredVoice = serverVoiceValue;
+      }
      if (volume != null) {
        await _tts.setVolume(volume);
      }
@@ -284,13 +374,15 @@ class TextToSpeechService {
      if (pitch != null) {
        await _tts.setPitch(pitch);
      }
-      // Set specific voice by name on device engine
-      if (_engine == TtsEngine.device && voiceProvided) {
+      // Set specific voice by name on device-capable engines
+      if (_engine != TtsEngine.server && voiceProvided) {
        await _setVoiceByName(_preferredVoice);
      }
    } catch (e) {
      _onError?.call(e.toString());
    }
+
+    _available = _computeAvailability();
  }

  /// Set voice by name, or use system default if null
@@ -343,7 +435,11 @@ class TextToSpeechService {
  /// Get available voices from the TTS engine
  Future<List<Map<String, dynamic>>> getAvailableVoices() async {
    if (!_initialized) {
-      await initialize(voice: _preferredVoice, engine: _engine);
+      await initialize(
+        deviceVoice: _preferredVoice,
+        serverVoice: _serverPreferredVoice,
+        engine: _engine,
+      );
    }

    if (_engine == TtsEngine.server && _api != null) {
@@ -425,6 +521,10 @@ class TextToSpeechService {
  }

  Future<String?> _resolveServerVoice() async {
+    final serverSelected = _serverPreferredVoice?.trim();
+    if (serverSelected != null && serverSelected.isNotEmpty) {
+      return serverSelected;
+    }
    final selected = _preferredVoice?.trim();
    if (selected != null && selected.isNotEmpty) {
      return selected;
@@ -132,9 +132,8 @@ class VoiceCallService {
    // Initialize TTS with current app settings (engine/voice/rate/pitch/volume)
    final settings = _ref.read(appSettingsProvider);
    await _tts.initialize(
-      voice: settings.ttsEngine == TtsEngine.server
-          ? settings.ttsServerVoiceId
-          : settings.ttsVoice,
+      deviceVoice: settings.ttsVoice,
+      serverVoice: settings.ttsServerVoiceId,
      speechRate: settings.ttsSpeechRate,
      pitch: settings.ttsPitch,
      volume: settings.ttsVolume,
@@ -587,11 +586,9 @@ VoiceCallService voiceCallService(Ref ref) {
  // Keep TTS settings in sync with app settings during a call
  ref.listen<AppSettings>(appSettingsProvider, (previous, next) {
    // Update voice/engine and runtime parameters
-    final selectedVoice = next.ttsEngine == TtsEngine.server
-        ? next.ttsServerVoiceId
-        : next.ttsVoice;
    service._tts.updateSettings(
-      voice: selectedVoice,
+      voice: next.ttsVoice,
+      serverVoice: next.ttsServerVoiceId,
      speechRate: next.ttsSpeechRate,
      pitch: next.ttsPitch,
      volume: next.ttsVolume,
@@ -698,6 +698,35 @@ class AppCustomizationPage extends ConsumerWidget {
  ) {
    final theme = context.conduitTheme;
    final l10n = AppLocalizations.of(context)!;
+    final ttsService = ref.watch(textToSpeechServiceProvider);
+    final bool deviceAvailable =
+        ttsService.deviceEngineAvailable || !ttsService.isInitialized;
+    final bool serverAvailable = ttsService.serverEngineAvailable;
+    final bool autoSelectable = deviceAvailable || serverAvailable;
+    final bool deviceSelectable = deviceAvailable;
+    final bool serverSelectable = serverAvailable;
+    final ttsDescription = _ttsPreferenceDescription(l10n, settings);
+    final warnings = <String>[];
+    switch (settings.ttsEngine) {
+      case TtsEngine.auto:
+        if (!deviceAvailable) {
+          warnings.add(l10n.ttsDeviceUnavailableWarning);
+        }
+        if (!serverAvailable) {
+          warnings.add(l10n.ttsServerUnavailableWarning);
+        }
+        break;
+      case TtsEngine.device:
+        if (!deviceAvailable) {
+          warnings.add(l10n.ttsDeviceUnavailableWarning);
+        }
+        break;
+      case TtsEngine.server:
+        if (!serverAvailable) {
+          warnings.add(l10n.ttsServerUnavailableWarning);
+        }
+        break;
+    }
    return Column(
      crossAxisAlignment: CrossAxisAlignment.start,
      children: [
@@ -733,82 +762,160 @@ class AppCustomizationPage extends ConsumerWidget {
                        ) ??
                        TextStyle(color: theme.sidebarForeground, fontSize: 14),
                  ),
-                  const Spacer(),
-                  Wrap(
-                    spacing: Spacing.sm,
-                    children: [
-                      ChoiceChip(
-                        label: Text(l10n.ttsEngineDevice),
-                        selected: settings.ttsEngine == TtsEngine.device,
-                        showCheckmark: false,
-                        selectedColor: theme.buttonPrimary,
-                        backgroundColor: theme.cardBackground,
-                        side: BorderSide(
-                          color: settings.ttsEngine == TtsEngine.device
-                              ? theme.buttonPrimary.withValues(alpha: 0.6)
-                              : theme.textPrimary.withValues(alpha: 0.2),
-                        ),
-                        labelStyle: TextStyle(
-                          color: settings.ttsEngine == TtsEngine.device
-                              ? theme.buttonPrimaryText
-                              : theme.textPrimary,
-                          fontWeight: FontWeight.w600,
-                        ),
-                        onSelected: (v) {
-                          if (v) {
-                            final notifier = ref.read(
-                              appSettingsProvider.notifier,
-                            );
-                            notifier.setTtsEngine(TtsEngine.device);
-                            // Keep previous voice (device voices)
+                ],
+              ),
+              const SizedBox(height: Spacing.sm),
+              Wrap(
+                spacing: Spacing.sm,
+                runSpacing: Spacing.sm,
+                children: [
+                  ChoiceChip(
+                    label: Text(l10n.ttsEngineAuto),
+                    selected: settings.ttsEngine == TtsEngine.auto,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.ttsEngine == TtsEngine.auto
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(
+                              alpha: autoSelectable ? 0.2 : 0.12,
+                            ),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.ttsEngine == TtsEngine.auto
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary.withValues(
+                              alpha: autoSelectable ? 1.0 : 0.45,
+                            ),
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: autoSelectable
+                        ? (value) {
+                            if (value) {
+                              ref
+                                  .read(appSettingsProvider.notifier)
+                                  .setTtsEngine(TtsEngine.auto);
+                            }
                          }
-                        },
-                      ),
-                      ChoiceChip(
-                        label: Text(l10n.ttsEngineServer),
-                        selected: settings.ttsEngine == TtsEngine.server,
-                        showCheckmark: false,
-                        selectedColor: theme.buttonPrimary,
-                        backgroundColor: theme.cardBackground,
-                        side: BorderSide(
-                          color: settings.ttsEngine == TtsEngine.server
-                              ? theme.buttonPrimary.withValues(alpha: 0.6)
-                              : theme.textPrimary.withValues(alpha: 0.2),
-                        ),
-                        labelStyle: TextStyle(
-                          color: settings.ttsEngine == TtsEngine.server
-                              ? theme.buttonPrimaryText
-                              : theme.textPrimary,
-                          fontWeight: FontWeight.w600,
-                        ),
-                        onSelected: (v) {
-                          if (v) {
-                            final notifier = ref.read(
-                              appSettingsProvider.notifier,
-                            );
-                            // Clear device-specific voice so server can default
-                            notifier.setTtsVoice(null);
-                            notifier.setTtsEngine(TtsEngine.server);
+                        : null,
+                  ),
+                  ChoiceChip(
+                    label: Text(l10n.ttsEngineDevice),
+                    selected: settings.ttsEngine == TtsEngine.device,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.ttsEngine == TtsEngine.device
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(
+                              alpha: deviceSelectable ? 0.2 : 0.12,
+                            ),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.ttsEngine == TtsEngine.device
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary.withValues(
+                              alpha: deviceSelectable ? 1.0 : 0.45,
+                            ),
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: deviceSelectable
+                        ? (value) {
+                            if (value) {
+                              ref
+                                  .read(appSettingsProvider.notifier)
+                                  .setTtsEngine(TtsEngine.device);
+                            }
                          }
-                        },
-                      ),
-                    ],
+                        : null,
+                  ),
+                  ChoiceChip(
+                    label: Text(l10n.ttsEngineServer),
+                    selected: settings.ttsEngine == TtsEngine.server,
+                    showCheckmark: false,
+                    selectedColor: theme.buttonPrimary,
+                    backgroundColor: theme.cardBackground,
+                    side: BorderSide(
+                      color: settings.ttsEngine == TtsEngine.server
+                          ? theme.buttonPrimary.withValues(alpha: 0.6)
+                          : theme.textPrimary.withValues(
+                              alpha: serverSelectable ? 0.2 : 0.12,
+                            ),
+                    ),
+                    labelStyle: TextStyle(
+                      color: settings.ttsEngine == TtsEngine.server
+                          ? theme.buttonPrimaryText
+                          : theme.textPrimary.withValues(
+                              alpha: serverSelectable ? 1.0 : 0.45,
+                            ),
+                      fontWeight: FontWeight.w600,
+                    ),
+                    onSelected: serverSelectable
+                        ? (value) {
+                            if (value) {
+                              final notifier = ref.read(
+                                appSettingsProvider.notifier,
+                              );
+                              notifier.setTtsVoice(null);
+                              notifier.setTtsEngine(TtsEngine.server);
+                            }
+                          }
+                        : null,
                  ),
                ],
              ),
+              const SizedBox(height: Spacing.sm),
+              AnimatedSwitcher(
+                duration: const Duration(milliseconds: 200),
+                child: Text(
+                  ttsDescription,
+                  key: ValueKey<String>(
+                    'tts-desc-${settings.ttsEngine.name}',
+                  ),
+                  style:
+                      theme.bodyMedium?.copyWith(
+                        color: theme.sidebarForeground.withValues(
+                          alpha: 0.9,
+                        ),
+                      ) ??
+                      TextStyle(
+                        color: theme.sidebarForeground.withValues(
+                          alpha: 0.9,
+                        ),
+                        fontSize: 14,
+                      ),
+                ),
+              ),
+              if (warnings.isNotEmpty) ...[
+                const SizedBox(height: Spacing.sm),
+                ...warnings.map(
+                  (warning) => Padding(
+                    padding: const EdgeInsets.only(top: Spacing.xs),
+                    child: Text(
+                      warning,
+                      style:
+                          theme.bodySmall?.copyWith(
+                            color: theme.error,
+                            fontWeight: FontWeight.w600,
+                          ) ??
+                          TextStyle(
+                            color: theme.error,
+                            fontSize: 12,
+                            fontWeight: FontWeight.w600,
+                          ),
+                    ),
+                  ),
+                ),
+              ],
            ],
          ),
        ),
        const SizedBox(height: Spacing.sm),
        _ExpandableCard(
          title: l10n.ttsVoice,
-          subtitle: _getDisplayVoiceName(
-            settings.ttsEngine == TtsEngine.server
-                ? ((settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ??
-                      '')
-                : (settings.ttsVoice ?? ''),
-            l10n.ttsSystemDefault,
-          ),
+          subtitle: _ttsVoiceSubtitle(l10n, settings),
          icon: UiUtils.platformIcon(
            ios: CupertinoIcons.speaker_3,
            android: Icons.record_voice_over,
@@ -827,14 +934,7 @@ class AppCustomizationPage extends ConsumerWidget {
                  color: theme.buttonPrimary,
                ),
                title: l10n.ttsVoice,
-                subtitle: _getDisplayVoiceName(
-                  settings.ttsEngine == TtsEngine.server
-                      ? ((settings.ttsServerVoiceName ??
-                                settings.ttsServerVoiceId) ??
-                            '')
-                      : (settings.ttsVoice ?? ''),
-                  l10n.ttsSystemDefault,
-                ),
+                subtitle: _ttsVoiceSubtitle(l10n, settings),
                onTap: () => _showVoicePickerSheet(context, ref, settings),
              ),
              const SizedBox(height: Spacing.md),
@@ -928,6 +1028,39 @@ class AppCustomizationPage extends ConsumerWidget {
    }
  }

+  String _ttsPreferenceDescription(
+    AppLocalizations l10n,
+    AppSettings settings,
+  ) {
+    switch (settings.ttsEngine) {
+      case TtsEngine.auto:
+        return l10n.ttsEngineAutoDescription;
+      case TtsEngine.device:
+        return l10n.ttsEngineDeviceDescription;
+      case TtsEngine.server:
+        return l10n.ttsEngineServerDescription;
+    }
+  }
+
+  String _ttsVoiceSubtitle(AppLocalizations l10n, AppSettings settings) {
+    final deviceName = _getDisplayVoiceName(
+      settings.ttsVoice,
+      l10n.ttsSystemDefault,
+    );
+    final serverVoice =
+        (settings.ttsServerVoiceName ?? settings.ttsServerVoiceId) ?? '';
+    final serverName = _getDisplayVoiceName(serverVoice, l10n.ttsSystemDefault);
+
+    switch (settings.ttsEngine) {
+      case TtsEngine.auto:
+        return '${l10n.ttsEngineDevice}: $deviceName • ${l10n.ttsEngineServer}: $serverName';
+      case TtsEngine.device:
+        return deviceName;
+      case TtsEngine.server:
+        return serverName;
+    }
+  }
+
  Widget _buildSliderTile(
    BuildContext context,
    WidgetRef ref, {