feat(tts): Refactor text splitting and offset computation for TTS

2025-11-05 00:59:57 +05:30
parent 3424af60f9
commit 0d49309ad1
2 changed files with 25 additions and 25 deletions
@@ -219,8 +219,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {

    // Prepare sentence split for highlighting
    final cleanText = MarkdownToText.convert(text);
-    final sentences = _splitForTts(cleanText);
-    final offsets = _computeOffsets(sentences);
+    final sentences = _service.splitTextForSpeech(cleanText);
+    final offsets = _computeOffsets(cleanText, sentences);

    state = state.copyWith(
      status: TtsPlaybackStatus.loading,
@@ -265,30 +265,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
    }
  }

-  List<String> _splitForTts(String text) {
-    final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
-    if (normalized.isEmpty) return const [];
-    final parts = <String>[];
-    final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
-    int index = 0;
-    for (final match in sentenceRegex.allMatches('$normalized ')) {
-      final s = match.group(1) ?? '';
-      if (s.trim().isNotEmpty) parts.add(s.trim());
-      index = match.end;
-    }
-    if (index < normalized.length) {
-      final tail = normalized.substring(index).trim();
-      if (tail.isNotEmpty) parts.add(tail);
-    }
-    return parts;
-  }
-
-  List<int> _computeOffsets(List<String> sentences) {
+  List<int> _computeOffsets(String source, List<String> sentences) {
+    if (sentences.isEmpty) return const [];
    final offsets = <int>[];
-    int acc = 0;
-    for (final s in sentences) {
-      offsets.add(acc);
-      acc += s.length + 1; // assume a space or punctuation between
+    var cursor = 0;
+    for (final sentence in sentences) {
+      final chunk = sentence.trim();
+      if (chunk.isEmpty) {
+        offsets.add(cursor);
+        continue;
+      }
+      final index = source.indexOf(chunk, cursor);
+      if (index == -1) {
+        offsets.add(cursor);
+        cursor += chunk.length;
+      } else {
+        offsets.add(index);
+        cursor = index + chunk.length;
+      }
    }
    return offsets;
  }
@@ -657,6 +657,12 @@ class TextToSpeechService {
    );
  }

+  /// Splits [text] into the chunks used for playback sequencing.
+  ///
+  /// This mirrors the server-side streaming behavior so UI consumers can stay
+  /// in sync with sentence indices reported during playback.
+  List<String> splitTextForSpeech(String text) => _splitForTts(text);
+
  Future<void> _onAudioComplete() async {
    final session = _session;
    // If there are more expected chunks