feat(tts): Refactor text splitting and offset computation for TTS
This commit is contained in:
@@ -219,8 +219,8 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
|||||||
|
|
||||||
// Prepare sentence split for highlighting
|
// Prepare sentence split for highlighting
|
||||||
final cleanText = MarkdownToText.convert(text);
|
final cleanText = MarkdownToText.convert(text);
|
||||||
final sentences = _splitForTts(cleanText);
|
final sentences = _service.splitTextForSpeech(cleanText);
|
||||||
final offsets = _computeOffsets(sentences);
|
final offsets = _computeOffsets(cleanText, sentences);
|
||||||
|
|
||||||
state = state.copyWith(
|
state = state.copyWith(
|
||||||
status: TtsPlaybackStatus.loading,
|
status: TtsPlaybackStatus.loading,
|
||||||
@@ -265,30 +265,24 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> _splitForTts(String text) {
|
List<int> _computeOffsets(String source, List<String> sentences) {
|
||||||
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();
|
if (sentences.isEmpty) return const [];
|
||||||
if (normalized.isEmpty) return const [];
|
|
||||||
final parts = <String>[];
|
|
||||||
final sentenceRegex = RegExp(r"(.+?[\.!?]+)(\s+|\$)");
|
|
||||||
int index = 0;
|
|
||||||
for (final match in sentenceRegex.allMatches('$normalized ')) {
|
|
||||||
final s = match.group(1) ?? '';
|
|
||||||
if (s.trim().isNotEmpty) parts.add(s.trim());
|
|
||||||
index = match.end;
|
|
||||||
}
|
|
||||||
if (index < normalized.length) {
|
|
||||||
final tail = normalized.substring(index).trim();
|
|
||||||
if (tail.isNotEmpty) parts.add(tail);
|
|
||||||
}
|
|
||||||
return parts;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<int> _computeOffsets(List<String> sentences) {
|
|
||||||
final offsets = <int>[];
|
final offsets = <int>[];
|
||||||
int acc = 0;
|
var cursor = 0;
|
||||||
for (final s in sentences) {
|
for (final sentence in sentences) {
|
||||||
offsets.add(acc);
|
final chunk = sentence.trim();
|
||||||
acc += s.length + 1; // assume a space or punctuation between
|
if (chunk.isEmpty) {
|
||||||
|
offsets.add(cursor);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final index = source.indexOf(chunk, cursor);
|
||||||
|
if (index == -1) {
|
||||||
|
offsets.add(cursor);
|
||||||
|
cursor += chunk.length;
|
||||||
|
} else {
|
||||||
|
offsets.add(index);
|
||||||
|
cursor = index + chunk.length;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return offsets;
|
return offsets;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -657,6 +657,12 @@ class TextToSpeechService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Splits [text] into the chunks used for playback sequencing.
|
||||||
|
///
|
||||||
|
/// This mirrors the server-side streaming behavior so UI consumers can stay
|
||||||
|
/// in sync with sentence indices reported during playback.
|
||||||
|
List<String> splitTextForSpeech(String text) => _splitForTts(text);
|
||||||
|
|
||||||
Future<void> _onAudioComplete() async {
|
Future<void> _onAudioComplete() async {
|
||||||
final session = _session;
|
final session = _session;
|
||||||
// If there are more expected chunks
|
// If there are more expected chunks
|
||||||
|
|||||||
Reference in New Issue
Block a user