refactor: Enhance markdown processing for text-to-speech conversion
- Introduced new regex patterns to remove thinking and reasoning blocks from markdown input. - Added functionality to strip emojis from the text, improving clarity for TTS. - Implemented HTML entity decoding to ensure proper text representation. - Replaced the existing sanitization method with a more comprehensive markdown-to-text conversion approach, enhancing performance and maintainability.
This commit is contained in:
@@ -5,6 +5,25 @@
|
|||||||
class MarkdownToText {
|
class MarkdownToText {
|
||||||
const MarkdownToText._();
|
const MarkdownToText._();
|
||||||
|
|
||||||
|
static final _thinkingBlockRegex = RegExp(
|
||||||
|
r'<details\s+type="reasoning"[^>]*>.*?</details>',
|
||||||
|
multiLine: true,
|
||||||
|
dotAll: true,
|
||||||
|
);
|
||||||
|
static final _thinkTagRegex = RegExp(
|
||||||
|
r'<think>.*?</think>',
|
||||||
|
multiLine: true,
|
||||||
|
dotAll: true,
|
||||||
|
);
|
||||||
|
static final _reasoningTagRegex = RegExp(
|
||||||
|
r'<reasoning>.*?</reasoning>',
|
||||||
|
multiLine: true,
|
||||||
|
dotAll: true,
|
||||||
|
);
|
||||||
|
static final _emojiRegex = RegExp(
|
||||||
|
r'[\u{1F600}-\u{1F64F}]|[\u{1F300}-\u{1F5FF}]|[\u{1F680}-\u{1F6FF}]|[\u{1F1E0}-\u{1F1FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F900}-\u{1F9FF}]|[\u{1FA00}-\u{1FA6F}]|[\u{1FA70}-\u{1FAFF}]|[\u{FE00}-\u{FE0F}]|[\u{1F018}-\u{1F270}]|[\u{238C}-\u{2454}]|[\u{20D0}-\u{20FF}]',
|
||||||
|
unicode: true,
|
||||||
|
);
|
||||||
static final _codeBlockRegex = RegExp(
|
static final _codeBlockRegex = RegExp(
|
||||||
r'```[^\n]*\n(.*?)```',
|
r'```[^\n]*\n(.*?)```',
|
||||||
multiLine: true,
|
multiLine: true,
|
||||||
@@ -29,18 +48,21 @@ class MarkdownToText {
|
|||||||
multiLine: true,
|
multiLine: true,
|
||||||
);
|
);
|
||||||
static final _htmlTagRegex = RegExp(r'<[^>]+>');
|
static final _htmlTagRegex = RegExp(r'<[^>]+>');
|
||||||
|
static final _htmlEntityRegex = RegExp(r'&[a-z]+;|&#\d+;|&#x[0-9a-f]+;');
|
||||||
static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
|
static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
|
||||||
static final _multipleSpacesRegex = RegExp(r' {2,}');
|
static final _multipleSpacesRegex = RegExp(r' {2,}');
|
||||||
|
|
||||||
/// Converts markdown text to plain text suitable for TTS.
|
/// Converts markdown text to plain text suitable for TTS.
|
||||||
///
|
///
|
||||||
|
/// - Removes thinking/reasoning blocks
|
||||||
|
/// - Removes emojis
|
||||||
/// - Removes code blocks (replaces with descriptive text)
|
/// - Removes code blocks (replaces with descriptive text)
|
||||||
/// - Strips all formatting (bold, italic, strikethrough)
|
/// - Strips all formatting (bold, italic, strikethrough)
|
||||||
/// - Converts links to just their text
|
/// - Converts links to just their text
|
||||||
/// - Removes images (or converts to alt text)
|
/// - Removes images (or converts to alt text)
|
||||||
/// - Simplifies headings
|
/// - Simplifies headings
|
||||||
/// - Preserves list structure with natural pauses
|
/// - Preserves list structure with natural pauses
|
||||||
/// - Removes HTML tags
|
/// - Removes HTML tags and entities
|
||||||
/// - Normalizes whitespace
|
/// - Normalizes whitespace
|
||||||
static String convert(String markdown) {
|
static String convert(String markdown) {
|
||||||
if (markdown.trim().isEmpty) {
|
if (markdown.trim().isEmpty) {
|
||||||
@@ -49,13 +71,20 @@ class MarkdownToText {
|
|||||||
|
|
||||||
var text = markdown;
|
var text = markdown;
|
||||||
|
|
||||||
|
// Remove thinking/reasoning blocks (must be done before general HTML tag removal)
|
||||||
|
text = text.replaceAll(_thinkingBlockRegex, '');
|
||||||
|
text = text.replaceAll(_thinkTagRegex, '');
|
||||||
|
text = text.replaceAll(_reasoningTagRegex, '');
|
||||||
|
|
||||||
|
// Remove emojis
|
||||||
|
text = text.replaceAll(_emojiRegex, '');
|
||||||
|
|
||||||
// Remove or replace code blocks with descriptive text
|
// Remove or replace code blocks with descriptive text
|
||||||
text = text.replaceAllMapped(_codeBlockRegex, (match) {
|
text = text.replaceAllMapped(_codeBlockRegex, (match) {
|
||||||
final code = match[1]?.trim() ?? '';
|
final code = match[1]?.trim() ?? '';
|
||||||
if (code.isEmpty) {
|
if (code.isEmpty) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
// For TTS, skip code blocks or use a brief description
|
|
||||||
return ' (code block) ';
|
return ' (code block) ';
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -86,7 +115,6 @@ class MarkdownToText {
|
|||||||
// Simplify headings (remove # symbols)
|
// Simplify headings (remove # symbols)
|
||||||
text = text.replaceAllMapped(_headingRegex, (match) {
|
text = text.replaceAllMapped(_headingRegex, (match) {
|
||||||
final heading = match[1] ?? '';
|
final heading = match[1] ?? '';
|
||||||
// Add a pause after headings for natural speech flow
|
|
||||||
return '$heading.\n';
|
return '$heading.\n';
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -103,6 +131,20 @@ class MarkdownToText {
|
|||||||
// Remove HTML tags
|
// Remove HTML tags
|
||||||
text = text.replaceAll(_htmlTagRegex, '');
|
text = text.replaceAll(_htmlTagRegex, '');
|
||||||
|
|
||||||
|
// Decode HTML entities
|
||||||
|
text = text.replaceAllMapped(_htmlEntityRegex, (match) {
|
||||||
|
final entity = match[0] ?? '';
|
||||||
|
return switch (entity) {
|
||||||
|
' ' => ' ',
|
||||||
|
'&' => '&',
|
||||||
|
'<' => '<',
|
||||||
|
'>' => '>',
|
||||||
|
'"' => '"',
|
||||||
|
''' => "'",
|
||||||
|
_ => entity,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
// Normalize whitespace
|
// Normalize whitespace
|
||||||
text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
|
text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
|
||||||
text = text.replaceAll(_multipleSpacesRegex, ' ');
|
text = text.replaceAll(_multipleSpacesRegex, ' ');
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import '../../../core/utils/reasoning_parser.dart';
|
|||||||
import '../../../core/utils/message_segments.dart';
|
import '../../../core/utils/message_segments.dart';
|
||||||
import '../../../core/utils/tool_calls_parser.dart';
|
import '../../../core/utils/tool_calls_parser.dart';
|
||||||
import '../../../core/models/chat_message.dart';
|
import '../../../core/models/chat_message.dart';
|
||||||
|
import '../../../core/utils/markdown_to_text.dart';
|
||||||
import '../providers/text_to_speech_provider.dart';
|
import '../providers/text_to_speech_provider.dart';
|
||||||
import 'enhanced_image_attachment.dart';
|
import 'enhanced_image_attachment.dart';
|
||||||
import 'package:conduit/l10n/app_localizations.dart';
|
import 'package:conduit/l10n/app_localizations.dart';
|
||||||
@@ -23,21 +24,6 @@ import '../../../core/utils/debug_logger.dart';
|
|||||||
import 'sources/openwebui_sources.dart';
|
import 'sources/openwebui_sources.dart';
|
||||||
import '../providers/assistant_response_builder_provider.dart';
|
import '../providers/assistant_response_builder_provider.dart';
|
||||||
|
|
||||||
// Pre-compiled regex patterns for TTS sanitization (performance optimization)
|
|
||||||
final _ttsCodeBlockPattern = RegExp(r'```');
|
|
||||||
final _ttsInlineCodePattern = RegExp(r'`');
|
|
||||||
final _ttsImagePattern = RegExp(r'!\[(.*?)\]\((.*?)\)');
|
|
||||||
final _ttsLinkPattern = RegExp(r'\[(.*?)\]\((.*?)\)');
|
|
||||||
final _ttsBoldPattern1 = RegExp(r'\*\*');
|
|
||||||
final _ttsBoldPattern2 = RegExp(r'__');
|
|
||||||
final _ttsItalicPattern1 = RegExp(r'\*');
|
|
||||||
final _ttsItalicPattern2 = RegExp(r'_');
|
|
||||||
final _ttsStrikePattern = RegExp(r'~');
|
|
||||||
final _ttsListPattern = RegExp(r'^[-*+]\s+', multiLine: true);
|
|
||||||
final _ttsQuotePattern = RegExp(r'^>\s?', multiLine: true);
|
|
||||||
final _ttsMultiSpacePattern = RegExp(r'[ \t]{2,}');
|
|
||||||
final _ttsMultiNewlinePattern = RegExp(r'\n{3,}');
|
|
||||||
|
|
||||||
// Pre-compiled regex patterns for image processing (performance optimization)
|
// Pre-compiled regex patterns for image processing (performance optimization)
|
||||||
final _base64ImagePattern = RegExp(r'data:image/[^;]+;base64,[A-Za-z0-9+/]+=*');
|
final _base64ImagePattern = RegExp(r'data:image/[^;]+;base64,[A-Za-z0-9+/]+=*');
|
||||||
final _fileIdPattern = RegExp(r'/api/v1/files/([^/]+)/content');
|
final _fileIdPattern = RegExp(r'/api/v1/files/([^/]+)/content');
|
||||||
@@ -258,7 +244,7 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
|
|||||||
|
|
||||||
String _buildTtsPlainText(List<MessageSegment> segments, String fallback) {
|
String _buildTtsPlainText(List<MessageSegment> segments, String fallback) {
|
||||||
if (segments.isEmpty) {
|
if (segments.isEmpty) {
|
||||||
return _sanitizeForSpeech(fallback);
|
return MarkdownToText.convert(fallback);
|
||||||
}
|
}
|
||||||
|
|
||||||
final buffer = StringBuffer();
|
final buffer = StringBuffer();
|
||||||
@@ -267,7 +253,7 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
final text = segment.text ?? '';
|
final text = segment.text ?? '';
|
||||||
final sanitized = _sanitizeForSpeech(text);
|
final sanitized = MarkdownToText.convert(text);
|
||||||
if (sanitized.isEmpty) {
|
if (sanitized.isEmpty) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -280,38 +266,11 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
|
|||||||
|
|
||||||
final result = buffer.toString().trim();
|
final result = buffer.toString().trim();
|
||||||
if (result.isEmpty) {
|
if (result.isEmpty) {
|
||||||
return _sanitizeForSpeech(fallback);
|
return MarkdownToText.convert(fallback);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
String _sanitizeForSpeech(String input) {
|
|
||||||
if (input.isEmpty) {
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
var text = input;
|
|
||||||
// Use pre-compiled regex patterns for better performance
|
|
||||||
text = text.replaceAll(_ttsCodeBlockPattern, ' ');
|
|
||||||
text = text.replaceAll(_ttsInlineCodePattern, '');
|
|
||||||
text = text.replaceAll(_ttsImagePattern, r'$1');
|
|
||||||
text = text.replaceAll(_ttsLinkPattern, r'$1');
|
|
||||||
text = text.replaceAll(_ttsBoldPattern1, '');
|
|
||||||
text = text.replaceAll(_ttsBoldPattern2, '');
|
|
||||||
text = text.replaceAll(_ttsItalicPattern1, '');
|
|
||||||
text = text.replaceAll(_ttsItalicPattern2, '');
|
|
||||||
text = text.replaceAll(_ttsStrikePattern, '');
|
|
||||||
text = text.replaceAll(_ttsListPattern, '');
|
|
||||||
text = text.replaceAll(_ttsQuotePattern, '');
|
|
||||||
text = text.replaceAll(' ', ' ');
|
|
||||||
text = text.replaceAll('&', '&');
|
|
||||||
text = text.replaceAll('<', '<');
|
|
||||||
text = text.replaceAll('>', '>');
|
|
||||||
text = text.replaceAll(_ttsMultiSpacePattern, ' ');
|
|
||||||
text = text.replaceAll(_ttsMultiNewlinePattern, '\n\n');
|
|
||||||
return text.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
// No streaming-specific markdown fixes needed here; handled by Markdown widget
|
// No streaming-specific markdown fixes needed here; handled by Markdown widget
|
||||||
|
|
||||||
Widget _buildToolCallTile(ToolCallEntry tc) {
|
Widget _buildToolCallTile(ToolCallEntry tc) {
|
||||||
|
|||||||
Reference in New Issue
Block a user