From 5fd68f86fe2a83f92fd303b13a28df0020cde6a4 Mon Sep 17 00:00:00 2001
From: cogwheel <172976095+cogwheel0@users.noreply.github.com>
Date: Mon, 22 Dec 2025 14:07:04 +0530
Subject: [PATCH] refactor(markdown): remove deprecated stream formatter and
enhance preprocessor
---
lib/core/utils/html_utils.dart | 18 --
lib/core/utils/markdown_stream_formatter.dart | 71 -----
lib/core/utils/markdown_to_text.dart | 160 ----------
lib/core/utils/reasoning_parser.dart | 100 ++++--
lib/core/utils/tool_calls_parser.dart | 16 +-
.../chat/providers/chat_providers.dart | 112 +------
.../providers/text_to_speech_provider.dart | 4 +-
.../chat/services/voice_call_service.dart | 4 +-
lib/features/chat/views/chat_page.dart | 33 +-
lib/features/chat/views/voice_call_page.dart | 4 +-
.../widgets/assistant_message_widget.dart | 42 ++-
.../markdown/markdown_preprocessor.dart | 288 +++++++++++++-----
12 files changed, 347 insertions(+), 505 deletions(-)
delete mode 100644 lib/core/utils/html_utils.dart
delete mode 100644 lib/core/utils/markdown_stream_formatter.dart
delete mode 100644 lib/core/utils/markdown_to_text.dart
diff --git a/lib/core/utils/html_utils.dart b/lib/core/utils/html_utils.dart
deleted file mode 100644
index 6b4659e..0000000
--- a/lib/core/utils/html_utils.dart
+++ /dev/null
@@ -1,18 +0,0 @@
-/// HTML entity utilities for parsing content.
-///
-/// Reference: openwebui-src/src/lib/utils/index.ts (unescapeHtml)
-library;
-
-import 'package:html_unescape/html_unescape.dart';
-
-/// Utility class for HTML entity handling.
-class HtmlUtils {
- /// HTML entity unescaper instance.
- static final _unescape = HtmlUnescape();
-
- /// Unescape HTML entities in a string.
- ///
- /// Handles all Named, Decimal, and Hexadecimal Character References.
- static String unescapeHtml(String s) => _unescape.convert(s);
-}
-
diff --git a/lib/core/utils/markdown_stream_formatter.dart b/lib/core/utils/markdown_stream_formatter.dart
deleted file mode 100644
index 45d26b6..0000000
--- a/lib/core/utils/markdown_stream_formatter.dart
+++ /dev/null
@@ -1,71 +0,0 @@
-// Pre-compiled regex patterns for markdown syntax detection (performance optimization)
-final _boldPattern = RegExp(r'\*\*');
-final _italicPattern = RegExp(r'(? _raw.toString();
-
- String _syntheticClosures(String content) {
- final buffer = StringBuffer();
-
- final fenceCount = '```'.allMatches(content).length;
- if (fenceCount.isOdd) {
- buffer.writeln('```');
- }
-
- final boldCount = _boldPattern.allMatches(content).length;
- if (boldCount.isOdd) {
- buffer.write('**');
- }
-
- final italicCount = _italicPattern.allMatches(content).length;
- if (italicCount.isOdd) {
- buffer.write('*');
- }
-
- final openBrackets = '['.allMatches(content).length;
- final closeBrackets = ']'.allMatches(content).length;
- if (openBrackets > closeBrackets) {
- buffer.write(List.filled(openBrackets - closeBrackets, ']').join());
- }
-
- final openParens = '('.allMatches(content).length;
- final closeParens = ')'.allMatches(content).length;
- if (openParens > closeParens) {
- buffer.write(List.filled(openParens - closeParens, ')').join());
- }
-
- return buffer.toString();
- }
-}
diff --git a/lib/core/utils/markdown_to_text.dart b/lib/core/utils/markdown_to_text.dart
deleted file mode 100644
index 687720f..0000000
--- a/lib/core/utils/markdown_to_text.dart
+++ /dev/null
@@ -1,160 +0,0 @@
-/// Converts markdown text to plain text suitable for text-to-speech.
-///
-/// Strips formatting while preserving the semantic meaning and readability
-/// of the content for audio consumption.
-class MarkdownToText {
- const MarkdownToText._();
-
- static final _thinkingBlockRegex = RegExp(
- r']*>.*? ',
- multiLine: true,
- dotAll: true,
- );
- static final _thinkTagRegex = RegExp(
- r'.*?',
- multiLine: true,
- dotAll: true,
- );
- static final _reasoningTagRegex = RegExp(
- r'.*?',
- multiLine: true,
- dotAll: true,
- );
- static final _emojiRegex = RegExp(
- r'[\u{1F600}-\u{1F64F}]|[\u{1F300}-\u{1F5FF}]|[\u{1F680}-\u{1F6FF}]|[\u{1F1E0}-\u{1F1FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F900}-\u{1F9FF}]|[\u{1FA00}-\u{1FA6F}]|[\u{1FA70}-\u{1FAFF}]|[\u{FE00}-\u{FE0F}]|[\u{1F018}-\u{1F270}]|[\u{238C}-\u{2454}]|[\u{20D0}-\u{20FF}]',
- unicode: true,
- );
- static final _codeBlockRegex = RegExp(
- r'```[^\n]*\n(.*?)```',
- multiLine: true,
- dotAll: true,
- );
- static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
- static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
- static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
- static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
- static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
- static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
- static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
- static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
- static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
- static final _orderedListRegex = RegExp(
- r'^[\s]*\d+\.\s+(.+)$',
- multiLine: true,
- );
- static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
- static final _horizontalRuleRegex = RegExp(
- r'^[\s]*[-*_]{3,}[\s]*$',
- multiLine: true,
- );
- static final _htmlTagRegex = RegExp(r'<[^>]+>');
- static final _htmlEntityRegex = RegExp(r'&[a-z]+;|\d+;|[0-9a-f]+;');
- static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
- static final _multipleSpacesRegex = RegExp(r' {2,}');
-
- /// Converts markdown text to plain text suitable for TTS.
- ///
- /// - Removes thinking/reasoning blocks
- /// - Removes emojis
- /// - Removes code blocks (replaces with descriptive text)
- /// - Strips all formatting (bold, italic, strikethrough)
- /// - Converts links to just their text
- /// - Removes images (or converts to alt text)
- /// - Simplifies headings
- /// - Preserves list structure with natural pauses
- /// - Removes HTML tags and entities
- /// - Normalizes whitespace
- static String convert(String markdown) {
- if (markdown.trim().isEmpty) {
- return '';
- }
-
- var text = markdown;
-
- // Remove thinking/reasoning blocks (must be done before general HTML tag removal)
- text = text.replaceAll(_thinkingBlockRegex, '');
- text = text.replaceAll(_thinkTagRegex, '');
- text = text.replaceAll(_reasoningTagRegex, '');
-
- // Remove emojis
- text = text.replaceAll(_emojiRegex, '');
-
- // Remove or replace code blocks with descriptive text
- text = text.replaceAllMapped(_codeBlockRegex, (match) {
- final code = match[1]?.trim() ?? '';
- if (code.isEmpty) {
- return '';
- }
- return ' (code block) ';
- });
-
- // Remove inline code backticks but keep the content
- text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
-
- // Strip bold/italic/strikethrough formatting
- text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
- text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
- text = text.replaceAllMapped(
- _italicRegex,
- (match) => match[1] ?? match[2] ?? '',
- );
- text = text.replaceAllMapped(
- _strikethroughRegex,
- (match) => match[1] ?? '',
- );
-
- // Convert links to just their text
- text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
-
- // Remove images (or use alt text if available)
- text = text.replaceAllMapped(_imageRegex, (match) {
- final alt = match[1]?.trim() ?? '';
- return alt.isNotEmpty ? ' ($alt image) ' : '';
- });
-
- // Simplify headings (remove # symbols)
- text = text.replaceAllMapped(_headingRegex, (match) {
- final heading = match[1] ?? '';
- return '$heading.\n';
- });
-
- // Preserve list items with natural pauses
- text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
- text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
-
- // Remove blockquote markers
- text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
-
- // Remove horizontal rules
- text = text.replaceAll(_horizontalRuleRegex, '');
-
- // Remove HTML tags
- text = text.replaceAll(_htmlTagRegex, '');
-
- // Decode HTML entities
- text = text.replaceAllMapped(_htmlEntityRegex, (match) {
- final entity = match[0] ?? '';
- return switch (entity) {
- ' ' => ' ',
- '&' => '&',
- '<' => '<',
- '>' => '>',
- '"' => '"',
- ''' => "'",
- _ => entity,
- };
- });
-
- // Normalize whitespace
- text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
- text = text.replaceAll(_multipleSpacesRegex, ' ');
-
- // Convert newlines to spaces for natural speech flow
- text = text.replaceAll('\n', ' ');
-
- // Final cleanup
- text = text.trim();
-
- return text;
- }
-}
diff --git a/lib/core/utils/reasoning_parser.dart b/lib/core/utils/reasoning_parser.dart
index 18483d6..a8602dc 100644
--- a/lib/core/utils/reasoning_parser.dart
+++ b/lib/core/utils/reasoning_parser.dart
@@ -7,7 +7,12 @@
/// Reference: openwebui-src/backend/open_webui/utils/middleware.py DEFAULT_REASONING_TAGS
library;
-import 'html_utils.dart';
+import 'package:html_unescape/html_unescape.dart';
+
+final _htmlUnescape = HtmlUnescape();
+
+/// Unescape HTML entities in reasoning content.
+String _unescapeHtml(String s) => _htmlUnescape.convert(s);
/// All reasoning tag pairs supported by Open WebUI.
/// Reference: DEFAULT_REASONING_TAGS in middleware.py
@@ -181,9 +186,25 @@ class ReasoningParser {
}
// Check for raw tag pairs
+ // Supports tags with optional attributes like
+ // Reference: openwebui-src/backend/open_webui/utils/middleware.py
for (final pair in tagPairs) {
final startTag = pair.$1;
- final idx = content.indexOf(startTag, index);
+ int idx = -1;
+
+ // For XML-like tags (e.g., ), match with optional attributes
+ if (startTag.startsWith('<') && startTag.endsWith('>')) {
+ final tagName = startTag.substring(1, startTag.length - 1);
+ final pattern = RegExp('<${RegExp.escape(tagName)}(\\s[^>]*)?>');
+ final match = pattern.firstMatch(content.substring(index));
+ if (match != null) {
+ idx = index + match.start;
+ }
+ } else {
+ // For non-XML tags (e.g., ◁think▷), use exact matching
+ idx = content.indexOf(startTag, index);
+ }
+
if (idx != -1 && (nextRawIdx == -1 || idx < nextRawIdx)) {
nextRawIdx = idx;
matchedRawPair = pair;
@@ -336,8 +357,8 @@ class ReasoningParser {
return _DetailsResult(
entry: ReasoningEntry(
- reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
- summary: HtmlUtils.unescapeHtml(summaryResult.summary),
+ reasoning: _unescapeHtml(summaryResult.remaining),
+ summary: _unescapeHtml(summaryResult.summary),
duration: effectiveDuration,
isDone: false,
blockType: blockType,
@@ -368,8 +389,8 @@ class ReasoningParser {
return _DetailsResult(
entry: ReasoningEntry(
- reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
- summary: HtmlUtils.unescapeHtml(summaryResult.summary),
+ reasoning: _unescapeHtml(summaryResult.remaining),
+ summary: _unescapeHtml(summaryResult.summary),
duration: effectiveDuration,
isDone: isDone,
blockType: blockType,
@@ -381,20 +402,47 @@ class ReasoningParser {
}
/// Parse a raw reasoning tag pair (e.g., `...`).
+ /// Supports tags with optional attributes like ``.
+ ///
+ /// Reference: openwebui-src/backend/open_webui/utils/middleware.py
static _ReasoningResult _parseRawReasoning(
String content,
int startIdx,
String startTag,
String endTag,
) {
- final endIdx = content.indexOf(endTag, startIdx + startTag.length);
+ // Find the actual end of the opening tag (handles attributes)
+ int contentStartIdx;
+ if (startTag.startsWith('<') && startTag.endsWith('>')) {
+ // For XML-like tags, find the closing '>' to skip any attributes
+ final tagCloseIdx = content.indexOf('>', startIdx);
+ if (tagCloseIdx == -1) {
+ // Incomplete opening tag
+ return _ReasoningResult(
+ entry: ReasoningEntry(
+ reasoning: '',
+ summary: '',
+ duration: 0,
+ isDone: false,
+ ),
+ endIndex: content.length,
+ isComplete: false,
+ );
+ }
+ contentStartIdx = tagCloseIdx + 1;
+ } else {
+ // For non-XML tags, use exact tag length
+ contentStartIdx = startIdx + startTag.length;
+ }
+
+ final endIdx = content.indexOf(endTag, contentStartIdx);
if (endIdx == -1) {
// Incomplete block (streaming)
- final innerContent = content.substring(startIdx + startTag.length);
+ final innerContent = content.substring(contentStartIdx);
return _ReasoningResult(
entry: ReasoningEntry(
- reasoning: HtmlUtils.unescapeHtml(innerContent.trim()),
+ reasoning: _unescapeHtml(innerContent.trim()),
summary: '',
duration: 0,
isDone: false,
@@ -405,10 +453,10 @@ class ReasoningParser {
}
// Complete block
- final innerContent = content.substring(startIdx + startTag.length, endIdx);
+ final innerContent = content.substring(contentStartIdx, endIdx);
return _ReasoningResult(
entry: ReasoningEntry(
- reasoning: HtmlUtils.unescapeHtml(innerContent.trim()),
+ reasoning: _unescapeHtml(innerContent.trim()),
summary: '',
duration: 0,
isDone: true,
@@ -533,23 +581,33 @@ class ReasoningParser {
}
/// Formats the duration for display.
- /// Mirrors Open WebUI's formatting:
+ /// Mirrors Open WebUI's dayjs.duration(seconds, 'seconds').humanize():
/// - < 1: "less than a second"
/// - < 60: "X seconds"
- /// - >= 60: humanized (e.g., "2 minutes")
+ /// - >= 60: humanized (e.g., "a minute", "2 minutes", "about an hour")
+ ///
+ /// Reference: openwebui-src/src/lib/components/common/Collapsible.svelte
static String formatDuration(int seconds) {
if (seconds < 1) return 'less than a second';
if (seconds < 60) return '$seconds second${seconds == 1 ? '' : 's'}';
- final minutes = seconds ~/ 60;
- final remainingSeconds = seconds % 60;
-
- if (remainingSeconds == 0) {
- return '$minutes minute${minutes == 1 ? '' : 's'}';
+ // Match dayjs.duration().humanize() behavior
+ // Reference: https://day.js.org/docs/en/durations/humanize
+ if (seconds < 90) return 'a minute';
+ if (seconds < 2700) {
+ // 45 minutes
+ final minutes = (seconds / 60).round();
+ return '$minutes minutes';
}
-
- // For mixed minutes and seconds, use abbreviated format
- return '$minutes min ${remainingSeconds}s';
+ if (seconds < 5400) return 'about an hour'; // 90 minutes
+ if (seconds < 79200) {
+ // 22 hours
+ final hours = (seconds / 3600).round();
+ return '$hours hours';
+ }
+ if (seconds < 129600) return 'a day'; // 36 hours
+ final days = (seconds / 86400).round();
+ return '$days days';
}
}
diff --git a/lib/core/utils/tool_calls_parser.dart b/lib/core/utils/tool_calls_parser.dart
index 6ecc904..7cb6b6a 100644
--- a/lib/core/utils/tool_calls_parser.dart
+++ b/lib/core/utils/tool_calls_parser.dart
@@ -1,5 +1,7 @@
import 'dart:convert';
+import '../../shared/widgets/markdown/markdown_preprocessor.dart';
+
/// Parsed representation of one tool call emitted as a `` block
class ToolCallEntry {
final String id;
@@ -255,18 +257,8 @@ class ToolCallsParser {
static String sanitizeForApi(String content) {
if (content.isEmpty) return content;
- // Remove blocks we never want to include in conversation context
- final removeTypes = ['reasoning', 'code_interpreter'];
- for (final t in removeTypes) {
- content = content.replaceAll(
- RegExp(
- ']*>[\\s\\S]*? ',
- multiLine: true,
- dotAll: true,
- ),
- '',
- );
- }
+ // Remove annotations and reasoning blocks
+ content = ConduitMarkdownPreprocessor.sanitize(content);
if (!content.contains('> {
bool _taskStatusCheckInFlight = false;
bool _observedRemoteTask = false;
- MarkdownStreamFormatter? _markdownFormatter;
- String? _activeStreamingMessageId;
-
bool _initialized = false;
@override
@@ -180,7 +176,6 @@ class ChatMessagesNotifier extends Notifier> {
// Cancel any existing message stream when switching conversations
_cancelMessageStream();
- _clearStreamingFormatter(); // Explicitly clear formatter on conversation switch
_stopRemoteTaskMonitor();
if (next != null) {
@@ -222,16 +217,10 @@ class ChatMessagesNotifier extends Notifier> {
if (controller != null && controller.isActive) {
unawaited(controller.cancel());
}
- _clearStreamingFormatter();
cancelSocketSubscriptions();
_stopRemoteTaskMonitor();
}
- void _clearStreamingFormatter() {
- _markdownFormatter = null;
- _activeStreamingMessageId = null;
- }
-
/// Checks if streaming cleanup is needed when adopting server messages.
/// Must be called BEFORE updating state, as it compares current local state
/// with incoming server state.
@@ -397,39 +386,6 @@ class ChatMessagesNotifier extends Notifier> {
}
}
- void _ensureFormatterForMessage(ChatMessage message) {
- // If we're switching to a different message, clear the old formatter first
- if (_markdownFormatter != null && _activeStreamingMessageId != message.id) {
- DebugLogger.log(
- 'Clearing formatter for message switch: $_activeStreamingMessageId -> ${message.id}',
- scope: 'chat/providers',
- );
- _clearStreamingFormatter();
- }
-
- // If formatter already exists for this message, reuse it
- if (_markdownFormatter != null && _activeStreamingMessageId == message.id) {
- return;
- }
-
- // Create new formatter
- final formatter = MarkdownStreamFormatter();
-
- // Only seed with existing content if this is a resume scenario
- // For new messages (empty content), start fresh to avoid duplication
- final seed = _stripStreamingPlaceholders(message.content);
- if (seed.isNotEmpty && message.content.isNotEmpty) {
- DebugLogger.log(
- 'Seeding formatter with existing content (${seed.length} chars) for message ${message.id}',
- scope: 'chat/providers',
- );
- formatter.seed(seed);
- }
-
- _markdownFormatter = formatter;
- _activeStreamingMessageId = message.id;
- }
-
String _stripStreamingPlaceholders(String content) {
var result = content;
const ti = '[TYPING_INDICATOR]';
@@ -443,15 +399,6 @@ class ChatMessagesNotifier extends Notifier> {
return result;
}
- String _finalizeFormatter(String messageId, String fallback) {
- if (_markdownFormatter != null && _activeStreamingMessageId == messageId) {
- final output = _markdownFormatter!.finalize();
- _clearStreamingFormatter();
- return output;
- }
- return fallback;
- }
-
void _touchStreamingActivity() {
_lastStreamingActivity = DateTime.now();
if (_hasStreamingAssistant) {
@@ -728,16 +675,11 @@ class ChatMessagesNotifier extends Notifier> {
}
void appendToLastMessage(String content) {
- if (state.isEmpty) {
- return;
- }
+ if (state.isEmpty) return;
final lastMessage = state.last;
- if (lastMessage.role != 'assistant') {
- return;
- }
+ if (lastMessage.role != 'assistant') return;
if (!lastMessage.isStreaming) {
- // Ignore late chunks when streaming already finished
DebugLogger.log(
'Ignoring late chunk for finished message: ${lastMessage.id}',
scope: 'chat/providers',
@@ -745,52 +687,21 @@ class ChatMessagesNotifier extends Notifier> {
return;
}
- _ensureFormatterForMessage(lastMessage);
-
- // Defensive check: ensure the formatter is for the correct message
- // This prevents cross-message pollution when messages change rapidly
- if (_activeStreamingMessageId != lastMessage.id) {
- DebugLogger.warning(
- 'Formatter message ID mismatch: active=$_activeStreamingMessageId, last=${lastMessage.id}. Resetting formatter.',
- );
- _clearStreamingFormatter();
- _ensureFormatterForMessage(lastMessage);
- }
-
- final formatter = _markdownFormatter!;
- final preview = formatter.ingest(content);
-
+ // Append content directly - the widget's normalize() handles incomplete markdown
state = [
...state.sublist(0, state.length - 1),
- lastMessage.copyWith(content: preview),
+ lastMessage.copyWith(content: lastMessage.content + content),
];
_touchStreamingActivity();
}
void replaceLastMessageContent(String content) {
- if (state.isEmpty) {
- return;
- }
+ if (state.isEmpty) return;
final lastMessage = state.last;
- if (lastMessage.role != 'assistant') {
- return;
- }
-
- _ensureFormatterForMessage(lastMessage);
-
- // Defensive check: ensure the formatter is for the correct message
- if (_activeStreamingMessageId != lastMessage.id) {
- DebugLogger.warning(
- 'Formatter message ID mismatch in replace: active=$_activeStreamingMessageId, last=${lastMessage.id}. Resetting formatter.',
- );
- _clearStreamingFormatter();
- _ensureFormatterForMessage(lastMessage);
- }
-
- final formatter = _markdownFormatter!;
- final sanitized = formatter.replace(_stripStreamingPlaceholders(content));
+ if (lastMessage.role != 'assistant') return;
+ final sanitized = _stripStreamingPlaceholders(content);
state = [
...state.sublist(0, state.length - 1),
lastMessage.copyWith(content: sanitized),
@@ -804,8 +715,7 @@ class ChatMessagesNotifier extends Notifier> {
final lastMessage = state.last;
if (lastMessage.role != 'assistant' || !lastMessage.isStreaming) return;
- final finalized = _finalizeFormatter(lastMessage.id, lastMessage.content);
- final cleaned = _stripStreamingPlaceholders(finalized);
+ final cleaned = _stripStreamingPlaceholders(lastMessage.content);
var updatedLast = lastMessage.copyWith(
isStreaming: false,
@@ -1005,11 +915,7 @@ Future restoreDefaultModel(dynamic ref) async {
try {
await ref.read(defaultModelProvider.future);
} catch (e) {
- DebugLogger.error(
- 'restore-default-failed',
- scope: 'chat/model',
- error: e,
- );
+ DebugLogger.error('restore-default-failed', scope: 'chat/model', error: e);
}
}
diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart
index 759a19f..3d4253e 100644
--- a/lib/features/chat/providers/text_to_speech_provider.dart
+++ b/lib/features/chat/providers/text_to_speech_provider.dart
@@ -4,7 +4,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/services/settings_service.dart';
import '../../../core/providers/app_providers.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
import '../services/text_to_speech_service.dart';
enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
@@ -218,7 +218,7 @@ class TextToSpeechController extends Notifier {
}
// Prepare sentence split for highlighting
- final cleanText = MarkdownToText.convert(text);
+ final cleanText = ConduitMarkdownPreprocessor.toPlainText(text);
final sentences = _service.splitTextForSpeech(cleanText);
final offsets = _computeOffsets(cleanText, sentences);
diff --git a/lib/features/chat/services/voice_call_service.dart b/lib/features/chat/services/voice_call_service.dart
index bb786d4..d98fbf1 100644
--- a/lib/features/chat/services/voice_call_service.dart
+++ b/lib/features/chat/services/voice_call_service.dart
@@ -12,7 +12,7 @@ import '../../../core/providers/app_providers.dart';
import '../../../core/services/background_streaming_handler.dart';
import '../../../core/services/callkit_service.dart';
import '../../../core/services/socket_service.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
import '../providers/chat_providers.dart';
import 'text_to_speech_service.dart';
import '../../../core/services/settings_service.dart';
@@ -589,7 +589,7 @@ class VoiceCallService {
void _processSpeakableSegments({required bool isFinalChunk}) {
if (_isDisposed) return;
- final cleanText = MarkdownToText.convert(_accumulatedResponse).trim();
+ final cleanText = ConduitMarkdownPreprocessor.toPlainText(_accumulatedResponse).trim();
if (cleanText.isEmpty) {
return;
}
diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart
index 7ba7699..3b2797e 100644
--- a/lib/features/chat/views/chat_page.dart
+++ b/lib/features/chat/views/chat_page.dart
@@ -18,6 +18,7 @@ import '../providers/chat_providers.dart';
import '../../../core/utils/debug_logger.dart';
import '../../../core/utils/user_display_name.dart';
import '../../../core/utils/model_icon_utils.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
import '../../../core/utils/android_assistant_handler.dart';
import '../widgets/modern_chat_input.dart';
import '../widgets/user_message_bubble.dart';
@@ -1205,36 +1206,8 @@ class _ChatPageState extends ConsumerState {
}
void _copyMessage(String content) {
- // Strip reasoning details from the copied content
- String cleanedContent = content;
-
- // Remove blocks
- cleanedContent = cleanedContent.replaceAll(
- RegExp(
- r']*>[\s\S]*?<\/details>',
- multiLine: true,
- dotAll: true,
- ),
- '',
- );
-
- // Remove raw reasoning tags
- cleanedContent = cleanedContent.replaceAll(
- RegExp(r'[\s\S]*?<\/think>', multiLine: true, dotAll: true),
- '',
- );
- cleanedContent = cleanedContent.replaceAll(
- RegExp(
- r'[\s\S]*?<\/reasoning>',
- multiLine: true,
- dotAll: true,
- ),
- '',
- );
-
- // Clean up any extra whitespace
- cleanedContent = cleanedContent.trim();
-
+ // Strip reasoning blocks and annotations from copied content
+ final cleanedContent = ConduitMarkdownPreprocessor.sanitize(content);
Clipboard.setData(ClipboardData(text: cleanedContent));
}
diff --git a/lib/features/chat/views/voice_call_page.dart b/lib/features/chat/views/voice_call_page.dart
index 743f28a..a5863d7 100644
--- a/lib/features/chat/views/voice_call_page.dart
+++ b/lib/features/chat/views/voice_call_page.dart
@@ -7,7 +7,7 @@ import 'package:flutter/cupertino.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/providers/app_providers.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
import '../../../l10n/app_localizations.dart';
import '../../../shared/widgets/conduit_components.dart';
import '../providers/chat_providers.dart';
@@ -335,7 +335,7 @@ class _VoiceCallPageState extends ConsumerState
} else if (_currentState == VoiceCallState.speaking &&
_currentResponse.isNotEmpty) {
// Convert markdown to clean text for display
- displayText = MarkdownToText.convert(_currentResponse);
+ displayText = ConduitMarkdownPreprocessor.toPlainText(_currentResponse);
}
if (displayText.isEmpty) {
diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart
index a79000c..4270fa4 100644
--- a/lib/features/chat/widgets/assistant_message_widget.dart
+++ b/lib/features/chat/widgets/assistant_message_widget.dart
@@ -11,7 +11,7 @@ import '../../../core/utils/reasoning_parser.dart';
import '../../../core/utils/message_segments.dart';
import '../../../core/utils/tool_calls_parser.dart';
import '../../../core/models/chat_message.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
import '../providers/text_to_speech_provider.dart';
import 'enhanced_image_attachment.dart';
import 'package:conduit/l10n/app_localizations.dart';
@@ -166,6 +166,10 @@ class _AssistantMessageWidgetState extends ConsumerState
raw = raw.substring(searchBanner.length);
}
+ // Note: Link reference definitions (including OpenAI annotations like
+ // [openai_responses:v2:reasoning:ID]: #) are stripped by the markdown
+ // preprocessor using the `markdown` package for proper CommonMark handling.
+
// Do not truncate content during streaming; segmented parser skips
// incomplete details blocks and tiles will render once complete.
final rSegs = ReasoningParser.segments(raw);
@@ -263,12 +267,12 @@ class _AssistantMessageWidgetState extends ConsumerState
String _buildTtsPlainTextFallback(List segments, String fallback) {
if (segments.isEmpty) {
- return MarkdownToText.convert(fallback);
+ return ConduitMarkdownPreprocessor.toPlainText(fallback);
}
final buffer = StringBuffer();
for (final segment in segments) {
- final sanitized = MarkdownToText.convert(segment);
+ final sanitized = ConduitMarkdownPreprocessor.toPlainText(segment);
if (sanitized.isEmpty) {
continue;
}
@@ -281,7 +285,7 @@ class _AssistantMessageWidgetState extends ConsumerState
final result = buffer.toString().trim();
if (result.isEmpty) {
- return MarkdownToText.convert(fallback);
+ return ConduitMarkdownPreprocessor.toPlainText(fallback);
}
return result;
}
@@ -1738,24 +1742,32 @@ class _AssistantMessageWidgetState extends ConsumerState
summaryLower == 'thinking...' ||
summaryLower.startsWith('thinking');
+ // Check if summary contains server-formatted duration (e.g., "(0s)", "for 0 secs")
+ final hasDurationInSummary = RegExp(
+ r'\(\d+s\)|\bfor \d+ secs?\b',
+ caseSensitive: false,
+ ).hasMatch(rc.summary);
+
// - If not done (streaming): show "Thinking..."
- // - If done with duration: show "Thought for X seconds"
- // - If done without duration: show "Thoughts" or custom summary
+ // - If done: show humanized "Thought for X" (uses our formatDuration)
+ // - If done without duration and has custom summary: show summary
if (!rc.isDone) {
// Still thinking - use summary if available, else default
return hasSummary && !isThinkingSummary ? rc.summary : l10n.thinking;
}
- // Done thinking - check duration
- if (rc.duration > 0) {
+ // Done thinking - always use humanized duration format
+ // This ensures "less than a second" instead of "0 secs" from server
+ if (rc.duration >= 0 && (rc.duration > 0 || hasDurationInSummary || isThinkingSummary)) {
return l10n.thoughtForDuration(rc.formattedDuration);
}
- // No duration - use custom summary if meaningful, else default
- if (!hasSummary || isThinkingSummary) {
- return l10n.thoughts;
+ // Has custom summary that's not a duration - show it
+ if (hasSummary && !isThinkingSummary) {
+ return rc.summary;
}
- return rc.summary;
+
+ return l10n.thoughts;
}
Widget buildHeader() {
@@ -1863,13 +1875,13 @@ String _buildTtsPlainTextWorker(Map payload) {
final segments = rawSegments is List ? rawSegments.cast() : const [];
if (segments.isEmpty) {
- return MarkdownToText.convert(fallback);
+ return ConduitMarkdownPreprocessor.toPlainText(fallback);
}
final buffer = StringBuffer();
for (final segment in segments) {
if (segment is! String || segment.isEmpty) continue;
- final sanitized = MarkdownToText.convert(segment);
+ final sanitized = ConduitMarkdownPreprocessor.toPlainText(segment);
if (sanitized.isEmpty) continue;
if (buffer.isNotEmpty) {
buffer.writeln();
@@ -1880,7 +1892,7 @@ String _buildTtsPlainTextWorker(Map payload) {
final result = buffer.toString().trim();
if (result.isEmpty) {
- return MarkdownToText.convert(fallback);
+ return ConduitMarkdownPreprocessor.toPlainText(fallback);
}
return result;
}
diff --git a/lib/shared/widgets/markdown/markdown_preprocessor.dart b/lib/shared/widgets/markdown/markdown_preprocessor.dart
index 4fee307..8db779c 100644
--- a/lib/shared/widgets/markdown/markdown_preprocessor.dart
+++ b/lib/shared/widgets/markdown/markdown_preprocessor.dart
@@ -1,10 +1,22 @@
-/// Utility helpers for normalising markdown content before handing it to
-/// [ConduitMarkdown]. The goal is to keep streaming responsive while smoothing
-/// out troublesome edge-cases (e.g. nested fences inside lists).
+import 'package:html_unescape/html_unescape.dart';
+import 'package:markdown/markdown.dart' as md;
+
+/// Content preprocessing, sanitization, and transformation for Markdown.
+///
+/// Provides:
+/// - [normalize] - Prepares content for display (keeps reasoning blocks)
+/// - [sanitize] - Cleans content for copy/API (removes reasoning blocks)
+/// - [toPlainText] - Converts to plain text for TTS
+/// - [softenInlineCode] - Breaks long inline code spans
class ConduitMarkdownPreprocessor {
const ConduitMarkdownPreprocessor._();
- // Pre-compile regex patterns for better performance during streaming
+ static final _htmlUnescape = HtmlUnescape();
+
+ // ============================================================
+ // Pre-compiled Patterns - Display/Sanitization
+ // ============================================================
+
static final _bulletFenceRegex = RegExp(
r'^(\s*(?:[*+-]|\d+\.)\s+)```([^\s`]*)\s*$',
multiLine: true,
@@ -14,7 +26,8 @@ class ConduitMarkdownPreprocessor {
multiLine: true,
);
static final _dedentCloseRegex = RegExp(r'^[ \t]+```\s*$', multiLine: true);
- static final _inlineClosingRegex = RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))');
+ static final _inlineClosingRegex =
+ RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))');
static final _labelThenDashRegex = RegExp(
r'^(\*\*[^\n*]+\*\*.*)\n(\s*-{3,}\s*$)',
multiLine: true,
@@ -24,92 +37,143 @@ class ConduitMarkdownPreprocessor {
multiLine: true,
);
static final _fenceAtBolRegex = RegExp(r'^\s*```', multiLine: true);
+ static final _linkWithTrailingSpaces =
+ RegExp(r'\[[^\]]+\]\([^\)]+\)\s{2,}$');
+ static final _multipleNewlines = RegExp(r'\n{3,}');
- /// Normalises common fence and hard-break issues produced by LLMs.
+ /// Combined pattern for all reasoning/thinking blocks.
+ static final _reasoningBlocks = RegExp(
+ r']*>[\s\S]*? |'
+ r'<(?:think|thinking|reasoning)(?:\s[^>]*)?>[\s\S]*?(?:think|thinking|reasoning)>',
+ multiLine: true,
+ dotAll: true,
+ );
+
+ // ============================================================
+ // Pre-compiled Patterns - Plain Text (TTS)
+ // ============================================================
+
+ static final _codeBlock = RegExp(r'```[^\n]*\n[\s\S]*?```');
+ static final _inlineCode = RegExp(r'`([^`]+)`');
+ static final _image = RegExp(r'!\[[^\]]*\]\([^)]+\)');
+ static final _link = RegExp(r'\[([^\]]+)\]\([^)]+\)');
+ // Paired markdown formatting - only unambiguous markers for TTS
+ // Single * and _ are skipped as they're ambiguous (math, variable names)
+ static final _boldItalic = RegExp(r'\*\*\*([^*]+)\*\*\*');
+ static final _bold = RegExp(r'\*\*([^*]+)\*\*');
+ static final _strikethrough = RegExp(r'~~([^~]+)~~');
+ // Single asterisk italic: only at word boundaries (space or line start/end)
+ static final _italicAsterisk = RegExp(r'(?:^|\s)\*([^*\s]+)\*(?=\s|$)');
+ // Single underscore italic: only when surrounded by spaces (not in identifiers)
+ static final _italicUnderscore = RegExp(r'(?:^|\s)_([^_\s]+)_(?=\s|$)');
+ static final _heading = RegExp(r'^#{1,6}\s+', multiLine: true);
+ static final _listMarker = RegExp(r'^[\s]*(?:[-*+]|\d+\.)\s+', multiLine: true);
+ static final _blockquote = RegExp(r'^>\s*', multiLine: true);
+ static final _horizontalRule = RegExp(r'^[\s]*[-*_]{3,}[\s]*$', multiLine: true);
+ static final _htmlTag = RegExp(r'<[^>]+>');
+ /// Comprehensive emoji pattern for TTS cleanup.
+ static final _emoji = RegExp(
+ r'[\u{1F600}-\u{1F64F}]|' // Emoticons
+ r'[\u{1F300}-\u{1F5FF}]|' // Misc Symbols and Pictographs
+ r'[\u{1F680}-\u{1F6FF}]|' // Transport and Map
+ r'[\u{1F1E0}-\u{1F1FF}]|' // Flags
+ r'[\u{2600}-\u{26FF}]|' // Misc symbols
+ r'[\u{2700}-\u{27BF}]|' // Dingbats
+ r'[\u{1F900}-\u{1F9FF}]|' // Supplemental Symbols
+ r'[\u{1FA00}-\u{1FA6F}]|' // Chess, cards
+ r'[\u{1FA70}-\u{1FAFF}]|' // Symbols Extended-A
+ r'[\u{FE00}-\u{FE0F}]|' // Variation Selectors
+ r'[\u{1F018}-\u{1F270}]|' // Various
+ r'[\u{238C}-\u{2454}]|' // Misc Technical
+ r'[\u{20D0}-\u{20FF}]', // Combining Diacritical Marks
+ unicode: true,
+ );
+ static final _whitespace = RegExp(r'\s+');
+
+ // ============================================================
+ // Public API
+ // ============================================================
+
+ /// Normalizes content for Markdown display.
+ ///
+ /// - Strips link reference definitions (including OpenAI annotations)
+ /// - Fixes common LLM fence issues
+ /// - Preserves reasoning blocks for collapsible UI rendering
static String normalize(String input) {
- if (input.isEmpty) {
- return input;
- }
+ if (input.isEmpty) return input;
var output = input.replaceAll('\r\n', '\n');
- // Move fenced code blocks that start on the same line as a list item onto
- // their own line so the parser does not treat them as list text.
- output = output.replaceAllMapped(
- _bulletFenceRegex,
- (match) => '${match[1]}\n```${match[2]}',
- );
+ // Strip link reference definitions using markdown package
+ output = _stripLinkReferenceDefinitions(output);
- // Dedent opening fences to avoid partial code-block detection when the
- // model indents fences by accident.
- output = output.replaceAllMapped(
- _dedentOpenRegex,
- (match) => '```${match[1]}',
- );
+ // Fix fence issues
+ output = _normalizeFences(output);
- // Dedent closing fences for the same reason as the opening fences.
- output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```');
-
- // Ensure closing fences stand alone. Prevents situations like `}\n```foo`
- // from keeping trailing braces inside the code block.
- output = output.replaceAllMapped(
- _inlineClosingRegex,
- (match) => '${match[1]}\n```',
- );
-
- // Insert a blank line when a "label: value" line is followed by a
- // horizontal rule so it is not treated as a Setext heading underline.
+ // Fix Setext heading false positives
output = output.replaceAllMapped(
_labelThenDashRegex,
(match) => '${match[1]}\n\n${match[2]}',
);
- // Allow headings like "## 1. Summary" without triggering ordered-list
- // parsing by inserting a zero-width joiner after the numeric marker.
+ // Fix numeric heading parsing
output = output.replaceAllMapped(
_atxEnumRegex,
(match) => '${match[1]}.\u200C${match[2]}${match[3]}',
);
- // Auto-close an unmatched opening fence at EOF to avoid the entire tail
- // of the message rendering as code.
- final fenceCount = _fenceAtBolRegex.allMatches(output).length;
- if (fenceCount.isOdd) {
- if (!output.endsWith('\n')) {
- output += '\n';
- }
- output += '```';
- }
-
- // Convert Markdown links followed by two trailing spaces into separate
- // paragraphs so that consecutive links do not collapse into a single
- // paragraph at render time.
- final linkWithTrailingSpaces = RegExp(r'\[[^\]]+\]\([^\)]+\)\s{2,}$');
- final lines = output.split('\n');
- if (lines.length > 1) {
- final buffer = StringBuffer();
- for (var i = 0; i < lines.length; i++) {
- final line = lines[i];
- buffer.write(line);
- if (i < lines.length - 1) {
- buffer.write('\n');
- }
- if (linkWithTrailingSpaces.hasMatch(line)) {
- buffer.write('\n');
- }
- }
- output = buffer.toString();
- }
+ // Separate consecutive links
+ output = _separateConsecutiveLinks(output);
return output;
}
- /// Inserts zero-width break characters into long inline code spans so they
- /// remain readable and do not overflow narrow layouts.
+ /// Sanitizes content for clipboard copy or API submission.
+ ///
+ /// - Strips link reference definitions (including OpenAI annotations)
+ /// - Strips reasoning/thinking blocks
+ /// - Normalizes whitespace
+ static String sanitize(String input) {
+ if (input.isEmpty) return input;
+
+ return input
+ .replaceAll('\r\n', '\n')
+ .transform(_stripLinkReferenceDefinitions)
+ .replaceAll(_reasoningBlocks, '')
+ .replaceAll(_multipleNewlines, '\n\n')
+ .trim();
+ }
+
+ /// Converts markdown to plain text for text-to-speech.
+ static String toPlainText(String input) {
+ if (input.trim().isEmpty) return '';
+
+ return sanitize(input)
+ .replaceAll(_codeBlock, '') // Remove code blocks
+ .replaceAllMapped(_inlineCode, (m) => m[1] ?? '') // Keep code text
+ .replaceAll(_image, '') // Remove images
+ .replaceAllMapped(_link, (m) => m[1] ?? '') // Keep link text
+ // Strip paired markdown formatting (preserves lone * and _ in text)
+ .replaceAllMapped(_boldItalic, (m) => m[1] ?? '')
+ .replaceAllMapped(_bold, (m) => m[1] ?? '')
+ .replaceAllMapped(_strikethrough, (m) => m[1] ?? '')
+ .replaceAllMapped(_italicAsterisk, (m) => ' ${m[1] ?? ''}')
+ .replaceAllMapped(_italicUnderscore, (m) => ' ${m[1] ?? ''}')
+ .replaceAll(_heading, '') // Strip # markers
+ .replaceAll(_listMarker, '') // Strip list markers
+ .replaceAll(_blockquote, '') // Strip > markers
+ .replaceAll(_horizontalRule, '') // Remove ---
+ .replaceAll(_htmlTag, '') // Remove HTML
+ .transform(_htmlUnescape.convert) // Decode entities
+ .replaceAll(_emoji, '') // Remove emojis
+ .replaceAll(_whitespace, ' ') // Normalize whitespace
+ .trim();
+ }
+
+ /// Breaks long inline code spans for better wrapping.
static String softenInlineCode(String input, {int chunkSize = 24}) {
- if (input.length <= chunkSize) {
- return input;
- }
+ if (input.length <= chunkSize) return input;
+
final buffer = StringBuffer();
for (var i = 0; i < input.length; i++) {
buffer.write(input[i]);
@@ -119,4 +183,90 @@ class ConduitMarkdownPreprocessor {
}
return buffer.toString();
}
+
+ // ============================================================
+ // Private Helpers
+ // ============================================================
+
+ static String _normalizeFences(String input) {
+ var output = input;
+
+ // Move fences after list markers to new line
+ output = output.replaceAllMapped(
+ _bulletFenceRegex,
+ (match) => '${match[1]}\n```${match[2]}',
+ );
+
+ // Dedent opening fences
+ output = output.replaceAllMapped(
+ _dedentOpenRegex,
+ (match) => '```${match[1]}',
+ );
+
+ // Dedent closing fences
+ output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```');
+
+ // Ensure closing fences stand alone
+ output = output.replaceAllMapped(
+ _inlineClosingRegex,
+ (match) => '${match[1]}\n```',
+ );
+
+ // Auto-close unmatched fence
+ final fenceCount = _fenceAtBolRegex.allMatches(output).length;
+ if (fenceCount.isOdd) {
+ if (!output.endsWith('\n')) output += '\n';
+ output += '```';
+ }
+
+ return output;
+ }
+
+ static String _separateConsecutiveLinks(String input) {
+ final lines = input.split('\n');
+ if (lines.length <= 1) return input;
+
+ final buffer = StringBuffer();
+ for (var i = 0; i < lines.length; i++) {
+ final line = lines[i];
+ buffer.write(line);
+ if (i < lines.length - 1) buffer.write('\n');
+ if (_linkWithTrailingSpaces.hasMatch(line)) buffer.write('\n');
+ }
+ return buffer.toString();
+ }
+
+ /// Strips link reference definitions using the `markdown` package.
+ static String _stripLinkReferenceDefinitions(String input) {
+ if (!input.contains('[')) return input;
+
+ final document = md.Document();
+ document.parseLines(input.split('\n'));
+
+ final refLabels = document.linkReferences.keys.toSet();
+ if (refLabels.isEmpty) return input;
+
+ final labelPatterns =
+ refLabels.map((label) => RegExp.escape(label)).join('|');
+
+ final refDefRegex = RegExp(
+ r'^[ ]{0,3}\[(?:' +
+ labelPatterns +
+ r')\]:[ \t]*(?:<[^>]*>|[^\s]*)(?:[ \t]+(?:"[^"]*"|' +
+ r"'[^']*'" +
+ r'|\([^)]*\)))?[ \t]*$',
+ multiLine: true,
+ caseSensitive: false,
+ );
+
+ return input
+ .replaceAll(refDefRegex, '')
+ .replaceAll(_multipleNewlines, '\n\n')
+ .trim();
+ }
+}
+
+/// Extension for chaining string transformations.
+extension _StringTransform on String {
+ String transform(String Function(String) fn) => fn(this);
}