From 5fd68f86fe2a83f92fd303b13a28df0020cde6a4 Mon Sep 17 00:00:00 2001
From: cogwheel <172976095+cogwheel0@users.noreply.github.com>
Date: Mon, 22 Dec 2025 14:07:04 +0530
Subject: [PATCH] refactor(markdown): remove deprecated stream formatter and
 enhance preprocessor

---
 lib/core/utils/html_utils.dart                |  18 --
 lib/core/utils/markdown_stream_formatter.dart |  71 -----
 lib/core/utils/markdown_to_text.dart          | 160 ----------
 lib/core/utils/reasoning_parser.dart          | 100 ++++--
 lib/core/utils/tool_calls_parser.dart         |  16 +-
 .../chat/providers/chat_providers.dart        | 112 +------
 .../providers/text_to_speech_provider.dart    |   4 +-
 .../chat/services/voice_call_service.dart     |   4 +-
 lib/features/chat/views/chat_page.dart        |  33 +-
 lib/features/chat/views/voice_call_page.dart  |   4 +-
 .../widgets/assistant_message_widget.dart     |  42 ++-
 .../markdown/markdown_preprocessor.dart       | 288 +++++++++++++-----
 12 files changed, 347 insertions(+), 505 deletions(-)
 delete mode 100644 lib/core/utils/html_utils.dart
 delete mode 100644 lib/core/utils/markdown_stream_formatter.dart
 delete mode 100644 lib/core/utils/markdown_to_text.dart

diff --git a/lib/core/utils/html_utils.dart b/lib/core/utils/html_utils.dart
deleted file mode 100644
index 6b4659e..0000000
--- a/lib/core/utils/html_utils.dart
+++ /dev/null
@@ -1,18 +0,0 @@
-/// HTML entity utilities for parsing content.
-///
-/// Reference: openwebui-src/src/lib/utils/index.ts (unescapeHtml)
-library;
-
-import 'package:html_unescape/html_unescape.dart';
-
-/// Utility class for HTML entity handling.
-class HtmlUtils {
-  /// HTML entity unescaper instance.
-  static final _unescape = HtmlUnescape();
-
-  /// Unescape HTML entities in a string.
-  ///
-  /// Handles all Named, Decimal, and Hexadecimal Character References.
-  static String unescapeHtml(String s) => _unescape.convert(s);
-}
-
diff --git a/lib/core/utils/markdown_stream_formatter.dart b/lib/core/utils/markdown_stream_formatter.dart
deleted file mode 100644
index 45d26b6..0000000
--- a/lib/core/utils/markdown_stream_formatter.dart
+++ /dev/null
@@ -1,71 +0,0 @@
-// Pre-compiled regex patterns for markdown syntax detection (performance optimization)
-final _boldPattern = RegExp(r'\*\*');
-final _italicPattern = RegExp(r'(?<!\*)\*(?!\*)');
-
-/// Maintains a raw markdown buffer for streaming content and generates
-/// preview-safe output by appending synthetic closing tokens when necessary.
-class MarkdownStreamFormatter {
-  StringBuffer _raw = StringBuffer();
-
-  /// Seeds the formatter with existing markdown content.
-  void seed(String content) {
-    _raw = StringBuffer(content);
-  }
-
-  /// Adds a streaming chunk to the internal buffer and returns a preview-ready
-  /// string with any required synthetic closing markers.
-  String ingest(String chunk) {
-    if (chunk.isNotEmpty) {
-      _raw.write(chunk);
-    }
-    return preview();
-  }
-
-  /// Replaces the current buffer with the provided [content].
-  String replace(String content) {
-    seed(content);
-    return preview();
-  }
-
-  /// Returns the preview-safe markdown string.
-  String preview() {
-    final raw = _raw.toString();
-    return raw + _syntheticClosures(raw);
-  }
-
-  /// Returns the raw markdown accumulated so far.
-  String finalize() => _raw.toString();
-
-  String _syntheticClosures(String content) {
-    final buffer = StringBuffer();
-
-    final fenceCount = '```'.allMatches(content).length;
-    if (fenceCount.isOdd) {
-      buffer.writeln('```');
-    }
-
-    final boldCount = _boldPattern.allMatches(content).length;
-    if (boldCount.isOdd) {
-      buffer.write('**');
-    }
-
-    final italicCount = _italicPattern.allMatches(content).length;
-    if (italicCount.isOdd) {
-      buffer.write('*');
-    }
-
-    final openBrackets = '['.allMatches(content).length;
-    final closeBrackets = ']'.allMatches(content).length;
-    if (openBrackets > closeBrackets) {
-      buffer.write(List.filled(openBrackets - closeBrackets, ']').join());
-    }
-
-    final openParens = '('.allMatches(content).length;
-    final closeParens = ')'.allMatches(content).length;
-    if (openParens > closeParens) {
-      buffer.write(List.filled(openParens - closeParens, ')').join());
-    }
-
-    return buffer.toString();
-  }
-}
diff --git a/lib/core/utils/markdown_to_text.dart b/lib/core/utils/markdown_to_text.dart
deleted file mode 100644
index 687720f..0000000
--- a/lib/core/utils/markdown_to_text.dart
+++ /dev/null
@@ -1,160 +0,0 @@
-/// Converts markdown text to plain text suitable for text-to-speech.
-///
-/// Strips formatting while preserving the semantic meaning and readability
-/// of the content for audio consumption.
-class MarkdownToText {
-  const MarkdownToText._();
-
-  static final _thinkingBlockRegex = RegExp(
-    r'<details\s+type="reasoning"[^>]*>.*?</details>',
-    multiLine: true,
-    dotAll: true,
-  );
-  static final _thinkTagRegex = RegExp(
-    r'<think>.*?</think>',
-    multiLine: true,
-    dotAll: true,
-  );
-  static final _reasoningTagRegex = RegExp(
-    r'<reasoning>.*?</reasoning>',
-    multiLine: true,
-    dotAll: true,
-  );
-  static final _emojiRegex = RegExp(
-    r'[\u{1F600}-\u{1F64F}]|[\u{1F300}-\u{1F5FF}]|[\u{1F680}-\u{1F6FF}]|[\u{1F1E0}-\u{1F1FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F900}-\u{1F9FF}]|[\u{1FA00}-\u{1FA6F}]|[\u{1FA70}-\u{1FAFF}]|[\u{FE00}-\u{FE0F}]|[\u{1F018}-\u{1F270}]|[\u{238C}-\u{2454}]|[\u{20D0}-\u{20FF}]',
-    unicode: true,
-  );
-  static final _codeBlockRegex = RegExp(
-    r'```[^\n]*\n(.*?)```',
-    multiLine: true,
-    dotAll: true,
-  );
-  static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
-  static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
-  static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
-  static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
-  static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
-  static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
-  static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
-  static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
-  static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
-  static final _orderedListRegex = RegExp(
-    r'^[\s]*\d+\.\s+(.+)$',
-    multiLine: true,
-  );
-  static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
-  static final _horizontalRuleRegex = RegExp(
-    r'^[\s]*[-*_]{3,}[\s]*$',
-    multiLine: true,
-  );
-  static final _htmlTagRegex = RegExp(r'<[^>]+>');
-  static final _htmlEntityRegex = RegExp(r'&[a-z]+;|&#\d+;|&#x[0-9a-f]+;');
-  static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
-  static final _multipleSpacesRegex = RegExp(r' {2,}');
-
-  /// Converts markdown text to plain text suitable for TTS.
-  ///
-  /// - Removes thinking/reasoning blocks
-  /// - Removes emojis
-  /// - Removes code blocks (replaces with descriptive text)
-  /// - Strips all formatting (bold, italic, strikethrough)
-  /// - Converts links to just their text
-  /// - Removes images (or converts to alt text)
-  /// - Simplifies headings
-  /// - Preserves list structure with natural pauses
-  /// - Removes HTML tags and entities
-  /// - Normalizes whitespace
-  static String convert(String markdown) {
-    if (markdown.trim().isEmpty) {
-      return '';
-    }
-
-    var text = markdown;
-
-    // Remove thinking/reasoning blocks (must be done before general HTML tag removal)
-    text = text.replaceAll(_thinkingBlockRegex, '');
-    text = text.replaceAll(_thinkTagRegex, '');
-    text = text.replaceAll(_reasoningTagRegex, '');
-
-    // Remove emojis
-    text = text.replaceAll(_emojiRegex, '');
-
-    // Remove or replace code blocks with descriptive text
-    text = text.replaceAllMapped(_codeBlockRegex, (match) {
-      final code = match[1]?.trim() ?? '';
-      if (code.isEmpty) {
-        return '';
-      }
-      return ' (code block) ';
-    });
-
-    // Remove inline code backticks but keep the content
-    text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
-
-    // Strip bold/italic/strikethrough formatting
-    text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
-    text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
-    text = text.replaceAllMapped(
-      _italicRegex,
-      (match) => match[1] ?? match[2] ?? '',
-    );
-    text = text.replaceAllMapped(
-      _strikethroughRegex,
-      (match) => match[1] ?? '',
-    );
-
-    // Convert links to just their text
-    text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
-
-    // Remove images (or use alt text if available)
-    text = text.replaceAllMapped(_imageRegex, (match) {
-      final alt = match[1]?.trim() ?? '';
-      return alt.isNotEmpty ? ' ($alt image) ' : '';
-    });
-
-    // Simplify headings (remove # symbols)
-    text = text.replaceAllMapped(_headingRegex, (match) {
-      final heading = match[1] ?? '';
-      return '$heading.\n';
-    });
-
-    // Preserve list items with natural pauses
-    text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
-    text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
-
-    // Remove blockquote markers
-    text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
-
-    // Remove horizontal rules
-    text = text.replaceAll(_horizontalRuleRegex, '');
-
-    // Remove HTML tags
-    text = text.replaceAll(_htmlTagRegex, '');
-
-    // Decode HTML entities
-    text = text.replaceAllMapped(_htmlEntityRegex, (match) {
-      final entity = match[0] ?? '';
-      return switch (entity) {
-        '&nbsp;' => ' ',
-        '&amp;' => '&',
-        '&lt;' => '<',
-        '&gt;' => '>',
-        '&quot;' => '"',
-        '&apos;' => "'",
-        _ => entity,
-      };
-    });
-
-    // Normalize whitespace
-    text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
-    text = text.replaceAll(_multipleSpacesRegex, ' ');
-
-    // Convert newlines to spaces for natural speech flow
-    text = text.replaceAll('\n', ' ');
-
-    // Final cleanup
-    text = text.trim();
-
-    return text;
-  }
-}
diff --git a/lib/core/utils/reasoning_parser.dart b/lib/core/utils/reasoning_parser.dart
index 18483d6..a8602dc 100644
--- a/lib/core/utils/reasoning_parser.dart
+++ b/lib/core/utils/reasoning_parser.dart
@@ -7,7 +7,12 @@
 /// Reference: openwebui-src/backend/open_webui/utils/middleware.py DEFAULT_REASONING_TAGS
 library;
 
-import 'html_utils.dart';
+import 'package:html_unescape/html_unescape.dart';
+
+final _htmlUnescape = HtmlUnescape();
+
+/// Unescape HTML entities in reasoning content.
+String _unescapeHtml(String s) => _htmlUnescape.convert(s);
 
 /// All reasoning tag pairs supported by Open WebUI.
 /// Reference: DEFAULT_REASONING_TAGS in middleware.py
@@ -181,9 +186,25 @@ class ReasoningParser {
       }
 
       // Check for raw tag pairs
+      // Supports tags with optional attributes like <think foo="bar">
+      // Reference: openwebui-src/backend/open_webui/utils/middleware.py
       for (final pair in tagPairs) {
         final startTag = pair.$1;
-        final idx = content.indexOf(startTag, index);
+        int idx = -1;
+
+        // For XML-like tags (e.g., <think>), match with optional attributes
+        if (startTag.startsWith('<') && startTag.endsWith('>')) {
+          final tagName = startTag.substring(1, startTag.length - 1);
+          final pattern = RegExp('<${RegExp.escape(tagName)}(\\s[^>]*)?>');
+          final match = pattern.firstMatch(content.substring(index));
+          if (match != null) {
+            idx = index + match.start;
+          }
+        } else {
+          // For non-XML tags (e.g., ◁think▷), use exact matching
+          idx = content.indexOf(startTag, index);
+        }
+
         if (idx != -1 && (nextRawIdx == -1 || idx < nextRawIdx)) {
           nextRawIdx = idx;
           matchedRawPair = pair;
@@ -336,8 +357,8 @@ class ReasoningParser {
 
       return _DetailsResult(
         entry: ReasoningEntry(
-          reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
-          summary: HtmlUtils.unescapeHtml(summaryResult.summary),
+          reasoning: _unescapeHtml(summaryResult.remaining),
+          summary: _unescapeHtml(summaryResult.summary),
           duration: effectiveDuration,
           isDone: false,
           blockType: blockType,
@@ -368,8 +389,8 @@ class ReasoningParser {
 
     return _DetailsResult(
       entry: ReasoningEntry(
-        reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
-        summary: HtmlUtils.unescapeHtml(summaryResult.summary),
+        reasoning: _unescapeHtml(summaryResult.remaining),
+        summary: _unescapeHtml(summaryResult.summary),
         duration: effectiveDuration,
         isDone: isDone,
         blockType: blockType,
@@ -381,20 +402,47 @@ class ReasoningParser {
   }
 
   /// Parse a raw reasoning tag pair (e.g., `<think>...</think>`).
+  /// Supports tags with optional attributes like `<think foo="bar">`.
+  ///
+  /// Reference: openwebui-src/backend/open_webui/utils/middleware.py
   static _ReasoningResult _parseRawReasoning(
     String content,
     int startIdx,
     String startTag,
     String endTag,
   ) {
-    final endIdx = content.indexOf(endTag, startIdx + startTag.length);
+    // Find the actual end of the opening tag (handles attributes)
+    int contentStartIdx;
+    if (startTag.startsWith('<') && startTag.endsWith('>')) {
+      // For XML-like tags, find the closing '>' to skip any attributes
+      final tagCloseIdx = content.indexOf('>', startIdx);
+      if (tagCloseIdx == -1) {
+        // Incomplete opening tag
+        return _ReasoningResult(
+          entry: ReasoningEntry(
+            reasoning: '',
+            summary: '',
+            duration: 0,
+            isDone: false,
+          ),
+          endIndex: content.length,
+          isComplete: false,
+        );
+      }
+      contentStartIdx = tagCloseIdx + 1;
+    } else {
+      // For non-XML tags, use exact tag length
+      contentStartIdx = startIdx + startTag.length;
+    }
+
+    final endIdx = content.indexOf(endTag, contentStartIdx);
 
     if (endIdx == -1) {
       // Incomplete block (streaming)
-      final innerContent = content.substring(startIdx + startTag.length);
+      final innerContent = content.substring(contentStartIdx);
       return _ReasoningResult(
         entry: ReasoningEntry(
-          reasoning: HtmlUtils.unescapeHtml(innerContent.trim()),
+          reasoning: _unescapeHtml(innerContent.trim()),
           summary: '',
           duration: 0,
           isDone: false,
@@ -405,10 +453,10 @@ class ReasoningParser {
     }
 
     // Complete block
-    final innerContent = content.substring(startIdx + startTag.length, endIdx);
+    final innerContent = content.substring(contentStartIdx, endIdx);
     return _ReasoningResult(
       entry: ReasoningEntry(
-        reasoning: HtmlUtils.unescapeHtml(innerContent.trim()),
+        reasoning: _unescapeHtml(innerContent.trim()),
         summary: '',
         duration: 0,
         isDone: true,
@@ -533,23 +581,33 @@ class ReasoningParser {
   }
 
   /// Formats the duration for display.
-  /// Mirrors Open WebUI's formatting:
+  /// Mirrors Open WebUI's dayjs.duration(seconds, 'seconds').humanize():
   /// - < 1: "less than a second"
   /// - < 60: "X seconds"
-  /// - >= 60: humanized (e.g., "2 minutes")
+  /// - >= 60: humanized (e.g., "a minute", "2 minutes", "about an hour")
+  ///
+  /// Reference: openwebui-src/src/lib/components/common/Collapsible.svelte
   static String formatDuration(int seconds) {
     if (seconds < 1) return 'less than a second';
     if (seconds < 60) return '$seconds second${seconds == 1 ? '' : 's'}';
 
-    final minutes = seconds ~/ 60;
-    final remainingSeconds = seconds % 60;
-
-    if (remainingSeconds == 0) {
-      return '$minutes minute${minutes == 1 ? '' : 's'}';
+    // Match dayjs.duration().humanize() behavior
+    // Reference: https://day.js.org/docs/en/durations/humanize
+    if (seconds < 90) return 'a minute';
+    if (seconds < 2700) {
+      // 45 minutes
+      final minutes = (seconds / 60).round();
+      return '$minutes minutes';
     }
-
-    // For mixed minutes and seconds, use abbreviated format
-    return '$minutes min ${remainingSeconds}s';
+    if (seconds < 5400) return 'about an hour'; // 90 minutes
+    if (seconds < 79200) {
+      // 22 hours
+      final hours = (seconds / 3600).round();
+      return '$hours hours';
+    }
+    if (seconds < 129600) return 'a day'; // 36 hours
+    final days = (seconds / 86400).round();
+    return '$days days';
   }
 }
 
diff --git a/lib/core/utils/tool_calls_parser.dart b/lib/core/utils/tool_calls_parser.dart
index 6ecc904..7cb6b6a 100644
--- a/lib/core/utils/tool_calls_parser.dart
+++ b/lib/core/utils/tool_calls_parser.dart
@@ -1,5 +1,7 @@
 import 'dart:convert';
 
+import '../../shared/widgets/markdown/markdown_preprocessor.dart';
+
 /// Parsed representation of one tool call emitted as a `<details type="tool_calls" ...>` block
 class ToolCallEntry {
   final String id;
@@ -255,18 +257,8 @@ class ToolCallsParser {
   static String sanitizeForApi(String content) {
     if (content.isEmpty) return content;
 
-    // Remove blocks we never want to include in conversation context
-    final removeTypes = ['reasoning', 'code_interpreter'];
-    for (final t in removeTypes) {
-      content = content.replaceAll(
-        RegExp(
-          '<details\\s+type="$t"[^>]*>[\\s\\S]*?</details>',
-          multiLine: true,
-          dotAll: true,
-        ),
-        '',
-      );
-    }
+    // Remove annotations and reasoning blocks
+    content = ConduitMarkdownPreprocessor.sanitize(content);
 
     if (!content.contains('<details')) return content.trim();
 
diff --git a/lib/features/chat/providers/chat_providers.dart b/lib/features/chat/providers/chat_providers.dart
index 064f781..a4612f3 100644
--- a/lib/features/chat/providers/chat_providers.dart
+++ b/lib/features/chat/providers/chat_providers.dart
@@ -16,7 +16,6 @@ import '../../../core/services/streaming_helper.dart';
 import '../../../core/services/streaming_response_controller.dart';
 import '../../../core/services/worker_manager.dart';
 import '../../../core/utils/debug_logger.dart';
-import '../../../core/utils/markdown_stream_formatter.dart';
 import '../../../core/utils/tool_calls_parser.dart';
 import '../models/chat_context_attachment.dart';
 import '../providers/context_attachments_provider.dart';
@@ -109,9 +108,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
   bool _taskStatusCheckInFlight = false;
   bool _observedRemoteTask = false;
 
-  MarkdownStreamFormatter? _markdownFormatter;
-  String? _activeStreamingMessageId;
-
   bool _initialized = false;
 
   @override
@@ -180,7 +176,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
 
         // Cancel any existing message stream when switching conversations
         _cancelMessageStream();
-        _clearStreamingFormatter(); // Explicitly clear formatter on conversation switch
         _stopRemoteTaskMonitor();
 
         if (next != null) {
@@ -222,16 +217,10 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
     if (controller != null && controller.isActive) {
       unawaited(controller.cancel());
     }
-    _clearStreamingFormatter();
     cancelSocketSubscriptions();
     _stopRemoteTaskMonitor();
   }
 
-  void _clearStreamingFormatter() {
-    _markdownFormatter = null;
-    _activeStreamingMessageId = null;
-  }
-
   /// Checks if streaming cleanup is needed when adopting server messages.
   /// Must be called BEFORE updating state, as it compares current local state
   /// with incoming server state.
@@ -397,39 +386,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
     }
   }
 
-  void _ensureFormatterForMessage(ChatMessage message) {
-    // If we're switching to a different message, clear the old formatter first
-    if (_markdownFormatter != null && _activeStreamingMessageId != message.id) {
-      DebugLogger.log(
-        'Clearing formatter for message switch: $_activeStreamingMessageId -> ${message.id}',
-        scope: 'chat/providers',
-      );
-      _clearStreamingFormatter();
-    }
-
-    // If formatter already exists for this message, reuse it
-    if (_markdownFormatter != null && _activeStreamingMessageId == message.id) {
-      return;
-    }
-
-    // Create new formatter
-    final formatter = MarkdownStreamFormatter();
-
-    // Only seed with existing content if this is a resume scenario
-    // For new messages (empty content), start fresh to avoid duplication
-    final seed = _stripStreamingPlaceholders(message.content);
-    if (seed.isNotEmpty && message.content.isNotEmpty) {
-      DebugLogger.log(
-        'Seeding formatter with existing content (${seed.length} chars) for message ${message.id}',
-        scope: 'chat/providers',
-      );
-      formatter.seed(seed);
-    }
-
-    _markdownFormatter = formatter;
-    _activeStreamingMessageId = message.id;
-  }
-
   String _stripStreamingPlaceholders(String content) {
     var result = content;
     const ti = '[TYPING_INDICATOR]';
@@ -443,15 +399,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
     return result;
   }
 
-  String _finalizeFormatter(String messageId, String fallback) {
-    if (_markdownFormatter != null && _activeStreamingMessageId == messageId) {
-      final output = _markdownFormatter!.finalize();
-      _clearStreamingFormatter();
-      return output;
-    }
-    return fallback;
-  }
-
   void _touchStreamingActivity() {
     _lastStreamingActivity = DateTime.now();
     if (_hasStreamingAssistant) {
@@ -728,16 +675,11 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
   }
 
   void appendToLastMessage(String content) {
-    if (state.isEmpty) {
-      return;
-    }
+    if (state.isEmpty) return;
 
     final lastMessage = state.last;
-    if (lastMessage.role != 'assistant') {
-      return;
-    }
+    if (lastMessage.role != 'assistant') return;
     if (!lastMessage.isStreaming) {
-      // Ignore late chunks when streaming already finished
       DebugLogger.log(
         'Ignoring late chunk for finished message: ${lastMessage.id}',
         scope: 'chat/providers',
@@ -745,52 +687,21 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
       return;
     }
 
-    _ensureFormatterForMessage(lastMessage);
-
-    // Defensive check: ensure the formatter is for the correct message
-    // This prevents cross-message pollution when messages change rapidly
-    if (_activeStreamingMessageId != lastMessage.id) {
-      DebugLogger.warning(
-        'Formatter message ID mismatch: active=$_activeStreamingMessageId, last=${lastMessage.id}. Resetting formatter.',
-      );
-      _clearStreamingFormatter();
-      _ensureFormatterForMessage(lastMessage);
-    }
-
-    final formatter = _markdownFormatter!;
-    final preview = formatter.ingest(content);
-
+    // Append content directly - the widget's normalize() handles incomplete markdown
     state = [
       ...state.sublist(0, state.length - 1),
-      lastMessage.copyWith(content: preview),
+      lastMessage.copyWith(content: lastMessage.content + content),
     ];
     _touchStreamingActivity();
   }
 
   void replaceLastMessageContent(String content) {
-    if (state.isEmpty) {
-      return;
-    }
+    if (state.isEmpty) return;
 
     final lastMessage = state.last;
-    if (lastMessage.role != 'assistant') {
-      return;
-    }
-
-    _ensureFormatterForMessage(lastMessage);
-
-    // Defensive check: ensure the formatter is for the correct message
-    if (_activeStreamingMessageId != lastMessage.id) {
-      DebugLogger.warning(
-        'Formatter message ID mismatch in replace: active=$_activeStreamingMessageId, last=${lastMessage.id}. Resetting formatter.',
-      );
-      _clearStreamingFormatter();
-      _ensureFormatterForMessage(lastMessage);
-    }
-
-    final formatter = _markdownFormatter!;
-    final sanitized = formatter.replace(_stripStreamingPlaceholders(content));
+    if (lastMessage.role != 'assistant') return;
 
+    final sanitized = _stripStreamingPlaceholders(content);
     state = [
       ...state.sublist(0, state.length - 1),
       lastMessage.copyWith(content: sanitized),
@@ -804,8 +715,7 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
     final lastMessage = state.last;
     if (lastMessage.role != 'assistant' || !lastMessage.isStreaming) return;
 
-    final finalized = _finalizeFormatter(lastMessage.id, lastMessage.content);
-    final cleaned = _stripStreamingPlaceholders(finalized);
+    final cleaned = _stripStreamingPlaceholders(lastMessage.content);
 
     var updatedLast = lastMessage.copyWith(
       isStreaming: false,
@@ -1005,11 +915,7 @@ Future<void> restoreDefaultModel(dynamic ref) async {
   try {
     await ref.read(defaultModelProvider.future);
   } catch (e) {
-    DebugLogger.error(
-      'restore-default-failed',
-      scope: 'chat/model',
-      error: e,
-    );
+    DebugLogger.error('restore-default-failed', scope: 'chat/model', error: e);
   }
 }
 
diff --git a/lib/features/chat/providers/text_to_speech_provider.dart b/lib/features/chat/providers/text_to_speech_provider.dart
index 759a19f..3d4253e 100644
--- a/lib/features/chat/providers/text_to_speech_provider.dart
+++ b/lib/features/chat/providers/text_to_speech_provider.dart
@@ -4,7 +4,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart';
 
 import '../../../core/services/settings_service.dart';
 import '../../../core/providers/app_providers.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../services/text_to_speech_service.dart';
 
 enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
@@ -218,7 +218,7 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
     }
 
     // Prepare sentence split for highlighting
-    final cleanText = MarkdownToText.convert(text);
+    final cleanText = ConduitMarkdownPreprocessor.toPlainText(text);
     final sentences = _service.splitTextForSpeech(cleanText);
     final offsets = _computeOffsets(cleanText, sentences);
 
diff --git a/lib/features/chat/services/voice_call_service.dart b/lib/features/chat/services/voice_call_service.dart
index bb786d4..d98fbf1 100644
--- a/lib/features/chat/services/voice_call_service.dart
+++ b/lib/features/chat/services/voice_call_service.dart
@@ -12,7 +12,7 @@ import '../../../core/providers/app_providers.dart';
 import '../../../core/services/background_streaming_handler.dart';
 import '../../../core/services/callkit_service.dart';
 import '../../../core/services/socket_service.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../providers/chat_providers.dart';
 import 'text_to_speech_service.dart';
 import '../../../core/services/settings_service.dart';
@@ -589,7 +589,7 @@ class VoiceCallService {
 
   void _processSpeakableSegments({required bool isFinalChunk}) {
     if (_isDisposed) return;
-    final cleanText = MarkdownToText.convert(_accumulatedResponse).trim();
+    final cleanText = ConduitMarkdownPreprocessor.toPlainText(_accumulatedResponse).trim();
     if (cleanText.isEmpty) {
       return;
     }
diff --git a/lib/features/chat/views/chat_page.dart b/lib/features/chat/views/chat_page.dart
index 7ba7699..3b2797e 100644
--- a/lib/features/chat/views/chat_page.dart
+++ b/lib/features/chat/views/chat_page.dart
@@ -18,6 +18,7 @@ import '../providers/chat_providers.dart';
 import '../../../core/utils/debug_logger.dart';
 import '../../../core/utils/user_display_name.dart';
 import '../../../core/utils/model_icon_utils.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../../../core/utils/android_assistant_handler.dart';
 import '../widgets/modern_chat_input.dart';
 import '../widgets/user_message_bubble.dart';
@@ -1205,36 +1206,8 @@ class _ChatPageState extends ConsumerState<ChatPage> {
   }
 
   void _copyMessage(String content) {
-    // Strip reasoning details from the copied content
-    String cleanedContent = content;
-
-    // Remove <details type="reasoning"> blocks
-    cleanedContent = cleanedContent.replaceAll(
-      RegExp(
-        r'<details\s+type="reasoning"[^>]*>[\s\S]*?<\/details>',
-        multiLine: true,
-        dotAll: true,
-      ),
-      '',
-    );
-
-    // Remove raw reasoning tags
-    cleanedContent = cleanedContent.replaceAll(
-      RegExp(r'<think>[\s\S]*?<\/think>', multiLine: true, dotAll: true),
-      '',
-    );
-    cleanedContent = cleanedContent.replaceAll(
-      RegExp(
-        r'<reasoning>[\s\S]*?<\/reasoning>',
-        multiLine: true,
-        dotAll: true,
-      ),
-      '',
-    );
-
-    // Clean up any extra whitespace
-    cleanedContent = cleanedContent.trim();
-
+    // Strip reasoning blocks and annotations from copied content
+    final cleanedContent = ConduitMarkdownPreprocessor.sanitize(content);
     Clipboard.setData(ClipboardData(text: cleanedContent));
   }
 
diff --git a/lib/features/chat/views/voice_call_page.dart b/lib/features/chat/views/voice_call_page.dart
index 743f28a..a5863d7 100644
--- a/lib/features/chat/views/voice_call_page.dart
+++ b/lib/features/chat/views/voice_call_page.dart
@@ -7,7 +7,7 @@ import 'package:flutter/cupertino.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 
 import '../../../core/providers/app_providers.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../../../l10n/app_localizations.dart';
 import '../../../shared/widgets/conduit_components.dart';
 import '../providers/chat_providers.dart';
@@ -335,7 +335,7 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
     } else if (_currentState == VoiceCallState.speaking &&
         _currentResponse.isNotEmpty) {
       // Convert markdown to clean text for display
-      displayText = MarkdownToText.convert(_currentResponse);
+      displayText = ConduitMarkdownPreprocessor.toPlainText(_currentResponse);
     }
 
     if (displayText.isEmpty) {
diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart
index a79000c..4270fa4 100644
--- a/lib/features/chat/widgets/assistant_message_widget.dart
+++ b/lib/features/chat/widgets/assistant_message_widget.dart
@@ -11,7 +11,7 @@ import '../../../core/utils/reasoning_parser.dart';
 import '../../../core/utils/message_segments.dart';
 import '../../../core/utils/tool_calls_parser.dart';
 import '../../../core/models/chat_message.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../providers/text_to_speech_provider.dart';
 import 'enhanced_image_attachment.dart';
 import 'package:conduit/l10n/app_localizations.dart';
@@ -166,6 +166,10 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
       raw = raw.substring(searchBanner.length);
     }
 
+    // Note: Link reference definitions (including OpenAI annotations like
+    // [openai_responses:v2:reasoning:ID]: #) are stripped by the markdown
+    // preprocessor using the `markdown` package for proper CommonMark handling.
+
     // Do not truncate content during streaming; segmented parser skips
     // incomplete details blocks and tiles will render once complete.
     final rSegs = ReasoningParser.segments(raw);
@@ -263,12 +267,12 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
 
   String _buildTtsPlainTextFallback(List<String> segments, String fallback) {
     if (segments.isEmpty) {
-      return MarkdownToText.convert(fallback);
+      return ConduitMarkdownPreprocessor.toPlainText(fallback);
     }
 
     final buffer = StringBuffer();
     for (final segment in segments) {
-      final sanitized = MarkdownToText.convert(segment);
+      final sanitized = ConduitMarkdownPreprocessor.toPlainText(segment);
       if (sanitized.isEmpty) {
         continue;
       }
@@ -281,7 +285,7 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
 
     final result = buffer.toString().trim();
     if (result.isEmpty) {
-      return MarkdownToText.convert(fallback);
+      return ConduitMarkdownPreprocessor.toPlainText(fallback);
     }
     return result;
   }
@@ -1738,24 +1742,32 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
           summaryLower == 'thinking...' ||
           summaryLower.startsWith('thinking');
 
+      // Check if summary contains server-formatted duration (e.g., "(0s)", "for 0 secs")
+      final hasDurationInSummary = RegExp(
+        r'\(\d+s\)|\bfor \d+ secs?\b',
+        caseSensitive: false,
+      ).hasMatch(rc.summary);
+
       // - If not done (streaming): show "Thinking..."
-      // - If done with duration: show "Thought for X seconds"
-      // - If done without duration: show "Thoughts" or custom summary
+      // - If done: show humanized "Thought for X" (uses our formatDuration)
+      // - If done without duration and has custom summary: show summary
       if (!rc.isDone) {
         // Still thinking - use summary if available, else default
         return hasSummary && !isThinkingSummary ? rc.summary : l10n.thinking;
       }
 
-      // Done thinking - check duration
-      if (rc.duration > 0) {
+      // Done thinking - always use humanized duration format
+      // This ensures "less than a second" instead of "0 secs" from server
+      if (rc.duration >= 0 && (rc.duration > 0 || hasDurationInSummary || isThinkingSummary)) {
         return l10n.thoughtForDuration(rc.formattedDuration);
       }
 
-      // No duration - use custom summary if meaningful, else default
-      if (!hasSummary || isThinkingSummary) {
-        return l10n.thoughts;
+      // Has custom summary that's not a duration - show it
+      if (hasSummary && !isThinkingSummary) {
+        return rc.summary;
       }
-      return rc.summary;
+
+      return l10n.thoughts;
     }
 
     Widget buildHeader() {
@@ -1863,13 +1875,13 @@ String _buildTtsPlainTextWorker(Map<String, dynamic> payload) {
   final segments = rawSegments is List ? rawSegments.cast<dynamic>() : const [];
 
   if (segments.isEmpty) {
-    return MarkdownToText.convert(fallback);
+    return ConduitMarkdownPreprocessor.toPlainText(fallback);
   }
 
   final buffer = StringBuffer();
   for (final segment in segments) {
     if (segment is! String || segment.isEmpty) continue;
-    final sanitized = MarkdownToText.convert(segment);
+    final sanitized = ConduitMarkdownPreprocessor.toPlainText(segment);
     if (sanitized.isEmpty) continue;
     if (buffer.isNotEmpty) {
       buffer.writeln();
@@ -1880,7 +1892,7 @@ String _buildTtsPlainTextWorker(Map<String, dynamic> payload) {
 
   final result = buffer.toString().trim();
   if (result.isEmpty) {
-    return MarkdownToText.convert(fallback);
+    return ConduitMarkdownPreprocessor.toPlainText(fallback);
   }
   return result;
 }
diff --git a/lib/shared/widgets/markdown/markdown_preprocessor.dart b/lib/shared/widgets/markdown/markdown_preprocessor.dart
index 4fee307..8db779c 100644
--- a/lib/shared/widgets/markdown/markdown_preprocessor.dart
+++ b/lib/shared/widgets/markdown/markdown_preprocessor.dart
@@ -1,10 +1,22 @@
-/// Utility helpers for normalising markdown content before handing it to
-/// [ConduitMarkdown]. The goal is to keep streaming responsive while smoothing
-/// out troublesome edge-cases (e.g. nested fences inside lists).
+import 'package:html_unescape/html_unescape.dart';
+import 'package:markdown/markdown.dart' as md;
+
+/// Content preprocessing, sanitization, and transformation for Markdown.
+///
+/// Provides:
+/// - [normalize] - Prepares content for display (keeps reasoning blocks)
+/// - [sanitize] - Cleans content for copy/API (removes reasoning blocks)
+/// - [toPlainText] - Converts to plain text for TTS
+/// - [softenInlineCode] - Breaks long inline code spans
 class ConduitMarkdownPreprocessor {
   const ConduitMarkdownPreprocessor._();
 
-  // Pre-compile regex patterns for better performance during streaming
+  static final _htmlUnescape = HtmlUnescape();
+
+  // ============================================================
+  // Pre-compiled Patterns - Display/Sanitization
+  // ============================================================
+
   static final _bulletFenceRegex = RegExp(
     r'^(\s*(?:[*+-]|\d+\.)\s+)```([^\s`]*)\s*$',
     multiLine: true,
@@ -14,7 +26,8 @@ class ConduitMarkdownPreprocessor {
     multiLine: true,
   );
   static final _dedentCloseRegex = RegExp(r'^[ \t]+```\s*$', multiLine: true);
-  static final _inlineClosingRegex = RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))');
+  static final _inlineClosingRegex =
+      RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))');
   static final _labelThenDashRegex = RegExp(
     r'^(\*\*[^\n*]+\*\*.*)\n(\s*-{3,}\s*$)',
     multiLine: true,
@@ -24,92 +37,143 @@ class ConduitMarkdownPreprocessor {
     multiLine: true,
   );
   static final _fenceAtBolRegex = RegExp(r'^\s*```', multiLine: true);
+  static final _linkWithTrailingSpaces =
+      RegExp(r'\[[^\]]+\]\([^\)]+\)\s{2,}$');
+  static final _multipleNewlines = RegExp(r'\n{3,}');
 
-  /// Normalises common fence and hard-break issues produced by LLMs.
+  /// Combined pattern for all reasoning/thinking blocks.
+  static final _reasoningBlocks = RegExp(
+    r'<details\s+type="(?:reasoning|code_interpreter)"[^>]*>[\s\S]*?</details>|'
+    r'<(?:think|thinking|reasoning)(?:\s[^>]*)?>[\s\S]*?</(?:think|thinking|reasoning)>',
+    multiLine: true,
+    dotAll: true,
+  );
+
+  // ============================================================
+  // Pre-compiled Patterns - Plain Text (TTS)
+  // ============================================================
+
+  static final _codeBlock = RegExp(r'```[^\n]*\n[\s\S]*?```');
+  static final _inlineCode = RegExp(r'`([^`]+)`');
+  static final _image = RegExp(r'!\[[^\]]*\]\([^)]+\)');
+  static final _link = RegExp(r'\[([^\]]+)\]\([^)]+\)');
+  // Paired markdown formatting - only unambiguous markers for TTS
+  // Single * and _ are skipped as they're ambiguous (math, variable names)
+  static final _boldItalic = RegExp(r'\*\*\*([^*]+)\*\*\*');
+  static final _bold = RegExp(r'\*\*([^*]+)\*\*');
+  static final _strikethrough = RegExp(r'~~([^~]+)~~');
+  // Single asterisk italic: only at word boundaries (space or line start/end)
+  static final _italicAsterisk = RegExp(r'(?:^|\s)\*([^*\s]+)\*(?=\s|$)');
+  // Single underscore italic: only when surrounded by spaces (not in identifiers)
+  static final _italicUnderscore = RegExp(r'(?:^|\s)_([^_\s]+)_(?=\s|$)');
+  static final _heading = RegExp(r'^#{1,6}\s+', multiLine: true);
+  static final _listMarker = RegExp(r'^[\s]*(?:[-*+]|\d+\.)\s+', multiLine: true);
+  static final _blockquote = RegExp(r'^>\s*', multiLine: true);
+  static final _horizontalRule = RegExp(r'^[\s]*[-*_]{3,}[\s]*$', multiLine: true);
+  static final _htmlTag = RegExp(r'<[^>]+>');
+  /// Comprehensive emoji pattern for TTS cleanup.
+  static final _emoji = RegExp(
+    r'[\u{1F600}-\u{1F64F}]|'  // Emoticons
+    r'[\u{1F300}-\u{1F5FF}]|'  // Misc Symbols and Pictographs
+    r'[\u{1F680}-\u{1F6FF}]|'  // Transport and Map
+    r'[\u{1F1E0}-\u{1F1FF}]|'  // Flags
+    r'[\u{2600}-\u{26FF}]|'    // Misc symbols
+    r'[\u{2700}-\u{27BF}]|'    // Dingbats
+    r'[\u{1F900}-\u{1F9FF}]|'  // Supplemental Symbols
+    r'[\u{1FA00}-\u{1FA6F}]|'  // Chess, cards
+    r'[\u{1FA70}-\u{1FAFF}]|'  // Symbols Extended-A
+    r'[\u{FE00}-\u{FE0F}]|'    // Variation Selectors
+    r'[\u{1F018}-\u{1F270}]|'  // Various
+    r'[\u{238C}-\u{2454}]|'    // Misc Technical
+    r'[\u{20D0}-\u{20FF}]',    // Combining Diacritical Marks
+    unicode: true,
+  );
+  static final _whitespace = RegExp(r'\s+');
+
+  // ============================================================
+  // Public API
+  // ============================================================
+
+  /// Normalizes content for Markdown display.
+  ///
+  /// - Strips link reference definitions (including OpenAI annotations)
+  /// - Fixes common LLM fence issues
+  /// - Preserves reasoning blocks for collapsible UI rendering
   static String normalize(String input) {
-    if (input.isEmpty) {
-      return input;
-    }
+    if (input.isEmpty) return input;
 
     var output = input.replaceAll('\r\n', '\n');
 
-    // Move fenced code blocks that start on the same line as a list item onto
-    // their own line so the parser does not treat them as list text.
-    output = output.replaceAllMapped(
-      _bulletFenceRegex,
-      (match) => '${match[1]}\n```${match[2]}',
-    );
+    // Strip link reference definitions using markdown package
+    output = _stripLinkReferenceDefinitions(output);
 
-    // Dedent opening fences to avoid partial code-block detection when the
-    // model indents fences by accident.
-    output = output.replaceAllMapped(
-      _dedentOpenRegex,
-      (match) => '```${match[1]}',
-    );
+    // Fix fence issues
+    output = _normalizeFences(output);
 
-    // Dedent closing fences for the same reason as the opening fences.
-    output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```');
-
-    // Ensure closing fences stand alone. Prevents situations like `}\n```foo`
-    // from keeping trailing braces inside the code block.
-    output = output.replaceAllMapped(
-      _inlineClosingRegex,
-      (match) => '${match[1]}\n```',
-    );
-
-    // Insert a blank line when a "label: value" line is followed by a
-    // horizontal rule so it is not treated as a Setext heading underline.
+    // Fix Setext heading false positives
     output = output.replaceAllMapped(
       _labelThenDashRegex,
       (match) => '${match[1]}\n\n${match[2]}',
     );
 
-    // Allow headings like "## 1. Summary" without triggering ordered-list
-    // parsing by inserting a zero-width joiner after the numeric marker.
+    // Fix numeric heading parsing
     output = output.replaceAllMapped(
       _atxEnumRegex,
       (match) => '${match[1]}.\u200C${match[2]}${match[3]}',
     );
 
-    // Auto-close an unmatched opening fence at EOF to avoid the entire tail
-    // of the message rendering as code.
-    final fenceCount = _fenceAtBolRegex.allMatches(output).length;
-    if (fenceCount.isOdd) {
-      if (!output.endsWith('\n')) {
-        output += '\n';
-      }
-      output += '```';
-    }
-
-    // Convert Markdown links followed by two trailing spaces into separate
-    // paragraphs so that consecutive links do not collapse into a single
-    // paragraph at render time.
-    final linkWithTrailingSpaces = RegExp(r'\[[^\]]+\]\([^\)]+\)\s{2,}$');
-    final lines = output.split('\n');
-    if (lines.length > 1) {
-      final buffer = StringBuffer();
-      for (var i = 0; i < lines.length; i++) {
-        final line = lines[i];
-        buffer.write(line);
-        if (i < lines.length - 1) {
-          buffer.write('\n');
-        }
-        if (linkWithTrailingSpaces.hasMatch(line)) {
-          buffer.write('\n');
-        }
-      }
-      output = buffer.toString();
-    }
+    // Separate consecutive links
+    output = _separateConsecutiveLinks(output);
 
     return output;
   }
 
-  /// Inserts zero-width break characters into long inline code spans so they
-  /// remain readable and do not overflow narrow layouts.
+  /// Sanitizes content for clipboard copy or API submission.
+  ///
+  /// - Strips link reference definitions (including OpenAI annotations)
+  /// - Strips reasoning/thinking blocks
+  /// - Normalizes whitespace
+  static String sanitize(String input) {
+    if (input.isEmpty) return input;
+
+    return input
+        .replaceAll('\r\n', '\n')
+        .transform(_stripLinkReferenceDefinitions)
+        .replaceAll(_reasoningBlocks, '')
+        .replaceAll(_multipleNewlines, '\n\n')
+        .trim();
+  }
+
+  /// Converts markdown to plain text for text-to-speech.
+  static String toPlainText(String input) {
+    if (input.trim().isEmpty) return '';
+
+    return sanitize(input)
+        .replaceAll(_codeBlock, '') // Remove code blocks
+        .replaceAllMapped(_inlineCode, (m) => m[1] ?? '') // Keep code text
+        .replaceAll(_image, '') // Remove images
+        .replaceAllMapped(_link, (m) => m[1] ?? '') // Keep link text
+        // Strip paired markdown formatting (preserves lone * and _ in text)
+        .replaceAllMapped(_boldItalic, (m) => m[1] ?? '')
+        .replaceAllMapped(_bold, (m) => m[1] ?? '')
+        .replaceAllMapped(_strikethrough, (m) => m[1] ?? '')
+        .replaceAllMapped(_italicAsterisk, (m) => ' ${m[1] ?? ''}')
+        .replaceAllMapped(_italicUnderscore, (m) => ' ${m[1] ?? ''}')
+        .replaceAll(_heading, '') // Strip # markers
+        .replaceAll(_listMarker, '') // Strip list markers
+        .replaceAll(_blockquote, '') // Strip > markers
+        .replaceAll(_horizontalRule, '') // Remove ---
+        .replaceAll(_htmlTag, '') // Remove HTML
+        .transform(_htmlUnescape.convert) // Decode entities
+        .replaceAll(_emoji, '') // Remove emojis
+        .replaceAll(_whitespace, ' ') // Normalize whitespace
+        .trim();
+  }
+
+  /// Breaks long inline code spans for better wrapping.
   static String softenInlineCode(String input, {int chunkSize = 24}) {
-    if (input.length <= chunkSize) {
-      return input;
-    }
+    if (input.length <= chunkSize) return input;
+
     final buffer = StringBuffer();
     for (var i = 0; i < input.length; i++) {
       buffer.write(input[i]);
@@ -119,4 +183,90 @@ class ConduitMarkdownPreprocessor {
     }
     return buffer.toString();
   }
+
+  // ============================================================
+  // Private Helpers
+  // ============================================================
+
+  static String _normalizeFences(String input) {
+    var output = input;
+
+    // Move fences after list markers to new line
+    output = output.replaceAllMapped(
+      _bulletFenceRegex,
+      (match) => '${match[1]}\n```${match[2]}',
+    );
+
+    // Dedent opening fences
+    output = output.replaceAllMapped(
+      _dedentOpenRegex,
+      (match) => '```${match[1]}',
+    );
+
+    // Dedent closing fences
+    output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```');
+
+    // Ensure closing fences stand alone
+    output = output.replaceAllMapped(
+      _inlineClosingRegex,
+      (match) => '${match[1]}\n```',
+    );
+
+    // Auto-close unmatched fence
+    final fenceCount = _fenceAtBolRegex.allMatches(output).length;
+    if (fenceCount.isOdd) {
+      if (!output.endsWith('\n')) output += '\n';
+      output += '```';
+    }
+
+    return output;
+  }
+
+  static String _separateConsecutiveLinks(String input) {
+    final lines = input.split('\n');
+    if (lines.length <= 1) return input;
+
+    final buffer = StringBuffer();
+    for (var i = 0; i < lines.length; i++) {
+      final line = lines[i];
+      buffer.write(line);
+      if (i < lines.length - 1) buffer.write('\n');
+      if (_linkWithTrailingSpaces.hasMatch(line)) buffer.write('\n');
+    }
+    return buffer.toString();
+  }
+
+  /// Strips link reference definitions using the `markdown` package.
+  static String _stripLinkReferenceDefinitions(String input) {
+    if (!input.contains('[')) return input;
+
+    final document = md.Document();
+    document.parseLines(input.split('\n'));
+
+    final refLabels = document.linkReferences.keys.toSet();
+    if (refLabels.isEmpty) return input;
+
+    final labelPatterns =
+        refLabels.map((label) => RegExp.escape(label)).join('|');
+
+    final refDefRegex = RegExp(
+      r'^[ ]{0,3}\[(?:' +
+          labelPatterns +
+          r')\]:[ \t]*(?:<[^>]*>|[^\s]*)(?:[ \t]+(?:"[^"]*"|' +
+          r"'[^']*'" +
+          r'|\([^)]*\)))?[ \t]*$',
+      multiLine: true,
+      caseSensitive: false,
+    );
+
+    return input
+        .replaceAll(refDefRegex, '')
+        .replaceAll(_multipleNewlines, '\n\n')
+        .trim();
+  }
+}
+
+/// Extension for chaining string transformations.
+extension _StringTransform on String {
+  String transform(String Function(String) fn) => fn(this);
 }