refactor(markdown): remove deprecated stream formatter and enhance preprocessor

2025-12-22 14:07:04 +05:30
parent 653162cb76
commit 5fd68f86fe
12 changed files with 347 additions and 505 deletions
@@ -1,18 +0,0 @@
 /// HTML entity utilities for parsing content.
 ///
 /// Reference: openwebui-src/src/lib/utils/index.ts (unescapeHtml)
 library;
 import 'package:html_unescape/html_unescape.dart';
 /// Utility class for HTML entity handling.
 class HtmlUtils {
  /// HTML entity unescaper instance.
  static final _unescape = HtmlUnescape();
  /// Unescape HTML entities in a string.
  ///
  /// Handles all Named, Decimal, and Hexadecimal Character References.
  static String unescapeHtml(String s) => _unescape.convert(s);
 }
@@ -1,71 +0,0 @@
 // Pre-compiled regex patterns for markdown syntax detection (performance optimization)
 final _boldPattern = RegExp(r'\*\*');
 final _italicPattern = RegExp(r'(?<!\*)\*(?!\*)');
 /// Maintains a raw markdown buffer for streaming content and generates
 /// preview-safe output by appending synthetic closing tokens when necessary.
 class MarkdownStreamFormatter {
  StringBuffer _raw = StringBuffer();
  /// Seeds the formatter with existing markdown content.
  void seed(String content) {
    _raw = StringBuffer(content);
  }
  /// Adds a streaming chunk to the internal buffer and returns a preview-ready
  /// string with any required synthetic closing markers.
  String ingest(String chunk) {
    if (chunk.isNotEmpty) {
      _raw.write(chunk);
    }
    return preview();
  }
  /// Replaces the current buffer with the provided [content].
  String replace(String content) {
    seed(content);
    return preview();
  }
  /// Returns the preview-safe markdown string.
  String preview() {
    final raw = _raw.toString();
    return raw + _syntheticClosures(raw);
  }
  /// Returns the raw markdown accumulated so far.
  String finalize() => _raw.toString();
  String _syntheticClosures(String content) {
    final buffer = StringBuffer();
    final fenceCount = '```'.allMatches(content).length;
    if (fenceCount.isOdd) {
      buffer.writeln('```');
    }
    final boldCount = _boldPattern.allMatches(content).length;
    if (boldCount.isOdd) {
      buffer.write('**');
    }
    final italicCount = _italicPattern.allMatches(content).length;
    if (italicCount.isOdd) {
      buffer.write('*');
    }
    final openBrackets = '['.allMatches(content).length;
    final closeBrackets = ']'.allMatches(content).length;
    if (openBrackets > closeBrackets) {
      buffer.write(List.filled(openBrackets - closeBrackets, ']').join());
    }
    final openParens = '('.allMatches(content).length;
    final closeParens = ')'.allMatches(content).length;
    if (openParens > closeParens) {
      buffer.write(List.filled(openParens - closeParens, ')').join());
    }
    return buffer.toString();
  }
 }
@@ -1,160 +0,0 @@
 /// Converts markdown text to plain text suitable for text-to-speech.
 ///
 /// Strips formatting while preserving the semantic meaning and readability
 /// of the content for audio consumption.
 class MarkdownToText {
  const MarkdownToText._();
  static final _thinkingBlockRegex = RegExp(
    r'<details\s+type="reasoning"[^>]*>.*?</details>',
    multiLine: true,
    dotAll: true,
  );
  static final _thinkTagRegex = RegExp(
    r'<think>.*?</think>',
    multiLine: true,
    dotAll: true,
  );
  static final _reasoningTagRegex = RegExp(
    r'<reasoning>.*?</reasoning>',
    multiLine: true,
    dotAll: true,
  );
  static final _emojiRegex = RegExp(
    r'[\u{1F600}-\u{1F64F}]|[\u{1F300}-\u{1F5FF}]|[\u{1F680}-\u{1F6FF}]|[\u{1F1E0}-\u{1F1FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F900}-\u{1F9FF}]|[\u{1FA00}-\u{1FA6F}]|[\u{1FA70}-\u{1FAFF}]|[\u{FE00}-\u{FE0F}]|[\u{1F018}-\u{1F270}]|[\u{238C}-\u{2454}]|[\u{20D0}-\u{20FF}]',
    unicode: true,
  );
  static final _codeBlockRegex = RegExp(
    r'```[^\n]*\n(.*?)```',
    multiLine: true,
    dotAll: true,
  );
  static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
  static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
  static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
  static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
  static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
  static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
  static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
  static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
  static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
  static final _orderedListRegex = RegExp(
    r'^[\s]*\d+\.\s+(.+)$',
    multiLine: true,
  );
  static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
  static final _horizontalRuleRegex = RegExp(
    r'^[\s]*[-*_]{3,}[\s]*$',
    multiLine: true,
  );
  static final _htmlTagRegex = RegExp(r'<[^>]+>');
  static final _htmlEntityRegex = RegExp(r'&[a-z]+;|&#\d+;|&#x[0-9a-f]+;');
  static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
  static final _multipleSpacesRegex = RegExp(r' {2,}');
  /// Converts markdown text to plain text suitable for TTS.
  ///
  /// - Removes thinking/reasoning blocks
  /// - Removes emojis
  /// - Removes code blocks (replaces with descriptive text)
  /// - Strips all formatting (bold, italic, strikethrough)
  /// - Converts links to just their text
  /// - Removes images (or converts to alt text)
  /// - Simplifies headings
  /// - Preserves list structure with natural pauses
  /// - Removes HTML tags and entities
  /// - Normalizes whitespace
  static String convert(String markdown) {
    if (markdown.trim().isEmpty) {
      return '';
    }
    var text = markdown;
    // Remove thinking/reasoning blocks (must be done before general HTML tag removal)
    text = text.replaceAll(_thinkingBlockRegex, '');
    text = text.replaceAll(_thinkTagRegex, '');
    text = text.replaceAll(_reasoningTagRegex, '');
    // Remove emojis
    text = text.replaceAll(_emojiRegex, '');
    // Remove or replace code blocks with descriptive text
    text = text.replaceAllMapped(_codeBlockRegex, (match) {
      final code = match[1]?.trim() ?? '';
      if (code.isEmpty) {
        return '';
      }
      return ' (code block) ';
    });
    // Remove inline code backticks but keep the content
    text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
    // Strip bold/italic/strikethrough formatting
    text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
    text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
    text = text.replaceAllMapped(
      _italicRegex,
      (match) => match[1] ?? match[2] ?? '',
    );
    text = text.replaceAllMapped(
      _strikethroughRegex,
      (match) => match[1] ?? '',
    );
    // Convert links to just their text
    text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
    // Remove images (or use alt text if available)
    text = text.replaceAllMapped(_imageRegex, (match) {
      final alt = match[1]?.trim() ?? '';
      return alt.isNotEmpty ? ' ($alt image) ' : '';
    });
    // Simplify headings (remove # symbols)
    text = text.replaceAllMapped(_headingRegex, (match) {
      final heading = match[1] ?? '';
      return '$heading.\n';
    });
    // Preserve list items with natural pauses
    text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
    text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
    // Remove blockquote markers
    text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
    // Remove horizontal rules
    text = text.replaceAll(_horizontalRuleRegex, '');
    // Remove HTML tags
    text = text.replaceAll(_htmlTagRegex, '');
    // Decode HTML entities
    text = text.replaceAllMapped(_htmlEntityRegex, (match) {
      final entity = match[0] ?? '';
      return switch (entity) {
        '&nbsp;' => ' ',
        '&amp;' => '&',
        '&lt;' => '<',
        '&gt;' => '>',
        '&quot;' => '"',
        '&apos;' => "'",
        _ => entity,
      };
    });
    // Normalize whitespace
    text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
    text = text.replaceAll(_multipleSpacesRegex, ' ');
    // Convert newlines to spaces for natural speech flow
    text = text.replaceAll('\n', ' ');
    // Final cleanup
    text = text.trim();
    return text;
  }
 }
@@ -7,7 +7,12 @@
 /// Reference: openwebui-src/backend/open_webui/utils/middleware.py DEFAULT_REASONING_TAGS
 library;
-import 'html_utils.dart';
+import 'package:html_unescape/html_unescape.dart';
 final _htmlUnescape = HtmlUnescape();
 /// Unescape HTML entities in reasoning content.
 String _unescapeHtml(String s) => _htmlUnescape.convert(s);
 /// All reasoning tag pairs supported by Open WebUI.
 /// Reference: DEFAULT_REASONING_TAGS in middleware.py
@@ -181,9 +186,25 @@ class ReasoningParser {
      }
      // Check for raw tag pairs
      // Supports tags with optional attributes like <think foo="bar">
      // Reference: openwebui-src/backend/open_webui/utils/middleware.py
      for (final pair in tagPairs) {
        final startTag = pair.$1;
-        final idx = content.indexOf(startTag, index);
+        int idx = -1;
        // For XML-like tags (e.g., <think>), match with optional attributes
        if (startTag.startsWith('<') && startTag.endsWith('>')) {
          final tagName = startTag.substring(1, startTag.length - 1);
          final pattern = RegExp('<${RegExp.escape(tagName)}(\\s[^>]*)?>');
          final match = pattern.firstMatch(content.substring(index));
          if (match != null) {
            idx = index + match.start;
          }
        } else {
          // For non-XML tags (e.g., ◁think▷), use exact matching
          idx = content.indexOf(startTag, index);
        }
        if (idx != -1 && (nextRawIdx == -1 || idx < nextRawIdx)) {
          nextRawIdx = idx;
          matchedRawPair = pair;
@@ -336,8 +357,8 @@ class ReasoningParser {
      return _DetailsResult(
        entry: ReasoningEntry(
-          reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
+          reasoning: _unescapeHtml(summaryResult.remaining),
-          summary: HtmlUtils.unescapeHtml(summaryResult.summary),
+          summary: _unescapeHtml(summaryResult.summary),
          duration: effectiveDuration,
          isDone: false,
          blockType: blockType,
@@ -368,8 +389,8 @@ class ReasoningParser {
    return _DetailsResult(
      entry: ReasoningEntry(
-        reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
+        reasoning: _unescapeHtml(summaryResult.remaining),
-        summary: HtmlUtils.unescapeHtml(summaryResult.summary),
+        summary: _unescapeHtml(summaryResult.summary),
        duration: effectiveDuration,
        isDone: isDone,
        blockType: blockType,
@@ -381,20 +402,47 @@ class ReasoningParser {
  }
  /// Parse a raw reasoning tag pair (e.g., `<think>...</think>`).
  /// Supports tags with optional attributes like `<think foo="bar">`.
  ///
  /// Reference: openwebui-src/backend/open_webui/utils/middleware.py
  static _ReasoningResult _parseRawReasoning(
    String content,
    int startIdx,
    String startTag,
    String endTag,
  ) {
-    final endIdx = content.indexOf(endTag, startIdx + startTag.length);
+    // Find the actual end of the opening tag (handles attributes)
    int contentStartIdx;
    if (startTag.startsWith('<') && startTag.endsWith('>')) {
      // For XML-like tags, find the closing '>' to skip any attributes
      final tagCloseIdx = content.indexOf('>', startIdx);
      if (tagCloseIdx == -1) {
        // Incomplete opening tag
        return _ReasoningResult(
          entry: ReasoningEntry(
            reasoning: '',
            summary: '',
            duration: 0,
            isDone: false,
          ),
          endIndex: content.length,
          isComplete: false,
        );
      }
      contentStartIdx = tagCloseIdx + 1;
    } else {
      // For non-XML tags, use exact tag length
      contentStartIdx = startIdx + startTag.length;
    }
    final endIdx = content.indexOf(endTag, contentStartIdx);
    if (endIdx == -1) {
      // Incomplete block (streaming)
-      final innerContent = content.substring(startIdx + startTag.length);
+      final innerContent = content.substring(contentStartIdx);
      return _ReasoningResult(
        entry: ReasoningEntry(
-          reasoning: HtmlUtils.unescapeHtml(innerContent.trim()),
+          reasoning: _unescapeHtml(innerContent.trim()),
          summary: '',
          duration: 0,
          isDone: false,
@@ -405,10 +453,10 @@ class ReasoningParser {
    }
    // Complete block
-    final innerContent = content.substring(startIdx + startTag.length, endIdx);
+    final innerContent = content.substring(contentStartIdx, endIdx);
    return _ReasoningResult(
      entry: ReasoningEntry(
-        reasoning: HtmlUtils.unescapeHtml(innerContent.trim()),
+        reasoning: _unescapeHtml(innerContent.trim()),
        summary: '',
        duration: 0,
        isDone: true,
@@ -533,23 +581,33 @@ class ReasoningParser {
  }
  /// Formats the duration for display.
-  /// Mirrors Open WebUI's formatting:
+  /// Mirrors Open WebUI's dayjs.duration(seconds, 'seconds').humanize():
  /// - < 1: "less than a second"
  /// - < 60: "X seconds"
-  /// - >= 60: humanized (e.g., "2 minutes")
+  /// - >= 60: humanized (e.g., "a minute", "2 minutes", "about an hour")
  ///
  /// Reference: openwebui-src/src/lib/components/common/Collapsible.svelte
  static String formatDuration(int seconds) {
    if (seconds < 1) return 'less than a second';
    if (seconds < 60) return '$seconds second${seconds == 1 ? '' : 's'}';
-    final minutes = seconds ~/ 60;
+    // Match dayjs.duration().humanize() behavior
-    final remainingSeconds = seconds % 60;
+    // Reference: https://day.js.org/docs/en/durations/humanize
-
+    if (seconds < 90) return 'a minute';
-    if (remainingSeconds == 0) {
+    if (seconds < 2700) {
-      return '$minutes minute${minutes == 1 ? '' : 's'}';
+      // 45 minutes
      final minutes = (seconds / 60).round();
      return '$minutes minutes';
    }
-
+    if (seconds < 5400) return 'about an hour'; // 90 minutes
-    // For mixed minutes and seconds, use abbreviated format
+    if (seconds < 79200) {
-    return '$minutes min ${remainingSeconds}s';
+      // 22 hours
      final hours = (seconds / 3600).round();
      return '$hours hours';
    }
    if (seconds < 129600) return 'a day'; // 36 hours
    final days = (seconds / 86400).round();
    return '$days days';
  }
 }
@@ -1,5 +1,7 @@
 import 'dart:convert';
 import '../../shared/widgets/markdown/markdown_preprocessor.dart';
 /// Parsed representation of one tool call emitted as a `<details type="tool_calls" ...>` block
 class ToolCallEntry {
  final String id;
@@ -255,18 +257,8 @@ class ToolCallsParser {
  static String sanitizeForApi(String content) {
    if (content.isEmpty) return content;
-    // Remove blocks we never want to include in conversation context
+    // Remove annotations and reasoning blocks
-    final removeTypes = ['reasoning', 'code_interpreter'];
+    content = ConduitMarkdownPreprocessor.sanitize(content);
    for (final t in removeTypes) {
      content = content.replaceAll(
        RegExp(
          '<details\\s+type="$t"[^>]*>[\\s\\S]*?</details>',
          multiLine: true,
          dotAll: true,
        ),
        '',
      );
    }
    if (!content.contains('<details')) return content.trim();
@@ -16,7 +16,6 @@ import '../../../core/services/streaming_helper.dart';
 import '../../../core/services/streaming_response_controller.dart';
 import '../../../core/services/worker_manager.dart';
 import '../../../core/utils/debug_logger.dart';
 import '../../../core/utils/markdown_stream_formatter.dart';
 import '../../../core/utils/tool_calls_parser.dart';
 import '../models/chat_context_attachment.dart';
 import '../providers/context_attachments_provider.dart';
@@ -109,9 +108,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
  bool _taskStatusCheckInFlight = false;
  bool _observedRemoteTask = false;
  MarkdownStreamFormatter? _markdownFormatter;
  String? _activeStreamingMessageId;
  bool _initialized = false;
  @override
@@ -180,7 +176,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
        // Cancel any existing message stream when switching conversations
        _cancelMessageStream();
        _clearStreamingFormatter(); // Explicitly clear formatter on conversation switch
        _stopRemoteTaskMonitor();
        if (next != null) {
@@ -222,16 +217,10 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
    if (controller != null && controller.isActive) {
      unawaited(controller.cancel());
    }
    _clearStreamingFormatter();
    cancelSocketSubscriptions();
    _stopRemoteTaskMonitor();
  }
  void _clearStreamingFormatter() {
    _markdownFormatter = null;
    _activeStreamingMessageId = null;
  }
  /// Checks if streaming cleanup is needed when adopting server messages.
  /// Must be called BEFORE updating state, as it compares current local state
  /// with incoming server state.
@@ -397,39 +386,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
    }
  }
  void _ensureFormatterForMessage(ChatMessage message) {
    // If we're switching to a different message, clear the old formatter first
    if (_markdownFormatter != null && _activeStreamingMessageId != message.id) {
      DebugLogger.log(
        'Clearing formatter for message switch: $_activeStreamingMessageId -> ${message.id}',
        scope: 'chat/providers',
      );
      _clearStreamingFormatter();
    }
    // If formatter already exists for this message, reuse it
    if (_markdownFormatter != null && _activeStreamingMessageId == message.id) {
      return;
    }
    // Create new formatter
    final formatter = MarkdownStreamFormatter();
    // Only seed with existing content if this is a resume scenario
    // For new messages (empty content), start fresh to avoid duplication
    final seed = _stripStreamingPlaceholders(message.content);
    if (seed.isNotEmpty && message.content.isNotEmpty) {
      DebugLogger.log(
        'Seeding formatter with existing content (${seed.length} chars) for message ${message.id}',
        scope: 'chat/providers',
      );
      formatter.seed(seed);
    }
    _markdownFormatter = formatter;
    _activeStreamingMessageId = message.id;
  }
  String _stripStreamingPlaceholders(String content) {
    var result = content;
    const ti = '[TYPING_INDICATOR]';
@@ -443,15 +399,6 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
    return result;
  }
  String _finalizeFormatter(String messageId, String fallback) {
    if (_markdownFormatter != null && _activeStreamingMessageId == messageId) {
      final output = _markdownFormatter!.finalize();
      _clearStreamingFormatter();
      return output;
    }
    return fallback;
  }
  void _touchStreamingActivity() {
    _lastStreamingActivity = DateTime.now();
    if (_hasStreamingAssistant) {
@@ -728,16 +675,11 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
  }
  void appendToLastMessage(String content) {
-    if (state.isEmpty) {
+    if (state.isEmpty) return;
      return;
    }
    final lastMessage = state.last;
-    if (lastMessage.role != 'assistant') {
+    if (lastMessage.role != 'assistant') return;
      return;
    }
    if (!lastMessage.isStreaming) {
      // Ignore late chunks when streaming already finished
      DebugLogger.log(
        'Ignoring late chunk for finished message: ${lastMessage.id}',
        scope: 'chat/providers',
@@ -745,52 +687,21 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
      return;
    }
-    _ensureFormatterForMessage(lastMessage);
+    // Append content directly - the widget's normalize() handles incomplete markdown
    // Defensive check: ensure the formatter is for the correct message
    // This prevents cross-message pollution when messages change rapidly
    if (_activeStreamingMessageId != lastMessage.id) {
      DebugLogger.warning(
        'Formatter message ID mismatch: active=$_activeStreamingMessageId, last=${lastMessage.id}. Resetting formatter.',
      );
      _clearStreamingFormatter();
      _ensureFormatterForMessage(lastMessage);
    }
    final formatter = _markdownFormatter!;
    final preview = formatter.ingest(content);
    state = [
      ...state.sublist(0, state.length - 1),
-      lastMessage.copyWith(content: preview),
+      lastMessage.copyWith(content: lastMessage.content + content),
    ];
    _touchStreamingActivity();
  }
  void replaceLastMessageContent(String content) {
-    if (state.isEmpty) {
+    if (state.isEmpty) return;
      return;
    }
    final lastMessage = state.last;
-    if (lastMessage.role != 'assistant') {
+    if (lastMessage.role != 'assistant') return;
      return;
    }
    _ensureFormatterForMessage(lastMessage);
    // Defensive check: ensure the formatter is for the correct message
    if (_activeStreamingMessageId != lastMessage.id) {
      DebugLogger.warning(
        'Formatter message ID mismatch in replace: active=$_activeStreamingMessageId, last=${lastMessage.id}. Resetting formatter.',
      );
      _clearStreamingFormatter();
      _ensureFormatterForMessage(lastMessage);
    }
    final formatter = _markdownFormatter!;
    final sanitized = formatter.replace(_stripStreamingPlaceholders(content));
    final sanitized = _stripStreamingPlaceholders(content);
    state = [
      ...state.sublist(0, state.length - 1),
      lastMessage.copyWith(content: sanitized),
@@ -804,8 +715,7 @@ class ChatMessagesNotifier extends Notifier<List<ChatMessage>> {
    final lastMessage = state.last;
    if (lastMessage.role != 'assistant' || !lastMessage.isStreaming) return;
-    final finalized = _finalizeFormatter(lastMessage.id, lastMessage.content);
+    final cleaned = _stripStreamingPlaceholders(lastMessage.content);
    final cleaned = _stripStreamingPlaceholders(finalized);
    var updatedLast = lastMessage.copyWith(
      isStreaming: false,
@@ -1005,11 +915,7 @@ Future<void> restoreDefaultModel(dynamic ref) async {
  try {
    await ref.read(defaultModelProvider.future);
  } catch (e) {
-    DebugLogger.error(
+    DebugLogger.error('restore-default-failed', scope: 'chat/model', error: e);
      'restore-default-failed',
      scope: 'chat/model',
      error: e,
    );
  }
 }
@@ -4,7 +4,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart';
 import '../../../core/services/settings_service.dart';
 import '../../../core/providers/app_providers.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../services/text_to_speech_service.dart';
 enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
@@ -218,7 +218,7 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
    }
    // Prepare sentence split for highlighting
-    final cleanText = MarkdownToText.convert(text);
+    final cleanText = ConduitMarkdownPreprocessor.toPlainText(text);
    final sentences = _service.splitTextForSpeech(cleanText);
    final offsets = _computeOffsets(cleanText, sentences);
@@ -12,7 +12,7 @@ import '../../../core/providers/app_providers.dart';
 import '../../../core/services/background_streaming_handler.dart';
 import '../../../core/services/callkit_service.dart';
 import '../../../core/services/socket_service.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../providers/chat_providers.dart';
 import 'text_to_speech_service.dart';
 import '../../../core/services/settings_service.dart';
@@ -589,7 +589,7 @@ class VoiceCallService {
  void _processSpeakableSegments({required bool isFinalChunk}) {
    if (_isDisposed) return;
-    final cleanText = MarkdownToText.convert(_accumulatedResponse).trim();
+    final cleanText = ConduitMarkdownPreprocessor.toPlainText(_accumulatedResponse).trim();
    if (cleanText.isEmpty) {
      return;
    }
@@ -18,6 +18,7 @@ import '../providers/chat_providers.dart';
 import '../../../core/utils/debug_logger.dart';
 import '../../../core/utils/user_display_name.dart';
 import '../../../core/utils/model_icon_utils.dart';
 import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../../../core/utils/android_assistant_handler.dart';
 import '../widgets/modern_chat_input.dart';
 import '../widgets/user_message_bubble.dart';
@@ -1205,36 +1206,8 @@ class _ChatPageState extends ConsumerState<ChatPage> {
  }
  void _copyMessage(String content) {
-    // Strip reasoning details from the copied content
+    // Strip reasoning blocks and annotations from copied content
-    String cleanedContent = content;
+    final cleanedContent = ConduitMarkdownPreprocessor.sanitize(content);
    // Remove <details type="reasoning"> blocks
    cleanedContent = cleanedContent.replaceAll(
      RegExp(
        r'<details\s+type="reasoning"[^>]*>[\s\S]*?<\/details>',
        multiLine: true,
        dotAll: true,
      ),
      '',
    );
    // Remove raw reasoning tags
    cleanedContent = cleanedContent.replaceAll(
      RegExp(r'<think>[\s\S]*?<\/think>', multiLine: true, dotAll: true),
      '',
    );
    cleanedContent = cleanedContent.replaceAll(
      RegExp(
        r'<reasoning>[\s\S]*?<\/reasoning>',
        multiLine: true,
        dotAll: true,
      ),
      '',
    );
    // Clean up any extra whitespace
    cleanedContent = cleanedContent.trim();
    Clipboard.setData(ClipboardData(text: cleanedContent));
  }
@@ -7,7 +7,7 @@ import 'package:flutter/cupertino.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import '../../../core/providers/app_providers.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../../../l10n/app_localizations.dart';
 import '../../../shared/widgets/conduit_components.dart';
 import '../providers/chat_providers.dart';
@@ -335,7 +335,7 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
    } else if (_currentState == VoiceCallState.speaking &&
        _currentResponse.isNotEmpty) {
      // Convert markdown to clean text for display
-      displayText = MarkdownToText.convert(_currentResponse);
+      displayText = ConduitMarkdownPreprocessor.toPlainText(_currentResponse);
    }
    if (displayText.isEmpty) {
@@ -11,7 +11,7 @@ import '../../../core/utils/reasoning_parser.dart';
 import '../../../core/utils/message_segments.dart';
 import '../../../core/utils/tool_calls_parser.dart';
 import '../../../core/models/chat_message.dart';
-import '../../../core/utils/markdown_to_text.dart';
+import '../../../shared/widgets/markdown/markdown_preprocessor.dart';
 import '../providers/text_to_speech_provider.dart';
 import 'enhanced_image_attachment.dart';
 import 'package:conduit/l10n/app_localizations.dart';
@@ -166,6 +166,10 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
      raw = raw.substring(searchBanner.length);
    }
    // Note: Link reference definitions (including OpenAI annotations like
    // [openai_responses:v2:reasoning:ID]: #) are stripped by the markdown
    // preprocessor using the `markdown` package for proper CommonMark handling.
    // Do not truncate content during streaming; segmented parser skips
    // incomplete details blocks and tiles will render once complete.
    final rSegs = ReasoningParser.segments(raw);
@@ -263,12 +267,12 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
  String _buildTtsPlainTextFallback(List<String> segments, String fallback) {
    if (segments.isEmpty) {
-      return MarkdownToText.convert(fallback);
+      return ConduitMarkdownPreprocessor.toPlainText(fallback);
    }
    final buffer = StringBuffer();
    for (final segment in segments) {
-      final sanitized = MarkdownToText.convert(segment);
+      final sanitized = ConduitMarkdownPreprocessor.toPlainText(segment);
      if (sanitized.isEmpty) {
        continue;
      }
@@ -281,7 +285,7 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
    final result = buffer.toString().trim();
    if (result.isEmpty) {
-      return MarkdownToText.convert(fallback);
+      return ConduitMarkdownPreprocessor.toPlainText(fallback);
    }
    return result;
  }
@@ -1738,26 +1742,34 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
          summaryLower == 'thinking...' ||
          summaryLower.startsWith('thinking');
      // Check if summary contains server-formatted duration (e.g., "(0s)", "for 0 secs")
      final hasDurationInSummary = RegExp(
        r'\(\d+s\)|\bfor \d+ secs?\b',
        caseSensitive: false,
      ).hasMatch(rc.summary);
      // - If not done (streaming): show "Thinking..."
-      // - If done with duration: show "Thought for X seconds"
+      // - If done: show humanized "Thought for X" (uses our formatDuration)
-      // - If done without duration: show "Thoughts" or custom summary
+      // - If done without duration and has custom summary: show summary
      if (!rc.isDone) {
        // Still thinking - use summary if available, else default
        return hasSummary && !isThinkingSummary ? rc.summary : l10n.thinking;
      }
-      // Done thinking - check duration
+      // Done thinking - always use humanized duration format
-      if (rc.duration > 0) {
+      // This ensures "less than a second" instead of "0 secs" from server
      if (rc.duration >= 0 && (rc.duration > 0 || hasDurationInSummary || isThinkingSummary)) {
        return l10n.thoughtForDuration(rc.formattedDuration);
      }
-      // No duration - use custom summary if meaningful, else default
+      // Has custom summary that's not a duration - show it
-      if (!hasSummary || isThinkingSummary) {
+      if (hasSummary && !isThinkingSummary) {
        return l10n.thoughts;
      }
        return rc.summary;
      }
      return l10n.thoughts;
    }
    Widget buildHeader() {
      final headerWidget = Row(
        mainAxisSize: MainAxisSize.min,
@@ -1863,13 +1875,13 @@ String _buildTtsPlainTextWorker(Map<String, dynamic> payload) {
  final segments = rawSegments is List ? rawSegments.cast<dynamic>() : const [];
  if (segments.isEmpty) {
-    return MarkdownToText.convert(fallback);
+    return ConduitMarkdownPreprocessor.toPlainText(fallback);
  }
  final buffer = StringBuffer();
  for (final segment in segments) {
    if (segment is! String || segment.isEmpty) continue;
-    final sanitized = MarkdownToText.convert(segment);
+    final sanitized = ConduitMarkdownPreprocessor.toPlainText(segment);
    if (sanitized.isEmpty) continue;
    if (buffer.isNotEmpty) {
      buffer.writeln();
@@ -1880,7 +1892,7 @@ String _buildTtsPlainTextWorker(Map<String, dynamic> payload) {
  final result = buffer.toString().trim();
  if (result.isEmpty) {
-    return MarkdownToText.convert(fallback);
+    return ConduitMarkdownPreprocessor.toPlainText(fallback);
  }
  return result;
 }
@@ -1,10 +1,22 @@
-/// Utility helpers for normalising markdown content before handing it to
+import 'package:html_unescape/html_unescape.dart';
-/// [ConduitMarkdown]. The goal is to keep streaming responsive while smoothing
+import 'package:markdown/markdown.dart' as md;
-/// out troublesome edge-cases (e.g. nested fences inside lists).
+
 /// Content preprocessing, sanitization, and transformation for Markdown.
 ///
 /// Provides:
 /// - [normalize] - Prepares content for display (keeps reasoning blocks)
 /// - [sanitize] - Cleans content for copy/API (removes reasoning blocks)
 /// - [toPlainText] - Converts to plain text for TTS
 /// - [softenInlineCode] - Breaks long inline code spans
 class ConduitMarkdownPreprocessor {
  const ConduitMarkdownPreprocessor._();
-  // Pre-compile regex patterns for better performance during streaming
+  static final _htmlUnescape = HtmlUnescape();
  // ============================================================
  // Pre-compiled Patterns - Display/Sanitization
  // ============================================================
  static final _bulletFenceRegex = RegExp(
    r'^(\s*(?:[*+-]|\d+\.)\s+)```([^\s`]*)\s*$',
    multiLine: true,
@@ -14,7 +26,8 @@ class ConduitMarkdownPreprocessor {
    multiLine: true,
  );
  static final _dedentCloseRegex = RegExp(r'^[ \t]+```\s*$', multiLine: true);
-  static final _inlineClosingRegex = RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))');
+  static final _inlineClosingRegex =
      RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))');
  static final _labelThenDashRegex = RegExp(
    r'^(\*\*[^\n*]+\*\*.*)\n(\s*-{3,}\s*$)',
    multiLine: true,
@@ -24,92 +37,143 @@ class ConduitMarkdownPreprocessor {
    multiLine: true,
  );
  static final _fenceAtBolRegex = RegExp(r'^\s*```', multiLine: true);
  static final _linkWithTrailingSpaces =
      RegExp(r'\[[^\]]+\]\([^\)]+\)\s{2,}$');
  static final _multipleNewlines = RegExp(r'\n{3,}');
-  /// Normalises common fence and hard-break issues produced by LLMs.
+  /// Combined pattern for all reasoning/thinking blocks.
  static final _reasoningBlocks = RegExp(
    r'<details\s+type="(?:reasoning|code_interpreter)"[^>]*>[\s\S]*?</details>|'
    r'<(?:think|thinking|reasoning)(?:\s[^>]*)?>[\s\S]*?</(?:think|thinking|reasoning)>',
    multiLine: true,
    dotAll: true,
  );
  // ============================================================
  // Pre-compiled Patterns - Plain Text (TTS)
  // ============================================================
  static final _codeBlock = RegExp(r'```[^\n]*\n[\s\S]*?```');
  static final _inlineCode = RegExp(r'`([^`]+)`');
  static final _image = RegExp(r'!\[[^\]]*\]\([^)]+\)');
  static final _link = RegExp(r'\[([^\]]+)\]\([^)]+\)');
  // Paired markdown formatting - only unambiguous markers for TTS
  // Single * and _ are skipped as they're ambiguous (math, variable names)
  static final _boldItalic = RegExp(r'\*\*\*([^*]+)\*\*\*');
  static final _bold = RegExp(r'\*\*([^*]+)\*\*');
  static final _strikethrough = RegExp(r'~~([^~]+)~~');
  // Single asterisk italic: only at word boundaries (space or line start/end)
  static final _italicAsterisk = RegExp(r'(?:^|\s)\*([^*\s]+)\*(?=\s|$)');
  // Single underscore italic: only when surrounded by spaces (not in identifiers)
  static final _italicUnderscore = RegExp(r'(?:^|\s)_([^_\s]+)_(?=\s|$)');
  static final _heading = RegExp(r'^#{1,6}\s+', multiLine: true);
  static final _listMarker = RegExp(r'^[\s]*(?:[-*+]|\d+\.)\s+', multiLine: true);
  static final _blockquote = RegExp(r'^>\s*', multiLine: true);
  static final _horizontalRule = RegExp(r'^[\s]*[-*_]{3,}[\s]*$', multiLine: true);
  static final _htmlTag = RegExp(r'<[^>]+>');
  /// Comprehensive emoji pattern for TTS cleanup.
  static final _emoji = RegExp(
    r'[\u{1F600}-\u{1F64F}]|'  // Emoticons
    r'[\u{1F300}-\u{1F5FF}]|'  // Misc Symbols and Pictographs
    r'[\u{1F680}-\u{1F6FF}]|'  // Transport and Map
    r'[\u{1F1E0}-\u{1F1FF}]|'  // Flags
    r'[\u{2600}-\u{26FF}]|'    // Misc symbols
    r'[\u{2700}-\u{27BF}]|'    // Dingbats
    r'[\u{1F900}-\u{1F9FF}]|'  // Supplemental Symbols
    r'[\u{1FA00}-\u{1FA6F}]|'  // Chess, cards
    r'[\u{1FA70}-\u{1FAFF}]|'  // Symbols Extended-A
    r'[\u{FE00}-\u{FE0F}]|'    // Variation Selectors
    r'[\u{1F018}-\u{1F270}]|'  // Various
    r'[\u{238C}-\u{2454}]|'    // Misc Technical
    r'[\u{20D0}-\u{20FF}]',    // Combining Diacritical Marks
    unicode: true,
  );
  static final _whitespace = RegExp(r'\s+');
  // ============================================================
  // Public API
  // ============================================================
  /// Normalizes content for Markdown display.
  ///
  /// - Strips link reference definitions (including OpenAI annotations)
  /// - Fixes common LLM fence issues
  /// - Preserves reasoning blocks for collapsible UI rendering
  static String normalize(String input) {
-    if (input.isEmpty) {
+    if (input.isEmpty) return input;
      return input;
    }
    var output = input.replaceAll('\r\n', '\n');
-    // Move fenced code blocks that start on the same line as a list item onto
+    // Strip link reference definitions using markdown package
-    // their own line so the parser does not treat them as list text.
+    output = _stripLinkReferenceDefinitions(output);
    output = output.replaceAllMapped(
      _bulletFenceRegex,
      (match) => '${match[1]}\n```${match[2]}',
    );
-    // Dedent opening fences to avoid partial code-block detection when the
+    // Fix fence issues
-    // model indents fences by accident.
+    output = _normalizeFences(output);
    output = output.replaceAllMapped(
      _dedentOpenRegex,
      (match) => '```${match[1]}',
    );
-    // Dedent closing fences for the same reason as the opening fences.
+    // Fix Setext heading false positives
    output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```');
    // Ensure closing fences stand alone. Prevents situations like `}\n```foo`
    // from keeping trailing braces inside the code block.
    output = output.replaceAllMapped(
      _inlineClosingRegex,
      (match) => '${match[1]}\n```',
    );
    // Insert a blank line when a "label: value" line is followed by a
    // horizontal rule so it is not treated as a Setext heading underline.
    output = output.replaceAllMapped(
      _labelThenDashRegex,
      (match) => '${match[1]}\n\n${match[2]}',
    );
-    // Allow headings like "## 1. Summary" without triggering ordered-list
+    // Fix numeric heading parsing
    // parsing by inserting a zero-width joiner after the numeric marker.
    output = output.replaceAllMapped(
      _atxEnumRegex,
      (match) => '${match[1]}.\u200C${match[2]}${match[3]}',
    );
-    // Auto-close an unmatched opening fence at EOF to avoid the entire tail
+    // Separate consecutive links
-    // of the message rendering as code.
+    output = _separateConsecutiveLinks(output);
    final fenceCount = _fenceAtBolRegex.allMatches(output).length;
    if (fenceCount.isOdd) {
      if (!output.endsWith('\n')) {
        output += '\n';
      }
      output += '```';
    }
    // Convert Markdown links followed by two trailing spaces into separate
    // paragraphs so that consecutive links do not collapse into a single
    // paragraph at render time.
    final linkWithTrailingSpaces = RegExp(r'\[[^\]]+\]\([^\)]+\)\s{2,}$');
    final lines = output.split('\n');
    if (lines.length > 1) {
      final buffer = StringBuffer();
      for (var i = 0; i < lines.length; i++) {
        final line = lines[i];
        buffer.write(line);
        if (i < lines.length - 1) {
          buffer.write('\n');
        }
        if (linkWithTrailingSpaces.hasMatch(line)) {
          buffer.write('\n');
        }
      }
      output = buffer.toString();
    }
    return output;
  }
-  /// Inserts zero-width break characters into long inline code spans so they
+  /// Sanitizes content for clipboard copy or API submission.
-  /// remain readable and do not overflow narrow layouts.
+  ///
-  static String softenInlineCode(String input, {int chunkSize = 24}) {
+  /// - Strips link reference definitions (including OpenAI annotations)
-    if (input.length <= chunkSize) {
+  /// - Strips reasoning/thinking blocks
-      return input;
+  /// - Normalizes whitespace
  static String sanitize(String input) {
    if (input.isEmpty) return input;
    return input
        .replaceAll('\r\n', '\n')
        .transform(_stripLinkReferenceDefinitions)
        .replaceAll(_reasoningBlocks, '')
        .replaceAll(_multipleNewlines, '\n\n')
        .trim();
  }
  /// Converts markdown to plain text for text-to-speech.
  static String toPlainText(String input) {
    if (input.trim().isEmpty) return '';
    return sanitize(input)
        .replaceAll(_codeBlock, '') // Remove code blocks
        .replaceAllMapped(_inlineCode, (m) => m[1] ?? '') // Keep code text
        .replaceAll(_image, '') // Remove images
        .replaceAllMapped(_link, (m) => m[1] ?? '') // Keep link text
        // Strip paired markdown formatting (preserves lone * and _ in text)
        .replaceAllMapped(_boldItalic, (m) => m[1] ?? '')
        .replaceAllMapped(_bold, (m) => m[1] ?? '')
        .replaceAllMapped(_strikethrough, (m) => m[1] ?? '')
        .replaceAllMapped(_italicAsterisk, (m) => ' ${m[1] ?? ''}')
        .replaceAllMapped(_italicUnderscore, (m) => ' ${m[1] ?? ''}')
        .replaceAll(_heading, '') // Strip # markers
        .replaceAll(_listMarker, '') // Strip list markers
        .replaceAll(_blockquote, '') // Strip > markers
        .replaceAll(_horizontalRule, '') // Remove ---
        .replaceAll(_htmlTag, '') // Remove HTML
        .transform(_htmlUnescape.convert) // Decode entities
        .replaceAll(_emoji, '') // Remove emojis
        .replaceAll(_whitespace, ' ') // Normalize whitespace
        .trim();
  }
  /// Breaks long inline code spans for better wrapping.
  static String softenInlineCode(String input, {int chunkSize = 24}) {
    if (input.length <= chunkSize) return input;
    final buffer = StringBuffer();
    for (var i = 0; i < input.length; i++) {
      buffer.write(input[i]);
@@ -119,4 +183,90 @@ class ConduitMarkdownPreprocessor {
    }
    return buffer.toString();
  }
  // ============================================================
  // Private Helpers
  // ============================================================
  static String _normalizeFences(String input) {
    var output = input;
    // Move fences after list markers to new line
    output = output.replaceAllMapped(
      _bulletFenceRegex,
      (match) => '${match[1]}\n```${match[2]}',
    );
    // Dedent opening fences
    output = output.replaceAllMapped(
      _dedentOpenRegex,
      (match) => '```${match[1]}',
    );
    // Dedent closing fences
    output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```');
    // Ensure closing fences stand alone
    output = output.replaceAllMapped(
      _inlineClosingRegex,
      (match) => '${match[1]}\n```',
    );
    // Auto-close unmatched fence
    final fenceCount = _fenceAtBolRegex.allMatches(output).length;
    if (fenceCount.isOdd) {
      if (!output.endsWith('\n')) output += '\n';
      output += '```';
    }
    return output;
  }
  static String _separateConsecutiveLinks(String input) {
    final lines = input.split('\n');
    if (lines.length <= 1) return input;
    final buffer = StringBuffer();
    for (var i = 0; i < lines.length; i++) {
      final line = lines[i];
      buffer.write(line);
      if (i < lines.length - 1) buffer.write('\n');
      if (_linkWithTrailingSpaces.hasMatch(line)) buffer.write('\n');
    }
    return buffer.toString();
  }
  /// Strips link reference definitions using the `markdown` package.
  static String _stripLinkReferenceDefinitions(String input) {
    if (!input.contains('[')) return input;
    final document = md.Document();
    document.parseLines(input.split('\n'));
    final refLabels = document.linkReferences.keys.toSet();
    if (refLabels.isEmpty) return input;
    final labelPatterns =
        refLabels.map((label) => RegExp.escape(label)).join('|');
    final refDefRegex = RegExp(
      r'^[ ]{0,3}\[(?:' +
          labelPatterns +
          r')\]:[ \t]*(?:<[^>]*>|[^\s]*)(?:[ \t]+(?:"[^"]*"|' +
          r"'[^']*'" +
          r'|\([^)]*\)))?[ \t]*$',
      multiLine: true,
      caseSensitive: false,
    );
    return input
        .replaceAll(refDefRegex, '')
        .replaceAll(_multipleNewlines, '\n\n')
        .trim();
  }
 }
 /// Extension for chaining string transformations.
 extension _StringTransform on String {
  String transform(String Function(String) fn) => fn(this);
 }