diff --git a/lib/core/utils/tool_calls_parser.dart b/lib/core/utils/tool_calls_parser.dart index 7da2365..6ecc904 100644 --- a/lib/core/utils/tool_calls_parser.dart +++ b/lib/core/utils/tool_calls_parser.dart @@ -180,17 +180,22 @@ class ToolCallsParser { if (seg.isToolCall && seg.entry != null) { calls.add(seg.entry!); } else if (seg.text != null && seg.text!.isNotEmpty) { - // Remove any embedded tool_calls blocks that may have slipped into text - final cleaned = seg.text! - .replaceAll( - RegExp( - r']*>[\s\S]*?<\/details>', - multiLine: true, - dotAll: true, - ), - '', - ) - .trim(); + final text = seg.text!; + // Quick check: only run cleanup regex if tool_calls details might exist + // (they should already be parsed as segments, but this is a safety net) + String cleaned = text; + if (text.contains(']*>[\s\S]*?<\/details>', + multiLine: true, + dotAll: true, + ), + '', + ); + } + cleaned = cleaned.trim(); if (cleaned.isNotEmpty) buf.write(cleaned); } } @@ -290,16 +295,21 @@ class ToolCallsParser { } buf.write(out); } else { - // Keep the raw text, but also remove any stray non-tool_calls details blocks - final t = (seg.text ?? '').replaceAll( - RegExp( - r']*>[\s\S]*?<\/details>', - multiLine: true, - dotAll: true, - ), - '', - ); - if (t.isNotEmpty) buf.write(t); + final text = seg.text ?? ''; + // Quick check: only run cleanup regex if details tags exist + String cleaned = text; + if (text.contains(']*>[\s\S]*?<\/details>', + multiLine: true, + dotAll: true, + ), + '', + ); + } + if (cleaned.isNotEmpty) buf.write(cleaned); } } diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart index b6bc8d5..683f0cd 100644 --- a/lib/features/chat/widgets/assistant_message_widget.dart +++ b/lib/features/chat/widgets/assistant_message_widget.dart @@ -724,22 +724,25 @@ class _AssistantMessageWidgetState extends ConsumerState // and type="tool_calls") via a custom block syntax, so they won't be rendered as // plain text during streaming. This prevents character flashing. - // We still clean raw reasoning tags (, ) as a fallback. - // The server normally converts these to
format, but raw mode or - // direct API responses might still use them. - String cleaned = content - .replaceAll( - RegExp(r'[\s\S]*?<\/think>', multiLine: true, dotAll: true), - '', - ) - .replaceAll( - RegExp( - r'[\s\S]*?<\/reasoning>', - multiLine: true, - dotAll: true, - ), - '', - ); + // Quick check: only run cleanup if raw tags might exist (rare case) + String cleaned = content; + if (content.contains('') || content.contains('')) { + // Clean raw reasoning tags as a fallback for raw mode or direct API responses. + // The server normally converts these to
format. + cleaned = content + .replaceAll( + RegExp(r'[\s\S]*?<\/think>', multiLine: true, dotAll: true), + '', + ) + .replaceAll( + RegExp( + r'[\s\S]*?<\/reasoning>', + multiLine: true, + dotAll: true, + ), + '', + ); + } // Process images in the remaining text final processedContent = _processContentForImages(cleaned); diff --git a/lib/shared/widgets/markdown/markdown_preprocessor.dart b/lib/shared/widgets/markdown/markdown_preprocessor.dart index 64bb6f5..84a168b 100644 --- a/lib/shared/widgets/markdown/markdown_preprocessor.dart +++ b/lib/shared/widgets/markdown/markdown_preprocessor.dart @@ -4,6 +4,30 @@ class ConduitMarkdownPreprocessor { const ConduitMarkdownPreprocessor._(); + // Pre-compile regex patterns for better performance during streaming + static final _bulletFenceRegex = RegExp( + r'^(\s*(?:[*+-]|\d+\.)\s+)```([^\s`]*)\s*$', + multiLine: true, + ); + static final _dedentOpenRegex = RegExp( + r'^[ \t]+```([^\n`]*)\s*$', + multiLine: true, + ); + static final _dedentCloseRegex = RegExp( + r'^[ \t]+```\s*$', + multiLine: true, + ); + static final _inlineClosingRegex = RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))'); + static final _labelThenDashRegex = RegExp( + r'^(\*\*[^\n*]+\*\*.*)\n(\s*-{3,}\s*$)', + multiLine: true, + ); + static final _atxEnumRegex = RegExp( + r'^(\s{0,3}#{1,6}\s+\d+)\.(\s*)(\S)', + multiLine: true, + ); + static final _fenceAtBolRegex = RegExp(r'^\s*```', multiLine: true); + /// Normalises common fence and hard-break issues produced by LLMs. static String normalize(String input) { if (input.isEmpty) { @@ -14,58 +38,42 @@ class ConduitMarkdownPreprocessor { // Move fenced code blocks that start on the same line as a list item onto // their own line so the parser does not treat them as list text. - final bulletFence = RegExp( - r'^(\s*(?:[*+-]|\d+\.)\s+)```([^\s`]*)\s*$', - multiLine: true, - ); output = output.replaceAllMapped( - bulletFence, + _bulletFenceRegex, (match) => '${match[1]}\n```${match[2]}', ); // Dedent opening fences to avoid partial code-block detection when the // model indents fences by accident. - final dedentOpen = RegExp(r'^[ \t]+```([^\n`]*)\s*$', multiLine: true); - output = output.replaceAllMapped(dedentOpen, (match) => '```${match[1]}'); + output = output.replaceAllMapped(_dedentOpenRegex, (match) => '```${match[1]}'); // Dedent closing fences for the same reason as the opening fences. - final dedentClose = RegExp(r'^[ \t]+```\s*$', multiLine: true); - output = output.replaceAllMapped(dedentClose, (_) => '```'); + output = output.replaceAllMapped(_dedentCloseRegex, (_) => '```'); // Ensure closing fences stand alone. Prevents situations like `}\n```foo` // from keeping trailing braces inside the code block. - final inlineClosing = RegExp(r'([^\r\n`])```(?=\s*(?:\r?\n|$))'); output = output.replaceAllMapped( - inlineClosing, + _inlineClosingRegex, (match) => '${match[1]}\n```', ); // Insert a blank line when a "label: value" line is followed by a // horizontal rule so it is not treated as a Setext heading underline. - final labelThenDash = RegExp( - r'^(\*\*[^\n*]+\*\*.*)\n(\s*-{3,}\s*$)', - multiLine: true, - ); output = output.replaceAllMapped( - labelThenDash, + _labelThenDashRegex, (match) => '${match[1]}\n\n${match[2]}', ); // Allow headings like "## 1. Summary" without triggering ordered-list // parsing by inserting a zero-width joiner after the numeric marker. - final atxEnum = RegExp( - r'^(\s{0,3}#{1,6}\s+\d+)\.(\s*)(\S)', - multiLine: true, - ); output = output.replaceAllMapped( - atxEnum, + _atxEnumRegex, (match) => '${match[1]}.\u200C${match[2]}${match[3]}', ); // Auto-close an unmatched opening fence at EOF to avoid the entire tail // of the message rendering as code. - final fenceAtBol = RegExp(r'^\s*```', multiLine: true); - final fenceCount = fenceAtBol.allMatches(output).length; + final fenceCount = _fenceAtBolRegex.allMatches(output).length; if (fenceCount.isOdd) { if (!output.endsWith('\n')) { output += '\n';