refactor: optimize regex patterns for image and markdown processing

- Introduced pre-compiled regex patterns across various components, including streaming_helper, markdown_stream_formatter, and assistant_message_widget, to enhance performance during image extraction and markdown formatting.
- Updated the AssistantMessageWidget to utilize these optimized patterns for TTS sanitization and image processing, reducing unnecessary regex evaluations.
- Improved overall efficiency in handling markdown content by leveraging pre-compiled patterns for common markdown syntax detection.
This commit is contained in:
cogwheel0
2025-10-06 00:09:52 +05:30
parent 3af46b379b
commit a2e5f46d62
4 changed files with 94 additions and 80 deletions

View File

@@ -23,6 +23,25 @@ import '../../../core/utils/debug_logger.dart';
import 'sources/openwebui_sources.dart';
import '../providers/assistant_response_builder_provider.dart';
// Pre-compiled regex patterns for TTS sanitization (performance optimization)
final _ttsCodeBlockPattern = RegExp(r'```');
final _ttsInlineCodePattern = RegExp(r'`');
final _ttsImagePattern = RegExp(r'!\[(.*?)\]\((.*?)\)');
final _ttsLinkPattern = RegExp(r'\[(.*?)\]\((.*?)\)');
final _ttsBoldPattern1 = RegExp(r'\*\*');
final _ttsBoldPattern2 = RegExp(r'__');
final _ttsItalicPattern1 = RegExp(r'\*');
final _ttsItalicPattern2 = RegExp(r'_');
final _ttsStrikePattern = RegExp(r'~');
final _ttsListPattern = RegExp(r'^[-*+]\s+', multiLine: true);
final _ttsQuotePattern = RegExp(r'^>\s?', multiLine: true);
final _ttsMultiSpacePattern = RegExp(r'[ \t]{2,}');
final _ttsMultiNewlinePattern = RegExp(r'\n{3,}');
// Pre-compiled regex patterns for image processing (performance optimization)
final _base64ImagePattern = RegExp(r'data:image/[^;]+;base64,[A-Za-z0-9+/]+=*');
final _fileIdPattern = RegExp(r'/api/v1/files/([^/]+)/content');
class AssistantMessageWidget extends ConsumerStatefulWidget {
final dynamic message;
final bool isStreaming;
@@ -270,23 +289,24 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
}
var text = input;
text = text.replaceAll(RegExp(r'```'), ' ');
text = text.replaceAll(RegExp(r'`'), '');
text = text.replaceAll(RegExp(r'!\[(.*?)\]\((.*?)\)'), r'$1');
text = text.replaceAll(RegExp(r'\[(.*?)\]\((.*?)\)'), r'$1');
text = text.replaceAll(RegExp(r'\*\*'), '');
text = text.replaceAll(RegExp(r'__'), '');
text = text.replaceAll(RegExp(r'\*'), '');
text = text.replaceAll(RegExp(r'_'), '');
text = text.replaceAll(RegExp(r'~'), '');
text = text.replaceAll(RegExp(r'^[-*+]\s+', multiLine: true), '');
text = text.replaceAll(RegExp(r'^>\s?', multiLine: true), '');
// Use pre-compiled regex patterns for better performance
text = text.replaceAll(_ttsCodeBlockPattern, ' ');
text = text.replaceAll(_ttsInlineCodePattern, '');
text = text.replaceAll(_ttsImagePattern, r'$1');
text = text.replaceAll(_ttsLinkPattern, r'$1');
text = text.replaceAll(_ttsBoldPattern1, '');
text = text.replaceAll(_ttsBoldPattern2, '');
text = text.replaceAll(_ttsItalicPattern1, '');
text = text.replaceAll(_ttsItalicPattern2, '');
text = text.replaceAll(_ttsStrikePattern, '');
text = text.replaceAll(_ttsListPattern, '');
text = text.replaceAll(_ttsQuotePattern, '');
text = text.replaceAll('&nbsp;', ' ');
text = text.replaceAll('&amp;', '&');
text = text.replaceAll('&lt;', '<');
text = text.replaceAll('&gt;', '>');
text = text.replaceAll(RegExp(r'[ \t]{2,}'), ' ');
text = text.replaceAll(RegExp(r'\n{3,}'), '\n\n');
text = text.replaceAll(_ttsMultiSpacePattern, ' ');
text = text.replaceAll(_ttsMultiNewlinePattern, '\n\n');
return text.trim();
}
@@ -771,18 +791,17 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
// Check if content contains image markdown or base64 data URLs
// This ensures images generated by AI are properly formatted
// Pattern to detect base64 images that might not be in markdown format
final base64Pattern = RegExp(r'data:image/[^;]+;base64,[A-Za-z0-9+/]+=*');
// Quick check: only process if we have base64 images and no markdown
if (!content.contains('data:image/') || content.contains('![')) {
return content;
}
// If we find base64 images not wrapped in markdown, wrap them
if (base64Pattern.hasMatch(content) && !content.contains('![')) {
content = content.replaceAllMapped(base64Pattern, (match) {
if (_base64ImagePattern.hasMatch(content)) {
content = content.replaceAllMapped(_base64ImagePattern, (match) {
final imageData = match.group(0)!;
// Check if this image is already in markdown format
final markdownCheck = RegExp(
r'!\[.*?\]\(' + RegExp.escape(imageData) + r'\)',
);
if (!markdownCheck.hasMatch(content)) {
// Check if this image is already in markdown format (simple string check)
if (!content.contains('![$imageData)')) {
return '\n![Generated Image]($imageData)\n';
}
return imageData;
@@ -951,9 +970,7 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
String attachmentId = fileUrl;
if (fileUrl.contains('/api/v1/files/') &&
fileUrl.contains('/content')) {
final fileIdMatch = RegExp(
r'/api/v1/files/([^/]+)/content',
).firstMatch(fileUrl);
final fileIdMatch = _fileIdPattern.firstMatch(fileUrl);
if (fileIdMatch != null) {
attachmentId = fileIdMatch.group(1)!;
}