feat(widget): Add citation badge for source references
This commit is contained in:
140
lib/core/utils/citation_parser.dart
Normal file
140
lib/core/utils/citation_parser.dart
Normal file
@@ -0,0 +1,140 @@
|
||||
/// Utility class for parsing inline citation references like [1], [1,2,3].
|
||||
///
|
||||
/// This matches OpenWebUI's citation-extension.ts behavior where adjacent
|
||||
/// citation brackets are merged and parsed into source indices.
|
||||
///
|
||||
/// Reference: openwebui-src/src/lib/utils/marked/citation-extension.ts
|
||||
library;
|
||||
|
||||
/// Represents a parsed citation with one or more source IDs.
|
||||
class Citation {
|
||||
/// 1-based source indices referenced by this citation.
|
||||
final List<int> sourceIds;
|
||||
|
||||
/// The raw text that was matched (e.g., "[1]" or "[1,2,3]").
|
||||
final String raw;
|
||||
|
||||
const Citation({required this.sourceIds, required this.raw});
|
||||
|
||||
/// Converts to 0-based indices for array access.
|
||||
List<int> get zeroBasedIndices =>
|
||||
sourceIds.map((id) => id - 1).toList(growable: false);
|
||||
}
|
||||
|
||||
/// A segment of content that is either plain text or a citation.
|
||||
class CitationSegment {
|
||||
final String? text;
|
||||
final Citation? citation;
|
||||
|
||||
const CitationSegment._({this.text, this.citation});
|
||||
|
||||
factory CitationSegment.text(String text) => CitationSegment._(text: text);
|
||||
factory CitationSegment.citation(Citation citation) =>
|
||||
CitationSegment._(citation: citation);
|
||||
|
||||
bool get isText => text != null;
|
||||
bool get isCitation => citation != null;
|
||||
}
|
||||
|
||||
/// Parser for inline citations in markdown content.
|
||||
class CitationParser {
|
||||
const CitationParser._();
|
||||
|
||||
// Matches one or more adjacent [N] or [N,M,...] blocks
|
||||
// Examples: "[1]", "[1,2,3]", "[1][2]", "[1,2][3,4]"
|
||||
static final _citationPattern = RegExp(r'(\[(?:\d[\d,\s]*)\])+');
|
||||
|
||||
// Matches individual bracket groups within a citation match
|
||||
static final _bracketGroupPattern = RegExp(r'\[([\d,\s]+)\]');
|
||||
|
||||
// Avoids matching footnotes like [^1]
|
||||
static final _footnotePattern = RegExp(r'^\[\^');
|
||||
|
||||
/// Parses content and returns segments of text and citations.
|
||||
///
|
||||
/// Returns null if no citations are found.
|
||||
static List<CitationSegment>? parse(String content) {
|
||||
if (content.isEmpty) return null;
|
||||
|
||||
final segments = <CitationSegment>[];
|
||||
int lastEnd = 0;
|
||||
|
||||
for (final match in _citationPattern.allMatches(content)) {
|
||||
// Check if this looks like a footnote reference
|
||||
final beforeMatch = match.start > 0
|
||||
? content.substring(match.start - 1, match.start)
|
||||
: '';
|
||||
if (beforeMatch == '^') continue;
|
||||
|
||||
// Check the matched content for footnote pattern
|
||||
final raw = match.group(0)!;
|
||||
if (_footnotePattern.hasMatch(raw)) continue;
|
||||
|
||||
// Add text before this citation
|
||||
if (match.start > lastEnd) {
|
||||
final textBefore = content.substring(lastEnd, match.start);
|
||||
if (textBefore.isNotEmpty) {
|
||||
segments.add(CitationSegment.text(textBefore));
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the citation IDs
|
||||
final ids = <int>[];
|
||||
for (final bracketMatch in _bracketGroupPattern.allMatches(raw)) {
|
||||
final idsStr = bracketMatch.group(1) ?? '';
|
||||
final parsed = idsStr
|
||||
.split(',')
|
||||
.map((s) => int.tryParse(s.trim()))
|
||||
.whereType<int>()
|
||||
.where((n) => n > 0) // Only positive indices
|
||||
.toList();
|
||||
ids.addAll(parsed);
|
||||
}
|
||||
|
||||
if (ids.isNotEmpty) {
|
||||
segments.add(
|
||||
CitationSegment.citation(Citation(sourceIds: ids, raw: raw)),
|
||||
);
|
||||
} else {
|
||||
// No valid IDs found, treat as text
|
||||
segments.add(CitationSegment.text(raw));
|
||||
}
|
||||
|
||||
lastEnd = match.end;
|
||||
}
|
||||
|
||||
// Add remaining text
|
||||
if (lastEnd < content.length) {
|
||||
final remaining = content.substring(lastEnd);
|
||||
if (remaining.isNotEmpty) {
|
||||
segments.add(CitationSegment.text(remaining));
|
||||
}
|
||||
}
|
||||
|
||||
// Return null if no citations were found
|
||||
final hasCitations = segments.any((s) => s.isCitation);
|
||||
return hasCitations ? segments : null;
|
||||
}
|
||||
|
||||
/// Checks if content contains any citation patterns.
|
||||
static bool hasCitations(String content) {
|
||||
if (content.isEmpty) return false;
|
||||
// The regex already excludes footnotes like [^1] since it requires
|
||||
// a digit immediately after the opening bracket.
|
||||
return _citationPattern.hasMatch(content);
|
||||
}
|
||||
|
||||
/// Extracts all unique source IDs from content (1-based).
|
||||
static List<int> extractSourceIds(String content) {
|
||||
final segments = parse(content);
|
||||
if (segments == null) return const [];
|
||||
|
||||
final ids = <int>{};
|
||||
for (final segment in segments) {
|
||||
if (segment.isCitation) {
|
||||
ids.addAll(segment.citation!.sourceIds);
|
||||
}
|
||||
}
|
||||
return ids.toList()..sort();
|
||||
}
|
||||
}
|
||||
@@ -124,10 +124,18 @@ class ReasoningContent {
|
||||
|
||||
/// Utility class for parsing and extracting reasoning/thinking content.
|
||||
class ReasoningParser {
|
||||
/// Patterns that indicate a details block is reasoning content.
|
||||
/// Used when the `type` attribute is missing.
|
||||
static final _reasoningSummaryPattern = RegExp(
|
||||
r'Thought|Thinking|Reasoning',
|
||||
caseSensitive: false,
|
||||
);
|
||||
|
||||
/// Splits content into ordered segments of plain text and reasoning entries.
|
||||
///
|
||||
/// Handles:
|
||||
/// - `<details type="reasoning">` blocks with optional summary/duration/done
|
||||
/// - `<details>` blocks without type but with reasoning-like summary
|
||||
/// - Raw tag pairs like `<think>`, `<thinking>`, `<reasoning>`, etc.
|
||||
/// - Incomplete/streaming cases by emitting a partial reasoning entry
|
||||
static List<ReasoningSegment>? segments(
|
||||
@@ -150,14 +158,14 @@ class ReasoningParser {
|
||||
int index = 0;
|
||||
|
||||
while (index < content.length) {
|
||||
// Find the earliest match: either <details type="reasoning" or a raw tag
|
||||
// Find the earliest match: either <details (any type) or a raw tag
|
||||
int nextDetailsIdx = -1;
|
||||
int nextRawIdx = -1;
|
||||
(String, String)? matchedRawPair;
|
||||
|
||||
// Check for <details type="reasoning"
|
||||
// Check for any <details tag (we'll determine if it's reasoning later)
|
||||
final detailsMatch = RegExp(
|
||||
r'<details\s+[^>]*type="reasoning"',
|
||||
r'<details(?:\s|>)',
|
||||
).firstMatch(content.substring(index));
|
||||
if (detailsMatch != null) {
|
||||
nextDetailsIdx = index + detailsMatch.start;
|
||||
@@ -203,9 +211,19 @@ class ReasoningParser {
|
||||
}
|
||||
|
||||
if (kind == 'details') {
|
||||
// Parse <details type="reasoning"> block and extract ReasoningEntry
|
||||
final result = _parseDetailsReasoning(content, nextIdx);
|
||||
segments.add(ReasoningSegment.entry(result.entry));
|
||||
// Parse <details> block and check if it's reasoning content
|
||||
final result = _parseDetailsBlock(content, nextIdx);
|
||||
|
||||
// Only add as reasoning if it's a reasoning type or looks like reasoning
|
||||
if (result.isReasoning) {
|
||||
segments.add(ReasoningSegment.entry(result.entry));
|
||||
} else {
|
||||
// Not a reasoning block, treat as text
|
||||
final detailsText = content.substring(nextIdx, result.endIndex);
|
||||
if (detailsText.trim().isNotEmpty) {
|
||||
segments.add(ReasoningSegment.text(detailsText));
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.isComplete) {
|
||||
// Incomplete block, stop here
|
||||
@@ -233,13 +251,14 @@ class ReasoningParser {
|
||||
return segments.isEmpty ? null : segments;
|
||||
}
|
||||
|
||||
/// Parse a `<details type="reasoning">` block starting at the given index.
|
||||
static _ReasoningResult _parseDetailsReasoning(String content, int startIdx) {
|
||||
/// Parse a `<details>` block starting at the given index.
|
||||
/// Returns whether the block is reasoning content based on type or summary.
|
||||
static _DetailsResult _parseDetailsBlock(String content, int startIdx) {
|
||||
// Find the opening tag end
|
||||
final openTagEnd = content.indexOf('>', startIdx);
|
||||
if (openTagEnd == -1) {
|
||||
// Incomplete opening tag
|
||||
return _ReasoningResult(
|
||||
// Incomplete opening tag - assume reasoning for streaming
|
||||
return _DetailsResult(
|
||||
entry: ReasoningEntry(
|
||||
reasoning: '',
|
||||
summary: '',
|
||||
@@ -248,6 +267,7 @@ class ReasoningParser {
|
||||
),
|
||||
endIndex: content.length,
|
||||
isComplete: false,
|
||||
isReasoning: true,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -260,6 +280,7 @@ class ReasoningParser {
|
||||
attrs[m.group(1)!] = m.group(2) ?? '';
|
||||
}
|
||||
|
||||
final type = attrs['type'] ?? '';
|
||||
final isDone = (attrs['done'] ?? 'true') == 'true';
|
||||
final duration = int.tryParse(attrs['duration'] ?? '0') ?? 0;
|
||||
|
||||
@@ -284,15 +305,27 @@ class ReasoningParser {
|
||||
final innerContent = content.substring(openTagEnd + 1);
|
||||
final summaryResult = _extractSummary(innerContent);
|
||||
|
||||
return _ReasoningResult(
|
||||
// Determine if this is reasoning based on type or summary
|
||||
final isReasoning =
|
||||
type == 'reasoning' ||
|
||||
(type.isEmpty &&
|
||||
_reasoningSummaryPattern.hasMatch(summaryResult.summary));
|
||||
|
||||
// Extract duration from summary if not in attributes
|
||||
final effectiveDuration = duration > 0
|
||||
? duration
|
||||
: _extractDurationFromSummary(summaryResult.summary);
|
||||
|
||||
return _DetailsResult(
|
||||
entry: ReasoningEntry(
|
||||
reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
|
||||
summary: HtmlUtils.unescapeHtml(summaryResult.summary),
|
||||
duration: duration,
|
||||
duration: effectiveDuration,
|
||||
isDone: false,
|
||||
),
|
||||
endIndex: content.length,
|
||||
isComplete: false,
|
||||
isReasoning: isReasoning,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -301,15 +334,27 @@ class ReasoningParser {
|
||||
final innerContent = content.substring(openTagEnd + 1, closeIdx);
|
||||
final summaryResult = _extractSummary(innerContent);
|
||||
|
||||
return _ReasoningResult(
|
||||
// Determine if this is reasoning based on type or summary
|
||||
final isReasoning =
|
||||
type == 'reasoning' ||
|
||||
(type.isEmpty &&
|
||||
_reasoningSummaryPattern.hasMatch(summaryResult.summary));
|
||||
|
||||
// Extract duration from summary if not in attributes
|
||||
final effectiveDuration = duration > 0
|
||||
? duration
|
||||
: _extractDurationFromSummary(summaryResult.summary);
|
||||
|
||||
return _DetailsResult(
|
||||
entry: ReasoningEntry(
|
||||
reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
|
||||
summary: HtmlUtils.unescapeHtml(summaryResult.summary),
|
||||
duration: duration,
|
||||
duration: effectiveDuration,
|
||||
isDone: isDone,
|
||||
),
|
||||
endIndex: i,
|
||||
isComplete: true,
|
||||
isReasoning: isReasoning,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -369,6 +414,30 @@ class ReasoningParser {
|
||||
return _SummaryResult(summary: '', remaining: content.trim());
|
||||
}
|
||||
|
||||
/// Extract duration from summary text like "Thought (1s)" or "Thinking (2m 30s)".
|
||||
static int _extractDurationFromSummary(String summary) {
|
||||
// Match patterns like "(1s)", "(30s)", "(1m)", "(2m 30s)", "(1m30s)"
|
||||
// Supports minutes-only "(1m)", seconds-only "(30s)", or both "(2m 30s)"
|
||||
final durationRegex = RegExp(
|
||||
r'\((\d+)m(?:\s*(\d+)s)?\)|\((\d+)s\)',
|
||||
caseSensitive: false,
|
||||
);
|
||||
final match = durationRegex.firstMatch(summary);
|
||||
if (match != null) {
|
||||
// Check if it's a minutes pattern (groups 1 and 2) or seconds-only (group 3)
|
||||
if (match.group(1) != null) {
|
||||
// Minutes pattern: "(Xm)" or "(Xm Ys)"
|
||||
final minutes = int.tryParse(match.group(1) ?? '0') ?? 0;
|
||||
final seconds = int.tryParse(match.group(2) ?? '0') ?? 0;
|
||||
return minutes * 60 + seconds;
|
||||
} else if (match.group(3) != null) {
|
||||
// Seconds-only pattern: "(Xs)"
|
||||
return int.tryParse(match.group(3) ?? '0') ?? 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Parses a message and extracts the first reasoning content block.
|
||||
/// Returns null if no reasoning content is found.
|
||||
static ReasoningContent? parseReasoningContent(
|
||||
@@ -412,6 +481,17 @@ class ReasoningParser {
|
||||
// Check for <details type="reasoning"
|
||||
if (content.contains('type="reasoning"')) return true;
|
||||
|
||||
// Check for <details> with reasoning-like summary
|
||||
if (content.contains('<details')) {
|
||||
final summaryMatch = RegExp(
|
||||
r'<summary>([^<]*)</summary>',
|
||||
).firstMatch(content);
|
||||
if (summaryMatch != null) {
|
||||
final summary = summaryMatch.group(1) ?? '';
|
||||
if (_reasoningSummaryPattern.hasMatch(summary)) return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for raw tag pairs
|
||||
for (final pair in defaultReasoningTagPairs) {
|
||||
if (content.contains(pair.$1)) return true;
|
||||
@@ -448,6 +528,20 @@ class _ReasoningResult {
|
||||
});
|
||||
}
|
||||
|
||||
class _DetailsResult {
|
||||
final ReasoningEntry entry;
|
||||
final int endIndex;
|
||||
final bool isComplete;
|
||||
final bool isReasoning;
|
||||
|
||||
const _DetailsResult({
|
||||
required this.entry,
|
||||
required this.endIndex,
|
||||
required this.isComplete,
|
||||
required this.isReasoning,
|
||||
});
|
||||
}
|
||||
|
||||
class _SummaryResult {
|
||||
final String summary;
|
||||
final String remaining;
|
||||
|
||||
Reference in New Issue
Block a user