feat(widget): Add citation badge for source references

This commit is contained in:
cogwheel0
2025-12-07 22:35:16 +05:30
parent ccde2e4a46
commit 6e4ee2acd3
5 changed files with 667 additions and 20 deletions

View File

@@ -0,0 +1,140 @@
/// Utility class for parsing inline citation references like [1], [1,2,3].
///
/// This matches OpenWebUI's citation-extension.ts behavior where adjacent
/// citation brackets are merged and parsed into source indices.
///
/// Reference: openwebui-src/src/lib/utils/marked/citation-extension.ts
library;
/// Represents a parsed citation with one or more source IDs.
class Citation {
/// 1-based source indices referenced by this citation.
final List<int> sourceIds;
/// The raw text that was matched (e.g., "[1]" or "[1,2,3]").
final String raw;
const Citation({required this.sourceIds, required this.raw});
/// Converts to 0-based indices for array access.
List<int> get zeroBasedIndices =>
sourceIds.map((id) => id - 1).toList(growable: false);
}
/// A segment of content that is either plain text or a citation.
class CitationSegment {
final String? text;
final Citation? citation;
const CitationSegment._({this.text, this.citation});
factory CitationSegment.text(String text) => CitationSegment._(text: text);
factory CitationSegment.citation(Citation citation) =>
CitationSegment._(citation: citation);
bool get isText => text != null;
bool get isCitation => citation != null;
}
/// Parser for inline citations in markdown content.
class CitationParser {
const CitationParser._();
// Matches one or more adjacent [N] or [N,M,...] blocks
// Examples: "[1]", "[1,2,3]", "[1][2]", "[1,2][3,4]"
static final _citationPattern = RegExp(r'(\[(?:\d[\d,\s]*)\])+');
// Matches individual bracket groups within a citation match
static final _bracketGroupPattern = RegExp(r'\[([\d,\s]+)\]');
// Avoids matching footnotes like [^1]
static final _footnotePattern = RegExp(r'^\[\^');
/// Parses content and returns segments of text and citations.
///
/// Returns null if no citations are found.
static List<CitationSegment>? parse(String content) {
if (content.isEmpty) return null;
final segments = <CitationSegment>[];
int lastEnd = 0;
for (final match in _citationPattern.allMatches(content)) {
// Check if this looks like a footnote reference
final beforeMatch = match.start > 0
? content.substring(match.start - 1, match.start)
: '';
if (beforeMatch == '^') continue;
// Check the matched content for footnote pattern
final raw = match.group(0)!;
if (_footnotePattern.hasMatch(raw)) continue;
// Add text before this citation
if (match.start > lastEnd) {
final textBefore = content.substring(lastEnd, match.start);
if (textBefore.isNotEmpty) {
segments.add(CitationSegment.text(textBefore));
}
}
// Parse the citation IDs
final ids = <int>[];
for (final bracketMatch in _bracketGroupPattern.allMatches(raw)) {
final idsStr = bracketMatch.group(1) ?? '';
final parsed = idsStr
.split(',')
.map((s) => int.tryParse(s.trim()))
.whereType<int>()
.where((n) => n > 0) // Only positive indices
.toList();
ids.addAll(parsed);
}
if (ids.isNotEmpty) {
segments.add(
CitationSegment.citation(Citation(sourceIds: ids, raw: raw)),
);
} else {
// No valid IDs found, treat as text
segments.add(CitationSegment.text(raw));
}
lastEnd = match.end;
}
// Add remaining text
if (lastEnd < content.length) {
final remaining = content.substring(lastEnd);
if (remaining.isNotEmpty) {
segments.add(CitationSegment.text(remaining));
}
}
// Return null if no citations were found
final hasCitations = segments.any((s) => s.isCitation);
return hasCitations ? segments : null;
}
/// Checks if content contains any citation patterns.
static bool hasCitations(String content) {
if (content.isEmpty) return false;
// The regex already excludes footnotes like [^1] since it requires
// a digit immediately after the opening bracket.
return _citationPattern.hasMatch(content);
}
/// Extracts all unique source IDs from content (1-based).
static List<int> extractSourceIds(String content) {
final segments = parse(content);
if (segments == null) return const [];
final ids = <int>{};
for (final segment in segments) {
if (segment.isCitation) {
ids.addAll(segment.citation!.sourceIds);
}
}
return ids.toList()..sort();
}
}

View File

@@ -124,10 +124,18 @@ class ReasoningContent {
/// Utility class for parsing and extracting reasoning/thinking content.
class ReasoningParser {
/// Patterns that indicate a details block is reasoning content.
/// Used when the `type` attribute is missing.
static final _reasoningSummaryPattern = RegExp(
r'Thought|Thinking|Reasoning',
caseSensitive: false,
);
/// Splits content into ordered segments of plain text and reasoning entries.
///
/// Handles:
/// - `<details type="reasoning">` blocks with optional summary/duration/done
/// - `<details>` blocks without type but with reasoning-like summary
/// - Raw tag pairs like `<think>`, `<thinking>`, `<reasoning>`, etc.
/// - Incomplete/streaming cases by emitting a partial reasoning entry
static List<ReasoningSegment>? segments(
@@ -150,14 +158,14 @@ class ReasoningParser {
int index = 0;
while (index < content.length) {
// Find the earliest match: either <details type="reasoning" or a raw tag
// Find the earliest match: either <details (any type) or a raw tag
int nextDetailsIdx = -1;
int nextRawIdx = -1;
(String, String)? matchedRawPair;
// Check for <details type="reasoning"
// Check for any <details tag (we'll determine if it's reasoning later)
final detailsMatch = RegExp(
r'<details\s+[^>]*type="reasoning"',
r'<details(?:\s|>)',
).firstMatch(content.substring(index));
if (detailsMatch != null) {
nextDetailsIdx = index + detailsMatch.start;
@@ -203,9 +211,19 @@ class ReasoningParser {
}
if (kind == 'details') {
// Parse <details type="reasoning"> block and extract ReasoningEntry
final result = _parseDetailsReasoning(content, nextIdx);
segments.add(ReasoningSegment.entry(result.entry));
// Parse <details> block and check if it's reasoning content
final result = _parseDetailsBlock(content, nextIdx);
// Only add as reasoning if it's a reasoning type or looks like reasoning
if (result.isReasoning) {
segments.add(ReasoningSegment.entry(result.entry));
} else {
// Not a reasoning block, treat as text
final detailsText = content.substring(nextIdx, result.endIndex);
if (detailsText.trim().isNotEmpty) {
segments.add(ReasoningSegment.text(detailsText));
}
}
if (!result.isComplete) {
// Incomplete block, stop here
@@ -233,13 +251,14 @@ class ReasoningParser {
return segments.isEmpty ? null : segments;
}
/// Parse a `<details type="reasoning">` block starting at the given index.
static _ReasoningResult _parseDetailsReasoning(String content, int startIdx) {
/// Parse a `<details>` block starting at the given index.
/// Returns whether the block is reasoning content based on type or summary.
static _DetailsResult _parseDetailsBlock(String content, int startIdx) {
// Find the opening tag end
final openTagEnd = content.indexOf('>', startIdx);
if (openTagEnd == -1) {
// Incomplete opening tag
return _ReasoningResult(
// Incomplete opening tag - assume reasoning for streaming
return _DetailsResult(
entry: ReasoningEntry(
reasoning: '',
summary: '',
@@ -248,6 +267,7 @@ class ReasoningParser {
),
endIndex: content.length,
isComplete: false,
isReasoning: true,
);
}
@@ -260,6 +280,7 @@ class ReasoningParser {
attrs[m.group(1)!] = m.group(2) ?? '';
}
final type = attrs['type'] ?? '';
final isDone = (attrs['done'] ?? 'true') == 'true';
final duration = int.tryParse(attrs['duration'] ?? '0') ?? 0;
@@ -284,15 +305,27 @@ class ReasoningParser {
final innerContent = content.substring(openTagEnd + 1);
final summaryResult = _extractSummary(innerContent);
return _ReasoningResult(
// Determine if this is reasoning based on type or summary
final isReasoning =
type == 'reasoning' ||
(type.isEmpty &&
_reasoningSummaryPattern.hasMatch(summaryResult.summary));
// Extract duration from summary if not in attributes
final effectiveDuration = duration > 0
? duration
: _extractDurationFromSummary(summaryResult.summary);
return _DetailsResult(
entry: ReasoningEntry(
reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
summary: HtmlUtils.unescapeHtml(summaryResult.summary),
duration: duration,
duration: effectiveDuration,
isDone: false,
),
endIndex: content.length,
isComplete: false,
isReasoning: isReasoning,
);
}
@@ -301,15 +334,27 @@ class ReasoningParser {
final innerContent = content.substring(openTagEnd + 1, closeIdx);
final summaryResult = _extractSummary(innerContent);
return _ReasoningResult(
// Determine if this is reasoning based on type or summary
final isReasoning =
type == 'reasoning' ||
(type.isEmpty &&
_reasoningSummaryPattern.hasMatch(summaryResult.summary));
// Extract duration from summary if not in attributes
final effectiveDuration = duration > 0
? duration
: _extractDurationFromSummary(summaryResult.summary);
return _DetailsResult(
entry: ReasoningEntry(
reasoning: HtmlUtils.unescapeHtml(summaryResult.remaining),
summary: HtmlUtils.unescapeHtml(summaryResult.summary),
duration: duration,
duration: effectiveDuration,
isDone: isDone,
),
endIndex: i,
isComplete: true,
isReasoning: isReasoning,
);
}
@@ -369,6 +414,30 @@ class ReasoningParser {
return _SummaryResult(summary: '', remaining: content.trim());
}
/// Extract duration from summary text like "Thought (1s)" or "Thinking (2m 30s)".
static int _extractDurationFromSummary(String summary) {
// Match patterns like "(1s)", "(30s)", "(1m)", "(2m 30s)", "(1m30s)"
// Supports minutes-only "(1m)", seconds-only "(30s)", or both "(2m 30s)"
final durationRegex = RegExp(
r'\((\d+)m(?:\s*(\d+)s)?\)|\((\d+)s\)',
caseSensitive: false,
);
final match = durationRegex.firstMatch(summary);
if (match != null) {
// Check if it's a minutes pattern (groups 1 and 2) or seconds-only (group 3)
if (match.group(1) != null) {
// Minutes pattern: "(Xm)" or "(Xm Ys)"
final minutes = int.tryParse(match.group(1) ?? '0') ?? 0;
final seconds = int.tryParse(match.group(2) ?? '0') ?? 0;
return minutes * 60 + seconds;
} else if (match.group(3) != null) {
// Seconds-only pattern: "(Xs)"
return int.tryParse(match.group(3) ?? '0') ?? 0;
}
}
return 0;
}
/// Parses a message and extracts the first reasoning content block.
/// Returns null if no reasoning content is found.
static ReasoningContent? parseReasoningContent(
@@ -412,6 +481,17 @@ class ReasoningParser {
// Check for <details type="reasoning"
if (content.contains('type="reasoning"')) return true;
// Check for <details> with reasoning-like summary
if (content.contains('<details')) {
final summaryMatch = RegExp(
r'<summary>([^<]*)</summary>',
).firstMatch(content);
if (summaryMatch != null) {
final summary = summaryMatch.group(1) ?? '';
if (_reasoningSummaryPattern.hasMatch(summary)) return true;
}
}
// Check for raw tag pairs
for (final pair in defaultReasoningTagPairs) {
if (content.contains(pair.$1)) return true;
@@ -448,6 +528,20 @@ class _ReasoningResult {
});
}
class _DetailsResult {
final ReasoningEntry entry;
final int endIndex;
final bool isComplete;
final bool isReasoning;
const _DetailsResult({
required this.entry,
required this.endIndex,
required this.isComplete,
required this.isReasoning,
});
}
class _SummaryResult {
final String summary;
final String remaining;