141 lines
4.4 KiB
Dart
141 lines
4.4 KiB
Dart
/// Utility class for parsing inline citation references like [1], [1,2,3].
|
|
///
|
|
/// This matches OpenWebUI's citation-extension.ts behavior where adjacent
|
|
/// citation brackets are merged and parsed into source indices.
|
|
///
|
|
/// Reference: openwebui-src/src/lib/utils/marked/citation-extension.ts
|
|
library;
|
|
|
|
/// Represents a parsed citation with one or more source IDs.
|
|
class Citation {
|
|
/// 1-based source indices referenced by this citation.
|
|
final List<int> sourceIds;
|
|
|
|
/// The raw text that was matched (e.g., "[1]" or "[1,2,3]").
|
|
final String raw;
|
|
|
|
const Citation({required this.sourceIds, required this.raw});
|
|
|
|
/// Converts to 0-based indices for array access.
|
|
List<int> get zeroBasedIndices =>
|
|
sourceIds.map((id) => id - 1).toList(growable: false);
|
|
}
|
|
|
|
/// A segment of content that is either plain text or a citation.
|
|
class CitationSegment {
|
|
final String? text;
|
|
final Citation? citation;
|
|
|
|
const CitationSegment._({this.text, this.citation});
|
|
|
|
factory CitationSegment.text(String text) => CitationSegment._(text: text);
|
|
factory CitationSegment.citation(Citation citation) =>
|
|
CitationSegment._(citation: citation);
|
|
|
|
bool get isText => text != null;
|
|
bool get isCitation => citation != null;
|
|
}
|
|
|
|
/// Parser for inline citations in markdown content.
|
|
class CitationParser {
|
|
const CitationParser._();
|
|
|
|
// Matches one or more adjacent [N] or [N,M,...] blocks
|
|
// Examples: "[1]", "[1,2,3]", "[1][2]", "[1,2][3,4]"
|
|
static final _citationPattern = RegExp(r'(\[(?:\d[\d,\s]*)\])+');
|
|
|
|
// Matches individual bracket groups within a citation match
|
|
static final _bracketGroupPattern = RegExp(r'\[([\d,\s]+)\]');
|
|
|
|
// Avoids matching footnotes like [^1]
|
|
static final _footnotePattern = RegExp(r'^\[\^');
|
|
|
|
/// Parses content and returns segments of text and citations.
|
|
///
|
|
/// Returns null if no citations are found.
|
|
static List<CitationSegment>? parse(String content) {
|
|
if (content.isEmpty) return null;
|
|
|
|
final segments = <CitationSegment>[];
|
|
int lastEnd = 0;
|
|
|
|
for (final match in _citationPattern.allMatches(content)) {
|
|
// Check if this looks like a footnote reference
|
|
final beforeMatch = match.start > 0
|
|
? content.substring(match.start - 1, match.start)
|
|
: '';
|
|
if (beforeMatch == '^') continue;
|
|
|
|
// Check the matched content for footnote pattern
|
|
final raw = match.group(0)!;
|
|
if (_footnotePattern.hasMatch(raw)) continue;
|
|
|
|
// Add text before this citation
|
|
if (match.start > lastEnd) {
|
|
final textBefore = content.substring(lastEnd, match.start);
|
|
if (textBefore.isNotEmpty) {
|
|
segments.add(CitationSegment.text(textBefore));
|
|
}
|
|
}
|
|
|
|
// Parse the citation IDs
|
|
final ids = <int>[];
|
|
for (final bracketMatch in _bracketGroupPattern.allMatches(raw)) {
|
|
final idsStr = bracketMatch.group(1) ?? '';
|
|
final parsed = idsStr
|
|
.split(',')
|
|
.map((s) => int.tryParse(s.trim()))
|
|
.whereType<int>()
|
|
.where((n) => n > 0) // Only positive indices
|
|
.toList();
|
|
ids.addAll(parsed);
|
|
}
|
|
|
|
if (ids.isNotEmpty) {
|
|
segments.add(
|
|
CitationSegment.citation(Citation(sourceIds: ids, raw: raw)),
|
|
);
|
|
} else {
|
|
// No valid IDs found, treat as text
|
|
segments.add(CitationSegment.text(raw));
|
|
}
|
|
|
|
lastEnd = match.end;
|
|
}
|
|
|
|
// Add remaining text
|
|
if (lastEnd < content.length) {
|
|
final remaining = content.substring(lastEnd);
|
|
if (remaining.isNotEmpty) {
|
|
segments.add(CitationSegment.text(remaining));
|
|
}
|
|
}
|
|
|
|
// Return null if no citations were found
|
|
final hasCitations = segments.any((s) => s.isCitation);
|
|
return hasCitations ? segments : null;
|
|
}
|
|
|
|
/// Checks if content contains any citation patterns.
|
|
static bool hasCitations(String content) {
|
|
if (content.isEmpty) return false;
|
|
// The regex already excludes footnotes like [^1] since it requires
|
|
// a digit immediately after the opening bracket.
|
|
return _citationPattern.hasMatch(content);
|
|
}
|
|
|
|
/// Extracts all unique source IDs from content (1-based).
|
|
static List<int> extractSourceIds(String content) {
|
|
final segments = parse(content);
|
|
if (segments == null) return const [];
|
|
|
|
final ids = <int>{};
|
|
for (final segment in segments) {
|
|
if (segment.isCitation) {
|
|
ids.addAll(segment.citation!.sourceIds);
|
|
}
|
|
}
|
|
return ids.toList()..sort();
|
|
}
|
|
}
|