Merge pull request #344 from cogwheel0/fix-knowledge-base-parsing

fix(knowledgebase): parsing for knowledge
This commit is contained in:
cogwheel
2026-01-13 11:54:17 +08:00
committed by GitHub
4 changed files with 267 additions and 13 deletions

View File

@@ -1,8 +1,10 @@
import 'package:freezed_annotation/freezed_annotation.dart';
part 'knowledge_base.freezed.dart';
part 'knowledge_base.g.dart';
import '../utils/json_parsing.dart';
part 'knowledge_base.freezed.dart';
/// A knowledge base containing documents for RAG retrieval.
@freezed
sealed class KnowledgeBase with _$KnowledgeBase {
const factory KnowledgeBase({
@@ -15,10 +17,26 @@ sealed class KnowledgeBase with _$KnowledgeBase {
@Default({}) Map<String, dynamic> metadata,
}) = _KnowledgeBase;
factory KnowledgeBase.fromJson(Map<String, dynamic> json) =>
_$KnowledgeBaseFromJson(json);
/// Creates a [KnowledgeBase] from JSON, handling both snake_case (new API)
/// and camelCase (old API) field names.
factory KnowledgeBase.fromJson(Map<String, dynamic> json) {
return KnowledgeBase(
id: json['id'] as String,
name: json['name'] as String,
description: json['description'] as String?,
createdAt: parseDateTime(json['created_at'] ?? json['createdAt']),
updatedAt: parseDateTime(json['updated_at'] ?? json['updatedAt']),
itemCount: parseInt(
json['file_count'] ?? json['item_count'] ?? json['itemCount'],
) ??
0,
metadata:
(json['metadata'] as Map<String, dynamic>?) ?? const <String, dynamic>{},
);
}
}
/// An item within a knowledge base.
@freezed
sealed class KnowledgeBaseItem with _$KnowledgeBaseItem {
const factory KnowledgeBaseItem({
@@ -30,6 +48,17 @@ sealed class KnowledgeBaseItem with _$KnowledgeBaseItem {
@Default({}) Map<String, dynamic> metadata,
}) = _KnowledgeBaseItem;
factory KnowledgeBaseItem.fromJson(Map<String, dynamic> json) =>
_$KnowledgeBaseItemFromJson(json);
/// Creates a [KnowledgeBaseItem] from JSON, handling both snake_case (new API)
/// and camelCase (old API) field names.
factory KnowledgeBaseItem.fromJson(Map<String, dynamic> json) {
return KnowledgeBaseItem(
id: json['id'] as String,
content: json['content'] as String,
title: json['title'] as String?,
createdAt: parseDateTime(json['created_at'] ?? json['createdAt']),
updatedAt: parseDateTime(json['updated_at'] ?? json['updatedAt']),
metadata:
(json['metadata'] as Map<String, dynamic>?) ?? const <String, dynamic>{},
);
}
}

View File

@@ -0,0 +1,54 @@
import 'package:freezed_annotation/freezed_annotation.dart';
import '../utils/json_parsing.dart';
part 'knowledge_base_file.freezed.dart';
/// A file within a knowledge base.
///
/// The new WebUI API returns files from a dedicated endpoint with pagination.
/// Files are deduplicated by content hash (not filename).
@freezed
sealed class KnowledgeBaseFile with _$KnowledgeBaseFile {
const factory KnowledgeBaseFile({
required String id,
required String filename,
Map<String, dynamic>? meta,
required DateTime createdAt,
DateTime? updatedAt,
/// Content hash used for server-side deduplication.
String? contentHash,
}) = _KnowledgeBaseFile;
/// Creates a [KnowledgeBaseFile] from JSON, handling various API formats.
factory KnowledgeBaseFile.fromJson(Map<String, dynamic> json) {
return KnowledgeBaseFile(
id: json['id'] as String,
filename: _extractFilename(json),
meta: json['meta'] as Map<String, dynamic>?,
createdAt: parseDateTime(json['created_at'] ?? json['createdAt']),
updatedAt: parseDateTimeOrNull(json['updated_at'] ?? json['updatedAt']),
contentHash:
(json['hash'] ?? json['content_hash'] ?? json['contentHash'])
as String?,
);
}
}
/// Extracts filename from various possible locations in the JSON.
String _extractFilename(Map<String, dynamic> json) {
if (json.containsKey('filename')) {
return json['filename'] as String? ?? 'Unknown';
}
if (json.containsKey('name')) {
return json['name'] as String? ?? 'Unknown';
}
// Check nested meta object
final meta = json['meta'];
if (meta is Map) {
final name = meta['name'] ?? meta['filename'];
if (name is String) return name;
}
return 'Unknown';
}

View File

@@ -15,6 +15,7 @@ import '../models/conversation.dart';
import '../models/chat_message.dart';
import '../models/file_info.dart';
import '../models/knowledge_base.dart';
import '../models/knowledge_base_file.dart';
import '../models/prompt.dart';
import '../auth/api_auth_interceptor.dart';
import '../error/api_error_interceptor.dart';
@@ -1925,15 +1926,25 @@ class ApiService {
_traceApi('Fetching knowledge bases');
final response = await _dio.get('/api/v1/knowledge/');
final data = response.data;
if (data is List) {
// Handle new paginated response: { "items": [...], "total": N }
// Also maintain backward compatibility with old array response
List<dynamic> items;
if (data is Map<String, dynamic> && data.containsKey('items')) {
items = data['items'] as List<dynamic>? ?? [];
} else if (data is List) {
// Backward compatibility with old API
items = data;
} else {
return const [];
}
final normalized = await _normalizeList(
data,
items,
debugLabel: 'parse_knowledge_bases',
);
return normalized.map(KnowledgeBase.fromJson).toList(growable: false);
}
return const [];
}
Future<Map<String, dynamic>> createKnowledgeBase({
required String name,
@@ -2017,6 +2028,113 @@ class ApiService {
return [];
}
/// Fetches files for a knowledge base with pagination support.
///
/// Returns a record with the list of files and the total count.
/// The new API returns paginated results (default 30 items per page).
Future<({List<KnowledgeBaseFile> files, int total})> getKnowledgeBaseFiles(
String knowledgeBaseId, {
int page = 1,
}) async {
_traceApi('Fetching knowledge base files: $knowledgeBaseId (page: $page)');
final response = await _dio.get(
'/api/v1/knowledge/$knowledgeBaseId/files',
queryParameters: {'page': page},
);
final data = response.data;
if (data is Map<String, dynamic>) {
final items = data['items'] as List<dynamic>? ?? [];
final total = data['total'] as int? ?? items.length;
final files = items
.whereType<Map<String, dynamic>>()
.map(KnowledgeBaseFile.fromJson)
.toList(growable: false);
return (files: files, total: total);
}
// Backward compatibility: if response is a plain list
if (data is List) {
final files = data
.whereType<Map<String, dynamic>>()
.map(KnowledgeBaseFile.fromJson)
.toList(growable: false);
return (files: files, total: files.length);
}
return (files: const <KnowledgeBaseFile>[], total: 0);
}
/// Fetches ALL files for a knowledge base, handling pagination internally.
///
/// Use this when you need the complete list of files (e.g., for deduplication).
Future<List<KnowledgeBaseFile>> getAllKnowledgeBaseFiles(
String knowledgeBaseId,
) async {
_traceApi('Fetching all knowledge base files: $knowledgeBaseId');
final allFiles = <KnowledgeBaseFile>[];
int page = 1;
int total = 0;
const maxPages = 100; // Safety limit to prevent infinite loops
do {
final result = await getKnowledgeBaseFiles(knowledgeBaseId, page: page);
// Guard against empty pages causing infinite loops
if (result.files.isEmpty) {
_traceApi('Empty page received, stopping pagination');
break;
}
allFiles.addAll(result.files);
total = result.total;
page++;
} while (allFiles.length < total && page <= maxPages);
if (page > maxPages) {
_traceApi('Warning: Hit max page limit ($maxPages) for $knowledgeBaseId');
}
_traceApi('Fetched ${allFiles.length} total files from $knowledgeBaseId');
return allFiles;
}
/// Adds a file to a knowledge base.
///
/// Returns the file metadata on success, or null if the file already exists
/// (duplicate content detected by the server based on content hash).
Future<Map<String, dynamic>?> addFileToKnowledgeBase(
String knowledgeBaseId, {
required String filename,
required List<int> content,
}) async {
_traceApi('Adding file to knowledge base: $knowledgeBaseId ($filename)');
try {
final mimeType = _getMimeType(filename);
final response = await _dio.post(
'/api/v1/knowledge/$knowledgeBaseId/file/add',
data: FormData.fromMap({
'file': MultipartFile.fromBytes(
content,
filename: filename,
contentType: mimeType != null ? MediaType.parse(mimeType) : null,
),
}),
);
return response.data as Map<String, dynamic>;
} on DioException catch (e) {
// Handle duplicate content as a no-op (file already exists)
if (e.response?.statusCode == 400) {
final responseData = e.response?.data;
final detail = responseData is Map<String, dynamic>
? responseData['detail'] as String? ?? ''
: '';
if (detail.contains('Duplicate content')) {
_traceApi('Skipping duplicate file: $filename');
return null; // Indicates file already exists
}
}
rethrow;
}
}
Future<Map<String, dynamic>?> processWebpage({
required String url,
String? collectionName,

View File

@@ -0,0 +1,53 @@
/// Utilities for parsing JSON values with graceful fallbacks.
///
/// These helpers handle various API response formats and provide defensive
/// parsing to avoid crashes from malformed data.
library;
/// Parses a DateTime from various formats.
///
/// Handles:
/// - `DateTime` objects (returned as-is)
/// - ISO 8601 strings (parsed)
/// - Unix timestamps as integers (assumed to be in seconds)
/// - `null` or invalid values (returns [DateTime.now])
DateTime parseDateTime(Object? value) {
if (value == null) {
return DateTime.now();
}
if (value is DateTime) {
return value;
}
if (value is String) {
// Use tryParse to avoid FormatException on malformed strings
return DateTime.tryParse(value) ?? DateTime.now();
}
if (value is int) {
// Assume Unix timestamp in seconds
return DateTime.fromMillisecondsSinceEpoch(value * 1000);
}
return DateTime.now();
}
/// Parses a nullable DateTime from various formats.
///
/// Returns `null` if the input is `null`, otherwise delegates to [parseDateTime].
DateTime? parseDateTimeOrNull(Object? value) {
if (value == null) return null;
return parseDateTime(value);
}
/// Parses an int from various formats.
///
/// Handles:
/// - `int` values (returned as-is)
/// - `num` values (converted to int)
/// - String values (parsed with [int.tryParse])
/// - `null` or invalid values (returns `null`)
int? parseInt(Object? value) {
if (value == null) return null;
if (value is int) return value;
if (value is num) return value.toInt();
if (value is String) return int.tryParse(value);
return null;
}