From f3f997ce3a912314464a03f477d92d85ee8ecb35 Mon Sep 17 00:00:00 2001 From: cogwheel <172976095+cogwheel0@users.noreply.github.com> Date: Tue, 13 Jan 2026 09:21:17 +0530 Subject: [PATCH] fix(knowledgebase): parsing for knowledge --- lib/core/models/knowledge_base.dart | 41 +++++-- lib/core/models/knowledge_base_file.dart | 54 ++++++++++ lib/core/services/api_service.dart | 132 +++++++++++++++++++++-- lib/core/utils/json_parsing.dart | 53 +++++++++ 4 files changed, 267 insertions(+), 13 deletions(-) create mode 100644 lib/core/models/knowledge_base_file.dart create mode 100644 lib/core/utils/json_parsing.dart diff --git a/lib/core/models/knowledge_base.dart b/lib/core/models/knowledge_base.dart index e85aa2b..b517640 100644 --- a/lib/core/models/knowledge_base.dart +++ b/lib/core/models/knowledge_base.dart @@ -1,8 +1,10 @@ import 'package:freezed_annotation/freezed_annotation.dart'; -part 'knowledge_base.freezed.dart'; -part 'knowledge_base.g.dart'; +import '../utils/json_parsing.dart'; +part 'knowledge_base.freezed.dart'; + +/// A knowledge base containing documents for RAG retrieval. @freezed sealed class KnowledgeBase with _$KnowledgeBase { const factory KnowledgeBase({ @@ -15,10 +17,26 @@ sealed class KnowledgeBase with _$KnowledgeBase { @Default({}) Map metadata, }) = _KnowledgeBase; - factory KnowledgeBase.fromJson(Map json) => - _$KnowledgeBaseFromJson(json); + /// Creates a [KnowledgeBase] from JSON, handling both snake_case (new API) + /// and camelCase (old API) field names. + factory KnowledgeBase.fromJson(Map json) { + return KnowledgeBase( + id: json['id'] as String, + name: json['name'] as String, + description: json['description'] as String?, + createdAt: parseDateTime(json['created_at'] ?? json['createdAt']), + updatedAt: parseDateTime(json['updated_at'] ?? json['updatedAt']), + itemCount: parseInt( + json['file_count'] ?? json['item_count'] ?? json['itemCount'], + ) ?? + 0, + metadata: + (json['metadata'] as Map?) ?? const {}, + ); + } } +/// An item within a knowledge base. @freezed sealed class KnowledgeBaseItem with _$KnowledgeBaseItem { const factory KnowledgeBaseItem({ @@ -30,6 +48,17 @@ sealed class KnowledgeBaseItem with _$KnowledgeBaseItem { @Default({}) Map metadata, }) = _KnowledgeBaseItem; - factory KnowledgeBaseItem.fromJson(Map json) => - _$KnowledgeBaseItemFromJson(json); + /// Creates a [KnowledgeBaseItem] from JSON, handling both snake_case (new API) + /// and camelCase (old API) field names. + factory KnowledgeBaseItem.fromJson(Map json) { + return KnowledgeBaseItem( + id: json['id'] as String, + content: json['content'] as String, + title: json['title'] as String?, + createdAt: parseDateTime(json['created_at'] ?? json['createdAt']), + updatedAt: parseDateTime(json['updated_at'] ?? json['updatedAt']), + metadata: + (json['metadata'] as Map?) ?? const {}, + ); + } } diff --git a/lib/core/models/knowledge_base_file.dart b/lib/core/models/knowledge_base_file.dart new file mode 100644 index 0000000..55ba3c8 --- /dev/null +++ b/lib/core/models/knowledge_base_file.dart @@ -0,0 +1,54 @@ +import 'package:freezed_annotation/freezed_annotation.dart'; + +import '../utils/json_parsing.dart'; + +part 'knowledge_base_file.freezed.dart'; + +/// A file within a knowledge base. +/// +/// The new WebUI API returns files from a dedicated endpoint with pagination. +/// Files are deduplicated by content hash (not filename). +@freezed +sealed class KnowledgeBaseFile with _$KnowledgeBaseFile { + const factory KnowledgeBaseFile({ + required String id, + required String filename, + Map? meta, + required DateTime createdAt, + DateTime? updatedAt, + + /// Content hash used for server-side deduplication. + String? contentHash, + }) = _KnowledgeBaseFile; + + /// Creates a [KnowledgeBaseFile] from JSON, handling various API formats. + factory KnowledgeBaseFile.fromJson(Map json) { + return KnowledgeBaseFile( + id: json['id'] as String, + filename: _extractFilename(json), + meta: json['meta'] as Map?, + createdAt: parseDateTime(json['created_at'] ?? json['createdAt']), + updatedAt: parseDateTimeOrNull(json['updated_at'] ?? json['updatedAt']), + contentHash: + (json['hash'] ?? json['content_hash'] ?? json['contentHash']) + as String?, + ); + } +} + +/// Extracts filename from various possible locations in the JSON. +String _extractFilename(Map json) { + if (json.containsKey('filename')) { + return json['filename'] as String? ?? 'Unknown'; + } + if (json.containsKey('name')) { + return json['name'] as String? ?? 'Unknown'; + } + // Check nested meta object + final meta = json['meta']; + if (meta is Map) { + final name = meta['name'] ?? meta['filename']; + if (name is String) return name; + } + return 'Unknown'; +} diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart index 21cded8..59a5455 100644 --- a/lib/core/services/api_service.dart +++ b/lib/core/services/api_service.dart @@ -15,6 +15,7 @@ import '../models/conversation.dart'; import '../models/chat_message.dart'; import '../models/file_info.dart'; import '../models/knowledge_base.dart'; +import '../models/knowledge_base_file.dart'; import '../models/prompt.dart'; import '../auth/api_auth_interceptor.dart'; import '../error/api_error_interceptor.dart'; @@ -1897,14 +1898,24 @@ class ApiService { _traceApi('Fetching knowledge bases'); final response = await _dio.get('/api/v1/knowledge/'); final data = response.data; - if (data is List) { - final normalized = await _normalizeList( - data, - debugLabel: 'parse_knowledge_bases', - ); - return normalized.map(KnowledgeBase.fromJson).toList(growable: false); + + // Handle new paginated response: { "items": [...], "total": N } + // Also maintain backward compatibility with old array response + List items; + if (data is Map && data.containsKey('items')) { + items = data['items'] as List? ?? []; + } else if (data is List) { + // Backward compatibility with old API + items = data; + } else { + return const []; } - return const []; + + final normalized = await _normalizeList( + items, + debugLabel: 'parse_knowledge_bases', + ); + return normalized.map(KnowledgeBase.fromJson).toList(growable: false); } Future> createKnowledgeBase({ @@ -1989,6 +2000,113 @@ class ApiService { return []; } + /// Fetches files for a knowledge base with pagination support. + /// + /// Returns a record with the list of files and the total count. + /// The new API returns paginated results (default 30 items per page). + Future<({List files, int total})> getKnowledgeBaseFiles( + String knowledgeBaseId, { + int page = 1, + }) async { + _traceApi('Fetching knowledge base files: $knowledgeBaseId (page: $page)'); + final response = await _dio.get( + '/api/v1/knowledge/$knowledgeBaseId/files', + queryParameters: {'page': page}, + ); + final data = response.data; + + if (data is Map) { + final items = data['items'] as List? ?? []; + final total = data['total'] as int? ?? items.length; + final files = items + .whereType>() + .map(KnowledgeBaseFile.fromJson) + .toList(growable: false); + return (files: files, total: total); + } + + // Backward compatibility: if response is a plain list + if (data is List) { + final files = data + .whereType>() + .map(KnowledgeBaseFile.fromJson) + .toList(growable: false); + return (files: files, total: files.length); + } + + return (files: const [], total: 0); + } + + /// Fetches ALL files for a knowledge base, handling pagination internally. + /// + /// Use this when you need the complete list of files (e.g., for deduplication). + Future> getAllKnowledgeBaseFiles( + String knowledgeBaseId, + ) async { + _traceApi('Fetching all knowledge base files: $knowledgeBaseId'); + final allFiles = []; + int page = 1; + int total = 0; + const maxPages = 100; // Safety limit to prevent infinite loops + + do { + final result = await getKnowledgeBaseFiles(knowledgeBaseId, page: page); + // Guard against empty pages causing infinite loops + if (result.files.isEmpty) { + _traceApi('Empty page received, stopping pagination'); + break; + } + allFiles.addAll(result.files); + total = result.total; + page++; + } while (allFiles.length < total && page <= maxPages); + + if (page > maxPages) { + _traceApi('Warning: Hit max page limit ($maxPages) for $knowledgeBaseId'); + } + _traceApi('Fetched ${allFiles.length} total files from $knowledgeBaseId'); + return allFiles; + } + + /// Adds a file to a knowledge base. + /// + /// Returns the file metadata on success, or null if the file already exists + /// (duplicate content detected by the server based on content hash). + Future?> addFileToKnowledgeBase( + String knowledgeBaseId, { + required String filename, + required List content, + }) async { + _traceApi('Adding file to knowledge base: $knowledgeBaseId ($filename)'); + try { + final mimeType = _getMimeType(filename); + final response = await _dio.post( + '/api/v1/knowledge/$knowledgeBaseId/file/add', + data: FormData.fromMap({ + 'file': MultipartFile.fromBytes( + content, + filename: filename, + contentType: mimeType != null ? MediaType.parse(mimeType) : null, + ), + }), + ); + return response.data as Map; + } on DioException catch (e) { + // Handle duplicate content as a no-op (file already exists) + if (e.response?.statusCode == 400) { + final responseData = e.response?.data; + final detail = responseData is Map + ? responseData['detail'] as String? ?? '' + : ''; + if (detail.contains('Duplicate content')) { + _traceApi('Skipping duplicate file: $filename'); + return null; // Indicates file already exists + } + } + rethrow; + } + } + Future?> processWebpage({ required String url, String? collectionName, diff --git a/lib/core/utils/json_parsing.dart b/lib/core/utils/json_parsing.dart new file mode 100644 index 0000000..e802900 --- /dev/null +++ b/lib/core/utils/json_parsing.dart @@ -0,0 +1,53 @@ +/// Utilities for parsing JSON values with graceful fallbacks. +/// +/// These helpers handle various API response formats and provide defensive +/// parsing to avoid crashes from malformed data. +library; + +/// Parses a DateTime from various formats. +/// +/// Handles: +/// - `DateTime` objects (returned as-is) +/// - ISO 8601 strings (parsed) +/// - Unix timestamps as integers (assumed to be in seconds) +/// - `null` or invalid values (returns [DateTime.now]) +DateTime parseDateTime(Object? value) { + if (value == null) { + return DateTime.now(); + } + if (value is DateTime) { + return value; + } + if (value is String) { + // Use tryParse to avoid FormatException on malformed strings + return DateTime.tryParse(value) ?? DateTime.now(); + } + if (value is int) { + // Assume Unix timestamp in seconds + return DateTime.fromMillisecondsSinceEpoch(value * 1000); + } + return DateTime.now(); +} + +/// Parses a nullable DateTime from various formats. +/// +/// Returns `null` if the input is `null`, otherwise delegates to [parseDateTime]. +DateTime? parseDateTimeOrNull(Object? value) { + if (value == null) return null; + return parseDateTime(value); +} + +/// Parses an int from various formats. +/// +/// Handles: +/// - `int` values (returned as-is) +/// - `num` values (converted to int) +/// - String values (parsed with [int.tryParse]) +/// - `null` or invalid values (returns `null`) +int? parseInt(Object? value) { + if (value == null) return null; + if (value is int) return value; + if (value is num) return value.toInt(); + if (value is String) return int.tryParse(value); + return null; +}