feat(chat): Add usage statistics support for message persistence

This commit is contained in:
cogwheel0
2025-12-15 18:42:06 +05:30
parent c21e70396d
commit 55cedc3ab8
7 changed files with 505 additions and 41 deletions

View File

@@ -156,6 +156,12 @@ sealed class Model with _$Model {
}
}
// Extract usage capability from info.meta.capabilities (OpenWebUI format)
// This indicates whether the model supports stream_options.include_usage
final infoMetaCapabilities =
infoMeta?['capabilities'] as Map<String, dynamic>?;
final supportsUsage = infoMetaCapabilities?['usage'] == true;
// Fallback to top-level toolIds (for cached models serialized via toJson)
if (toolIds == null || toolIds.isEmpty) {
final topLevelToolIds = json['toolIds'];
@@ -199,6 +205,7 @@ sealed class Model with _$Model {
'pricing': json['pricing'],
'context_length': json['context_length'],
'supported_parameters': supportedParamsList ?? supportedParams,
'usage': supportsUsage,
},
metadata: mergedMetadata,
toolIds: toolIds,

View File

@@ -966,7 +966,7 @@ class ApiService {
if (msg.attachmentIds != null && msg.attachmentIds!.isNotEmpty)
'attachment_ids': List<String>.from(msg.attachmentIds!),
if (sanitizedFiles != null) 'files': sanitizedFiles,
// Mirror status updates, follow-ups, code executions, and sources
// Mirror status updates, follow-ups, code executions, sources, and usage
if (msg.statusHistory.isNotEmpty)
'statusHistory': msg.statusHistory.map((s) => s.toJson()).toList(),
if (msg.followUps.isNotEmpty)
@@ -975,6 +975,8 @@ class ApiService {
'codeExecutions': msg.codeExecutions.map((e) => e.toJson()).toList(),
if (msg.sources.isNotEmpty)
'sources': msg.sources.map((s) => s.toJson()).toList(),
// Include usage statistics for persistence (issue #274)
if (msg.usage != null) 'usage': msg.usage,
};
// Update parent's childrenIds
@@ -1001,7 +1003,7 @@ class ApiService {
if (msg.attachmentIds != null && msg.attachmentIds!.isNotEmpty)
'attachment_ids': List<String>.from(msg.attachmentIds!),
if (sanitizedArrayFiles != null) 'files': sanitizedArrayFiles,
// Mirror status updates, follow-ups, code executions, and sources
// Mirror status updates, follow-ups, code executions, sources, and usage
if (msg.statusHistory.isNotEmpty)
'statusHistory': msg.statusHistory.map((s) => s.toJson()).toList(),
if (msg.followUps.isNotEmpty)
@@ -1010,6 +1012,8 @@ class ApiService {
'codeExecutions': msg.codeExecutions.map((e) => e.toJson()).toList(),
if (msg.sources.isNotEmpty)
'sources': msg.sources.map((s) => s.toJson()).toList(),
// Include usage statistics for persistence (issue #274)
if (msg.usage != null) 'usage': msg.usage,
});
previousId = messageId;
@@ -1747,6 +1751,10 @@ class ApiService {
}
// Send chat completed notification
// This persists usage data and other message metadata to the server
/// Notify backend that chat streaming is complete.
/// This triggers any configured filters/actions on the backend.
/// Matches OpenWebUI's chatCompletedHandler in Chat.svelte.
Future<void> sendChatCompleted({
required String chatId,
required String messageId,
@@ -1754,61 +1762,61 @@ class ApiService {
required String model,
Map<String, dynamic>? modelItem,
String? sessionId,
List<String>? filterIds,
}) async {
_traceApi('Sending chat completed notification (optional endpoint)');
// This endpoint appears to be optional or deprecated in newer OpenWebUI versions
// The main chat synchronization happens through /api/v1/chats/{id} updates
// We'll still try to call it but won't fail if it doesn't work
// Format messages to match OpenWebUI expected structure
// Note: Removing 'id' field as it causes 400 error
// Format messages to match OpenWebUI expected structure exactly
final formattedMessages = messages.map((msg) {
final formatted = {
// Don't include 'id' - it causes 400 error with detail: 'id'
final formatted = <String, dynamic>{
'id': msg['id'],
'role': msg['role'],
'content': msg['content'],
'timestamp':
msg['timestamp'] ?? DateTime.now().millisecondsSinceEpoch ~/ 1000,
};
// Add model info for assistant messages
if (msg['role'] == 'assistant') {
formatted['model'] = model;
if (msg.containsKey('usage')) {
// Include info if present (OpenWebUI sends this)
if (msg.containsKey('info') && msg['info'] != null) {
formatted['info'] = msg['info'];
}
// Include usage if present (issue #274)
if (msg.containsKey('usage') && msg['usage'] != null) {
formatted['usage'] = msg['usage'];
}
// Include sources if present
if (msg.containsKey('sources') && msg['sources'] != null) {
formatted['sources'] = msg['sources'];
}
return formatted;
}).toList();
// Include the message ID and session ID at the top level - server expects these
final requestData = {
'id': messageId, // The server expects the assistant message ID here
'chat_id': chatId,
final requestData = <String, dynamic>{
'model': model,
'messages': formattedMessages,
'session_id':
sessionId ?? const Uuid().v4().substring(0, 20), // Add session_id
// Don't include model_item as it might not be expected
'chat_id': chatId,
'session_id': sessionId ?? const Uuid().v4().substring(0, 20),
'id': messageId,
};
// Include filter_ids if provided (for outlet filters)
if (filterIds != null && filterIds.isNotEmpty) {
requestData['filter_ids'] = filterIds;
}
// Include model_item if available
if (modelItem != null) {
requestData['model_item'] = modelItem;
}
try {
final response = await _dio.post(
await _dio.post(
'/api/chat/completed',
data: requestData,
options: Options(
sendTimeout: const Duration(seconds: 4),
receiveTimeout: const Duration(seconds: 4),
sendTimeout: const Duration(seconds: 10),
receiveTimeout: const Duration(seconds: 10),
),
);
_traceApi('Chat completed response: ${response.statusCode}');
} catch (e) {
// This is a non-critical endpoint - main sync happens via /api/v1/chats/{id}
_traceApi(
'Chat completed endpoint not available or failed (non-critical): $e',
);
} catch (_) {
// Non-critical - filters/actions may not be configured
}
}
@@ -2826,6 +2834,16 @@ class ApiService {
data['chat_id'] = conversationId;
}
// Request usage statistics if model supports it (issue #274)
// Matches OpenWebUI: only sends stream_options when model.info.meta.capabilities.usage is true
final supportsUsage =
modelItem?['capabilities']?['usage'] == true ||
(modelItem?['info'] as Map?)?['meta']?['capabilities']?['usage'] ==
true;
if (supportsUsage) {
data['stream_options'] = {'include_usage': true};
}
// Add feature flags via 'features' object only (not as top-level params).
// Top-level 'web_search'/'image_generation' params are not recognized by
// OpenAI and cause errors when forwarded. Open WebUI expects these in the

View File

@@ -305,6 +305,10 @@ Map<String, dynamic> _parseOpenWebUIMessageToJson(
? historyMsg['sources'] ?? historyMsg['citations']
: msgData['sources'] ?? msgData['citations'];
// Parse usage data - Open WebUI stores this in 'usage' field on messages
final rawUsage = _coerceJsonMap(historyMsg?['usage'] ?? msgData['usage']);
final Map<String, dynamic>? usage = rawUsage.isEmpty ? null : rawUsage;
return <String, dynamic>{
'id': (msgData['id'] ?? _uuid.v4()).toString(),
'role': role,
@@ -319,7 +323,7 @@ Map<String, dynamic> _parseOpenWebUIMessageToJson(
'followUps': _coerceStringList(followUpsRaw),
'codeExecutions': _parseCodeExecutionsField(codeExecRaw),
'sources': _parseSourcesField(sourcesRaw),
'usage': _coerceJsonMap(msgData['usage']),
'usage': usage,
'versions': const <Map<String, dynamic>>[],
};
}

View File

@@ -576,12 +576,15 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
setFollowUps(assistant.id, assistant.followUps);
updateMessageById(assistant.id, (current) {
// Preserve existing usage if server doesn't have it yet (issue #274)
// Usage is captured from streaming but may not be persisted on server
final effectiveUsage = assistant.usage ?? current.usage;
return current.copyWith(
followUps: List<String>.from(assistant.followUps),
statusHistory: assistant.statusHistory,
sources: assistant.sources,
metadata: {...?current.metadata, ...?assistant.metadata},
usage: assistant.usage,
usage: effectiveUsage,
);
});
} catch (_) {
@@ -638,6 +641,14 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
}
try {
final Map<String, dynamic> j = jsonDecode(dataStr);
// Capture usage statistics from OpenAI-style streaming (issue #274)
// Usage is sent in the final chunk with stream_options.include_usage
final usageData = j['usage'];
if (usageData is Map<String, dynamic> && usageData.isNotEmpty) {
updateLastMessageWith((m) => m.copyWith(usage: usageData));
}
final choices = j['choices'];
if (choices is List && choices.isNotEmpty) {
final choice = choices.first;
@@ -746,6 +757,18 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
if (type == 'chat:completion' && payload != null) {
if (payload is Map<String, dynamic>) {
// Capture usage statistics whenever they appear (issue #274)
// Usage may come in a separate payload before the done:true payload
final usageData = payload['usage'];
if (usageData is Map<String, dynamic> && usageData.isNotEmpty) {
final targetId = _resolveTargetMessageId(messageId, getMessages);
if (targetId != null) {
updateMessageById(targetId, (current) {
return current.copyWith(usage: usageData);
});
}
}
final rawSources = payload['sources'] ?? payload['citations'];
final normalizedSources = _normalizeSourcesPayload(rawSources);
if (normalizedSources != null && normalizedSources.isNotEmpty) {
@@ -832,18 +855,55 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
}
if (payload['done'] == true) {
try {
// Get current messages to send with usage data (issue #274)
final currentMessages = getMessages();
final messagesForCompleted = currentMessages.map((m) {
final msgMap = <String, dynamic>{
'id': m.id,
'role': m.role,
'content': m.content,
'timestamp': m.timestamp.millisecondsSinceEpoch ~/ 1000,
};
if (m.role == 'assistant' && m.usage != null) {
msgMap['usage'] = m.usage;
}
if (m.sources.isNotEmpty) {
msgMap['sources'] = m.sources.map((s) => s.toJson()).toList();
}
return msgMap;
}).toList();
// Send chatCompleted to run any filters/actions
// ignore: unawaited_futures
api.sendChatCompleted(
chatId: activeConversationId ?? '',
messageId: assistantMessageId,
messages: const [],
messages: messagesForCompleted,
model: modelId,
modelItem: modelItem,
sessionId: sessionId,
);
} catch (_) {}
Future.microtask(refreshConversationSnapshot);
// Sync conversation to persist usage data (issue #274)
// chatCompleted doesn't persist - syncConversationMessages does
final chatId = activeConversationId;
if (chatId != null && chatId.isNotEmpty) {
// ignore: unawaited_futures
api.syncConversationMessages(
chatId,
currentMessages,
model: modelId,
);
}
} catch (_) {
// Non-critical - continue if sync fails
}
// Delay snapshot refresh to allow backend to persist data
Future.delayed(
const Duration(milliseconds: 500),
refreshConversationSnapshot,
);
final msgs = getMessages();
if (msgs.isNotEmpty && msgs.last.role == 'assistant') {

View File

@@ -1521,6 +1521,10 @@ Future<void> regenerateMessage(
'actions': <dynamic>[],
'filters': <dynamic>[],
'tags': <dynamic>[],
// Include capabilities from the actual model for usage stats support
'capabilities': selectedModel.capabilities,
// Include info/metadata for usage capability detection
'info': selectedModel.metadata?['info'],
};
// WebSocket-only streaming requires socket connection
@@ -2217,6 +2221,10 @@ Future<void> _sendMessageInternal(
'actions': <dynamic>[],
'filters': <dynamic>[],
'tags': <dynamic>[],
// Include capabilities from the actual model for usage stats support
'capabilities': selectedModel.capabilities,
// Include info/metadata for usage capability detection
'info': selectedModel.metadata?['info'],
};
// WebSocket-only streaming requires socket connection.

View File

@@ -1346,6 +1346,15 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
},
),
],
// Usage info button (like Open WebUI)
if (widget.message.usage != null &&
widget.message.usage!.isNotEmpty) ...[
_buildActionButton(
icon: Platform.isIOS ? CupertinoIcons.info : Icons.info_outline,
label: l10n.usageInfo,
onTap: () => _showUsageInfoSheet(context, widget.message.usage!),
),
],
if (isErrorMessage) ...[
_buildActionButton(
icon: Platform.isIOS
@@ -1373,6 +1382,242 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
return ChatActionButton(icon: icon, label: label, onTap: onTap);
}
/// Shows a bottom sheet with usage/performance statistics for the response.
/// Matches Open WebUI's info button behavior but adapted for mobile UX.
void _showUsageInfoSheet(BuildContext context, Map<String, dynamic> usage) {
final theme = context.conduitTheme;
final l10n = AppLocalizations.of(context)!;
showModalBottomSheet<void>(
context: context,
backgroundColor: theme.surfaceBackground,
shape: const RoundedRectangleBorder(
borderRadius: BorderRadius.vertical(
top: Radius.circular(AppBorderRadius.dialog),
),
),
builder: (ctx) {
return SafeArea(
child: Padding(
padding: const EdgeInsets.all(Spacing.lg),
child: Column(
mainAxisSize: MainAxisSize.min,
crossAxisAlignment: CrossAxisAlignment.start,
children: [
// Title
Row(
children: [
Icon(
Icons.analytics_outlined,
size: IconSize.md,
color: theme.textPrimary,
),
const SizedBox(width: Spacing.sm),
Text(
l10n.usageInfoTitle,
style: TextStyle(
fontSize: AppTypography.bodyLarge,
fontWeight: FontWeight.w600,
color: theme.textPrimary,
),
),
],
),
const SizedBox(height: Spacing.lg),
// Stats grid
..._buildUsageStats(ctx, usage, l10n, theme),
],
),
),
);
},
);
}
/// Builds the list of usage stat widgets from the usage map.
List<Widget> _buildUsageStats(
BuildContext context,
Map<String, dynamic> usage,
AppLocalizations l10n,
ConduitThemeExtension theme,
) {
final stats = <Widget>[];
// Parse all possible fields
final evalCount = _parseNum(usage['eval_count']);
final evalDuration = _parseNum(usage['eval_duration']);
final promptEvalCount = _parseNum(usage['prompt_eval_count']);
final promptEvalDuration = _parseNum(usage['prompt_eval_duration']);
final completionTokens = _parseNum(usage['completion_tokens']);
final promptTokens = _parseNum(usage['prompt_tokens']);
final totalTokens = _parseNum(usage['total_tokens']);
// Time fields in seconds (Groq/OpenAI extended format)
final completionTime = _parseNum(usage['completion_time']);
final promptTime = _parseNum(usage['prompt_time']);
final totalTime = _parseNum(usage['total_time']);
final queueTime = _parseNum(usage['queue_time']);
// Time fields in nanoseconds (Ollama/llama.cpp format)
final totalDuration = _parseNum(usage['total_duration']);
final loadDuration = _parseNum(usage['load_duration']);
// Reasoning tokens (OpenAI o1/o3 models, Groq)
final completionDetails = usage['completion_tokens_details'];
final reasoningTokens = completionDetails is Map
? _parseNum(completionDetails['reasoning_tokens'])
: null;
// --- Token Generation Speed ---
// Priority: Ollama format > Groq/OpenAI extended format > token count only
if (evalCount != null && evalDuration != null && evalDuration > 0) {
// Ollama/llama.cpp: duration in nanoseconds
final tgSpeed = evalCount / (evalDuration / 1e9);
stats.add(
_UsageStatRow(
label: l10n.usageTokenGeneration,
value: l10n.usageTokensPerSecond(tgSpeed.toStringAsFixed(1)),
detail: l10n.usageTokenCount(evalCount.toInt()),
theme: theme,
),
);
} else if (completionTokens != null &&
completionTime != null &&
completionTime > 0) {
// Groq/OpenAI extended: time in seconds
final tgSpeed = completionTokens / completionTime;
stats.add(
_UsageStatRow(
label: l10n.usageTokenGeneration,
value: l10n.usageTokensPerSecond(tgSpeed.toStringAsFixed(1)),
detail: l10n.usageTokenCount(completionTokens.toInt()),
theme: theme,
),
);
} else if (completionTokens != null) {
// Basic OpenAI: token count only
stats.add(
_UsageStatRow(
label: l10n.usageTokenGeneration,
value: l10n.usageTokenCount(completionTokens.toInt()),
theme: theme,
),
);
}
// --- Prompt Processing Speed ---
if (promptEvalCount != null &&
promptEvalDuration != null &&
promptEvalDuration > 0) {
// Ollama/llama.cpp: duration in nanoseconds
final ppSpeed = promptEvalCount / (promptEvalDuration / 1e9);
stats.add(
_UsageStatRow(
label: l10n.usagePromptEval,
value: l10n.usageTokensPerSecond(ppSpeed.toStringAsFixed(1)),
detail: l10n.usageTokenCount(promptEvalCount.toInt()),
theme: theme,
),
);
} else if (promptTokens != null && promptTime != null && promptTime > 0) {
// Groq/OpenAI extended: time in seconds
final ppSpeed = promptTokens / promptTime;
stats.add(
_UsageStatRow(
label: l10n.usagePromptEval,
value: l10n.usageTokensPerSecond(ppSpeed.toStringAsFixed(1)),
detail: l10n.usageTokenCount(promptTokens.toInt()),
theme: theme,
),
);
} else if (promptTokens != null) {
// Basic OpenAI: token count only
stats.add(
_UsageStatRow(
label: l10n.usagePromptEval,
value: l10n.usageTokenCount(promptTokens.toInt()),
theme: theme,
),
);
}
// --- Reasoning Tokens (for o1/o3 models) ---
if (reasoningTokens != null && reasoningTokens > 0) {
stats.add(
_UsageStatRow(
label: l10n.usageReasoningTokens,
value: l10n.usageTokenCount(reasoningTokens.toInt()),
theme: theme,
),
);
}
// --- Total Tokens (if not already shown via completion + prompt) ---
if (totalTokens != null &&
(completionTokens == null || promptTokens == null)) {
stats.add(
_UsageStatRow(
label: l10n.usageTotalTokens,
value: l10n.usageTokenCount(totalTokens.toInt()),
theme: theme,
),
);
}
// --- Total Duration ---
if (totalDuration != null && totalDuration > 0) {
// Ollama/llama.cpp: nanoseconds
final totalSec = totalDuration / 1e9;
stats.add(
_UsageStatRow(
label: l10n.usageTotalDuration,
value: l10n.usageSecondsFormat(totalSec.toStringAsFixed(2)),
theme: theme,
),
);
} else if (totalTime != null && totalTime > 0) {
// Groq/OpenAI extended: seconds
stats.add(
_UsageStatRow(
label: l10n.usageTotalDuration,
value: l10n.usageSecondsFormat(totalTime.toStringAsFixed(2)),
theme: theme,
),
);
}
// --- Queue Time (Groq) ---
if (queueTime != null && queueTime > 0) {
stats.add(
_UsageStatRow(
label: l10n.usageQueueTime,
value: l10n.usageSecondsFormat(queueTime.toStringAsFixed(3)),
theme: theme,
),
);
}
// --- Model Load Time (Ollama) ---
if (loadDuration != null && loadDuration > 0) {
final loadSec = loadDuration / 1e9;
stats.add(
_UsageStatRow(
label: l10n.usageLoadDuration,
value: l10n.usageSecondsFormat(loadSec.toStringAsFixed(2)),
theme: theme,
),
);
}
return stats;
}
/// Safely parse a number from dynamic value.
num? _parseNum(dynamic value) {
if (value == null) return null;
if (value is num) return value;
if (value is String) return num.tryParse(value);
return null;
}
// Reasoning tile rendered inline - minimal design inspired by OpenWebUI
Widget _buildReasoningTile(ReasoningEntry rc, int index) {
final isExpanded = _expandedReasoning.contains(index);
@@ -1878,3 +2123,59 @@ Future<void> _launchUri(String url) async {
DebugLogger.log('Unable to open url $url: $err', scope: 'chat/assistant');
}
}
/// Row widget for displaying a single usage statistic.
class _UsageStatRow extends StatelessWidget {
const _UsageStatRow({
required this.label,
required this.value,
this.detail,
required this.theme,
});
final String label;
final String value;
final String? detail;
final ConduitThemeExtension theme;
@override
Widget build(BuildContext context) {
return Padding(
padding: const EdgeInsets.only(bottom: Spacing.sm),
child: Row(
mainAxisAlignment: MainAxisAlignment.spaceBetween,
children: [
Text(
label,
style: TextStyle(
fontSize: AppTypography.bodyMedium,
color: theme.textSecondary,
),
),
Column(
crossAxisAlignment: CrossAxisAlignment.end,
children: [
Text(
value,
style: TextStyle(
fontSize: AppTypography.bodyMedium,
fontWeight: FontWeight.w600,
fontFamily: AppTypography.monospaceFontFamily,
color: theme.textPrimary,
),
),
if (detail != null)
Text(
detail!,
style: TextStyle(
fontSize: AppTypography.labelSmall,
color: theme.textTertiary,
),
),
],
),
],
),
);
}
}

View File

@@ -1000,6 +1000,72 @@
"@ttsStop": {
"description": "Action to stop text to speech playback"
},
"usageInfo": "Info",
"@usageInfo": {
"description": "Action to show usage/performance statistics for a response"
},
"usageInfoTitle": "Response Statistics",
"@usageInfoTitle": {
"description": "Title for the usage statistics dialog/sheet"
},
"usageTokenGeneration": "Token Generation",
"@usageTokenGeneration": {
"description": "Label for token generation speed metric"
},
"usagePromptEval": "Prompt Processing",
"@usagePromptEval": {
"description": "Label for prompt evaluation/pre-processing speed metric"
},
"usageTotalDuration": "Total Duration",
"@usageTotalDuration": {
"description": "Label for total response generation duration"
},
"usageTokensPerSecond": "{speed} t/s",
"@usageTokensPerSecond": {
"description": "Tokens per second display format",
"placeholders": {
"speed": {
"type": "String",
"example": "45.2"
}
}
},
"usageTokenCount": "{count} tokens",
"@usageTokenCount": {
"description": "Token count display format",
"placeholders": {
"count": {
"type": "int",
"example": "128"
}
}
},
"usageSecondsFormat": "{seconds}s",
"@usageSecondsFormat": {
"description": "Seconds duration format",
"placeholders": {
"seconds": {
"type": "String",
"example": "2.34"
}
}
},
"usageLoadDuration": "Model Load Time",
"@usageLoadDuration": {
"description": "Label for model loading duration metric"
},
"usageQueueTime": "Queue Time",
"@usageQueueTime": {
"description": "Label for queue waiting time metric"
},
"usageReasoningTokens": "Reasoning Tokens",
"@usageReasoningTokens": {
"description": "Label for reasoning/thinking tokens count"
},
"usageTotalTokens": "Total Tokens",
"@usageTotalTokens": {
"description": "Label for total token count"
},
"edit": "Edit",
"@edit": {
"description": "Action to edit an item/message."