feat(chat): Add usage statistics support for message persistence

2025-12-15 18:42:06 +05:30
parent c21e70396d
commit 55cedc3ab8
7 changed files with 505 additions and 41 deletions
--- a/lib/core/models/model.dart
+++ b/lib/core/models/model.dart
@@ -156,6 +156,12 @@ sealed class Model with _$Model {
      }
    }

+    // Extract usage capability from info.meta.capabilities (OpenWebUI format)
+    // This indicates whether the model supports stream_options.include_usage
+    final infoMetaCapabilities =
+        infoMeta?['capabilities'] as Map<String, dynamic>?;
+    final supportsUsage = infoMetaCapabilities?['usage'] == true;
+
    // Fallback to top-level toolIds (for cached models serialized via toJson)
    if (toolIds == null || toolIds.isEmpty) {
      final topLevelToolIds = json['toolIds'];
@@ -199,6 +205,7 @@ sealed class Model with _$Model {
        'pricing': json['pricing'],
        'context_length': json['context_length'],
        'supported_parameters': supportedParamsList ?? supportedParams,
+        'usage': supportsUsage,
      },
      metadata: mergedMetadata,
      toolIds: toolIds,
--- a/lib/core/services/api_service.dart
+++ b/lib/core/services/api_service.dart
@@ -966,7 +966,7 @@ class ApiService {
        if (msg.attachmentIds != null && msg.attachmentIds!.isNotEmpty)
          'attachment_ids': List<String>.from(msg.attachmentIds!),
        if (sanitizedFiles != null) 'files': sanitizedFiles,
-        // Mirror status updates, follow-ups, code executions, and sources
+        // Mirror status updates, follow-ups, code executions, sources, and usage
        if (msg.statusHistory.isNotEmpty)
          'statusHistory': msg.statusHistory.map((s) => s.toJson()).toList(),
        if (msg.followUps.isNotEmpty)
@@ -975,6 +975,8 @@ class ApiService {
          'codeExecutions': msg.codeExecutions.map((e) => e.toJson()).toList(),
        if (msg.sources.isNotEmpty)
          'sources': msg.sources.map((s) => s.toJson()).toList(),
+        // Include usage statistics for persistence (issue #274)
+        if (msg.usage != null) 'usage': msg.usage,
      };

      // Update parent's childrenIds
@@ -1001,7 +1003,7 @@ class ApiService {
        if (msg.attachmentIds != null && msg.attachmentIds!.isNotEmpty)
          'attachment_ids': List<String>.from(msg.attachmentIds!),
        if (sanitizedArrayFiles != null) 'files': sanitizedArrayFiles,
-        // Mirror status updates, follow-ups, code executions, and sources
+        // Mirror status updates, follow-ups, code executions, sources, and usage
        if (msg.statusHistory.isNotEmpty)
          'statusHistory': msg.statusHistory.map((s) => s.toJson()).toList(),
        if (msg.followUps.isNotEmpty)
@@ -1010,6 +1012,8 @@ class ApiService {
          'codeExecutions': msg.codeExecutions.map((e) => e.toJson()).toList(),
        if (msg.sources.isNotEmpty)
          'sources': msg.sources.map((s) => s.toJson()).toList(),
+        // Include usage statistics for persistence (issue #274)
+        if (msg.usage != null) 'usage': msg.usage,
      });

      previousId = messageId;
@@ -1747,6 +1751,10 @@ class ApiService {
  }

  // Send chat completed notification
+  // This persists usage data and other message metadata to the server
+  /// Notify backend that chat streaming is complete.
+  /// This triggers any configured filters/actions on the backend.
+  /// Matches OpenWebUI's chatCompletedHandler in Chat.svelte.
  Future<void> sendChatCompleted({
    required String chatId,
    required String messageId,
@@ -1754,61 +1762,61 @@ class ApiService {
    required String model,
    Map<String, dynamic>? modelItem,
    String? sessionId,
+    List<String>? filterIds,
  }) async {
-    _traceApi('Sending chat completed notification (optional endpoint)');
-
-    // This endpoint appears to be optional or deprecated in newer OpenWebUI versions
-    // The main chat synchronization happens through /api/v1/chats/{id} updates
-    // We'll still try to call it but won't fail if it doesn't work
-
-    // Format messages to match OpenWebUI expected structure
-    // Note: Removing 'id' field as it causes 400 error
+    // Format messages to match OpenWebUI expected structure exactly
    final formattedMessages = messages.map((msg) {
-      final formatted = {
-        // Don't include 'id' - it causes 400 error with detail: 'id'
+      final formatted = <String, dynamic>{
+        'id': msg['id'],
        'role': msg['role'],
        'content': msg['content'],
        'timestamp':
            msg['timestamp'] ?? DateTime.now().millisecondsSinceEpoch ~/ 1000,
      };
-
-      // Add model info for assistant messages
-      if (msg['role'] == 'assistant') {
-        formatted['model'] = model;
-        if (msg.containsKey('usage')) {
+      // Include info if present (OpenWebUI sends this)
+      if (msg.containsKey('info') && msg['info'] != null) {
+        formatted['info'] = msg['info'];
+      }
+      // Include usage if present (issue #274)
+      if (msg.containsKey('usage') && msg['usage'] != null) {
        formatted['usage'] = msg['usage'];
      }
+      // Include sources if present
+      if (msg.containsKey('sources') && msg['sources'] != null) {
+        formatted['sources'] = msg['sources'];
      }
-
      return formatted;
    }).toList();

-    // Include the message ID and session ID at the top level - server expects these
-    final requestData = {
-      'id': messageId, // The server expects the assistant message ID here
-      'chat_id': chatId,
+    final requestData = <String, dynamic>{
      'model': model,
      'messages': formattedMessages,
-      'session_id':
-          sessionId ?? const Uuid().v4().substring(0, 20), // Add session_id
-      // Don't include model_item as it might not be expected
+      'chat_id': chatId,
+      'session_id': sessionId ?? const Uuid().v4().substring(0, 20),
+      'id': messageId,
    };

+    // Include filter_ids if provided (for outlet filters)
+    if (filterIds != null && filterIds.isNotEmpty) {
+      requestData['filter_ids'] = filterIds;
+    }
+
+    // Include model_item if available
+    if (modelItem != null) {
+      requestData['model_item'] = modelItem;
+    }
+
    try {
-      final response = await _dio.post(
+      await _dio.post(
        '/api/chat/completed',
        data: requestData,
        options: Options(
-          sendTimeout: const Duration(seconds: 4),
-          receiveTimeout: const Duration(seconds: 4),
+          sendTimeout: const Duration(seconds: 10),
+          receiveTimeout: const Duration(seconds: 10),
        ),
      );
-      _traceApi('Chat completed response: ${response.statusCode}');
-    } catch (e) {
-      // This is a non-critical endpoint - main sync happens via /api/v1/chats/{id}
-      _traceApi(
-        'Chat completed endpoint not available or failed (non-critical): $e',
-      );
+    } catch (_) {
+      // Non-critical - filters/actions may not be configured
    }
  }

@@ -2826,6 +2834,16 @@ class ApiService {
      data['chat_id'] = conversationId;
    }

+    // Request usage statistics if model supports it (issue #274)
+    // Matches OpenWebUI: only sends stream_options when model.info.meta.capabilities.usage is true
+    final supportsUsage =
+        modelItem?['capabilities']?['usage'] == true ||
+        (modelItem?['info'] as Map?)?['meta']?['capabilities']?['usage'] ==
+            true;
+    if (supportsUsage) {
+      data['stream_options'] = {'include_usage': true};
+    }
+
    // Add feature flags via 'features' object only (not as top-level params).
    // Top-level 'web_search'/'image_generation' params are not recognized by
    // OpenAI and cause errors when forwarded. Open WebUI expects these in the
--- a/lib/core/services/conversation_parsing.dart
+++ b/lib/core/services/conversation_parsing.dart
@@ -305,6 +305,10 @@ Map<String, dynamic> _parseOpenWebUIMessageToJson(
      ? historyMsg['sources'] ?? historyMsg['citations']
      : msgData['sources'] ?? msgData['citations'];

+  // Parse usage data - Open WebUI stores this in 'usage' field on messages
+  final rawUsage = _coerceJsonMap(historyMsg?['usage'] ?? msgData['usage']);
+  final Map<String, dynamic>? usage = rawUsage.isEmpty ? null : rawUsage;
+
  return <String, dynamic>{
    'id': (msgData['id'] ?? _uuid.v4()).toString(),
    'role': role,
@@ -319,7 +323,7 @@ Map<String, dynamic> _parseOpenWebUIMessageToJson(
    'followUps': _coerceStringList(followUpsRaw),
    'codeExecutions': _parseCodeExecutionsField(codeExecRaw),
    'sources': _parseSourcesField(sourcesRaw),
-    'usage': _coerceJsonMap(msgData['usage']),
+    'usage': usage,
    'versions': const <Map<String, dynamic>>[],
  };
 }
--- a/lib/core/services/streaming_helper.dart
+++ b/lib/core/services/streaming_helper.dart
@@ -576,12 +576,15 @@ ActiveSocketStream attachUnifiedChunkedStreaming({

      setFollowUps(assistant.id, assistant.followUps);
      updateMessageById(assistant.id, (current) {
+        // Preserve existing usage if server doesn't have it yet (issue #274)
+        // Usage is captured from streaming but may not be persisted on server
+        final effectiveUsage = assistant.usage ?? current.usage;
        return current.copyWith(
          followUps: List<String>.from(assistant.followUps),
          statusHistory: assistant.statusHistory,
          sources: assistant.sources,
          metadata: {...?current.metadata, ...?assistant.metadata},
-          usage: assistant.usage,
+          usage: effectiveUsage,
        );
      });
    } catch (_) {
@@ -638,6 +641,14 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
            }
            try {
              final Map<String, dynamic> j = jsonDecode(dataStr);
+
+              // Capture usage statistics from OpenAI-style streaming (issue #274)
+              // Usage is sent in the final chunk with stream_options.include_usage
+              final usageData = j['usage'];
+              if (usageData is Map<String, dynamic> && usageData.isNotEmpty) {
+                updateLastMessageWith((m) => m.copyWith(usage: usageData));
+              }
+
              final choices = j['choices'];
              if (choices is List && choices.isNotEmpty) {
                final choice = choices.first;
@@ -746,6 +757,18 @@ ActiveSocketStream attachUnifiedChunkedStreaming({

      if (type == 'chat:completion' && payload != null) {
        if (payload is Map<String, dynamic>) {
+          // Capture usage statistics whenever they appear (issue #274)
+          // Usage may come in a separate payload before the done:true payload
+          final usageData = payload['usage'];
+          if (usageData is Map<String, dynamic> && usageData.isNotEmpty) {
+            final targetId = _resolveTargetMessageId(messageId, getMessages);
+            if (targetId != null) {
+              updateMessageById(targetId, (current) {
+                return current.copyWith(usage: usageData);
+              });
+            }
+          }
+
          final rawSources = payload['sources'] ?? payload['citations'];
          final normalizedSources = _normalizeSourcesPayload(rawSources);
          if (normalizedSources != null && normalizedSources.isNotEmpty) {
@@ -832,18 +855,55 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
          }
          if (payload['done'] == true) {
            try {
+              // Get current messages to send with usage data (issue #274)
+              final currentMessages = getMessages();
+              final messagesForCompleted = currentMessages.map((m) {
+                final msgMap = <String, dynamic>{
+                  'id': m.id,
+                  'role': m.role,
+                  'content': m.content,
+                  'timestamp': m.timestamp.millisecondsSinceEpoch ~/ 1000,
+                };
+                if (m.role == 'assistant' && m.usage != null) {
+                  msgMap['usage'] = m.usage;
+                }
+                if (m.sources.isNotEmpty) {
+                  msgMap['sources'] = m.sources.map((s) => s.toJson()).toList();
+                }
+                return msgMap;
+              }).toList();
+
+              // Send chatCompleted to run any filters/actions
              // ignore: unawaited_futures
              api.sendChatCompleted(
                chatId: activeConversationId ?? '',
                messageId: assistantMessageId,
-                messages: const [],
+                messages: messagesForCompleted,
                model: modelId,
                modelItem: modelItem,
                sessionId: sessionId,
              );
-            } catch (_) {}

-            Future.microtask(refreshConversationSnapshot);
+              // Sync conversation to persist usage data (issue #274)
+              // chatCompleted doesn't persist - syncConversationMessages does
+              final chatId = activeConversationId;
+              if (chatId != null && chatId.isNotEmpty) {
+                // ignore: unawaited_futures
+                api.syncConversationMessages(
+                  chatId,
+                  currentMessages,
+                  model: modelId,
+                );
+              }
+            } catch (_) {
+              // Non-critical - continue if sync fails
+            }
+
+            // Delay snapshot refresh to allow backend to persist data
+            Future.delayed(
+              const Duration(milliseconds: 500),
+              refreshConversationSnapshot,
+            );

            final msgs = getMessages();
            if (msgs.isNotEmpty && msgs.last.role == 'assistant') {
--- a/lib/features/chat/providers/chat_providers.dart
+++ b/lib/features/chat/providers/chat_providers.dart
@@ -1521,6 +1521,10 @@ Future<void> regenerateMessage(
      'actions': <dynamic>[],
      'filters': <dynamic>[],
      'tags': <dynamic>[],
+      // Include capabilities from the actual model for usage stats support
+      'capabilities': selectedModel.capabilities,
+      // Include info/metadata for usage capability detection
+      'info': selectedModel.metadata?['info'],
    };

    // WebSocket-only streaming requires socket connection
@@ -2217,6 +2221,10 @@ Future<void> _sendMessageInternal(
      'actions': <dynamic>[],
      'filters': <dynamic>[],
      'tags': <dynamic>[],
+      // Include capabilities from the actual model for usage stats support
+      'capabilities': selectedModel.capabilities,
+      // Include info/metadata for usage capability detection
+      'info': selectedModel.metadata?['info'],
    };

    // WebSocket-only streaming requires socket connection.
--- a/lib/features/chat/widgets/assistant_message_widget.dart
+++ b/lib/features/chat/widgets/assistant_message_widget.dart
@@ -1346,6 +1346,15 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
            },
          ),
        ],
+        // Usage info button (like Open WebUI)
+        if (widget.message.usage != null &&
+            widget.message.usage!.isNotEmpty) ...[
+          _buildActionButton(
+            icon: Platform.isIOS ? CupertinoIcons.info : Icons.info_outline,
+            label: l10n.usageInfo,
+            onTap: () => _showUsageInfoSheet(context, widget.message.usage!),
+          ),
+        ],
        if (isErrorMessage) ...[
          _buildActionButton(
            icon: Platform.isIOS
@@ -1373,6 +1382,242 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
    return ChatActionButton(icon: icon, label: label, onTap: onTap);
  }

+  /// Shows a bottom sheet with usage/performance statistics for the response.
+  /// Matches Open WebUI's info button behavior but adapted for mobile UX.
+  void _showUsageInfoSheet(BuildContext context, Map<String, dynamic> usage) {
+    final theme = context.conduitTheme;
+    final l10n = AppLocalizations.of(context)!;
+
+    showModalBottomSheet<void>(
+      context: context,
+      backgroundColor: theme.surfaceBackground,
+      shape: const RoundedRectangleBorder(
+        borderRadius: BorderRadius.vertical(
+          top: Radius.circular(AppBorderRadius.dialog),
+        ),
+      ),
+      builder: (ctx) {
+        return SafeArea(
+          child: Padding(
+            padding: const EdgeInsets.all(Spacing.lg),
+            child: Column(
+              mainAxisSize: MainAxisSize.min,
+              crossAxisAlignment: CrossAxisAlignment.start,
+              children: [
+                // Title
+                Row(
+                  children: [
+                    Icon(
+                      Icons.analytics_outlined,
+                      size: IconSize.md,
+                      color: theme.textPrimary,
+                    ),
+                    const SizedBox(width: Spacing.sm),
+                    Text(
+                      l10n.usageInfoTitle,
+                      style: TextStyle(
+                        fontSize: AppTypography.bodyLarge,
+                        fontWeight: FontWeight.w600,
+                        color: theme.textPrimary,
+                      ),
+                    ),
+                  ],
+                ),
+                const SizedBox(height: Spacing.lg),
+
+                // Stats grid
+                ..._buildUsageStats(ctx, usage, l10n, theme),
+              ],
+            ),
+          ),
+        );
+      },
+    );
+  }
+
+  /// Builds the list of usage stat widgets from the usage map.
+  List<Widget> _buildUsageStats(
+    BuildContext context,
+    Map<String, dynamic> usage,
+    AppLocalizations l10n,
+    ConduitThemeExtension theme,
+  ) {
+    final stats = <Widget>[];
+
+    // Parse all possible fields
+    final evalCount = _parseNum(usage['eval_count']);
+    final evalDuration = _parseNum(usage['eval_duration']);
+    final promptEvalCount = _parseNum(usage['prompt_eval_count']);
+    final promptEvalDuration = _parseNum(usage['prompt_eval_duration']);
+    final completionTokens = _parseNum(usage['completion_tokens']);
+    final promptTokens = _parseNum(usage['prompt_tokens']);
+    final totalTokens = _parseNum(usage['total_tokens']);
+    // Time fields in seconds (Groq/OpenAI extended format)
+    final completionTime = _parseNum(usage['completion_time']);
+    final promptTime = _parseNum(usage['prompt_time']);
+    final totalTime = _parseNum(usage['total_time']);
+    final queueTime = _parseNum(usage['queue_time']);
+    // Time fields in nanoseconds (Ollama/llama.cpp format)
+    final totalDuration = _parseNum(usage['total_duration']);
+    final loadDuration = _parseNum(usage['load_duration']);
+    // Reasoning tokens (OpenAI o1/o3 models, Groq)
+    final completionDetails = usage['completion_tokens_details'];
+    final reasoningTokens = completionDetails is Map
+        ? _parseNum(completionDetails['reasoning_tokens'])
+        : null;
+
+    // --- Token Generation Speed ---
+    // Priority: Ollama format > Groq/OpenAI extended format > token count only
+    if (evalCount != null && evalDuration != null && evalDuration > 0) {
+      // Ollama/llama.cpp: duration in nanoseconds
+      final tgSpeed = evalCount / (evalDuration / 1e9);
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTokenGeneration,
+          value: l10n.usageTokensPerSecond(tgSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(evalCount.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (completionTokens != null &&
+        completionTime != null &&
+        completionTime > 0) {
+      // Groq/OpenAI extended: time in seconds
+      final tgSpeed = completionTokens / completionTime;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTokenGeneration,
+          value: l10n.usageTokensPerSecond(tgSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(completionTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (completionTokens != null) {
+      // Basic OpenAI: token count only
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTokenGeneration,
+          value: l10n.usageTokenCount(completionTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Prompt Processing Speed ---
+    if (promptEvalCount != null &&
+        promptEvalDuration != null &&
+        promptEvalDuration > 0) {
+      // Ollama/llama.cpp: duration in nanoseconds
+      final ppSpeed = promptEvalCount / (promptEvalDuration / 1e9);
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usagePromptEval,
+          value: l10n.usageTokensPerSecond(ppSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(promptEvalCount.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (promptTokens != null && promptTime != null && promptTime > 0) {
+      // Groq/OpenAI extended: time in seconds
+      final ppSpeed = promptTokens / promptTime;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usagePromptEval,
+          value: l10n.usageTokensPerSecond(ppSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(promptTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (promptTokens != null) {
+      // Basic OpenAI: token count only
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usagePromptEval,
+          value: l10n.usageTokenCount(promptTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Reasoning Tokens (for o1/o3 models) ---
+    if (reasoningTokens != null && reasoningTokens > 0) {
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageReasoningTokens,
+          value: l10n.usageTokenCount(reasoningTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Total Tokens (if not already shown via completion + prompt) ---
+    if (totalTokens != null &&
+        (completionTokens == null || promptTokens == null)) {
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTotalTokens,
+          value: l10n.usageTokenCount(totalTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Total Duration ---
+    if (totalDuration != null && totalDuration > 0) {
+      // Ollama/llama.cpp: nanoseconds
+      final totalSec = totalDuration / 1e9;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTotalDuration,
+          value: l10n.usageSecondsFormat(totalSec.toStringAsFixed(2)),
+          theme: theme,
+        ),
+      );
+    } else if (totalTime != null && totalTime > 0) {
+      // Groq/OpenAI extended: seconds
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTotalDuration,
+          value: l10n.usageSecondsFormat(totalTime.toStringAsFixed(2)),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Queue Time (Groq) ---
+    if (queueTime != null && queueTime > 0) {
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageQueueTime,
+          value: l10n.usageSecondsFormat(queueTime.toStringAsFixed(3)),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Model Load Time (Ollama) ---
+    if (loadDuration != null && loadDuration > 0) {
+      final loadSec = loadDuration / 1e9;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageLoadDuration,
+          value: l10n.usageSecondsFormat(loadSec.toStringAsFixed(2)),
+          theme: theme,
+        ),
+      );
+    }
+
+    return stats;
+  }
+
+  /// Safely parse a number from dynamic value.
+  num? _parseNum(dynamic value) {
+    if (value == null) return null;
+    if (value is num) return value;
+    if (value is String) return num.tryParse(value);
+    return null;
+  }
+
  // Reasoning tile rendered inline - minimal design inspired by OpenWebUI
  Widget _buildReasoningTile(ReasoningEntry rc, int index) {
    final isExpanded = _expandedReasoning.contains(index);
@@ -1878,3 +2123,59 @@ Future<void> _launchUri(String url) async {
    DebugLogger.log('Unable to open url $url: $err', scope: 'chat/assistant');
  }
 }
+
+/// Row widget for displaying a single usage statistic.
+class _UsageStatRow extends StatelessWidget {
+  const _UsageStatRow({
+    required this.label,
+    required this.value,
+    this.detail,
+    required this.theme,
+  });
+
+  final String label;
+  final String value;
+  final String? detail;
+  final ConduitThemeExtension theme;
+
+  @override
+  Widget build(BuildContext context) {
+    return Padding(
+      padding: const EdgeInsets.only(bottom: Spacing.sm),
+      child: Row(
+        mainAxisAlignment: MainAxisAlignment.spaceBetween,
+        children: [
+          Text(
+            label,
+            style: TextStyle(
+              fontSize: AppTypography.bodyMedium,
+              color: theme.textSecondary,
+            ),
+          ),
+          Column(
+            crossAxisAlignment: CrossAxisAlignment.end,
+            children: [
+              Text(
+                value,
+                style: TextStyle(
+                  fontSize: AppTypography.bodyMedium,
+                  fontWeight: FontWeight.w600,
+                  fontFamily: AppTypography.monospaceFontFamily,
+                  color: theme.textPrimary,
+                ),
+              ),
+              if (detail != null)
+                Text(
+                  detail!,
+                  style: TextStyle(
+                    fontSize: AppTypography.labelSmall,
+                    color: theme.textTertiary,
+                  ),
+                ),
+            ],
+          ),
+        ],
+      ),
+    );
+  }
+}
--- a/lib/l10n/app_en.arb
+++ b/lib/l10n/app_en.arb
@@ -1000,6 +1000,72 @@
  "@ttsStop": {
    "description": "Action to stop text to speech playback"
  },
+  "usageInfo": "Info",
+  "@usageInfo": {
+    "description": "Action to show usage/performance statistics for a response"
+  },
+  "usageInfoTitle": "Response Statistics",
+  "@usageInfoTitle": {
+    "description": "Title for the usage statistics dialog/sheet"
+  },
+  "usageTokenGeneration": "Token Generation",
+  "@usageTokenGeneration": {
+    "description": "Label for token generation speed metric"
+  },
+  "usagePromptEval": "Prompt Processing",
+  "@usagePromptEval": {
+    "description": "Label for prompt evaluation/pre-processing speed metric"
+  },
+  "usageTotalDuration": "Total Duration",
+  "@usageTotalDuration": {
+    "description": "Label for total response generation duration"
+  },
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "description": "Tokens per second display format",
+    "placeholders": {
+      "speed": {
+        "type": "String",
+        "example": "45.2"
+      }
+    }
+  },
+  "usageTokenCount": "{count} tokens",
+  "@usageTokenCount": {
+    "description": "Token count display format",
+    "placeholders": {
+      "count": {
+        "type": "int",
+        "example": "128"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "description": "Seconds duration format",
+    "placeholders": {
+      "seconds": {
+        "type": "String",
+        "example": "2.34"
+      }
+    }
+  },
+  "usageLoadDuration": "Model Load Time",
+  "@usageLoadDuration": {
+    "description": "Label for model loading duration metric"
+  },
+  "usageQueueTime": "Queue Time",
+  "@usageQueueTime": {
+    "description": "Label for queue waiting time metric"
+  },
+  "usageReasoningTokens": "Reasoning Tokens",
+  "@usageReasoningTokens": {
+    "description": "Label for reasoning/thinking tokens count"
+  },
+  "usageTotalTokens": "Total Tokens",
+  "@usageTotalTokens": {
+    "description": "Label for total token count"
+  },
  "edit": "Edit",
  "@edit": {
    "description": "Action to edit an item/message."