diff --git a/lib/core/models/model.dart b/lib/core/models/model.dart
index fdc4860..50d1d23 100644
--- a/lib/core/models/model.dart
+++ b/lib/core/models/model.dart
@@ -156,6 +156,12 @@ sealed class Model with _$Model {
       }
     }
 
+    // Extract usage capability from info.meta.capabilities (OpenWebUI format)
+    // This indicates whether the model supports stream_options.include_usage
+    final infoMetaCapabilities =
+        infoMeta?['capabilities'] as Map<String, dynamic>?;
+    final supportsUsage = infoMetaCapabilities?['usage'] == true;
+
     // Fallback to top-level toolIds (for cached models serialized via toJson)
     if (toolIds == null || toolIds.isEmpty) {
       final topLevelToolIds = json['toolIds'];
@@ -199,6 +205,7 @@ sealed class Model with _$Model {
         'pricing': json['pricing'],
         'context_length': json['context_length'],
         'supported_parameters': supportedParamsList ?? supportedParams,
+        'usage': supportsUsage,
       },
       metadata: mergedMetadata,
       toolIds: toolIds,
diff --git a/lib/core/services/api_service.dart b/lib/core/services/api_service.dart
index 6673dc4..f384032 100644
--- a/lib/core/services/api_service.dart
+++ b/lib/core/services/api_service.dart
@@ -966,7 +966,7 @@ class ApiService {
         if (msg.attachmentIds != null && msg.attachmentIds!.isNotEmpty)
           'attachment_ids': List<String>.from(msg.attachmentIds!),
         if (sanitizedFiles != null) 'files': sanitizedFiles,
-        // Mirror status updates, follow-ups, code executions, and sources
+        // Mirror status updates, follow-ups, code executions, sources, and usage
         if (msg.statusHistory.isNotEmpty)
           'statusHistory': msg.statusHistory.map((s) => s.toJson()).toList(),
         if (msg.followUps.isNotEmpty)
@@ -975,6 +975,8 @@ class ApiService {
           'codeExecutions': msg.codeExecutions.map((e) => e.toJson()).toList(),
         if (msg.sources.isNotEmpty)
           'sources': msg.sources.map((s) => s.toJson()).toList(),
+        // Include usage statistics for persistence (issue #274)
+        if (msg.usage != null) 'usage': msg.usage,
       };
 
       // Update parent's childrenIds
@@ -1001,7 +1003,7 @@ class ApiService {
         if (msg.attachmentIds != null && msg.attachmentIds!.isNotEmpty)
           'attachment_ids': List<String>.from(msg.attachmentIds!),
         if (sanitizedArrayFiles != null) 'files': sanitizedArrayFiles,
-        // Mirror status updates, follow-ups, code executions, and sources
+        // Mirror status updates, follow-ups, code executions, sources, and usage
         if (msg.statusHistory.isNotEmpty)
           'statusHistory': msg.statusHistory.map((s) => s.toJson()).toList(),
         if (msg.followUps.isNotEmpty)
@@ -1010,6 +1012,8 @@ class ApiService {
           'codeExecutions': msg.codeExecutions.map((e) => e.toJson()).toList(),
         if (msg.sources.isNotEmpty)
           'sources': msg.sources.map((s) => s.toJson()).toList(),
+        // Include usage statistics for persistence (issue #274)
+        if (msg.usage != null) 'usage': msg.usage,
       });
 
       previousId = messageId;
@@ -1747,6 +1751,10 @@ class ApiService {
   }
 
   // Send chat completed notification
+  // This persists usage data and other message metadata to the server
+  /// Notify backend that chat streaming is complete.
+  /// This triggers any configured filters/actions on the backend.
+  /// Matches OpenWebUI's chatCompletedHandler in Chat.svelte.
   Future<void> sendChatCompleted({
     required String chatId,
     required String messageId,
@@ -1754,61 +1762,61 @@ class ApiService {
     required String model,
     Map<String, dynamic>? modelItem,
     String? sessionId,
+    List<String>? filterIds,
   }) async {
-    _traceApi('Sending chat completed notification (optional endpoint)');
-
-    // This endpoint appears to be optional or deprecated in newer OpenWebUI versions
-    // The main chat synchronization happens through /api/v1/chats/{id} updates
-    // We'll still try to call it but won't fail if it doesn't work
-
-    // Format messages to match OpenWebUI expected structure
-    // Note: Removing 'id' field as it causes 400 error
+    // Format messages to match OpenWebUI expected structure exactly
     final formattedMessages = messages.map((msg) {
-      final formatted = {
-        // Don't include 'id' - it causes 400 error with detail: 'id'
+      final formatted = <String, dynamic>{
+        'id': msg['id'],
         'role': msg['role'],
         'content': msg['content'],
         'timestamp':
             msg['timestamp'] ?? DateTime.now().millisecondsSinceEpoch ~/ 1000,
       };
-
-      // Add model info for assistant messages
-      if (msg['role'] == 'assistant') {
-        formatted['model'] = model;
-        if (msg.containsKey('usage')) {
-          formatted['usage'] = msg['usage'];
-        }
+      // Include info if present (OpenWebUI sends this)
+      if (msg.containsKey('info') && msg['info'] != null) {
+        formatted['info'] = msg['info'];
+      }
+      // Include usage if present (issue #274)
+      if (msg.containsKey('usage') && msg['usage'] != null) {
+        formatted['usage'] = msg['usage'];
+      }
+      // Include sources if present
+      if (msg.containsKey('sources') && msg['sources'] != null) {
+        formatted['sources'] = msg['sources'];
       }
-
       return formatted;
     }).toList();
 
-    // Include the message ID and session ID at the top level - server expects these
-    final requestData = {
-      'id': messageId, // The server expects the assistant message ID here
-      'chat_id': chatId,
+    final requestData = <String, dynamic>{
       'model': model,
       'messages': formattedMessages,
-      'session_id':
-          sessionId ?? const Uuid().v4().substring(0, 20), // Add session_id
-      // Don't include model_item as it might not be expected
+      'chat_id': chatId,
+      'session_id': sessionId ?? const Uuid().v4().substring(0, 20),
+      'id': messageId,
     };
 
+    // Include filter_ids if provided (for outlet filters)
+    if (filterIds != null && filterIds.isNotEmpty) {
+      requestData['filter_ids'] = filterIds;
+    }
+
+    // Include model_item if available
+    if (modelItem != null) {
+      requestData['model_item'] = modelItem;
+    }
+
     try {
-      final response = await _dio.post(
+      await _dio.post(
         '/api/chat/completed',
         data: requestData,
         options: Options(
-          sendTimeout: const Duration(seconds: 4),
-          receiveTimeout: const Duration(seconds: 4),
+          sendTimeout: const Duration(seconds: 10),
+          receiveTimeout: const Duration(seconds: 10),
         ),
       );
-      _traceApi('Chat completed response: ${response.statusCode}');
-    } catch (e) {
-      // This is a non-critical endpoint - main sync happens via /api/v1/chats/{id}
-      _traceApi(
-        'Chat completed endpoint not available or failed (non-critical): $e',
-      );
+    } catch (_) {
+      // Non-critical - filters/actions may not be configured
     }
   }
 
@@ -2826,6 +2834,16 @@ class ApiService {
       data['chat_id'] = conversationId;
     }
 
+    // Request usage statistics if model supports it (issue #274)
+    // Matches OpenWebUI: only sends stream_options when model.info.meta.capabilities.usage is true
+    final supportsUsage =
+        modelItem?['capabilities']?['usage'] == true ||
+        (modelItem?['info'] as Map?)?['meta']?['capabilities']?['usage'] ==
+            true;
+    if (supportsUsage) {
+      data['stream_options'] = {'include_usage': true};
+    }
+
     // Add feature flags via 'features' object only (not as top-level params).
     // Top-level 'web_search'/'image_generation' params are not recognized by
     // OpenAI and cause errors when forwarded. Open WebUI expects these in the
diff --git a/lib/core/services/conversation_parsing.dart b/lib/core/services/conversation_parsing.dart
index 52f2990..12b134b 100644
--- a/lib/core/services/conversation_parsing.dart
+++ b/lib/core/services/conversation_parsing.dart
@@ -305,6 +305,10 @@ Map<String, dynamic> _parseOpenWebUIMessageToJson(
       ? historyMsg['sources'] ?? historyMsg['citations']
       : msgData['sources'] ?? msgData['citations'];
 
+  // Parse usage data - Open WebUI stores this in 'usage' field on messages
+  final rawUsage = _coerceJsonMap(historyMsg?['usage'] ?? msgData['usage']);
+  final Map<String, dynamic>? usage = rawUsage.isEmpty ? null : rawUsage;
+
   return <String, dynamic>{
     'id': (msgData['id'] ?? _uuid.v4()).toString(),
     'role': role,
@@ -319,7 +323,7 @@ Map<String, dynamic> _parseOpenWebUIMessageToJson(
     'followUps': _coerceStringList(followUpsRaw),
     'codeExecutions': _parseCodeExecutionsField(codeExecRaw),
     'sources': _parseSourcesField(sourcesRaw),
-    'usage': _coerceJsonMap(msgData['usage']),
+    'usage': usage,
     'versions': const <Map<String, dynamic>>[],
   };
 }
diff --git a/lib/core/services/streaming_helper.dart b/lib/core/services/streaming_helper.dart
index 9583bb0..c8f16fd 100644
--- a/lib/core/services/streaming_helper.dart
+++ b/lib/core/services/streaming_helper.dart
@@ -576,12 +576,15 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
 
       setFollowUps(assistant.id, assistant.followUps);
       updateMessageById(assistant.id, (current) {
+        // Preserve existing usage if server doesn't have it yet (issue #274)
+        // Usage is captured from streaming but may not be persisted on server
+        final effectiveUsage = assistant.usage ?? current.usage;
         return current.copyWith(
           followUps: List<String>.from(assistant.followUps),
           statusHistory: assistant.statusHistory,
           sources: assistant.sources,
           metadata: {...?current.metadata, ...?assistant.metadata},
-          usage: assistant.usage,
+          usage: effectiveUsage,
         );
       });
     } catch (_) {
@@ -638,6 +641,14 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
             }
             try {
               final Map<String, dynamic> j = jsonDecode(dataStr);
+
+              // Capture usage statistics from OpenAI-style streaming (issue #274)
+              // Usage is sent in the final chunk with stream_options.include_usage
+              final usageData = j['usage'];
+              if (usageData is Map<String, dynamic> && usageData.isNotEmpty) {
+                updateLastMessageWith((m) => m.copyWith(usage: usageData));
+              }
+
               final choices = j['choices'];
               if (choices is List && choices.isNotEmpty) {
                 final choice = choices.first;
@@ -746,6 +757,18 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
 
       if (type == 'chat:completion' && payload != null) {
         if (payload is Map<String, dynamic>) {
+          // Capture usage statistics whenever they appear (issue #274)
+          // Usage may come in a separate payload before the done:true payload
+          final usageData = payload['usage'];
+          if (usageData is Map<String, dynamic> && usageData.isNotEmpty) {
+            final targetId = _resolveTargetMessageId(messageId, getMessages);
+            if (targetId != null) {
+              updateMessageById(targetId, (current) {
+                return current.copyWith(usage: usageData);
+              });
+            }
+          }
+
           final rawSources = payload['sources'] ?? payload['citations'];
           final normalizedSources = _normalizeSourcesPayload(rawSources);
           if (normalizedSources != null && normalizedSources.isNotEmpty) {
@@ -832,18 +855,55 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
           }
           if (payload['done'] == true) {
             try {
+              // Get current messages to send with usage data (issue #274)
+              final currentMessages = getMessages();
+              final messagesForCompleted = currentMessages.map((m) {
+                final msgMap = <String, dynamic>{
+                  'id': m.id,
+                  'role': m.role,
+                  'content': m.content,
+                  'timestamp': m.timestamp.millisecondsSinceEpoch ~/ 1000,
+                };
+                if (m.role == 'assistant' && m.usage != null) {
+                  msgMap['usage'] = m.usage;
+                }
+                if (m.sources.isNotEmpty) {
+                  msgMap['sources'] = m.sources.map((s) => s.toJson()).toList();
+                }
+                return msgMap;
+              }).toList();
+
+              // Send chatCompleted to run any filters/actions
               // ignore: unawaited_futures
               api.sendChatCompleted(
                 chatId: activeConversationId ?? '',
                 messageId: assistantMessageId,
-                messages: const [],
+                messages: messagesForCompleted,
                 model: modelId,
                 modelItem: modelItem,
                 sessionId: sessionId,
               );
-            } catch (_) {}
 
-            Future.microtask(refreshConversationSnapshot);
+              // Sync conversation to persist usage data (issue #274)
+              // chatCompleted doesn't persist - syncConversationMessages does
+              final chatId = activeConversationId;
+              if (chatId != null && chatId.isNotEmpty) {
+                // ignore: unawaited_futures
+                api.syncConversationMessages(
+                  chatId,
+                  currentMessages,
+                  model: modelId,
+                );
+              }
+            } catch (_) {
+              // Non-critical - continue if sync fails
+            }
+
+            // Delay snapshot refresh to allow backend to persist data
+            Future.delayed(
+              const Duration(milliseconds: 500),
+              refreshConversationSnapshot,
+            );
 
             final msgs = getMessages();
             if (msgs.isNotEmpty && msgs.last.role == 'assistant') {
diff --git a/lib/features/chat/providers/chat_providers.dart b/lib/features/chat/providers/chat_providers.dart
index 205035f..99918cf 100644
--- a/lib/features/chat/providers/chat_providers.dart
+++ b/lib/features/chat/providers/chat_providers.dart
@@ -1521,6 +1521,10 @@ Future<void> regenerateMessage(
       'actions': <dynamic>[],
       'filters': <dynamic>[],
       'tags': <dynamic>[],
+      // Include capabilities from the actual model for usage stats support
+      'capabilities': selectedModel.capabilities,
+      // Include info/metadata for usage capability detection
+      'info': selectedModel.metadata?['info'],
     };
 
     // WebSocket-only streaming requires socket connection
@@ -2217,6 +2221,10 @@ Future<void> _sendMessageInternal(
       'actions': <dynamic>[],
       'filters': <dynamic>[],
       'tags': <dynamic>[],
+      // Include capabilities from the actual model for usage stats support
+      'capabilities': selectedModel.capabilities,
+      // Include info/metadata for usage capability detection
+      'info': selectedModel.metadata?['info'],
     };
 
     // WebSocket-only streaming requires socket connection.
diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart
index 5427d7b..2bf762e 100644
--- a/lib/features/chat/widgets/assistant_message_widget.dart
+++ b/lib/features/chat/widgets/assistant_message_widget.dart
@@ -1346,6 +1346,15 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
             },
           ),
         ],
+        // Usage info button (like Open WebUI)
+        if (widget.message.usage != null &&
+            widget.message.usage!.isNotEmpty) ...[
+          _buildActionButton(
+            icon: Platform.isIOS ? CupertinoIcons.info : Icons.info_outline,
+            label: l10n.usageInfo,
+            onTap: () => _showUsageInfoSheet(context, widget.message.usage!),
+          ),
+        ],
         if (isErrorMessage) ...[
           _buildActionButton(
             icon: Platform.isIOS
@@ -1373,6 +1382,242 @@ class _AssistantMessageWidgetState extends ConsumerState<AssistantMessageWidget>
     return ChatActionButton(icon: icon, label: label, onTap: onTap);
   }
 
+  /// Shows a bottom sheet with usage/performance statistics for the response.
+  /// Matches Open WebUI's info button behavior but adapted for mobile UX.
+  void _showUsageInfoSheet(BuildContext context, Map<String, dynamic> usage) {
+    final theme = context.conduitTheme;
+    final l10n = AppLocalizations.of(context)!;
+
+    showModalBottomSheet<void>(
+      context: context,
+      backgroundColor: theme.surfaceBackground,
+      shape: const RoundedRectangleBorder(
+        borderRadius: BorderRadius.vertical(
+          top: Radius.circular(AppBorderRadius.dialog),
+        ),
+      ),
+      builder: (ctx) {
+        return SafeArea(
+          child: Padding(
+            padding: const EdgeInsets.all(Spacing.lg),
+            child: Column(
+              mainAxisSize: MainAxisSize.min,
+              crossAxisAlignment: CrossAxisAlignment.start,
+              children: [
+                // Title
+                Row(
+                  children: [
+                    Icon(
+                      Icons.analytics_outlined,
+                      size: IconSize.md,
+                      color: theme.textPrimary,
+                    ),
+                    const SizedBox(width: Spacing.sm),
+                    Text(
+                      l10n.usageInfoTitle,
+                      style: TextStyle(
+                        fontSize: AppTypography.bodyLarge,
+                        fontWeight: FontWeight.w600,
+                        color: theme.textPrimary,
+                      ),
+                    ),
+                  ],
+                ),
+                const SizedBox(height: Spacing.lg),
+
+                // Stats grid
+                ..._buildUsageStats(ctx, usage, l10n, theme),
+              ],
+            ),
+          ),
+        );
+      },
+    );
+  }
+
+  /// Builds the list of usage stat widgets from the usage map.
+  List<Widget> _buildUsageStats(
+    BuildContext context,
+    Map<String, dynamic> usage,
+    AppLocalizations l10n,
+    ConduitThemeExtension theme,
+  ) {
+    final stats = <Widget>[];
+
+    // Parse all possible fields
+    final evalCount = _parseNum(usage['eval_count']);
+    final evalDuration = _parseNum(usage['eval_duration']);
+    final promptEvalCount = _parseNum(usage['prompt_eval_count']);
+    final promptEvalDuration = _parseNum(usage['prompt_eval_duration']);
+    final completionTokens = _parseNum(usage['completion_tokens']);
+    final promptTokens = _parseNum(usage['prompt_tokens']);
+    final totalTokens = _parseNum(usage['total_tokens']);
+    // Time fields in seconds (Groq/OpenAI extended format)
+    final completionTime = _parseNum(usage['completion_time']);
+    final promptTime = _parseNum(usage['prompt_time']);
+    final totalTime = _parseNum(usage['total_time']);
+    final queueTime = _parseNum(usage['queue_time']);
+    // Time fields in nanoseconds (Ollama/llama.cpp format)
+    final totalDuration = _parseNum(usage['total_duration']);
+    final loadDuration = _parseNum(usage['load_duration']);
+    // Reasoning tokens (OpenAI o1/o3 models, Groq)
+    final completionDetails = usage['completion_tokens_details'];
+    final reasoningTokens = completionDetails is Map
+        ? _parseNum(completionDetails['reasoning_tokens'])
+        : null;
+
+    // --- Token Generation Speed ---
+    // Priority: Ollama format > Groq/OpenAI extended format > token count only
+    if (evalCount != null && evalDuration != null && evalDuration > 0) {
+      // Ollama/llama.cpp: duration in nanoseconds
+      final tgSpeed = evalCount / (evalDuration / 1e9);
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTokenGeneration,
+          value: l10n.usageTokensPerSecond(tgSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(evalCount.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (completionTokens != null &&
+        completionTime != null &&
+        completionTime > 0) {
+      // Groq/OpenAI extended: time in seconds
+      final tgSpeed = completionTokens / completionTime;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTokenGeneration,
+          value: l10n.usageTokensPerSecond(tgSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(completionTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (completionTokens != null) {
+      // Basic OpenAI: token count only
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTokenGeneration,
+          value: l10n.usageTokenCount(completionTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Prompt Processing Speed ---
+    if (promptEvalCount != null &&
+        promptEvalDuration != null &&
+        promptEvalDuration > 0) {
+      // Ollama/llama.cpp: duration in nanoseconds
+      final ppSpeed = promptEvalCount / (promptEvalDuration / 1e9);
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usagePromptEval,
+          value: l10n.usageTokensPerSecond(ppSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(promptEvalCount.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (promptTokens != null && promptTime != null && promptTime > 0) {
+      // Groq/OpenAI extended: time in seconds
+      final ppSpeed = promptTokens / promptTime;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usagePromptEval,
+          value: l10n.usageTokensPerSecond(ppSpeed.toStringAsFixed(1)),
+          detail: l10n.usageTokenCount(promptTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    } else if (promptTokens != null) {
+      // Basic OpenAI: token count only
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usagePromptEval,
+          value: l10n.usageTokenCount(promptTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Reasoning Tokens (for o1/o3 models) ---
+    if (reasoningTokens != null && reasoningTokens > 0) {
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageReasoningTokens,
+          value: l10n.usageTokenCount(reasoningTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Total Tokens (if not already shown via completion + prompt) ---
+    if (totalTokens != null &&
+        (completionTokens == null || promptTokens == null)) {
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTotalTokens,
+          value: l10n.usageTokenCount(totalTokens.toInt()),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Total Duration ---
+    if (totalDuration != null && totalDuration > 0) {
+      // Ollama/llama.cpp: nanoseconds
+      final totalSec = totalDuration / 1e9;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTotalDuration,
+          value: l10n.usageSecondsFormat(totalSec.toStringAsFixed(2)),
+          theme: theme,
+        ),
+      );
+    } else if (totalTime != null && totalTime > 0) {
+      // Groq/OpenAI extended: seconds
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageTotalDuration,
+          value: l10n.usageSecondsFormat(totalTime.toStringAsFixed(2)),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Queue Time (Groq) ---
+    if (queueTime != null && queueTime > 0) {
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageQueueTime,
+          value: l10n.usageSecondsFormat(queueTime.toStringAsFixed(3)),
+          theme: theme,
+        ),
+      );
+    }
+
+    // --- Model Load Time (Ollama) ---
+    if (loadDuration != null && loadDuration > 0) {
+      final loadSec = loadDuration / 1e9;
+      stats.add(
+        _UsageStatRow(
+          label: l10n.usageLoadDuration,
+          value: l10n.usageSecondsFormat(loadSec.toStringAsFixed(2)),
+          theme: theme,
+        ),
+      );
+    }
+
+    return stats;
+  }
+
+  /// Safely parse a number from dynamic value.
+  num? _parseNum(dynamic value) {
+    if (value == null) return null;
+    if (value is num) return value;
+    if (value is String) return num.tryParse(value);
+    return null;
+  }
+
   // Reasoning tile rendered inline - minimal design inspired by OpenWebUI
   Widget _buildReasoningTile(ReasoningEntry rc, int index) {
     final isExpanded = _expandedReasoning.contains(index);
@@ -1878,3 +2123,59 @@ Future<void> _launchUri(String url) async {
     DebugLogger.log('Unable to open url $url: $err', scope: 'chat/assistant');
   }
 }
+
+/// Row widget for displaying a single usage statistic.
+class _UsageStatRow extends StatelessWidget {
+  const _UsageStatRow({
+    required this.label,
+    required this.value,
+    this.detail,
+    required this.theme,
+  });
+
+  final String label;
+  final String value;
+  final String? detail;
+  final ConduitThemeExtension theme;
+
+  @override
+  Widget build(BuildContext context) {
+    return Padding(
+      padding: const EdgeInsets.only(bottom: Spacing.sm),
+      child: Row(
+        mainAxisAlignment: MainAxisAlignment.spaceBetween,
+        children: [
+          Text(
+            label,
+            style: TextStyle(
+              fontSize: AppTypography.bodyMedium,
+              color: theme.textSecondary,
+            ),
+          ),
+          Column(
+            crossAxisAlignment: CrossAxisAlignment.end,
+            children: [
+              Text(
+                value,
+                style: TextStyle(
+                  fontSize: AppTypography.bodyMedium,
+                  fontWeight: FontWeight.w600,
+                  fontFamily: AppTypography.monospaceFontFamily,
+                  color: theme.textPrimary,
+                ),
+              ),
+              if (detail != null)
+                Text(
+                  detail!,
+                  style: TextStyle(
+                    fontSize: AppTypography.labelSmall,
+                    color: theme.textTertiary,
+                  ),
+                ),
+            ],
+          ),
+        ],
+      ),
+    );
+  }
+}
diff --git a/lib/l10n/app_de.arb b/lib/l10n/app_de.arb
index e433641..1b2723a 100644
--- a/lib/l10n/app_de.arb
+++ b/lib/l10n/app_de.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "LDAP-Authentifizierung ist auf diesem Server nicht aktiviert",
   "ssoPlatformNotSupported": "SSO-Authentifizierung wird auf dieser Plattform nicht unterstützt. Bitte verwenden Sie stattdessen Anmeldedaten oder LDAP-Authentifizierung.",
   "continueWithProvider": "Weiter mit {provider}",
-  "or": "oder"
+  "or": "oder",
+  "usageInfo": "Info",
+  "usageInfoTitle": "Antwortstatistiken",
+  "usageTokenGeneration": "Token-Generierung",
+  "usagePromptEval": "Prompt-Verarbeitung",
+  "usageTotalDuration": "Gesamtdauer",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} Tokens",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "Modellladezeit",
+  "usageQueueTime": "Wartezeit",
+  "usageReasoningTokens": "Reasoning-Tokens",
+  "usageTotalTokens": "Gesamte Tokens"
 }
diff --git a/lib/l10n/app_en.arb b/lib/l10n/app_en.arb
index 81a941f..75cbfa2 100644
--- a/lib/l10n/app_en.arb
+++ b/lib/l10n/app_en.arb
@@ -1000,6 +1000,72 @@
   "@ttsStop": {
     "description": "Action to stop text to speech playback"
   },
+  "usageInfo": "Info",
+  "@usageInfo": {
+    "description": "Action to show usage/performance statistics for a response"
+  },
+  "usageInfoTitle": "Response Statistics",
+  "@usageInfoTitle": {
+    "description": "Title for the usage statistics dialog/sheet"
+  },
+  "usageTokenGeneration": "Token Generation",
+  "@usageTokenGeneration": {
+    "description": "Label for token generation speed metric"
+  },
+  "usagePromptEval": "Prompt Processing",
+  "@usagePromptEval": {
+    "description": "Label for prompt evaluation/pre-processing speed metric"
+  },
+  "usageTotalDuration": "Total Duration",
+  "@usageTotalDuration": {
+    "description": "Label for total response generation duration"
+  },
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "description": "Tokens per second display format",
+    "placeholders": {
+      "speed": {
+        "type": "String",
+        "example": "45.2"
+      }
+    }
+  },
+  "usageTokenCount": "{count} tokens",
+  "@usageTokenCount": {
+    "description": "Token count display format",
+    "placeholders": {
+      "count": {
+        "type": "int",
+        "example": "128"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "description": "Seconds duration format",
+    "placeholders": {
+      "seconds": {
+        "type": "String",
+        "example": "2.34"
+      }
+    }
+  },
+  "usageLoadDuration": "Model Load Time",
+  "@usageLoadDuration": {
+    "description": "Label for model loading duration metric"
+  },
+  "usageQueueTime": "Queue Time",
+  "@usageQueueTime": {
+    "description": "Label for queue waiting time metric"
+  },
+  "usageReasoningTokens": "Reasoning Tokens",
+  "@usageReasoningTokens": {
+    "description": "Label for reasoning/thinking tokens count"
+  },
+  "usageTotalTokens": "Total Tokens",
+  "@usageTotalTokens": {
+    "description": "Label for total token count"
+  },
   "edit": "Edit",
   "@edit": {
     "description": "Action to edit an item/message."
diff --git a/lib/l10n/app_es.arb b/lib/l10n/app_es.arb
index e1dfc05..8a0eff4 100644
--- a/lib/l10n/app_es.arb
+++ b/lib/l10n/app_es.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "La autenticación LDAP no está habilitada en este servidor",
   "ssoPlatformNotSupported": "La autenticación SSO no es compatible con esta plataforma. Por favor, use credenciales o autenticación LDAP en su lugar.",
   "continueWithProvider": "Continuar con {provider}",
-  "or": "o"
+  "or": "o",
+  "usageInfo": "Info",
+  "usageInfoTitle": "Estadísticas de respuesta",
+  "usageTokenGeneration": "Generación de tokens",
+  "usagePromptEval": "Procesamiento del prompt",
+  "usageTotalDuration": "Duración total",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} tokens",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "Tiempo de carga del modelo",
+  "usageQueueTime": "Tiempo en cola",
+  "usageReasoningTokens": "Tokens de razonamiento",
+  "usageTotalTokens": "Tokens totales"
 }
diff --git a/lib/l10n/app_fr.arb b/lib/l10n/app_fr.arb
index 637aba1..1197ce5 100644
--- a/lib/l10n/app_fr.arb
+++ b/lib/l10n/app_fr.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "L'authentification LDAP n'est pas activée sur ce serveur",
   "ssoPlatformNotSupported": "L'authentification SSO n'est pas prise en charge sur cette plateforme. Veuillez utiliser les identifiants ou l'authentification LDAP à la place.",
   "continueWithProvider": "Continuer avec {provider}",
-  "or": "ou"
+  "or": "ou",
+  "usageInfo": "Info",
+  "usageInfoTitle": "Statistiques de réponse",
+  "usageTokenGeneration": "Génération de tokens",
+  "usagePromptEval": "Traitement du prompt",
+  "usageTotalDuration": "Durée totale",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} tokens",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "Temps de chargement du modèle",
+  "usageQueueTime": "Temps d'attente",
+  "usageReasoningTokens": "Tokens de raisonnement",
+  "usageTotalTokens": "Tokens totaux"
 }
diff --git a/lib/l10n/app_it.arb b/lib/l10n/app_it.arb
index 6214e21..f67eecb 100644
--- a/lib/l10n/app_it.arb
+++ b/lib/l10n/app_it.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "L'autenticazione LDAP non è abilitata su questo server",
   "ssoPlatformNotSupported": "L'autenticazione SSO non è supportata su questa piattaforma. Usa invece le credenziali o l'autenticazione LDAP.",
   "continueWithProvider": "Continua con {provider}",
-  "or": "o"
+  "or": "o",
+  "usageInfo": "Info",
+  "usageInfoTitle": "Statistiche risposta",
+  "usageTokenGeneration": "Generazione token",
+  "usagePromptEval": "Elaborazione prompt",
+  "usageTotalDuration": "Durata totale",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} token",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "Tempo di caricamento modello",
+  "usageQueueTime": "Tempo in coda",
+  "usageReasoningTokens": "Token di ragionamento",
+  "usageTotalTokens": "Token totali"
 }
diff --git a/lib/l10n/app_ko.arb b/lib/l10n/app_ko.arb
index 9e41f1c..9040a8f 100644
--- a/lib/l10n/app_ko.arb
+++ b/lib/l10n/app_ko.arb
@@ -657,5 +657,38 @@
   "ldapNotEnabled": "이 서버에서 LDAP 인증이 활성화되어 있지 않습니다",
   "ssoPlatformNotSupported": "이 플랫폼에서는 SSO 인증이 지원되지 않습니다. 대신 자격 증명 또는 LDAP 인증을 사용하세요.",
   "continueWithProvider": "{provider}(으)로 계속",
-  "or": "또는"
+  "or": "또는",
+  "usageInfo": "정보",
+  "usageInfoTitle": "응답 통계",
+  "usageTokenGeneration": "토큰 생성",
+  "usagePromptEval": "프롬프트 처리",
+  "usageTotalDuration": "총 소요 시간",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count}개 토큰",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}초",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "모델 로드 시간",
+  "usageQueueTime": "대기 시간",
+  "usageReasoningTokens": "추론 토큰",
+  "usageTotalTokens": "총 토큰"
 }
diff --git a/lib/l10n/app_nl.arb b/lib/l10n/app_nl.arb
index 2869cc6..998abd9 100644
--- a/lib/l10n/app_nl.arb
+++ b/lib/l10n/app_nl.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "LDAP-authenticatie is niet ingeschakeld op deze server",
   "ssoPlatformNotSupported": "SSO-authenticatie wordt niet ondersteund op dit platform. Gebruik in plaats daarvan inloggegevens of LDAP-authenticatie.",
   "continueWithProvider": "Doorgaan met {provider}",
-  "or": "of"
+  "or": "of",
+  "usageInfo": "Info",
+  "usageInfoTitle": "Antwoordstatistieken",
+  "usageTokenGeneration": "Tokengeneratie",
+  "usagePromptEval": "Promptverwerking",
+  "usageTotalDuration": "Totale duur",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} tokens",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}s",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "Modellaadtijd",
+  "usageQueueTime": "Wachttijd",
+  "usageReasoningTokens": "Redeneertokens",
+  "usageTotalTokens": "Totaal tokens"
 }
diff --git a/lib/l10n/app_ru.arb b/lib/l10n/app_ru.arb
index a43af60..eab81df 100644
--- a/lib/l10n/app_ru.arb
+++ b/lib/l10n/app_ru.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "LDAP-аутентификация не включена на этом сервере",
   "ssoPlatformNotSupported": "SSO-аутентификация не поддерживается на этой платформе. Пожалуйста, используйте учётные данные или LDAP-аутентификацию.",
   "continueWithProvider": "Продолжить с {provider}",
-  "or": "или"
+  "or": "или",
+  "usageInfo": "Инфо",
+  "usageInfoTitle": "Статистика ответа",
+  "usageTokenGeneration": "Генерация токенов",
+  "usagePromptEval": "Обработка промпта",
+  "usageTotalDuration": "Общая длительность",
+  "usageTokensPerSecond": "{speed} т/с",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} токенов",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}с",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "Время загрузки модели",
+  "usageQueueTime": "Время в очереди",
+  "usageReasoningTokens": "Токены рассуждений",
+  "usageTotalTokens": "Всего токенов"
 }
diff --git a/lib/l10n/app_zh.arb b/lib/l10n/app_zh.arb
index 547e3b8..2ea71de 100644
--- a/lib/l10n/app_zh.arb
+++ b/lib/l10n/app_zh.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "此服务器未启用 LDAP 验证",
   "ssoPlatformNotSupported": "此平台不支持 SSO 验证。请改用凭据或 LDAP 验证。",
   "continueWithProvider": "使用 {provider} 继续",
-  "or": "或"
+  "or": "或",
+  "usageInfo": "信息",
+  "usageInfoTitle": "响应统计",
+  "usageTokenGeneration": "Token 生成",
+  "usagePromptEval": "提示处理",
+  "usageTotalDuration": "总耗时",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} 个 token",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}秒",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "模型加载时间",
+  "usageQueueTime": "排队时间",
+  "usageReasoningTokens": "推理 token",
+  "usageTotalTokens": "总 token"
 }
diff --git a/lib/l10n/app_zh_Hant.arb b/lib/l10n/app_zh_Hant.arb
index aabaaad..e26a898 100644
--- a/lib/l10n/app_zh_Hant.arb
+++ b/lib/l10n/app_zh_Hant.arb
@@ -879,5 +879,38 @@
   "ldapNotEnabled": "此伺服器未啟用 LDAP 驗證",
   "ssoPlatformNotSupported": "此平台不支援 SSO 驗證。請改用憑據或 LDAP 驗證。",
   "continueWithProvider": "使用 {provider} 繼續",
-  "or": "或"
+  "or": "或",
+  "usageInfo": "資訊",
+  "usageInfoTitle": "回應統計",
+  "usageTokenGeneration": "Token 生成",
+  "usagePromptEval": "提示處理",
+  "usageTotalDuration": "總耗時",
+  "usageTokensPerSecond": "{speed} t/s",
+  "@usageTokensPerSecond": {
+    "placeholders": {
+      "speed": {
+        "type": "String"
+      }
+    }
+  },
+  "usageTokenCount": "{count} 個 token",
+  "@usageTokenCount": {
+    "placeholders": {
+      "count": {
+        "type": "int"
+      }
+    }
+  },
+  "usageSecondsFormat": "{seconds}秒",
+  "@usageSecondsFormat": {
+    "placeholders": {
+      "seconds": {
+        "type": "String"
+      }
+    }
+  },
+  "usageLoadDuration": "模型載入時間",
+  "usageQueueTime": "排隊時間",
+  "usageReasoningTokens": "推理 token",
+  "usageTotalTokens": "總 token"
 }