diff --git a/lib/features/chat/widgets/assistant_message_widget.dart b/lib/features/chat/widgets/assistant_message_widget.dart index b9bc418..a79000c 100644 --- a/lib/features/chat/widgets/assistant_message_widget.dart +++ b/lib/features/chat/widgets/assistant_message_widget.dart @@ -1532,10 +1532,26 @@ class _AssistantMessageWidgetState extends ConsumerState ? _parseNum(completionDetails['reasoning_tokens']) : null; + // llama.cpp server format: pre-calculated tokens/second values + final predictedPerSecond = _parseNum(usage['predicted_per_second']); + final promptPerSecond = _parseNum(usage['prompt_per_second']); + final predictedN = _parseNum(usage['predicted_n']); + final promptN = _parseNum(usage['prompt_n']); + // --- Token Generation Speed --- - // Priority: Ollama format > Groq/OpenAI extended format > token count only - if (evalCount != null && evalDuration != null && evalDuration > 0) { - // Ollama/llama.cpp: duration in nanoseconds + // Priority: llama.cpp direct > Ollama calculated > Groq/OpenAI > count only + if (predictedPerSecond != null && predictedPerSecond > 0) { + // llama.cpp server: pre-calculated tokens/second + stats.add( + _UsageStatRow( + label: l10n.usageTokenGeneration, + value: l10n.usageTokensPerSecond(predictedPerSecond.toStringAsFixed(1)), + detail: predictedN != null ? l10n.usageTokenCount(predictedN.toInt()) : null, + theme: theme, + ), + ); + } else if (evalCount != null && evalDuration != null && evalDuration > 0) { + // Ollama: duration in nanoseconds final tgSpeed = evalCount / (evalDuration / 1e9); stats.add( _UsageStatRow( @@ -1570,10 +1586,21 @@ class _AssistantMessageWidgetState extends ConsumerState } // --- Prompt Processing Speed --- - if (promptEvalCount != null && + // Priority: llama.cpp direct > Ollama calculated > Groq/OpenAI > count only + if (promptPerSecond != null && promptPerSecond > 0) { + // llama.cpp server: pre-calculated tokens/second + stats.add( + _UsageStatRow( + label: l10n.usagePromptEval, + value: l10n.usageTokensPerSecond(promptPerSecond.toStringAsFixed(1)), + detail: promptN != null ? l10n.usageTokenCount(promptN.toInt()) : null, + theme: theme, + ), + ); + } else if (promptEvalCount != null && promptEvalDuration != null && promptEvalDuration > 0) { - // Ollama/llama.cpp: duration in nanoseconds + // Ollama: duration in nanoseconds final ppSpeed = promptEvalCount / (promptEvalDuration / 1e9); stats.add( _UsageStatRow(