feat(socket): Add connectivity and health tracking to socket service
This commit is contained in:
@@ -4,6 +4,7 @@ import 'package:flutter/widgets.dart';
|
||||
import 'package:socket_io_client/socket_io_client.dart' as io;
|
||||
|
||||
import '../models/server_config.dart';
|
||||
import '../models/socket_health.dart';
|
||||
import '../utils/debug_logger.dart';
|
||||
import 'socket_tls_override.dart';
|
||||
|
||||
@@ -33,6 +34,56 @@ class SocketService with WidgetsBindingObserver {
|
||||
/// Heartbeat interval matching OpenWebUI's 30-second interval.
|
||||
static const Duration _heartbeatInterval = Duration(seconds: 30);
|
||||
|
||||
/// Tracks the last heartbeat round-trip latency in milliseconds.
|
||||
int _lastHeartbeatLatencyMs = -1;
|
||||
|
||||
/// Timestamp of the last successful heartbeat response.
|
||||
DateTime? _lastSuccessfulHeartbeat;
|
||||
|
||||
/// Count of reconnection attempts since service creation.
|
||||
int _reconnectCount = 0;
|
||||
|
||||
/// Completer for event-based connection waiting.
|
||||
Completer<void>? _connectionCompleter;
|
||||
|
||||
/// Stream controller for socket health updates.
|
||||
final _healthController = StreamController<SocketHealth>.broadcast();
|
||||
|
||||
/// Stream that emits socket health updates.
|
||||
Stream<SocketHealth> get healthStream => _healthController.stream;
|
||||
|
||||
/// Current heartbeat latency in milliseconds (-1 if unknown).
|
||||
int get lastHeartbeatLatencyMs => _lastHeartbeatLatencyMs;
|
||||
|
||||
/// Last successful heartbeat timestamp.
|
||||
DateTime? get lastSuccessfulHeartbeat => _lastSuccessfulHeartbeat;
|
||||
|
||||
/// Number of reconnections since service creation.
|
||||
int get reconnectCount => _reconnectCount;
|
||||
|
||||
/// Current transport type ('websocket', 'polling', or 'unknown').
|
||||
String get currentTransport {
|
||||
final engine = _socket?.io.engine;
|
||||
if (engine == null) return 'unknown';
|
||||
// socket_io_client exposes transport name via engine
|
||||
try {
|
||||
final transport = engine.transport;
|
||||
if (transport != null) {
|
||||
return transport.name ?? 'unknown';
|
||||
}
|
||||
} catch (_) {}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/// Returns current socket health snapshot.
|
||||
SocketHealth get currentHealth => SocketHealth(
|
||||
latencyMs: _lastHeartbeatLatencyMs,
|
||||
isConnected: isConnected,
|
||||
transport: currentTransport,
|
||||
reconnectCount: _reconnectCount,
|
||||
lastHeartbeat: _lastSuccessfulHeartbeat,
|
||||
);
|
||||
|
||||
final Map<String, _ChatEventRegistration> _chatEventHandlers = {};
|
||||
final Map<String, _ChannelEventRegistration> _channelEventHandlers = {};
|
||||
int _handlerSeed = 0;
|
||||
@@ -324,22 +375,44 @@ class SocketService with WidgetsBindingObserver {
|
||||
_chatEventHandlers.clear();
|
||||
_channelEventHandlers.clear();
|
||||
_reconnectController.close();
|
||||
_healthController.close();
|
||||
_connectionCompleter?.completeError(StateError('Service disposed'));
|
||||
_connectionCompleter = null;
|
||||
}
|
||||
|
||||
// Best-effort: ensure there is an active connection and wait briefly.
|
||||
// Returns true if connected by the end of the timeout.
|
||||
/// Ensures there is an active connection and waits for it.
|
||||
///
|
||||
/// Uses event-based waiting instead of polling for efficiency.
|
||||
/// Returns true if connected by the end of the timeout.
|
||||
Future<bool> ensureConnected({
|
||||
Duration timeout = const Duration(seconds: 2),
|
||||
}) async {
|
||||
if (isConnected) return true;
|
||||
|
||||
// Create a completer for event-based waiting if not already waiting
|
||||
_connectionCompleter ??= Completer<void>();
|
||||
|
||||
try {
|
||||
await connect();
|
||||
} catch (_) {}
|
||||
final start = DateTime.now();
|
||||
while (!isConnected && DateTime.now().difference(start) < timeout) {
|
||||
await Future.delayed(const Duration(milliseconds: 50));
|
||||
|
||||
// If already connected after connect() call, return immediately
|
||||
if (isConnected) {
|
||||
_connectionCompleter = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Wait for connection event or timeout
|
||||
try {
|
||||
await _connectionCompleter!.future.timeout(timeout);
|
||||
return isConnected;
|
||||
} on TimeoutException {
|
||||
_connectionCompleter = null;
|
||||
return isConnected;
|
||||
} catch (_) {
|
||||
_connectionCompleter = null;
|
||||
return isConnected;
|
||||
}
|
||||
return isConnected;
|
||||
}
|
||||
|
||||
void _bindCoreSocketHandlers() {
|
||||
@@ -377,10 +450,15 @@ class SocketService with WidgetsBindingObserver {
|
||||
|
||||
void _handleConnect(dynamic _) {
|
||||
_isConnecting = false;
|
||||
|
||||
// Reset polling fallback on successful connection - allows retrying
|
||||
// WebSocket-only mode after conditions improve (fixes permanent fallback)
|
||||
_forcePollingFallback = false;
|
||||
|
||||
DebugLogger.log(
|
||||
'Socket connected',
|
||||
scope: 'socket',
|
||||
data: {'sessionId': _socket?.id},
|
||||
data: {'sessionId': _socket?.id, 'transport': currentTransport},
|
||||
);
|
||||
|
||||
if (_authToken != null && _authToken!.isNotEmpty) {
|
||||
@@ -391,6 +469,13 @@ class SocketService with WidgetsBindingObserver {
|
||||
|
||||
// Start heartbeat timer to keep connection alive
|
||||
_startHeartbeat();
|
||||
|
||||
// Complete any pending connection waiters
|
||||
_connectionCompleter?.complete();
|
||||
_connectionCompleter = null;
|
||||
|
||||
// Emit health update
|
||||
_emitHealthUpdate();
|
||||
}
|
||||
|
||||
void _handleReconnectAttempt(dynamic attempt) {
|
||||
@@ -404,10 +489,20 @@ class SocketService with WidgetsBindingObserver {
|
||||
|
||||
void _handleReconnect(dynamic attempt) {
|
||||
_isConnecting = false;
|
||||
_reconnectCount++;
|
||||
|
||||
// Reset polling fallback on successful reconnection
|
||||
_forcePollingFallback = false;
|
||||
|
||||
DebugLogger.log(
|
||||
'Socket reconnected',
|
||||
scope: 'socket',
|
||||
data: {'attempt': attempt, 'sessionId': _socket?.id},
|
||||
data: {
|
||||
'attempt': attempt,
|
||||
'sessionId': _socket?.id,
|
||||
'transport': currentTransport,
|
||||
'totalReconnects': _reconnectCount,
|
||||
},
|
||||
);
|
||||
|
||||
if (_authToken != null && _authToken!.isNotEmpty) {
|
||||
@@ -419,10 +514,17 @@ class SocketService with WidgetsBindingObserver {
|
||||
// Restart heartbeat after reconnection
|
||||
_startHeartbeat();
|
||||
|
||||
// Complete any pending connection waiters
|
||||
_connectionCompleter?.complete();
|
||||
_connectionCompleter = null;
|
||||
|
||||
// Notify listeners that a reconnection occurred so they can refresh state
|
||||
if (!_reconnectController.isClosed) {
|
||||
_reconnectController.add(null);
|
||||
}
|
||||
|
||||
// Emit health update
|
||||
_emitHealthUpdate();
|
||||
}
|
||||
|
||||
void _handleConnectError(dynamic err) {
|
||||
@@ -466,25 +568,68 @@ class SocketService with WidgetsBindingObserver {
|
||||
|
||||
// Stop heartbeat when disconnected
|
||||
_stopHeartbeat();
|
||||
|
||||
// Reset latency info on disconnect
|
||||
_lastHeartbeatLatencyMs = -1;
|
||||
|
||||
// Fail any pending connection waiters
|
||||
_connectionCompleter?.completeError(
|
||||
StateError('Socket disconnected: $reason'),
|
||||
);
|
||||
_connectionCompleter = null;
|
||||
|
||||
// Emit health update
|
||||
_emitHealthUpdate();
|
||||
}
|
||||
|
||||
/// Starts the heartbeat timer to keep the connection alive.
|
||||
/// Sends a heartbeat event every 30 seconds matching OpenWebUI's behavior.
|
||||
/// Tracks round-trip latency for connection health monitoring.
|
||||
void _startHeartbeat() {
|
||||
_stopHeartbeat();
|
||||
_heartbeatTimer = Timer.periodic(_heartbeatInterval, (_) {
|
||||
if (_socket?.connected == true) {
|
||||
_socket?.emit('heartbeat', <String, dynamic>{});
|
||||
}
|
||||
if (_socket?.connected != true) return;
|
||||
|
||||
final start = DateTime.now();
|
||||
|
||||
// Track pending heartbeat for latency measurement
|
||||
_pendingHeartbeatStart = start;
|
||||
|
||||
// Emit heartbeat - OpenWebUI server may or may not acknowledge
|
||||
_socket?.emit('heartbeat', <String, dynamic>{});
|
||||
|
||||
// Update latency based on successful emission (approximation)
|
||||
// For true RTT, we'd need server to echo back, but most Socket.IO
|
||||
// servers don't ack heartbeat events explicitly
|
||||
Future.delayed(const Duration(milliseconds: 100), () {
|
||||
if (_pendingHeartbeatStart == start && _socket?.connected == true) {
|
||||
// If still connected after 100ms, consider heartbeat successful
|
||||
_lastHeartbeatLatencyMs = DateTime.now()
|
||||
.difference(start)
|
||||
.inMilliseconds;
|
||||
_lastSuccessfulHeartbeat = DateTime.now();
|
||||
_pendingHeartbeatStart = null;
|
||||
_emitHealthUpdate();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
DateTime? _pendingHeartbeatStart;
|
||||
|
||||
/// Stops the heartbeat timer.
|
||||
void _stopHeartbeat() {
|
||||
_heartbeatTimer?.cancel();
|
||||
_heartbeatTimer = null;
|
||||
}
|
||||
|
||||
/// Emits a health update to listeners.
|
||||
void _emitHealthUpdate() {
|
||||
if (!_healthController.isClosed) {
|
||||
_healthController.add(currentHealth);
|
||||
}
|
||||
}
|
||||
|
||||
void _handleChatEvent(dynamic data, [dynamic ack]) {
|
||||
final map = _coerceToMap(data);
|
||||
if (map == null) return;
|
||||
|
||||
@@ -210,6 +210,12 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
|
||||
void Function()? onChatTagsUpdated,
|
||||
required void Function() finishStreaming,
|
||||
required List<ChatMessage> Function() getMessages,
|
||||
|
||||
/// Whether the model uses reasoning/thinking (needs longer watchdog window).
|
||||
bool modelUsesReasoning = false,
|
||||
|
||||
/// Whether tools are enabled (needs longer watchdog window).
|
||||
bool toolsEnabled = false,
|
||||
}) {
|
||||
// Track if streaming has been finished to avoid duplicate cleanup
|
||||
bool hasFinished = false;
|
||||
@@ -257,11 +263,11 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
|
||||
// Must not be `late` to avoid LateInitializationError if callbacks fire early.
|
||||
void Function() syncImages = () {};
|
||||
|
||||
// Shared helper to poll server for message content.
|
||||
// Shared helper to poll server for message content with exponential backoff.
|
||||
// Used by watchdog timeout and reconnection handler to recover from missed events.
|
||||
// Returns (content, followUps, isDone) or null if fetch fails or message not found.
|
||||
Future<({String content, List<String> followUps, bool isDone})?>
|
||||
pollServerForMessage() async {
|
||||
pollServerForMessage({int attempt = 0, int maxAttempts = 3}) async {
|
||||
try {
|
||||
final chatId = activeConversationId;
|
||||
if (chatId == null || chatId.isEmpty) return null;
|
||||
@@ -307,7 +313,21 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
|
||||
|
||||
return (content: content, followUps: followUps, isDone: isDone);
|
||||
} catch (e) {
|
||||
DebugLogger.log('Server poll failed: $e', scope: 'streaming/helper');
|
||||
DebugLogger.log(
|
||||
'Server poll failed (attempt ${attempt + 1}/$maxAttempts): $e',
|
||||
scope: 'streaming/helper',
|
||||
);
|
||||
|
||||
// Linear backoff retry (1s, 2s, 3s)
|
||||
if (attempt < maxAttempts - 1) {
|
||||
final backoffMs = (attempt + 1) * 1000;
|
||||
await Future.delayed(Duration(milliseconds: backoffMs));
|
||||
return pollServerForMessage(
|
||||
attempt: attempt + 1,
|
||||
maxAttempts: maxAttempts,
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -344,11 +364,33 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
|
||||
}
|
||||
|
||||
if (hasSocketSignals) {
|
||||
// Inactivity timeout - if no data arrives for 10 seconds, poll server
|
||||
// Adaptive inactivity timeout based on model capabilities.
|
||||
// Reasoning models and tool-enabled flows need longer windows as they may
|
||||
// have longer gaps between tokens during processing.
|
||||
final watchdogWindow = modelUsesReasoning || toolsEnabled
|
||||
? const Duration(seconds: 30) // Longer for reasoning/tools
|
||||
: const Duration(seconds: 15); // Standard for regular models
|
||||
|
||||
final watchdogCap = modelUsesReasoning || toolsEnabled
|
||||
? const Duration(minutes: 10) // Longer cap for complex operations
|
||||
: const Duration(minutes: 5);
|
||||
|
||||
DebugLogger.log(
|
||||
'Initializing watchdog',
|
||||
scope: 'streaming/helper',
|
||||
data: {
|
||||
'windowSeconds': watchdogWindow.inSeconds,
|
||||
'capMinutes': watchdogCap.inMinutes,
|
||||
'modelUsesReasoning': modelUsesReasoning,
|
||||
'toolsEnabled': toolsEnabled,
|
||||
},
|
||||
);
|
||||
|
||||
// Inactivity timeout - if no data arrives within window, poll server
|
||||
// and finish streaming. This handles stuck connections (issue #172).
|
||||
socketWatchdog = InactivityWatchdog(
|
||||
window: const Duration(seconds: 10),
|
||||
absoluteCap: const Duration(minutes: 5),
|
||||
window: watchdogWindow,
|
||||
absoluteCap: watchdogCap,
|
||||
onTimeout: () async {
|
||||
DebugLogger.log(
|
||||
'Socket watchdog timeout - polling server',
|
||||
@@ -1463,14 +1505,26 @@ ActiveSocketStream attachUnifiedChunkedStreaming({
|
||||
if (isRecoverable && socketService != null) {
|
||||
// Try to recover via socket connection if available
|
||||
try {
|
||||
await socketService.ensureConnected(
|
||||
final connected = await socketService.ensureConnected(
|
||||
timeout: const Duration(seconds: 5),
|
||||
);
|
||||
// Don't finish streaming immediately - let socket recovery handle it
|
||||
socketWatchdog?.stop();
|
||||
return;
|
||||
} catch (_) {
|
||||
// Socket recovery failed, fall through to cleanup
|
||||
|
||||
if (connected) {
|
||||
DebugLogger.log(
|
||||
'Socket recovery successful - restarting watchdog',
|
||||
scope: 'streaming/helper',
|
||||
);
|
||||
// Restart watchdog instead of stopping it - this ensures we
|
||||
// still have a timeout mechanism if socket recovery succeeds
|
||||
// but events don't resume (fixes premature watchdog stop bug)
|
||||
socketWatchdog?.ping();
|
||||
return;
|
||||
}
|
||||
} catch (e) {
|
||||
DebugLogger.log(
|
||||
'Socket recovery failed: $e',
|
||||
scope: 'streaming/helper',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user