fix: server side tts on ios

This commit is contained in:
cogwheel0
2025-10-31 23:20:04 +05:30
parent 041c6d0df5
commit 5d33e5fe65
10 changed files with 184 additions and 115 deletions

View File

@@ -1,6 +1,7 @@
PODS:
- audioplayers_darwin (0.0.1):
- Flutter
- FlutterMacOS
- connectivity_plus (0.0.1):
- Flutter
- DKImagePickerController/Core (4.3.9):
@@ -86,7 +87,7 @@ PODS:
- FlutterMacOS
DEPENDENCIES:
- audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/ios`)
- audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/darwin`)
- connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`)
- file_picker (from `.symlinks/plugins/file_picker/ios`)
- Flutter (from `Flutter`)
@@ -117,7 +118,7 @@ SPEC REPOS:
EXTERNAL SOURCES:
audioplayers_darwin:
:path: ".symlinks/plugins/audioplayers_darwin/ios"
:path: ".symlinks/plugins/audioplayers_darwin/darwin"
connectivity_plus:
:path: ".symlinks/plugins/connectivity_plus/ios"
file_picker:
@@ -160,7 +161,7 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/webview_flutter_wkwebview/darwin"
SPEC CHECKSUMS:
audioplayers_darwin: ccf9c770ee768abb07e26d90af093f7bab1c12ab
audioplayers_darwin: 4f9ca89d92d3d21cec7ec580e78ca888e5fb68bd
connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60

View File

@@ -2438,7 +2438,7 @@ class ApiService {
return [];
}
Future<List<int>> generateSpeech({
Future<({Uint8List bytes, String mimeType})> generateSpeech({
required String text,
String? voice,
}) async {
@@ -2450,12 +2450,75 @@ class ApiService {
options: Options(responseType: ResponseType.bytes),
);
// Return audio data as bytes
final data = response.data;
if (data is List<int>) return data;
if (data is Uint8List) return data.toList();
if (data is List) return (data).cast<int>();
return [];
final rawMimeType = response.headers.value('content-type');
final audioBytes = _coerceAudioBytes(response.data);
final resolvedMimeType = _resolveAudioMimeType(rawMimeType, audioBytes);
return (bytes: audioBytes, mimeType: resolvedMimeType);
}
Uint8List _coerceAudioBytes(Object? data) {
if (data is Uint8List && data.isNotEmpty) {
return Uint8List.fromList(data);
}
if (data is List<int>) {
return Uint8List.fromList(data);
}
if (data is List) {
return Uint8List.fromList(data.cast<int>());
}
return Uint8List(0);
}
String _resolveAudioMimeType(String? rawMimeType, Uint8List bytes) {
final sanitized = rawMimeType?.split(';').first.trim();
if (sanitized != null && sanitized.isNotEmpty) {
return sanitized;
}
if (_matchesPrefix(bytes, const [0x52, 0x49, 0x46, 0x46]) &&
_matchesPrefix(bytes, const [0x57, 0x41, 0x56, 0x45], offset: 8)) {
return 'audio/wav';
}
if (_matchesPrefix(bytes, const [0x4F, 0x67, 0x67, 0x53])) {
return 'audio/ogg';
}
if (_matchesPrefix(bytes, const [0x66, 0x4C, 0x61, 0x43])) {
return 'audio/flac';
}
if (_looksLikeMp4(bytes)) {
return 'audio/mp4';
}
if (_looksLikeMpeg(bytes)) {
return 'audio/mpeg';
}
return 'audio/mpeg';
}
bool _matchesPrefix(Uint8List bytes, List<int> signature, {int offset = 0}) {
if (bytes.length < offset + signature.length) {
return false;
}
for (var i = 0; i < signature.length; i++) {
if (bytes[offset + i] != signature[i]) {
return false;
}
}
return true;
}
bool _looksLikeMp4(Uint8List bytes) {
return bytes.length >= 8 &&
_matchesPrefix(bytes, const [0x66, 0x74, 0x79, 0x70], offset: 4);
}
bool _looksLikeMpeg(Uint8List bytes) {
if (bytes.length >= 3 &&
bytes[0] == 0x49 &&
bytes[1] == 0x44 &&
bytes[2] == 0x33) {
return true;
}
return bytes.length >= 2 && bytes[0] == 0xFF && (bytes[1] & 0xE0) == 0xE0;
}
// Server audio transcription removed; rely on on-device STT in UI layer

View File

@@ -27,11 +27,11 @@ class BackgroundStreamingHandler {
void Function(List<String> streamIds)? onStreamsSuspending;
void Function()? onBackgroundTaskExpiring;
void Function(List<String> streamIds, int estimatedSeconds)?
onBackgroundTaskExtended;
onBackgroundTaskExtended;
void Function()? onBackgroundKeepAlive;
bool Function()? shouldContinueInBackground;
void Function(String error, String errorType, List<String> streamIds)?
onServiceFailed;
onServiceFailed;
void _setupMethodCallHandler() {
_channel.setMethodCallHandler((call) async {

View File

@@ -53,7 +53,7 @@ class PersistentStreamingService with WidgetsBindingObserver {
error: '$errorType: $error',
data: {'affectedStreams': streamIds},
);
// Attempt immediate recovery for failed streams
for (final streamId in streamIds) {
final callback = _streamRecoveryCallbacks[streamId];
@@ -145,25 +145,25 @@ class PersistentStreamingService with WidgetsBindingObserver {
_heartbeatTimer = Timer.periodic(const Duration(seconds: 30), (_) {
if (_activeStreams.isNotEmpty && _isInBackground) {
_backgroundHandler.keepAlive();
// Check for stale streams during background operation
_checkStreamHealth();
}
});
}
void _checkStreamHealth() {
final now = DateTime.now();
final staleStreams = <String>[];
for (final entry in _streamMetadata.entries) {
final streamId = entry.key;
final metadata = entry.value;
final lastUpdate = metadata['lastUpdate'] as DateTime?;
if (lastUpdate != null) {
final timeSinceUpdate = now.difference(lastUpdate);
// If no update in 90 seconds while in background, consider stale
if (timeSinceUpdate > const Duration(seconds: 90)) {
DebugLogger.warning(
@@ -173,14 +173,12 @@ class PersistentStreamingService with WidgetsBindingObserver {
}
}
}
// Attempt recovery for stale streams
for (final streamId in staleStreams) {
final callback = _streamRecoveryCallbacks[streamId];
if (callback != null && _retryAttempts[streamId] == null) {
DebugLogger.stream(
'Initiating recovery for stale stream: $streamId',
);
DebugLogger.stream('Initiating recovery for stale stream: $streamId');
_attemptStreamRecovery(streamId, callback);
}
}

View File

@@ -4,15 +4,15 @@ import 'package:dio/dio.dart';
import '../utils/debug_logger.dart';
/// Parser for Server-Sent Events (SSE) streaming responses.
///
///
/// This matches the web client's EventSourceParserStream behavior,
/// parsing SSE data chunks and extracting OpenAI-compatible deltas.
class SSEStreamParser {
/// Parse an SSE response stream from Dio into text chunks.
///
///
/// Returns a stream of content strings extracted from OpenAI-style
/// completion chunks.
///
///
/// [heartbeatTimeout] - Maximum time without data before considering
/// the connection stale (default: 2 minutes)
/// [onHeartbeat] - Callback invoked when any data is received
@@ -24,46 +24,43 @@ class SSEStreamParser {
}) async* {
DateTime lastDataReceived = DateTime.now();
Timer? heartbeatTimer;
// Set up heartbeat monitoring
if (heartbeatTimeout.inMilliseconds > 0) {
heartbeatTimer = Timer.periodic(
const Duration(seconds: 30),
(timer) {
final timeSinceLastData = DateTime.now().difference(lastDataReceived);
if (timeSinceLastData > heartbeatTimeout) {
DebugLogger.warning(
'SSE stream heartbeat timeout: No data received for ${timeSinceLastData.inSeconds}s',
data: {'timeout': heartbeatTimeout.inSeconds},
);
timer.cancel();
}
},
);
heartbeatTimer = Timer.periodic(const Duration(seconds: 30), (timer) {
final timeSinceLastData = DateTime.now().difference(lastDataReceived);
if (timeSinceLastData > heartbeatTimeout) {
DebugLogger.warning(
'SSE stream heartbeat timeout: No data received for ${timeSinceLastData.inSeconds}s',
data: {'timeout': heartbeatTimeout.inSeconds},
);
timer.cancel();
}
});
}
try {
// Buffer for accumulating incomplete SSE messages
String buffer = '';
await for (final chunk in responseBody.stream) {
// Update last data timestamp and invoke heartbeat callback
lastDataReceived = DateTime.now();
onHeartbeat?.call();
// Convert bytes to string (Dio ResponseBody.stream always emits Uint8List)
final text = utf8.decode(chunk as List<int>, allowMalformed: true);
buffer += text;
// Process complete SSE messages (delimited by double newline)
final messages = buffer.split('\n\n');
// Keep the last (potentially incomplete) message in the buffer
buffer = messages.removeLast();
for (final message in messages) {
if (message.trim().isEmpty) continue;
// Parse SSE message
final content = _parseSSEMessage(message);
if (content != null) {
@@ -72,7 +69,7 @@ class SSEStreamParser {
DebugLogger.stream('SSE stream completed with [DONE] signal');
return;
}
// Split large deltas into smaller chunks for smoother UI updates
if (splitLargeDeltas && content.length > 5) {
yield* _splitIntoChunks(content);
@@ -82,7 +79,7 @@ class SSEStreamParser {
}
}
}
// Process any remaining buffered data
if (buffer.trim().isNotEmpty) {
final content = _parseSSEMessage(buffer);
@@ -103,34 +100,34 @@ class SSEStreamParser {
heartbeatTimer?.cancel();
}
}
/// Parse a single SSE message and extract content.
static String? _parseSSEMessage(String message) {
try {
// SSE format: "data: <json>\n" or just the JSON
String dataLine = message.trim();
// Remove "data: " prefix if present
if (dataLine.startsWith('data: ')) {
dataLine = dataLine.substring(6).trim();
} else if (dataLine.startsWith('data:')) {
dataLine = dataLine.substring(5).trim();
}
// Handle [DONE] signal
if (dataLine == '[DONE]' || dataLine == 'DONE') {
return '[DONE]';
}
// Skip empty data
if (dataLine.isEmpty) {
return null;
}
// Parse JSON
try {
final json = jsonDecode(dataLine) as Map<String, dynamic>;
// Handle errors
if (json['error'] != null) {
DebugLogger.error(
@@ -140,7 +137,7 @@ class SSEStreamParser {
);
return null;
}
// Extract content from OpenAI-style response
// Format: { choices: [{ delta: { content: "..." } }] }
final choices = json['choices'];
@@ -156,13 +153,13 @@ class SSEStreamParser {
}
}
}
// Alternative format: { content: "..." }
final directContent = json['content'];
if (directContent is String && directContent.isNotEmpty) {
return directContent;
}
return null;
} on FormatException catch (e) {
DebugLogger.warning(
@@ -181,24 +178,24 @@ class SSEStreamParser {
return null;
}
}
/// Split large content into smaller chunks for smoother streaming.
/// This matches the web client's streamLargeDeltasAsRandomChunks behavior.
static Stream<String> _splitIntoChunks(String content) async* {
var remaining = content;
while (remaining.isNotEmpty) {
// Random chunk size between 1-3 characters
final chunkSize = (remaining.length < 3)
? remaining.length
: 1 + (DateTime.now().millisecond % 3);
final chunk = remaining.substring(0, chunkSize);
yield chunk;
// Small delay for smoother visual effect (matching web client)
await Future.delayed(const Duration(milliseconds: 5));
remaining = remaining.substring(chunkSize);
}
}

View File

@@ -43,7 +43,7 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
if (_shouldIgnoreError(error)) {
return;
}
// Defer to next frame to avoid setState during build exceptions
WidgetsBinding.instance.addPostFrameCallback((_) {
if (mounted) {
@@ -169,7 +169,7 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
),
),
const SizedBox(height: Spacing.lg),
// Error title
Text(
AppLocalizations.of(context)?.errorMessage ??
@@ -178,7 +178,7 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
textAlign: TextAlign.center,
),
const SizedBox(height: Spacing.sm),
// Error description
Text(
enhancedErrorService.getUserMessage(_error!),
@@ -187,10 +187,10 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
color: context.conduitTheme.textSecondary,
),
),
if (widget.allowRetry) ...[
const SizedBox(height: Spacing.xl),
// Retry button
SizedBox(
width: double.infinity,
@@ -198,8 +198,10 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
onPressed: _retry,
icon: const Icon(Icons.refresh_rounded),
style: FilledButton.styleFrom(
backgroundColor: context.conduitTheme.buttonPrimary,
foregroundColor: context.conduitTheme.buttonPrimaryText,
backgroundColor:
context.conduitTheme.buttonPrimary,
foregroundColor:
context.conduitTheme.buttonPrimaryText,
padding: const EdgeInsets.symmetric(
horizontal: Spacing.lg,
vertical: Spacing.md,
@@ -212,7 +214,8 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
elevation: 0,
),
label: Text(
AppLocalizations.of(context)?.retry ?? 'Try Again',
AppLocalizations.of(context)?.retry ??
'Try Again',
style: context.conduitTheme.bodySmall?.copyWith(
fontWeight: FontWeight.w600,
color: context.conduitTheme.buttonPrimaryText,

View File

@@ -9,6 +9,8 @@ import 'package:flutter_tts/flutter_tts.dart';
import '../../../core/services/api_service.dart';
import '../../../core/services/settings_service.dart';
typedef _SpeechChunk = ({Uint8List bytes, String mimeType});
/// Lightweight wrapper around FlutterTts to centralize configuration
class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
@@ -20,7 +22,7 @@ class TextToSpeechService {
bool _available = false;
bool _voiceConfigured = false;
int _session = 0; // increments to cancel in-flight work
final List<Uint8List> _buffered = <Uint8List>[]; // server chunks
final List<_SpeechChunk> _buffered = <_SpeechChunk>[]; // server chunks
int _expectedChunks = 0;
int _currentIndex = -1;
bool _waitingNext = false;
@@ -51,9 +53,6 @@ class TextToSpeechService {
case PlayerState.paused:
_handlePause();
break;
case PlayerState.stopped:
_handleCancel();
break;
default:
break;
}
@@ -238,6 +237,7 @@ class TextToSpeechService {
_waitingNext = false;
if (_engine == TtsEngine.server) {
await _player.stop();
_handleCancel();
} else {
await _tts.stop();
}
@@ -486,18 +486,23 @@ class TextToSpeechService {
_expectedChunks = chunks.length;
// Fetch first chunk to start playback quickly
final firstBytes = await _fetchServerAudio(
final firstChunk = await _fetchServerAudio(
chunks.first,
effectiveVoice,
session,
);
if (session != _session) return; // canceled
if (firstBytes.isEmpty) throw Exception('Empty audio response');
if (firstChunk.bytes.isEmpty) {
throw Exception('Empty audio response');
}
await _player.stop();
_buffered.add(Uint8List.fromList(firstBytes));
final bufferedFirst = _cloneChunk(firstChunk);
_buffered.add(bufferedFirst);
_currentIndex = 0;
await _player.play(BytesSource(_buffered.first));
await _player.play(
BytesSource(bufferedFirst.bytes, mimeType: bufferedFirst.mimeType),
);
_onSentenceIndex?.call(0);
// Prefetch the rest in background
@@ -518,10 +523,10 @@ class TextToSpeechService {
for (final chunk in remaining) {
if (session != _session) return; // canceled
try {
final audio = await _fetchServerAudio(chunk, voice, session);
final audioChunk = await _fetchServerAudio(chunk, voice, session);
if (session != _session) return;
if (audio.isNotEmpty) {
_buffered.add(Uint8List.fromList(audio));
if (audioChunk.bytes.isNotEmpty) {
_buffered.add(_cloneChunk(audioChunk));
// If the player finished the previous chunk and is waiting, start now
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
@@ -535,7 +540,7 @@ class TextToSpeechService {
}
}
Future<List<int>> _fetchServerAudio(
Future<_SpeechChunk> _fetchServerAudio(
String text,
String? voice,
int session,
@@ -565,11 +570,15 @@ class TextToSpeechService {
final nextIndex = _currentIndex + 1;
if (nextIndex < 0 || nextIndex >= _buffered.length) return;
_currentIndex = nextIndex;
final bytes = _buffered[nextIndex];
await _player.play(BytesSource(bytes));
final chunk = _buffered[nextIndex];
await _player.play(BytesSource(chunk.bytes, mimeType: chunk.mimeType));
_onSentenceIndex?.call(_currentIndex);
}
_SpeechChunk _cloneChunk(_SpeechChunk chunk) {
return (bytes: Uint8List.fromList(chunk.bytes), mimeType: chunk.mimeType);
}
List<String> _splitForTts(String text) {
// Normalize whitespace
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();

View File

@@ -1554,18 +1554,20 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
final bool enabled = onTap != null;
final Brightness brightness = Theme.of(context).brightness;
final theme = context.conduitTheme;
// Enhanced color scheme for active state
final Color activeBackground = isActive
? theme.buttonPrimary.withValues(alpha: brightness == Brightness.dark ? 0.22 : 0.14)
? theme.buttonPrimary.withValues(
alpha: brightness == Brightness.dark ? 0.22 : 0.14,
)
: Colors.transparent;
final Color inactiveBackground = brightness == Brightness.dark
? theme.cardBackground.withValues(alpha: 0.25)
: theme.cardBackground.withValues(alpha: 0.08);
final Color background = isActive ? activeBackground : inactiveBackground;
// Enhanced border styling
final Color activeBorder = theme.buttonPrimary.withValues(
alpha: brightness == Brightness.dark ? 0.85 : 0.75,
@@ -1574,17 +1576,17 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
alpha: brightness == Brightness.dark ? 0.4 : 0.25,
);
final Color borderColor = isActive ? activeBorder : inactiveBorder;
// Enhanced content colors
final Color activeTextColor = theme.buttonPrimary;
final Color inactiveTextColor = theme.textPrimary.withValues(
alpha: enabled ? (brightness == Brightness.dark ? 0.85 : 0.75) : Alpha.disabled,
alpha: enabled
? (brightness == Brightness.dark ? 0.85 : 0.75)
: Alpha.disabled,
);
final Color textColor = isActive ? activeTextColor : inactiveTextColor;
final Color iconColor = isActive
? activeTextColor
: inactiveTextColor;
final Color iconColor = isActive ? activeTextColor : inactiveTextColor;
return AnimatedContainer(
duration: const Duration(milliseconds: 200),
@@ -1632,11 +1634,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
AnimatedContainer(
duration: const Duration(milliseconds: 200),
curve: Curves.easeOutCubic,
child: Icon(
icon,
size: IconSize.small + 1,
color: iconColor,
),
child: Icon(icon, size: IconSize.small + 1, color: iconColor),
),
const SizedBox(width: Spacing.xs + 1),
AnimatedDefaultTextStyle(

View File

@@ -69,58 +69,58 @@ packages:
dependency: "direct main"
description:
name: audioplayers
sha256: c05c6147124cd63e725e861335a8b4d57300b80e6e92cea7c145c739223bbaef
sha256: "5441fa0ceb8807a5ad701199806510e56afde2b4913d9d17c2f19f2902cf0ae4"
url: "https://pub.dev"
source: hosted
version: "5.2.1"
version: "6.5.1"
audioplayers_android:
dependency: transitive
description:
name: audioplayers_android
sha256: b00e1a0e11365d88576320ec2d8c192bc21f1afb6c0e5995d1c57ae63156acb5
sha256: "60a6728277228413a85755bd3ffd6fab98f6555608923813ce383b190a360605"
url: "https://pub.dev"
source: hosted
version: "4.0.3"
version: "5.2.1"
audioplayers_darwin:
dependency: transitive
description:
name: audioplayers_darwin
sha256: "3034e99a6df8d101da0f5082dcca0a2a99db62ab1d4ddb3277bed3f6f81afe08"
sha256: "0811d6924904ca13f9ef90d19081e4a87f7297ddc19fc3d31f60af1aaafee333"
url: "https://pub.dev"
source: hosted
version: "5.0.2"
version: "6.3.0"
audioplayers_linux:
dependency: transitive
description:
name: audioplayers_linux
sha256: "60787e73fefc4d2e0b9c02c69885402177e818e4e27ef087074cf27c02246c9e"
sha256: f75bce1ce864170ef5e6a2c6a61cd3339e1a17ce11e99a25bae4474ea491d001
url: "https://pub.dev"
source: hosted
version: "3.1.0"
version: "4.2.1"
audioplayers_platform_interface:
dependency: transitive
description:
name: audioplayers_platform_interface
sha256: "365c547f1bb9e77d94dd1687903a668d8f7ac3409e48e6e6a3668a1ac2982adb"
sha256: "0e2f6a919ab56d0fec272e801abc07b26ae7f31980f912f24af4748763e5a656"
url: "https://pub.dev"
source: hosted
version: "6.1.0"
version: "7.1.1"
audioplayers_web:
dependency: transitive
description:
name: audioplayers_web
sha256: "22cd0173e54d92bd9b2c80b1204eb1eb159ece87475ab58c9788a70ec43c2a62"
sha256: "1c0f17cec68455556775f1e50ca85c40c05c714a99c5eb1d2d57cc17ba5522d7"
url: "https://pub.dev"
source: hosted
version: "4.1.0"
version: "5.1.1"
audioplayers_windows:
dependency: transitive
description:
name: audioplayers_windows
sha256: "9536812c9103563644ada2ef45ae523806b0745f7a78e89d1b5fb1951de90e1a"
sha256: "4048797865105b26d47628e6abb49231ea5de84884160229251f37dfcbe52fd7"
url: "https://pub.dev"
source: hosted
version: "3.1.0"
version: "4.2.1"
boolean_selector:
dependency: transitive
description:

View File

@@ -47,7 +47,7 @@ dependencies:
record: ^6.1.1
stts: ^1.2.5
flutter_tts: ^4.2.3
audioplayers: ^5.2.1
audioplayers: ^6.5.1
image_picker: ^1.2.0
file_picker: ^10.3.3
path_provider: ^2.1.4