fix: server side tts on ios

This commit is contained in:
cogwheel0
2025-10-31 23:20:04 +05:30
parent 041c6d0df5
commit 5d33e5fe65
10 changed files with 184 additions and 115 deletions

View File

@@ -1,6 +1,7 @@
PODS: PODS:
- audioplayers_darwin (0.0.1): - audioplayers_darwin (0.0.1):
- Flutter - Flutter
- FlutterMacOS
- connectivity_plus (0.0.1): - connectivity_plus (0.0.1):
- Flutter - Flutter
- DKImagePickerController/Core (4.3.9): - DKImagePickerController/Core (4.3.9):
@@ -86,7 +87,7 @@ PODS:
- FlutterMacOS - FlutterMacOS
DEPENDENCIES: DEPENDENCIES:
- audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/ios`) - audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/darwin`)
- connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`) - connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`)
- file_picker (from `.symlinks/plugins/file_picker/ios`) - file_picker (from `.symlinks/plugins/file_picker/ios`)
- Flutter (from `Flutter`) - Flutter (from `Flutter`)
@@ -117,7 +118,7 @@ SPEC REPOS:
EXTERNAL SOURCES: EXTERNAL SOURCES:
audioplayers_darwin: audioplayers_darwin:
:path: ".symlinks/plugins/audioplayers_darwin/ios" :path: ".symlinks/plugins/audioplayers_darwin/darwin"
connectivity_plus: connectivity_plus:
:path: ".symlinks/plugins/connectivity_plus/ios" :path: ".symlinks/plugins/connectivity_plus/ios"
file_picker: file_picker:
@@ -160,7 +161,7 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/webview_flutter_wkwebview/darwin" :path: ".symlinks/plugins/webview_flutter_wkwebview/darwin"
SPEC CHECKSUMS: SPEC CHECKSUMS:
audioplayers_darwin: ccf9c770ee768abb07e26d90af093f7bab1c12ab audioplayers_darwin: 4f9ca89d92d3d21cec7ec580e78ca888e5fb68bd
connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60 DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60

View File

@@ -2438,7 +2438,7 @@ class ApiService {
return []; return [];
} }
Future<List<int>> generateSpeech({ Future<({Uint8List bytes, String mimeType})> generateSpeech({
required String text, required String text,
String? voice, String? voice,
}) async { }) async {
@@ -2450,12 +2450,75 @@ class ApiService {
options: Options(responseType: ResponseType.bytes), options: Options(responseType: ResponseType.bytes),
); );
// Return audio data as bytes final rawMimeType = response.headers.value('content-type');
final data = response.data; final audioBytes = _coerceAudioBytes(response.data);
if (data is List<int>) return data; final resolvedMimeType = _resolveAudioMimeType(rawMimeType, audioBytes);
if (data is Uint8List) return data.toList();
if (data is List) return (data).cast<int>(); return (bytes: audioBytes, mimeType: resolvedMimeType);
return []; }
Uint8List _coerceAudioBytes(Object? data) {
if (data is Uint8List && data.isNotEmpty) {
return Uint8List.fromList(data);
}
if (data is List<int>) {
return Uint8List.fromList(data);
}
if (data is List) {
return Uint8List.fromList(data.cast<int>());
}
return Uint8List(0);
}
String _resolveAudioMimeType(String? rawMimeType, Uint8List bytes) {
final sanitized = rawMimeType?.split(';').first.trim();
if (sanitized != null && sanitized.isNotEmpty) {
return sanitized;
}
if (_matchesPrefix(bytes, const [0x52, 0x49, 0x46, 0x46]) &&
_matchesPrefix(bytes, const [0x57, 0x41, 0x56, 0x45], offset: 8)) {
return 'audio/wav';
}
if (_matchesPrefix(bytes, const [0x4F, 0x67, 0x67, 0x53])) {
return 'audio/ogg';
}
if (_matchesPrefix(bytes, const [0x66, 0x4C, 0x61, 0x43])) {
return 'audio/flac';
}
if (_looksLikeMp4(bytes)) {
return 'audio/mp4';
}
if (_looksLikeMpeg(bytes)) {
return 'audio/mpeg';
}
return 'audio/mpeg';
}
bool _matchesPrefix(Uint8List bytes, List<int> signature, {int offset = 0}) {
if (bytes.length < offset + signature.length) {
return false;
}
for (var i = 0; i < signature.length; i++) {
if (bytes[offset + i] != signature[i]) {
return false;
}
}
return true;
}
bool _looksLikeMp4(Uint8List bytes) {
return bytes.length >= 8 &&
_matchesPrefix(bytes, const [0x66, 0x74, 0x79, 0x70], offset: 4);
}
bool _looksLikeMpeg(Uint8List bytes) {
if (bytes.length >= 3 &&
bytes[0] == 0x49 &&
bytes[1] == 0x44 &&
bytes[2] == 0x33) {
return true;
}
return bytes.length >= 2 && bytes[0] == 0xFF && (bytes[1] & 0xE0) == 0xE0;
} }
// Server audio transcription removed; rely on on-device STT in UI layer // Server audio transcription removed; rely on on-device STT in UI layer

View File

@@ -27,11 +27,11 @@ class BackgroundStreamingHandler {
void Function(List<String> streamIds)? onStreamsSuspending; void Function(List<String> streamIds)? onStreamsSuspending;
void Function()? onBackgroundTaskExpiring; void Function()? onBackgroundTaskExpiring;
void Function(List<String> streamIds, int estimatedSeconds)? void Function(List<String> streamIds, int estimatedSeconds)?
onBackgroundTaskExtended; onBackgroundTaskExtended;
void Function()? onBackgroundKeepAlive; void Function()? onBackgroundKeepAlive;
bool Function()? shouldContinueInBackground; bool Function()? shouldContinueInBackground;
void Function(String error, String errorType, List<String> streamIds)? void Function(String error, String errorType, List<String> streamIds)?
onServiceFailed; onServiceFailed;
void _setupMethodCallHandler() { void _setupMethodCallHandler() {
_channel.setMethodCallHandler((call) async { _channel.setMethodCallHandler((call) async {

View File

@@ -178,9 +178,7 @@ class PersistentStreamingService with WidgetsBindingObserver {
for (final streamId in staleStreams) { for (final streamId in staleStreams) {
final callback = _streamRecoveryCallbacks[streamId]; final callback = _streamRecoveryCallbacks[streamId];
if (callback != null && _retryAttempts[streamId] == null) { if (callback != null && _retryAttempts[streamId] == null) {
DebugLogger.stream( DebugLogger.stream('Initiating recovery for stale stream: $streamId');
'Initiating recovery for stale stream: $streamId',
);
_attemptStreamRecovery(streamId, callback); _attemptStreamRecovery(streamId, callback);
} }
} }

View File

@@ -27,19 +27,16 @@ class SSEStreamParser {
// Set up heartbeat monitoring // Set up heartbeat monitoring
if (heartbeatTimeout.inMilliseconds > 0) { if (heartbeatTimeout.inMilliseconds > 0) {
heartbeatTimer = Timer.periodic( heartbeatTimer = Timer.periodic(const Duration(seconds: 30), (timer) {
const Duration(seconds: 30), final timeSinceLastData = DateTime.now().difference(lastDataReceived);
(timer) { if (timeSinceLastData > heartbeatTimeout) {
final timeSinceLastData = DateTime.now().difference(lastDataReceived); DebugLogger.warning(
if (timeSinceLastData > heartbeatTimeout) { 'SSE stream heartbeat timeout: No data received for ${timeSinceLastData.inSeconds}s',
DebugLogger.warning( data: {'timeout': heartbeatTimeout.inSeconds},
'SSE stream heartbeat timeout: No data received for ${timeSinceLastData.inSeconds}s', );
data: {'timeout': heartbeatTimeout.inSeconds}, timer.cancel();
); }
timer.cancel(); });
}
},
);
} }
try { try {

View File

@@ -198,8 +198,10 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
onPressed: _retry, onPressed: _retry,
icon: const Icon(Icons.refresh_rounded), icon: const Icon(Icons.refresh_rounded),
style: FilledButton.styleFrom( style: FilledButton.styleFrom(
backgroundColor: context.conduitTheme.buttonPrimary, backgroundColor:
foregroundColor: context.conduitTheme.buttonPrimaryText, context.conduitTheme.buttonPrimary,
foregroundColor:
context.conduitTheme.buttonPrimaryText,
padding: const EdgeInsets.symmetric( padding: const EdgeInsets.symmetric(
horizontal: Spacing.lg, horizontal: Spacing.lg,
vertical: Spacing.md, vertical: Spacing.md,
@@ -212,7 +214,8 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
elevation: 0, elevation: 0,
), ),
label: Text( label: Text(
AppLocalizations.of(context)?.retry ?? 'Try Again', AppLocalizations.of(context)?.retry ??
'Try Again',
style: context.conduitTheme.bodySmall?.copyWith( style: context.conduitTheme.bodySmall?.copyWith(
fontWeight: FontWeight.w600, fontWeight: FontWeight.w600,
color: context.conduitTheme.buttonPrimaryText, color: context.conduitTheme.buttonPrimaryText,

View File

@@ -9,6 +9,8 @@ import 'package:flutter_tts/flutter_tts.dart';
import '../../../core/services/api_service.dart'; import '../../../core/services/api_service.dart';
import '../../../core/services/settings_service.dart'; import '../../../core/services/settings_service.dart';
typedef _SpeechChunk = ({Uint8List bytes, String mimeType});
/// Lightweight wrapper around FlutterTts to centralize configuration /// Lightweight wrapper around FlutterTts to centralize configuration
class TextToSpeechService { class TextToSpeechService {
final FlutterTts _tts = FlutterTts(); final FlutterTts _tts = FlutterTts();
@@ -20,7 +22,7 @@ class TextToSpeechService {
bool _available = false; bool _available = false;
bool _voiceConfigured = false; bool _voiceConfigured = false;
int _session = 0; // increments to cancel in-flight work int _session = 0; // increments to cancel in-flight work
final List<Uint8List> _buffered = <Uint8List>[]; // server chunks final List<_SpeechChunk> _buffered = <_SpeechChunk>[]; // server chunks
int _expectedChunks = 0; int _expectedChunks = 0;
int _currentIndex = -1; int _currentIndex = -1;
bool _waitingNext = false; bool _waitingNext = false;
@@ -51,9 +53,6 @@ class TextToSpeechService {
case PlayerState.paused: case PlayerState.paused:
_handlePause(); _handlePause();
break; break;
case PlayerState.stopped:
_handleCancel();
break;
default: default:
break; break;
} }
@@ -238,6 +237,7 @@ class TextToSpeechService {
_waitingNext = false; _waitingNext = false;
if (_engine == TtsEngine.server) { if (_engine == TtsEngine.server) {
await _player.stop(); await _player.stop();
_handleCancel();
} else { } else {
await _tts.stop(); await _tts.stop();
} }
@@ -486,18 +486,23 @@ class TextToSpeechService {
_expectedChunks = chunks.length; _expectedChunks = chunks.length;
// Fetch first chunk to start playback quickly // Fetch first chunk to start playback quickly
final firstBytes = await _fetchServerAudio( final firstChunk = await _fetchServerAudio(
chunks.first, chunks.first,
effectiveVoice, effectiveVoice,
session, session,
); );
if (session != _session) return; // canceled if (session != _session) return; // canceled
if (firstBytes.isEmpty) throw Exception('Empty audio response'); if (firstChunk.bytes.isEmpty) {
throw Exception('Empty audio response');
}
await _player.stop(); await _player.stop();
_buffered.add(Uint8List.fromList(firstBytes)); final bufferedFirst = _cloneChunk(firstChunk);
_buffered.add(bufferedFirst);
_currentIndex = 0; _currentIndex = 0;
await _player.play(BytesSource(_buffered.first)); await _player.play(
BytesSource(bufferedFirst.bytes, mimeType: bufferedFirst.mimeType),
);
_onSentenceIndex?.call(0); _onSentenceIndex?.call(0);
// Prefetch the rest in background // Prefetch the rest in background
@@ -518,10 +523,10 @@ class TextToSpeechService {
for (final chunk in remaining) { for (final chunk in remaining) {
if (session != _session) return; // canceled if (session != _session) return; // canceled
try { try {
final audio = await _fetchServerAudio(chunk, voice, session); final audioChunk = await _fetchServerAudio(chunk, voice, session);
if (session != _session) return; if (session != _session) return;
if (audio.isNotEmpty) { if (audioChunk.bytes.isNotEmpty) {
_buffered.add(Uint8List.fromList(audio)); _buffered.add(_cloneChunk(audioChunk));
// If the player finished the previous chunk and is waiting, start now // If the player finished the previous chunk and is waiting, start now
if (_waitingNext && (_currentIndex + 1) < _buffered.length) { if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false; _waitingNext = false;
@@ -535,7 +540,7 @@ class TextToSpeechService {
} }
} }
Future<List<int>> _fetchServerAudio( Future<_SpeechChunk> _fetchServerAudio(
String text, String text,
String? voice, String? voice,
int session, int session,
@@ -565,11 +570,15 @@ class TextToSpeechService {
final nextIndex = _currentIndex + 1; final nextIndex = _currentIndex + 1;
if (nextIndex < 0 || nextIndex >= _buffered.length) return; if (nextIndex < 0 || nextIndex >= _buffered.length) return;
_currentIndex = nextIndex; _currentIndex = nextIndex;
final bytes = _buffered[nextIndex]; final chunk = _buffered[nextIndex];
await _player.play(BytesSource(bytes)); await _player.play(BytesSource(chunk.bytes, mimeType: chunk.mimeType));
_onSentenceIndex?.call(_currentIndex); _onSentenceIndex?.call(_currentIndex);
} }
_SpeechChunk _cloneChunk(_SpeechChunk chunk) {
return (bytes: Uint8List.fromList(chunk.bytes), mimeType: chunk.mimeType);
}
List<String> _splitForTts(String text) { List<String> _splitForTts(String text) {
// Normalize whitespace // Normalize whitespace
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim(); final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();

View File

@@ -1557,7 +1557,9 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
// Enhanced color scheme for active state // Enhanced color scheme for active state
final Color activeBackground = isActive final Color activeBackground = isActive
? theme.buttonPrimary.withValues(alpha: brightness == Brightness.dark ? 0.22 : 0.14) ? theme.buttonPrimary.withValues(
alpha: brightness == Brightness.dark ? 0.22 : 0.14,
)
: Colors.transparent; : Colors.transparent;
final Color inactiveBackground = brightness == Brightness.dark final Color inactiveBackground = brightness == Brightness.dark
@@ -1578,13 +1580,13 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
// Enhanced content colors // Enhanced content colors
final Color activeTextColor = theme.buttonPrimary; final Color activeTextColor = theme.buttonPrimary;
final Color inactiveTextColor = theme.textPrimary.withValues( final Color inactiveTextColor = theme.textPrimary.withValues(
alpha: enabled ? (brightness == Brightness.dark ? 0.85 : 0.75) : Alpha.disabled, alpha: enabled
? (brightness == Brightness.dark ? 0.85 : 0.75)
: Alpha.disabled,
); );
final Color textColor = isActive ? activeTextColor : inactiveTextColor; final Color textColor = isActive ? activeTextColor : inactiveTextColor;
final Color iconColor = isActive final Color iconColor = isActive ? activeTextColor : inactiveTextColor;
? activeTextColor
: inactiveTextColor;
return AnimatedContainer( return AnimatedContainer(
duration: const Duration(milliseconds: 200), duration: const Duration(milliseconds: 200),
@@ -1632,11 +1634,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
AnimatedContainer( AnimatedContainer(
duration: const Duration(milliseconds: 200), duration: const Duration(milliseconds: 200),
curve: Curves.easeOutCubic, curve: Curves.easeOutCubic,
child: Icon( child: Icon(icon, size: IconSize.small + 1, color: iconColor),
icon,
size: IconSize.small + 1,
color: iconColor,
),
), ),
const SizedBox(width: Spacing.xs + 1), const SizedBox(width: Spacing.xs + 1),
AnimatedDefaultTextStyle( AnimatedDefaultTextStyle(

View File

@@ -69,58 +69,58 @@ packages:
dependency: "direct main" dependency: "direct main"
description: description:
name: audioplayers name: audioplayers
sha256: c05c6147124cd63e725e861335a8b4d57300b80e6e92cea7c145c739223bbaef sha256: "5441fa0ceb8807a5ad701199806510e56afde2b4913d9d17c2f19f2902cf0ae4"
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "5.2.1" version: "6.5.1"
audioplayers_android: audioplayers_android:
dependency: transitive dependency: transitive
description: description:
name: audioplayers_android name: audioplayers_android
sha256: b00e1a0e11365d88576320ec2d8c192bc21f1afb6c0e5995d1c57ae63156acb5 sha256: "60a6728277228413a85755bd3ffd6fab98f6555608923813ce383b190a360605"
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "4.0.3" version: "5.2.1"
audioplayers_darwin: audioplayers_darwin:
dependency: transitive dependency: transitive
description: description:
name: audioplayers_darwin name: audioplayers_darwin
sha256: "3034e99a6df8d101da0f5082dcca0a2a99db62ab1d4ddb3277bed3f6f81afe08" sha256: "0811d6924904ca13f9ef90d19081e4a87f7297ddc19fc3d31f60af1aaafee333"
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "5.0.2" version: "6.3.0"
audioplayers_linux: audioplayers_linux:
dependency: transitive dependency: transitive
description: description:
name: audioplayers_linux name: audioplayers_linux
sha256: "60787e73fefc4d2e0b9c02c69885402177e818e4e27ef087074cf27c02246c9e" sha256: f75bce1ce864170ef5e6a2c6a61cd3339e1a17ce11e99a25bae4474ea491d001
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "3.1.0" version: "4.2.1"
audioplayers_platform_interface: audioplayers_platform_interface:
dependency: transitive dependency: transitive
description: description:
name: audioplayers_platform_interface name: audioplayers_platform_interface
sha256: "365c547f1bb9e77d94dd1687903a668d8f7ac3409e48e6e6a3668a1ac2982adb" sha256: "0e2f6a919ab56d0fec272e801abc07b26ae7f31980f912f24af4748763e5a656"
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "6.1.0" version: "7.1.1"
audioplayers_web: audioplayers_web:
dependency: transitive dependency: transitive
description: description:
name: audioplayers_web name: audioplayers_web
sha256: "22cd0173e54d92bd9b2c80b1204eb1eb159ece87475ab58c9788a70ec43c2a62" sha256: "1c0f17cec68455556775f1e50ca85c40c05c714a99c5eb1d2d57cc17ba5522d7"
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "4.1.0" version: "5.1.1"
audioplayers_windows: audioplayers_windows:
dependency: transitive dependency: transitive
description: description:
name: audioplayers_windows name: audioplayers_windows
sha256: "9536812c9103563644ada2ef45ae523806b0745f7a78e89d1b5fb1951de90e1a" sha256: "4048797865105b26d47628e6abb49231ea5de84884160229251f37dfcbe52fd7"
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "3.1.0" version: "4.2.1"
boolean_selector: boolean_selector:
dependency: transitive dependency: transitive
description: description:

View File

@@ -47,7 +47,7 @@ dependencies:
record: ^6.1.1 record: ^6.1.1
stts: ^1.2.5 stts: ^1.2.5
flutter_tts: ^4.2.3 flutter_tts: ^4.2.3
audioplayers: ^5.2.1 audioplayers: ^6.5.1
image_picker: ^1.2.0 image_picker: ^1.2.0
file_picker: ^10.3.3 file_picker: ^10.3.3
path_provider: ^2.1.4 path_provider: ^2.1.4