fix: server side tts on ios

This commit is contained in:
cogwheel0
2025-10-31 23:20:04 +05:30
parent 041c6d0df5
commit 5d33e5fe65
10 changed files with 184 additions and 115 deletions

View File

@@ -1,6 +1,7 @@
PODS:
- audioplayers_darwin (0.0.1):
- Flutter
- FlutterMacOS
- connectivity_plus (0.0.1):
- Flutter
- DKImagePickerController/Core (4.3.9):
@@ -86,7 +87,7 @@ PODS:
- FlutterMacOS
DEPENDENCIES:
- audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/ios`)
- audioplayers_darwin (from `.symlinks/plugins/audioplayers_darwin/darwin`)
- connectivity_plus (from `.symlinks/plugins/connectivity_plus/ios`)
- file_picker (from `.symlinks/plugins/file_picker/ios`)
- Flutter (from `Flutter`)
@@ -117,7 +118,7 @@ SPEC REPOS:
EXTERNAL SOURCES:
audioplayers_darwin:
:path: ".symlinks/plugins/audioplayers_darwin/ios"
:path: ".symlinks/plugins/audioplayers_darwin/darwin"
connectivity_plus:
:path: ".symlinks/plugins/connectivity_plus/ios"
file_picker:
@@ -160,7 +161,7 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/webview_flutter_wkwebview/darwin"
SPEC CHECKSUMS:
audioplayers_darwin: ccf9c770ee768abb07e26d90af093f7bab1c12ab
audioplayers_darwin: 4f9ca89d92d3d21cec7ec580e78ca888e5fb68bd
connectivity_plus: cb623214f4e1f6ef8fe7403d580fdad517d2f7dd
DKImagePickerController: 946cec48c7873164274ecc4624d19e3da4c1ef3c
DKPhotoGallery: b3834fecb755ee09a593d7c9e389d8b5d6deed60

View File

@@ -2438,7 +2438,7 @@ class ApiService {
return [];
}
Future<List<int>> generateSpeech({
Future<({Uint8List bytes, String mimeType})> generateSpeech({
required String text,
String? voice,
}) async {
@@ -2450,12 +2450,75 @@ class ApiService {
options: Options(responseType: ResponseType.bytes),
);
// Return audio data as bytes
final data = response.data;
if (data is List<int>) return data;
if (data is Uint8List) return data.toList();
if (data is List) return (data).cast<int>();
return [];
final rawMimeType = response.headers.value('content-type');
final audioBytes = _coerceAudioBytes(response.data);
final resolvedMimeType = _resolveAudioMimeType(rawMimeType, audioBytes);
return (bytes: audioBytes, mimeType: resolvedMimeType);
}
Uint8List _coerceAudioBytes(Object? data) {
if (data is Uint8List && data.isNotEmpty) {
return Uint8List.fromList(data);
}
if (data is List<int>) {
return Uint8List.fromList(data);
}
if (data is List) {
return Uint8List.fromList(data.cast<int>());
}
return Uint8List(0);
}
String _resolveAudioMimeType(String? rawMimeType, Uint8List bytes) {
final sanitized = rawMimeType?.split(';').first.trim();
if (sanitized != null && sanitized.isNotEmpty) {
return sanitized;
}
if (_matchesPrefix(bytes, const [0x52, 0x49, 0x46, 0x46]) &&
_matchesPrefix(bytes, const [0x57, 0x41, 0x56, 0x45], offset: 8)) {
return 'audio/wav';
}
if (_matchesPrefix(bytes, const [0x4F, 0x67, 0x67, 0x53])) {
return 'audio/ogg';
}
if (_matchesPrefix(bytes, const [0x66, 0x4C, 0x61, 0x43])) {
return 'audio/flac';
}
if (_looksLikeMp4(bytes)) {
return 'audio/mp4';
}
if (_looksLikeMpeg(bytes)) {
return 'audio/mpeg';
}
return 'audio/mpeg';
}
bool _matchesPrefix(Uint8List bytes, List<int> signature, {int offset = 0}) {
if (bytes.length < offset + signature.length) {
return false;
}
for (var i = 0; i < signature.length; i++) {
if (bytes[offset + i] != signature[i]) {
return false;
}
}
return true;
}
bool _looksLikeMp4(Uint8List bytes) {
return bytes.length >= 8 &&
_matchesPrefix(bytes, const [0x66, 0x74, 0x79, 0x70], offset: 4);
}
bool _looksLikeMpeg(Uint8List bytes) {
if (bytes.length >= 3 &&
bytes[0] == 0x49 &&
bytes[1] == 0x44 &&
bytes[2] == 0x33) {
return true;
}
return bytes.length >= 2 && bytes[0] == 0xFF && (bytes[1] & 0xE0) == 0xE0;
}
// Server audio transcription removed; rely on on-device STT in UI layer

View File

@@ -178,9 +178,7 @@ class PersistentStreamingService with WidgetsBindingObserver {
for (final streamId in staleStreams) {
final callback = _streamRecoveryCallbacks[streamId];
if (callback != null && _retryAttempts[streamId] == null) {
DebugLogger.stream(
'Initiating recovery for stale stream: $streamId',
);
DebugLogger.stream('Initiating recovery for stale stream: $streamId');
_attemptStreamRecovery(streamId, callback);
}
}

View File

@@ -27,9 +27,7 @@ class SSEStreamParser {
// Set up heartbeat monitoring
if (heartbeatTimeout.inMilliseconds > 0) {
heartbeatTimer = Timer.periodic(
const Duration(seconds: 30),
(timer) {
heartbeatTimer = Timer.periodic(const Duration(seconds: 30), (timer) {
final timeSinceLastData = DateTime.now().difference(lastDataReceived);
if (timeSinceLastData > heartbeatTimeout) {
DebugLogger.warning(
@@ -38,8 +36,7 @@ class SSEStreamParser {
);
timer.cancel();
}
},
);
});
}
try {

View File

@@ -198,8 +198,10 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
onPressed: _retry,
icon: const Icon(Icons.refresh_rounded),
style: FilledButton.styleFrom(
backgroundColor: context.conduitTheme.buttonPrimary,
foregroundColor: context.conduitTheme.buttonPrimaryText,
backgroundColor:
context.conduitTheme.buttonPrimary,
foregroundColor:
context.conduitTheme.buttonPrimaryText,
padding: const EdgeInsets.symmetric(
horizontal: Spacing.lg,
vertical: Spacing.md,
@@ -212,7 +214,8 @@ class _ErrorBoundaryState extends ConsumerState<ErrorBoundary> {
elevation: 0,
),
label: Text(
AppLocalizations.of(context)?.retry ?? 'Try Again',
AppLocalizations.of(context)?.retry ??
'Try Again',
style: context.conduitTheme.bodySmall?.copyWith(
fontWeight: FontWeight.w600,
color: context.conduitTheme.buttonPrimaryText,

View File

@@ -9,6 +9,8 @@ import 'package:flutter_tts/flutter_tts.dart';
import '../../../core/services/api_service.dart';
import '../../../core/services/settings_service.dart';
typedef _SpeechChunk = ({Uint8List bytes, String mimeType});
/// Lightweight wrapper around FlutterTts to centralize configuration
class TextToSpeechService {
final FlutterTts _tts = FlutterTts();
@@ -20,7 +22,7 @@ class TextToSpeechService {
bool _available = false;
bool _voiceConfigured = false;
int _session = 0; // increments to cancel in-flight work
final List<Uint8List> _buffered = <Uint8List>[]; // server chunks
final List<_SpeechChunk> _buffered = <_SpeechChunk>[]; // server chunks
int _expectedChunks = 0;
int _currentIndex = -1;
bool _waitingNext = false;
@@ -51,9 +53,6 @@ class TextToSpeechService {
case PlayerState.paused:
_handlePause();
break;
case PlayerState.stopped:
_handleCancel();
break;
default:
break;
}
@@ -238,6 +237,7 @@ class TextToSpeechService {
_waitingNext = false;
if (_engine == TtsEngine.server) {
await _player.stop();
_handleCancel();
} else {
await _tts.stop();
}
@@ -486,18 +486,23 @@ class TextToSpeechService {
_expectedChunks = chunks.length;
// Fetch first chunk to start playback quickly
final firstBytes = await _fetchServerAudio(
final firstChunk = await _fetchServerAudio(
chunks.first,
effectiveVoice,
session,
);
if (session != _session) return; // canceled
if (firstBytes.isEmpty) throw Exception('Empty audio response');
if (firstChunk.bytes.isEmpty) {
throw Exception('Empty audio response');
}
await _player.stop();
_buffered.add(Uint8List.fromList(firstBytes));
final bufferedFirst = _cloneChunk(firstChunk);
_buffered.add(bufferedFirst);
_currentIndex = 0;
await _player.play(BytesSource(_buffered.first));
await _player.play(
BytesSource(bufferedFirst.bytes, mimeType: bufferedFirst.mimeType),
);
_onSentenceIndex?.call(0);
// Prefetch the rest in background
@@ -518,10 +523,10 @@ class TextToSpeechService {
for (final chunk in remaining) {
if (session != _session) return; // canceled
try {
final audio = await _fetchServerAudio(chunk, voice, session);
final audioChunk = await _fetchServerAudio(chunk, voice, session);
if (session != _session) return;
if (audio.isNotEmpty) {
_buffered.add(Uint8List.fromList(audio));
if (audioChunk.bytes.isNotEmpty) {
_buffered.add(_cloneChunk(audioChunk));
// If the player finished the previous chunk and is waiting, start now
if (_waitingNext && (_currentIndex + 1) < _buffered.length) {
_waitingNext = false;
@@ -535,7 +540,7 @@ class TextToSpeechService {
}
}
Future<List<int>> _fetchServerAudio(
Future<_SpeechChunk> _fetchServerAudio(
String text,
String? voice,
int session,
@@ -565,11 +570,15 @@ class TextToSpeechService {
final nextIndex = _currentIndex + 1;
if (nextIndex < 0 || nextIndex >= _buffered.length) return;
_currentIndex = nextIndex;
final bytes = _buffered[nextIndex];
await _player.play(BytesSource(bytes));
final chunk = _buffered[nextIndex];
await _player.play(BytesSource(chunk.bytes, mimeType: chunk.mimeType));
_onSentenceIndex?.call(_currentIndex);
}
_SpeechChunk _cloneChunk(_SpeechChunk chunk) {
return (bytes: Uint8List.fromList(chunk.bytes), mimeType: chunk.mimeType);
}
List<String> _splitForTts(String text) {
// Normalize whitespace
final normalized = text.replaceAll(RegExp(r"\s+"), ' ').trim();

View File

@@ -1557,7 +1557,9 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
// Enhanced color scheme for active state
final Color activeBackground = isActive
? theme.buttonPrimary.withValues(alpha: brightness == Brightness.dark ? 0.22 : 0.14)
? theme.buttonPrimary.withValues(
alpha: brightness == Brightness.dark ? 0.22 : 0.14,
)
: Colors.transparent;
final Color inactiveBackground = brightness == Brightness.dark
@@ -1578,13 +1580,13 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
// Enhanced content colors
final Color activeTextColor = theme.buttonPrimary;
final Color inactiveTextColor = theme.textPrimary.withValues(
alpha: enabled ? (brightness == Brightness.dark ? 0.85 : 0.75) : Alpha.disabled,
alpha: enabled
? (brightness == Brightness.dark ? 0.85 : 0.75)
: Alpha.disabled,
);
final Color textColor = isActive ? activeTextColor : inactiveTextColor;
final Color iconColor = isActive
? activeTextColor
: inactiveTextColor;
final Color iconColor = isActive ? activeTextColor : inactiveTextColor;
return AnimatedContainer(
duration: const Duration(milliseconds: 200),
@@ -1632,11 +1634,7 @@ class _ModernChatInputState extends ConsumerState<ModernChatInput>
AnimatedContainer(
duration: const Duration(milliseconds: 200),
curve: Curves.easeOutCubic,
child: Icon(
icon,
size: IconSize.small + 1,
color: iconColor,
),
child: Icon(icon, size: IconSize.small + 1, color: iconColor),
),
const SizedBox(width: Spacing.xs + 1),
AnimatedDefaultTextStyle(

View File

@@ -69,58 +69,58 @@ packages:
dependency: "direct main"
description:
name: audioplayers
sha256: c05c6147124cd63e725e861335a8b4d57300b80e6e92cea7c145c739223bbaef
sha256: "5441fa0ceb8807a5ad701199806510e56afde2b4913d9d17c2f19f2902cf0ae4"
url: "https://pub.dev"
source: hosted
version: "5.2.1"
version: "6.5.1"
audioplayers_android:
dependency: transitive
description:
name: audioplayers_android
sha256: b00e1a0e11365d88576320ec2d8c192bc21f1afb6c0e5995d1c57ae63156acb5
sha256: "60a6728277228413a85755bd3ffd6fab98f6555608923813ce383b190a360605"
url: "https://pub.dev"
source: hosted
version: "4.0.3"
version: "5.2.1"
audioplayers_darwin:
dependency: transitive
description:
name: audioplayers_darwin
sha256: "3034e99a6df8d101da0f5082dcca0a2a99db62ab1d4ddb3277bed3f6f81afe08"
sha256: "0811d6924904ca13f9ef90d19081e4a87f7297ddc19fc3d31f60af1aaafee333"
url: "https://pub.dev"
source: hosted
version: "5.0.2"
version: "6.3.0"
audioplayers_linux:
dependency: transitive
description:
name: audioplayers_linux
sha256: "60787e73fefc4d2e0b9c02c69885402177e818e4e27ef087074cf27c02246c9e"
sha256: f75bce1ce864170ef5e6a2c6a61cd3339e1a17ce11e99a25bae4474ea491d001
url: "https://pub.dev"
source: hosted
version: "3.1.0"
version: "4.2.1"
audioplayers_platform_interface:
dependency: transitive
description:
name: audioplayers_platform_interface
sha256: "365c547f1bb9e77d94dd1687903a668d8f7ac3409e48e6e6a3668a1ac2982adb"
sha256: "0e2f6a919ab56d0fec272e801abc07b26ae7f31980f912f24af4748763e5a656"
url: "https://pub.dev"
source: hosted
version: "6.1.0"
version: "7.1.1"
audioplayers_web:
dependency: transitive
description:
name: audioplayers_web
sha256: "22cd0173e54d92bd9b2c80b1204eb1eb159ece87475ab58c9788a70ec43c2a62"
sha256: "1c0f17cec68455556775f1e50ca85c40c05c714a99c5eb1d2d57cc17ba5522d7"
url: "https://pub.dev"
source: hosted
version: "4.1.0"
version: "5.1.1"
audioplayers_windows:
dependency: transitive
description:
name: audioplayers_windows
sha256: "9536812c9103563644ada2ef45ae523806b0745f7a78e89d1b5fb1951de90e1a"
sha256: "4048797865105b26d47628e6abb49231ea5de84884160229251f37dfcbe52fd7"
url: "https://pub.dev"
source: hosted
version: "3.1.0"
version: "4.2.1"
boolean_selector:
dependency: transitive
description:

View File

@@ -47,7 +47,7 @@ dependencies:
record: ^6.1.1
stts: ^1.2.5
flutter_tts: ^4.2.3
audioplayers: ^5.2.1
audioplayers: ^6.5.1
image_picker: ^1.2.0
file_picker: ^10.3.3
path_provider: ^2.1.4