feat: enhance text-to-speech and voice call services

- Added volume, speech rate, and pitch settings to the TextToSpeechService for improved audio control.
- Reset the accumulated response in VoiceCallService before sending messages to ensure accurate response handling.
- Enhanced the handling of socket events in VoiceCallService to manage streaming content and completion more effectively.
- Improved logging for better debugging and tracking of TTS and voice call states.
This commit is contained in:
cogwheel0
2025-10-08 13:35:24 +05:30
parent b673921002
commit 4f6c10c857
2 changed files with 61 additions and 19 deletions

View File

@@ -54,6 +54,16 @@ class TextToSpeechService {
try {
await _tts.awaitSpeakCompletion(false);
// Set volume to maximum
await _tts.setVolume(1.0);
// Set speech rate (1.0 is normal)
await _tts.setSpeechRate(0.5);
// Set pitch (1.0 is normal)
await _tts.setPitch(1.0);
if (!kIsWeb && Platform.isIOS) {
await _tts.setSharedInstance(true);
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
@@ -63,6 +73,7 @@ class TextToSpeechService {
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]);
}
await _configurePreferredVoice();
_available = true;
} catch (e) {

View File

@@ -234,6 +234,7 @@ class VoiceCallService {
try {
_updateState(VoiceCallState.processing);
_accumulatedResponse = ''; // Reset response accumulator
// Send message using the existing chat infrastructure
sendMessageFromService(_ref, text, null);
@@ -243,34 +244,50 @@ class VoiceCallService {
}
}
String _accumulatedResponse = '';
void _handleSocketEvent(
Map<String, dynamic> event,
void Function(dynamic response)? ack,
) {
if (_isDisposed) return;
final type = event['type']?.toString();
final data = event['data'];
final outerData = event['data'];
if (data is Map<String, dynamic>) {
// Handle streaming response chunks
if (type == 'message' || type == 'delta') {
final content = data['content']?.toString() ?? '';
if (content.isNotEmpty) {
_responseController.add(content);
if (outerData is Map<String, dynamic>) {
final eventType = outerData['type']?.toString();
final innerData = outerData['data'];
if (eventType == 'chat:completion' && innerData is Map<String, dynamic>) {
// Handle streaming content chunks
if (innerData.containsKey('content')) {
final content = innerData['content']?.toString() ?? '';
if (content.isNotEmpty) {
_accumulatedResponse = content;
_responseController.add(content);
}
}
}
// Handle completion
if (data['done'] == true || type == 'completion') {
final fullResponse = data['content']?.toString() ??
data['message']?.toString() ??
'';
if (fullResponse.isNotEmpty) {
_speakResponse(fullResponse);
} else {
// No response, restart listening
_startListening();
// Check for completion using choices[0].finish_reason
if (innerData.containsKey('choices')) {
final choices = innerData['choices'] as List?;
if (choices != null && choices.isNotEmpty) {
final firstChoice = choices[0] as Map<String, dynamic>?;
final finishReason = firstChoice?['finish_reason'];
if (finishReason == 'stop') {
// ignore: avoid_print
print('[VoiceCall] Response completed! Text: $_accumulatedResponse');
if (_accumulatedResponse.isNotEmpty) {
_speakResponse(_accumulatedResponse);
_accumulatedResponse = '';
} else {
// No response, restart listening
_startListening();
}
}
}
}
}
}
@@ -280,10 +297,24 @@ class VoiceCallService {
if (_isDisposed) return;
try {
// ignore: avoid_print
print('[VoiceCall] _speakResponse called with: $response');
// Stop listening before speaking
await _voiceInput.stopListening();
await _transcriptSubscription?.cancel();
await _intensitySubscription?.cancel();
_updateState(VoiceCallState.speaking);
// ignore: avoid_print
print('[VoiceCall] State updated to speaking, calling TTS...');
await _tts.speak(response);
// ignore: avoid_print
print('[VoiceCall] TTS.speak() returned');
// After speaking completes, _handleTtsComplete will restart listening
} catch (e) {
// ignore: avoid_print
print('[VoiceCall] Error in _speakResponse: $e');
_updateState(VoiceCallState.error);
// Restart listening even if TTS fails
await _startListening();