feat: enhance text-to-speech and voice call services

- Added volume, speech rate, and pitch settings to the TextToSpeechService for improved audio control.
- Reset the accumulated response in VoiceCallService before sending messages to ensure accurate response handling.
- Enhanced the handling of socket events in VoiceCallService to manage streaming content and completion more effectively.
- Improved logging for better debugging and tracking of TTS and voice call states.
This commit is contained in:
cogwheel0
2025-10-08 13:35:24 +05:30
parent b673921002
commit 4f6c10c857
2 changed files with 61 additions and 19 deletions

View File

@@ -54,6 +54,16 @@ class TextToSpeechService {
try { try {
await _tts.awaitSpeakCompletion(false); await _tts.awaitSpeakCompletion(false);
// Set volume to maximum
await _tts.setVolume(1.0);
// Set speech rate (1.0 is normal)
await _tts.setSpeechRate(0.5);
// Set pitch (1.0 is normal)
await _tts.setPitch(1.0);
if (!kIsWeb && Platform.isIOS) { if (!kIsWeb && Platform.isIOS) {
await _tts.setSharedInstance(true); await _tts.setSharedInstance(true);
await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [ await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
@@ -63,6 +73,7 @@ class TextToSpeechService {
IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP, IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
]); ]);
} }
await _configurePreferredVoice(); await _configurePreferredVoice();
_available = true; _available = true;
} catch (e) { } catch (e) {

View File

@@ -234,6 +234,7 @@ class VoiceCallService {
try { try {
_updateState(VoiceCallState.processing); _updateState(VoiceCallState.processing);
_accumulatedResponse = ''; // Reset response accumulator
// Send message using the existing chat infrastructure // Send message using the existing chat infrastructure
sendMessageFromService(_ref, text, null); sendMessageFromService(_ref, text, null);
@@ -243,31 +244,44 @@ class VoiceCallService {
} }
} }
String _accumulatedResponse = '';
void _handleSocketEvent( void _handleSocketEvent(
Map<String, dynamic> event, Map<String, dynamic> event,
void Function(dynamic response)? ack, void Function(dynamic response)? ack,
) { ) {
if (_isDisposed) return; if (_isDisposed) return;
final type = event['type']?.toString(); final outerData = event['data'];
final data = event['data'];
if (data is Map<String, dynamic>) { if (outerData is Map<String, dynamic>) {
// Handle streaming response chunks final eventType = outerData['type']?.toString();
if (type == 'message' || type == 'delta') { final innerData = outerData['data'];
final content = data['content']?.toString() ?? '';
if (eventType == 'chat:completion' && innerData is Map<String, dynamic>) {
// Handle streaming content chunks
if (innerData.containsKey('content')) {
final content = innerData['content']?.toString() ?? '';
if (content.isNotEmpty) { if (content.isNotEmpty) {
_accumulatedResponse = content;
_responseController.add(content); _responseController.add(content);
} }
} }
// Handle completion // Check for completion using choices[0].finish_reason
if (data['done'] == true || type == 'completion') { if (innerData.containsKey('choices')) {
final fullResponse = data['content']?.toString() ?? final choices = innerData['choices'] as List?;
data['message']?.toString() ?? if (choices != null && choices.isNotEmpty) {
''; final firstChoice = choices[0] as Map<String, dynamic>?;
if (fullResponse.isNotEmpty) { final finishReason = firstChoice?['finish_reason'];
_speakResponse(fullResponse);
if (finishReason == 'stop') {
// ignore: avoid_print
print('[VoiceCall] Response completed! Text: $_accumulatedResponse');
if (_accumulatedResponse.isNotEmpty) {
_speakResponse(_accumulatedResponse);
_accumulatedResponse = '';
} else { } else {
// No response, restart listening // No response, restart listening
_startListening(); _startListening();
@@ -275,15 +289,32 @@ class VoiceCallService {
} }
} }
} }
}
}
}
Future<void> _speakResponse(String response) async { Future<void> _speakResponse(String response) async {
if (_isDisposed) return; if (_isDisposed) return;
try { try {
// ignore: avoid_print
print('[VoiceCall] _speakResponse called with: $response');
// Stop listening before speaking
await _voiceInput.stopListening();
await _transcriptSubscription?.cancel();
await _intensitySubscription?.cancel();
_updateState(VoiceCallState.speaking); _updateState(VoiceCallState.speaking);
// ignore: avoid_print
print('[VoiceCall] State updated to speaking, calling TTS...');
await _tts.speak(response); await _tts.speak(response);
// ignore: avoid_print
print('[VoiceCall] TTS.speak() returned');
// After speaking completes, _handleTtsComplete will restart listening // After speaking completes, _handleTtsComplete will restart listening
} catch (e) { } catch (e) {
// ignore: avoid_print
print('[VoiceCall] Error in _speakResponse: $e');
_updateState(VoiceCallState.error); _updateState(VoiceCallState.error);
// Restart listening even if TTS fails // Restart listening even if TTS fails
await _startListening(); await _startListening();