feat: enhance text-to-speech and voice call services

- Added volume, speech rate, and pitch settings to the TextToSpeechService for improved audio control. - Reset the accumulated response in VoiceCallService before sending messages to ensure accurate response handling. - Enhanced the handling of socket events in VoiceCallService to manage streaming content and completion more effectively. - Improved logging for better debugging and tracking of TTS and voice call states.
2025-10-08 13:35:24 +05:30
parent b673921002
commit 4f6c10c857
2 changed files with 61 additions and 19 deletions
@@ -54,6 +54,16 @@ class TextToSpeechService {
    try {
      await _tts.awaitSpeakCompletion(false);
      // Set volume to maximum
      await _tts.setVolume(1.0);
      // Set speech rate (1.0 is normal)
      await _tts.setSpeechRate(0.5);
      // Set pitch (1.0 is normal)
      await _tts.setPitch(1.0);
      if (!kIsWeb && Platform.isIOS) {
        await _tts.setSharedInstance(true);
        await _tts.setIosAudioCategory(IosTextToSpeechAudioCategory.playback, [
@@ -63,6 +73,7 @@ class TextToSpeechService {
          IosTextToSpeechAudioCategoryOptions.allowBluetoothA2DP,
        ]);
      }
      await _configurePreferredVoice();
      _available = true;
    } catch (e) {
@@ -234,6 +234,7 @@ class VoiceCallService {
    try {
      _updateState(VoiceCallState.processing);
      _accumulatedResponse = ''; // Reset response accumulator
      // Send message using the existing chat infrastructure
      sendMessageFromService(_ref, text, null);
@@ -243,31 +244,44 @@ class VoiceCallService {
    }
  }
  String _accumulatedResponse = '';
  void _handleSocketEvent(
    Map<String, dynamic> event,
    void Function(dynamic response)? ack,
  ) {
    if (_isDisposed) return;
-    final type = event['type']?.toString();
+    final outerData = event['data'];
    final data = event['data'];
-    if (data is Map<String, dynamic>) {
+    if (outerData is Map<String, dynamic>) {
-      // Handle streaming response chunks
+      final eventType = outerData['type']?.toString();
-      if (type == 'message' || type == 'delta') {
+      final innerData = outerData['data'];
-        final content = data['content']?.toString() ?? '';
+
      if (eventType == 'chat:completion' && innerData is Map<String, dynamic>) {
        // Handle streaming content chunks
        if (innerData.containsKey('content')) {
          final content = innerData['content']?.toString() ?? '';
          if (content.isNotEmpty) {
            _accumulatedResponse = content;
            _responseController.add(content);
          }
        }
-      // Handle completion
+        // Check for completion using choices[0].finish_reason
-      if (data['done'] == true || type == 'completion') {
+        if (innerData.containsKey('choices')) {
-        final fullResponse = data['content']?.toString() ??
+          final choices = innerData['choices'] as List?;
-            data['message']?.toString() ??
+          if (choices != null && choices.isNotEmpty) {
-            '';
+            final firstChoice = choices[0] as Map<String, dynamic>?;
-        if (fullResponse.isNotEmpty) {
+            final finishReason = firstChoice?['finish_reason'];
-          _speakResponse(fullResponse);
+
            if (finishReason == 'stop') {
              // ignore: avoid_print
              print('[VoiceCall] Response completed! Text: $_accumulatedResponse');
              if (_accumulatedResponse.isNotEmpty) {
                _speakResponse(_accumulatedResponse);
                _accumulatedResponse = '';
              } else {
                // No response, restart listening
                _startListening();
@@ -275,15 +289,32 @@ class VoiceCallService {
            }
          }
        }
      }
    }
  }
  Future<void> _speakResponse(String response) async {
    if (_isDisposed) return;
    try {
      // ignore: avoid_print
      print('[VoiceCall] _speakResponse called with: $response');
      // Stop listening before speaking
      await _voiceInput.stopListening();
      await _transcriptSubscription?.cancel();
      await _intensitySubscription?.cancel();
      _updateState(VoiceCallState.speaking);
      // ignore: avoid_print
      print('[VoiceCall] State updated to speaking, calling TTS...');
      await _tts.speak(response);
      // ignore: avoid_print
      print('[VoiceCall] TTS.speak() returned');
      // After speaking completes, _handleTtsComplete will restart listening
    } catch (e) {
      // ignore: avoid_print
      print('[VoiceCall] Error in _speakResponse: $e');
      _updateState(VoiceCallState.error);
      // Restart listening even if TTS fails
      await _startListening();