feat: enhance text-to-speech functionality with markdown support

- Integrated markdown conversion in TextToSpeechController to clean text before speech synthesis, ensuring only valid content is spoken.
- Updated VoiceCallService to utilize markdown conversion for responses, improving the clarity of spoken content.
- Enhanced VoiceCallPage to display cleaned text from markdown, providing a better user experience during voice interactions.
This commit is contained in:
cogwheel0
2025-10-09 00:20:36 +05:30
parent 96202c7453
commit fabb1df63a
4 changed files with 166 additions and 37 deletions

View File

@@ -0,0 +1,118 @@
/// Converts markdown text to plain text suitable for text-to-speech.
///
/// Strips formatting while preserving the semantic meaning and readability
/// of the content for audio consumption.
class MarkdownToText {
const MarkdownToText._();
static final _codeBlockRegex = RegExp(
r'```[^\n]*\n(.*?)```',
multiLine: true,
dotAll: true,
);
static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
static final _orderedListRegex = RegExp(
r'^[\s]*\d+\.\s+(.+)$',
multiLine: true,
);
static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
static final _horizontalRuleRegex = RegExp(
r'^[\s]*[-*_]{3,}[\s]*$',
multiLine: true,
);
static final _htmlTagRegex = RegExp(r'<[^>]+>');
static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
static final _multipleSpacesRegex = RegExp(r' {2,}');
/// Converts markdown text to plain text suitable for TTS.
///
/// - Removes code blocks (replaces with descriptive text)
/// - Strips all formatting (bold, italic, strikethrough)
/// - Converts links to just their text
/// - Removes images (or converts to alt text)
/// - Simplifies headings
/// - Preserves list structure with natural pauses
/// - Removes HTML tags
/// - Normalizes whitespace
static String convert(String markdown) {
if (markdown.trim().isEmpty) {
return '';
}
var text = markdown;
// Remove or replace code blocks with descriptive text
text = text.replaceAllMapped(_codeBlockRegex, (match) {
final code = match[1]?.trim() ?? '';
if (code.isEmpty) {
return '';
}
// For TTS, skip code blocks or use a brief description
return ' (code block) ';
});
// Remove inline code backticks but keep the content
text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
// Strip bold/italic/strikethrough formatting
text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
text = text.replaceAllMapped(
_italicRegex,
(match) => match[1] ?? match[2] ?? '',
);
text = text.replaceAllMapped(
_strikethroughRegex,
(match) => match[1] ?? '',
);
// Convert links to just their text
text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
// Remove images (or use alt text if available)
text = text.replaceAllMapped(_imageRegex, (match) {
final alt = match[1]?.trim() ?? '';
return alt.isNotEmpty ? ' ($alt image) ' : '';
});
// Simplify headings (remove # symbols)
text = text.replaceAllMapped(_headingRegex, (match) {
final heading = match[1] ?? '';
// Add a pause after headings for natural speech flow
return '$heading.\n';
});
// Preserve list items with natural pauses
text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
// Remove blockquote markers
text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
// Remove horizontal rules
text = text.replaceAll(_horizontalRuleRegex, '');
// Remove HTML tags
text = text.replaceAll(_htmlTagRegex, '');
// Normalize whitespace
text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
text = text.replaceAll(_multipleSpacesRegex, ' ');
// Convert newlines to spaces for natural speech flow
text = text.replaceAll('\n', ' ');
// Final cleanup
text = text.trim();
return text;
}
}

View File

@@ -2,6 +2,7 @@ import 'dart:async';
import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../services/text_to_speech_service.dart'; import '../services/text_to_speech_service.dart';
enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error } enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
@@ -161,7 +162,21 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
); );
try { try {
await _service.speak(text); // Convert markdown to clean text for TTS
final cleanText = MarkdownToText.convert(text);
if (cleanText.isEmpty) {
// No speakable content
if (!ref.mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.idle,
clearActiveMessageId: true,
);
return;
}
await _service.speak(cleanText);
if (!ref.mounted) { if (!ref.mounted) {
return; return;
} }

View File

@@ -5,6 +5,7 @@ import 'package:wakelock_plus/wakelock_plus.dart';
import '../../../core/providers/app_providers.dart'; import '../../../core/providers/app_providers.dart';
import '../../../core/services/socket_service.dart'; import '../../../core/services/socket_service.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../providers/chat_providers.dart'; import '../providers/chat_providers.dart';
import 'text_to_speech_service.dart'; import 'text_to_speech_service.dart';
import 'voice_input_service.dart'; import 'voice_input_service.dart';
@@ -53,10 +54,10 @@ class VoiceCallService {
required TextToSpeechService tts, required TextToSpeechService tts,
required SocketService socketService, required SocketService socketService,
required Ref ref, required Ref ref,
}) : _voiceInput = voiceInput, }) : _voiceInput = voiceInput,
_tts = tts, _tts = tts,
_socketService = socketService, _socketService = socketService,
_ref = ref { _ref = ref {
_tts.bindHandlers( _tts.bindHandlers(
onStart: _handleTtsStart, onStart: _handleTtsStart,
onComplete: _handleTtsComplete, onComplete: _handleTtsComplete,
@@ -80,8 +81,8 @@ class VoiceCallService {
await _notificationService.initialize(); await _notificationService.initialize();
// Request notification permissions if needed // Request notification permissions if needed
final notificationsEnabled = final notificationsEnabled = await _notificationService
await _notificationService.areNotificationsEnabled(); .areNotificationsEnabled();
if (!notificationsEnabled) { if (!notificationsEnabled) {
await _notificationService.requestPermissions(); await _notificationService.requestPermissions();
} }
@@ -186,12 +187,10 @@ class VoiceCallService {
); );
// Forward intensity stream for waveform visualization // Forward intensity stream for waveform visualization
_intensitySubscription = _voiceInput.intensityStream.listen( _intensitySubscription = _voiceInput.intensityStream.listen((intensity) {
(intensity) { if (_isDisposed) return;
if (_isDisposed) return; _intensityController.add(intensity);
_intensityController.add(intensity); });
},
);
} catch (e) { } catch (e) {
_updateState(VoiceCallState.error); _updateState(VoiceCallState.error);
rethrow; rethrow;
@@ -283,7 +282,17 @@ class VoiceCallService {
await _intensitySubscription?.cancel(); await _intensitySubscription?.cancel();
_updateState(VoiceCallState.speaking); _updateState(VoiceCallState.speaking);
await _tts.speak(response);
// Convert markdown to clean text for TTS
final cleanText = MarkdownToText.convert(response);
if (cleanText.isEmpty) {
// No speakable content, restart listening
_isSpeaking = false;
await _startListening();
return;
}
await _tts.speak(cleanText);
// After speaking completes, _handleTtsComplete will restart listening // After speaking completes, _handleTtsComplete will restart listening
} catch (e) { } catch (e) {
_isSpeaking = false; _isSpeaking = false;

View File

@@ -6,6 +6,7 @@ import 'package:flutter/cupertino.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/providers/app_providers.dart'; import '../../../core/providers/app_providers.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../services/voice_call_service.dart'; import '../services/voice_call_service.dart';
class VoiceCallPage extends ConsumerStatefulWidget { class VoiceCallPage extends ConsumerStatefulWidget {
@@ -239,7 +240,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
builder: (context, child) { builder: (context, child) {
final offset = (index * 0.2) % 1.0; final offset = (index * 0.2) % 1.0;
final animation = (_waveController.value + offset) % 1.0; final animation = (_waveController.value + offset) % 1.0;
final height = 20.0 + final height =
20.0 +
(math.sin(animation * math.pi * 2) * 30.0).abs() + (math.sin(animation * math.pi * 2) * 30.0).abs() +
(_currentIntensity * 4.0); (_currentIntensity * 4.0);
@@ -271,10 +273,7 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
decoration: BoxDecoration( decoration: BoxDecoration(
shape: BoxShape.circle, shape: BoxShape.circle,
color: primaryColor.withValues(alpha: 0.2), color: primaryColor.withValues(alpha: 0.2),
border: Border.all( border: Border.all(color: primaryColor, width: 3),
color: primaryColor,
width: 3,
),
), ),
child: Center( child: Center(
child: Icon( child: Icon(
@@ -322,8 +321,9 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
_currentTranscript.isNotEmpty) { _currentTranscript.isNotEmpty) {
displayText = _currentTranscript; displayText = _currentTranscript;
} else if (_currentState == VoiceCallState.speaking && } else if (_currentState == VoiceCallState.speaking &&
_currentResponse.isNotEmpty) { _currentResponse.isNotEmpty) {
displayText = _currentResponse; // Convert markdown to clean text for display
displayText = MarkdownToText.convert(_currentResponse);
} }
if (displayText.isEmpty) { if (displayText.isEmpty) {
@@ -405,25 +405,12 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
child: Container( child: Container(
width: 64, width: 64,
height: 64, height: 64,
decoration: BoxDecoration( decoration: BoxDecoration(shape: BoxShape.circle, color: color),
shape: BoxShape.circle, child: Icon(icon, color: Colors.white, size: 32),
color: color,
),
child: Icon(
icon,
color: Colors.white,
size: 32,
),
), ),
), ),
const SizedBox(height: 8), const SizedBox(height: 8),
Text( Text(label, style: TextStyle(fontSize: 12, color: color)),
label,
style: TextStyle(
fontSize: 12,
color: color,
),
),
], ],
); );
} }