feat: enhance text-to-speech functionality with markdown support

- Integrated markdown conversion in TextToSpeechController to clean text before speech synthesis, ensuring only valid content is spoken.
- Updated VoiceCallService to utilize markdown conversion for responses, improving the clarity of spoken content.
- Enhanced VoiceCallPage to display cleaned text from markdown, providing a better user experience during voice interactions.
This commit is contained in:
cogwheel0
2025-10-09 00:20:36 +05:30
parent 96202c7453
commit fabb1df63a
4 changed files with 166 additions and 37 deletions

View File

@@ -0,0 +1,118 @@
/// Converts markdown text to plain text suitable for text-to-speech.
///
/// Strips formatting while preserving the semantic meaning and readability
/// of the content for audio consumption.
class MarkdownToText {
const MarkdownToText._();
static final _codeBlockRegex = RegExp(
r'```[^\n]*\n(.*?)```',
multiLine: true,
dotAll: true,
);
static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
static final _orderedListRegex = RegExp(
r'^[\s]*\d+\.\s+(.+)$',
multiLine: true,
);
static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
static final _horizontalRuleRegex = RegExp(
r'^[\s]*[-*_]{3,}[\s]*$',
multiLine: true,
);
static final _htmlTagRegex = RegExp(r'<[^>]+>');
static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
static final _multipleSpacesRegex = RegExp(r' {2,}');
/// Converts markdown text to plain text suitable for TTS.
///
/// - Removes code blocks (replaces with descriptive text)
/// - Strips all formatting (bold, italic, strikethrough)
/// - Converts links to just their text
/// - Removes images (or converts to alt text)
/// - Simplifies headings
/// - Preserves list structure with natural pauses
/// - Removes HTML tags
/// - Normalizes whitespace
static String convert(String markdown) {
if (markdown.trim().isEmpty) {
return '';
}
var text = markdown;
// Remove or replace code blocks with descriptive text
text = text.replaceAllMapped(_codeBlockRegex, (match) {
final code = match[1]?.trim() ?? '';
if (code.isEmpty) {
return '';
}
// For TTS, skip code blocks or use a brief description
return ' (code block) ';
});
// Remove inline code backticks but keep the content
text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
// Strip bold/italic/strikethrough formatting
text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
text = text.replaceAllMapped(
_italicRegex,
(match) => match[1] ?? match[2] ?? '',
);
text = text.replaceAllMapped(
_strikethroughRegex,
(match) => match[1] ?? '',
);
// Convert links to just their text
text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
// Remove images (or use alt text if available)
text = text.replaceAllMapped(_imageRegex, (match) {
final alt = match[1]?.trim() ?? '';
return alt.isNotEmpty ? ' ($alt image) ' : '';
});
// Simplify headings (remove # symbols)
text = text.replaceAllMapped(_headingRegex, (match) {
final heading = match[1] ?? '';
// Add a pause after headings for natural speech flow
return '$heading.\n';
});
// Preserve list items with natural pauses
text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
// Remove blockquote markers
text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
// Remove horizontal rules
text = text.replaceAll(_horizontalRuleRegex, '');
// Remove HTML tags
text = text.replaceAll(_htmlTagRegex, '');
// Normalize whitespace
text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
text = text.replaceAll(_multipleSpacesRegex, ' ');
// Convert newlines to spaces for natural speech flow
text = text.replaceAll('\n', ' ');
// Final cleanup
text = text.trim();
return text;
}
}

View File

@@ -2,6 +2,7 @@ import 'dart:async';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../services/text_to_speech_service.dart';
enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
@@ -161,7 +162,21 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
);
try {
await _service.speak(text);
// Convert markdown to clean text for TTS
final cleanText = MarkdownToText.convert(text);
if (cleanText.isEmpty) {
// No speakable content
if (!ref.mounted) {
return;
}
state = state.copyWith(
status: TtsPlaybackStatus.idle,
clearActiveMessageId: true,
);
return;
}
await _service.speak(cleanText);
if (!ref.mounted) {
return;
}

View File

@@ -5,6 +5,7 @@ import 'package:wakelock_plus/wakelock_plus.dart';
import '../../../core/providers/app_providers.dart';
import '../../../core/services/socket_service.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../providers/chat_providers.dart';
import 'text_to_speech_service.dart';
import 'voice_input_service.dart';
@@ -80,8 +81,8 @@ class VoiceCallService {
await _notificationService.initialize();
// Request notification permissions if needed
final notificationsEnabled =
await _notificationService.areNotificationsEnabled();
final notificationsEnabled = await _notificationService
.areNotificationsEnabled();
if (!notificationsEnabled) {
await _notificationService.requestPermissions();
}
@@ -186,12 +187,10 @@ class VoiceCallService {
);
// Forward intensity stream for waveform visualization
_intensitySubscription = _voiceInput.intensityStream.listen(
(intensity) {
_intensitySubscription = _voiceInput.intensityStream.listen((intensity) {
if (_isDisposed) return;
_intensityController.add(intensity);
},
);
});
} catch (e) {
_updateState(VoiceCallState.error);
rethrow;
@@ -283,7 +282,17 @@ class VoiceCallService {
await _intensitySubscription?.cancel();
_updateState(VoiceCallState.speaking);
await _tts.speak(response);
// Convert markdown to clean text for TTS
final cleanText = MarkdownToText.convert(response);
if (cleanText.isEmpty) {
// No speakable content, restart listening
_isSpeaking = false;
await _startListening();
return;
}
await _tts.speak(cleanText);
// After speaking completes, _handleTtsComplete will restart listening
} catch (e) {
_isSpeaking = false;

View File

@@ -6,6 +6,7 @@ import 'package:flutter/cupertino.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import '../../../core/providers/app_providers.dart';
import '../../../core/utils/markdown_to_text.dart';
import '../services/voice_call_service.dart';
class VoiceCallPage extends ConsumerStatefulWidget {
@@ -239,7 +240,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
builder: (context, child) {
final offset = (index * 0.2) % 1.0;
final animation = (_waveController.value + offset) % 1.0;
final height = 20.0 +
final height =
20.0 +
(math.sin(animation * math.pi * 2) * 30.0).abs() +
(_currentIntensity * 4.0);
@@ -271,10 +273,7 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
decoration: BoxDecoration(
shape: BoxShape.circle,
color: primaryColor.withValues(alpha: 0.2),
border: Border.all(
color: primaryColor,
width: 3,
),
border: Border.all(color: primaryColor, width: 3),
),
child: Center(
child: Icon(
@@ -323,7 +322,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
displayText = _currentTranscript;
} else if (_currentState == VoiceCallState.speaking &&
_currentResponse.isNotEmpty) {
displayText = _currentResponse;
// Convert markdown to clean text for display
displayText = MarkdownToText.convert(_currentResponse);
}
if (displayText.isEmpty) {
@@ -405,25 +405,12 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
child: Container(
width: 64,
height: 64,
decoration: BoxDecoration(
shape: BoxShape.circle,
color: color,
),
child: Icon(
icon,
color: Colors.white,
size: 32,
),
decoration: BoxDecoration(shape: BoxShape.circle, color: color),
child: Icon(icon, color: Colors.white, size: 32),
),
),
const SizedBox(height: 8),
Text(
label,
style: TextStyle(
fontSize: 12,
color: color,
),
),
Text(label, style: TextStyle(fontSize: 12, color: color)),
],
);
}