feat: enhance text-to-speech functionality with markdown support
- Integrated markdown conversion in TextToSpeechController to clean text before speech synthesis, ensuring only valid content is spoken. - Updated VoiceCallService to utilize markdown conversion for responses, improving the clarity of spoken content. - Enhanced VoiceCallPage to display cleaned text from markdown, providing a better user experience during voice interactions.
This commit is contained in:
118
lib/core/utils/markdown_to_text.dart
Normal file
118
lib/core/utils/markdown_to_text.dart
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
/// Converts markdown text to plain text suitable for text-to-speech.
|
||||||
|
///
|
||||||
|
/// Strips formatting while preserving the semantic meaning and readability
|
||||||
|
/// of the content for audio consumption.
|
||||||
|
class MarkdownToText {
|
||||||
|
const MarkdownToText._();
|
||||||
|
|
||||||
|
static final _codeBlockRegex = RegExp(
|
||||||
|
r'```[^\n]*\n(.*?)```',
|
||||||
|
multiLine: true,
|
||||||
|
dotAll: true,
|
||||||
|
);
|
||||||
|
static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
|
||||||
|
static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
|
||||||
|
static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
|
||||||
|
static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
|
||||||
|
static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
|
||||||
|
static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
|
||||||
|
static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
|
||||||
|
static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
|
||||||
|
static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
|
||||||
|
static final _orderedListRegex = RegExp(
|
||||||
|
r'^[\s]*\d+\.\s+(.+)$',
|
||||||
|
multiLine: true,
|
||||||
|
);
|
||||||
|
static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
|
||||||
|
static final _horizontalRuleRegex = RegExp(
|
||||||
|
r'^[\s]*[-*_]{3,}[\s]*$',
|
||||||
|
multiLine: true,
|
||||||
|
);
|
||||||
|
static final _htmlTagRegex = RegExp(r'<[^>]+>');
|
||||||
|
static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
|
||||||
|
static final _multipleSpacesRegex = RegExp(r' {2,}');
|
||||||
|
|
||||||
|
/// Converts markdown text to plain text suitable for TTS.
|
||||||
|
///
|
||||||
|
/// - Removes code blocks (replaces with descriptive text)
|
||||||
|
/// - Strips all formatting (bold, italic, strikethrough)
|
||||||
|
/// - Converts links to just their text
|
||||||
|
/// - Removes images (or converts to alt text)
|
||||||
|
/// - Simplifies headings
|
||||||
|
/// - Preserves list structure with natural pauses
|
||||||
|
/// - Removes HTML tags
|
||||||
|
/// - Normalizes whitespace
|
||||||
|
static String convert(String markdown) {
|
||||||
|
if (markdown.trim().isEmpty) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
var text = markdown;
|
||||||
|
|
||||||
|
// Remove or replace code blocks with descriptive text
|
||||||
|
text = text.replaceAllMapped(_codeBlockRegex, (match) {
|
||||||
|
final code = match[1]?.trim() ?? '';
|
||||||
|
if (code.isEmpty) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
// For TTS, skip code blocks or use a brief description
|
||||||
|
return ' (code block) ';
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove inline code backticks but keep the content
|
||||||
|
text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
|
||||||
|
|
||||||
|
// Strip bold/italic/strikethrough formatting
|
||||||
|
text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
|
||||||
|
text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
|
||||||
|
text = text.replaceAllMapped(
|
||||||
|
_italicRegex,
|
||||||
|
(match) => match[1] ?? match[2] ?? '',
|
||||||
|
);
|
||||||
|
text = text.replaceAllMapped(
|
||||||
|
_strikethroughRegex,
|
||||||
|
(match) => match[1] ?? '',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert links to just their text
|
||||||
|
text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
|
||||||
|
|
||||||
|
// Remove images (or use alt text if available)
|
||||||
|
text = text.replaceAllMapped(_imageRegex, (match) {
|
||||||
|
final alt = match[1]?.trim() ?? '';
|
||||||
|
return alt.isNotEmpty ? ' ($alt image) ' : '';
|
||||||
|
});
|
||||||
|
|
||||||
|
// Simplify headings (remove # symbols)
|
||||||
|
text = text.replaceAllMapped(_headingRegex, (match) {
|
||||||
|
final heading = match[1] ?? '';
|
||||||
|
// Add a pause after headings for natural speech flow
|
||||||
|
return '$heading.\n';
|
||||||
|
});
|
||||||
|
|
||||||
|
// Preserve list items with natural pauses
|
||||||
|
text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
|
||||||
|
text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
|
||||||
|
|
||||||
|
// Remove blockquote markers
|
||||||
|
text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
|
||||||
|
|
||||||
|
// Remove horizontal rules
|
||||||
|
text = text.replaceAll(_horizontalRuleRegex, '');
|
||||||
|
|
||||||
|
// Remove HTML tags
|
||||||
|
text = text.replaceAll(_htmlTagRegex, '');
|
||||||
|
|
||||||
|
// Normalize whitespace
|
||||||
|
text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
|
||||||
|
text = text.replaceAll(_multipleSpacesRegex, ' ');
|
||||||
|
|
||||||
|
// Convert newlines to spaces for natural speech flow
|
||||||
|
text = text.replaceAll('\n', ' ');
|
||||||
|
|
||||||
|
// Final cleanup
|
||||||
|
text = text.trim();
|
||||||
|
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ import 'dart:async';
|
|||||||
|
|
||||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||||
|
|
||||||
|
import '../../../core/utils/markdown_to_text.dart';
|
||||||
import '../services/text_to_speech_service.dart';
|
import '../services/text_to_speech_service.dart';
|
||||||
|
|
||||||
enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
|
enum TtsPlaybackStatus { idle, initializing, loading, speaking, paused, error }
|
||||||
@@ -161,7 +162,21 @@ class TextToSpeechController extends Notifier<TextToSpeechState> {
|
|||||||
);
|
);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await _service.speak(text);
|
// Convert markdown to clean text for TTS
|
||||||
|
final cleanText = MarkdownToText.convert(text);
|
||||||
|
if (cleanText.isEmpty) {
|
||||||
|
// No speakable content
|
||||||
|
if (!ref.mounted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
state = state.copyWith(
|
||||||
|
status: TtsPlaybackStatus.idle,
|
||||||
|
clearActiveMessageId: true,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await _service.speak(cleanText);
|
||||||
if (!ref.mounted) {
|
if (!ref.mounted) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import 'package:wakelock_plus/wakelock_plus.dart';
|
|||||||
|
|
||||||
import '../../../core/providers/app_providers.dart';
|
import '../../../core/providers/app_providers.dart';
|
||||||
import '../../../core/services/socket_service.dart';
|
import '../../../core/services/socket_service.dart';
|
||||||
|
import '../../../core/utils/markdown_to_text.dart';
|
||||||
import '../providers/chat_providers.dart';
|
import '../providers/chat_providers.dart';
|
||||||
import 'text_to_speech_service.dart';
|
import 'text_to_speech_service.dart';
|
||||||
import 'voice_input_service.dart';
|
import 'voice_input_service.dart';
|
||||||
@@ -80,8 +81,8 @@ class VoiceCallService {
|
|||||||
await _notificationService.initialize();
|
await _notificationService.initialize();
|
||||||
|
|
||||||
// Request notification permissions if needed
|
// Request notification permissions if needed
|
||||||
final notificationsEnabled =
|
final notificationsEnabled = await _notificationService
|
||||||
await _notificationService.areNotificationsEnabled();
|
.areNotificationsEnabled();
|
||||||
if (!notificationsEnabled) {
|
if (!notificationsEnabled) {
|
||||||
await _notificationService.requestPermissions();
|
await _notificationService.requestPermissions();
|
||||||
}
|
}
|
||||||
@@ -186,12 +187,10 @@ class VoiceCallService {
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Forward intensity stream for waveform visualization
|
// Forward intensity stream for waveform visualization
|
||||||
_intensitySubscription = _voiceInput.intensityStream.listen(
|
_intensitySubscription = _voiceInput.intensityStream.listen((intensity) {
|
||||||
(intensity) {
|
|
||||||
if (_isDisposed) return;
|
if (_isDisposed) return;
|
||||||
_intensityController.add(intensity);
|
_intensityController.add(intensity);
|
||||||
},
|
});
|
||||||
);
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
_updateState(VoiceCallState.error);
|
_updateState(VoiceCallState.error);
|
||||||
rethrow;
|
rethrow;
|
||||||
@@ -283,7 +282,17 @@ class VoiceCallService {
|
|||||||
await _intensitySubscription?.cancel();
|
await _intensitySubscription?.cancel();
|
||||||
|
|
||||||
_updateState(VoiceCallState.speaking);
|
_updateState(VoiceCallState.speaking);
|
||||||
await _tts.speak(response);
|
|
||||||
|
// Convert markdown to clean text for TTS
|
||||||
|
final cleanText = MarkdownToText.convert(response);
|
||||||
|
if (cleanText.isEmpty) {
|
||||||
|
// No speakable content, restart listening
|
||||||
|
_isSpeaking = false;
|
||||||
|
await _startListening();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await _tts.speak(cleanText);
|
||||||
// After speaking completes, _handleTtsComplete will restart listening
|
// After speaking completes, _handleTtsComplete will restart listening
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
_isSpeaking = false;
|
_isSpeaking = false;
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import 'package:flutter/cupertino.dart';
|
|||||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||||
|
|
||||||
import '../../../core/providers/app_providers.dart';
|
import '../../../core/providers/app_providers.dart';
|
||||||
|
import '../../../core/utils/markdown_to_text.dart';
|
||||||
import '../services/voice_call_service.dart';
|
import '../services/voice_call_service.dart';
|
||||||
|
|
||||||
class VoiceCallPage extends ConsumerStatefulWidget {
|
class VoiceCallPage extends ConsumerStatefulWidget {
|
||||||
@@ -239,7 +240,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
|
|||||||
builder: (context, child) {
|
builder: (context, child) {
|
||||||
final offset = (index * 0.2) % 1.0;
|
final offset = (index * 0.2) % 1.0;
|
||||||
final animation = (_waveController.value + offset) % 1.0;
|
final animation = (_waveController.value + offset) % 1.0;
|
||||||
final height = 20.0 +
|
final height =
|
||||||
|
20.0 +
|
||||||
(math.sin(animation * math.pi * 2) * 30.0).abs() +
|
(math.sin(animation * math.pi * 2) * 30.0).abs() +
|
||||||
(_currentIntensity * 4.0);
|
(_currentIntensity * 4.0);
|
||||||
|
|
||||||
@@ -271,10 +273,7 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
|
|||||||
decoration: BoxDecoration(
|
decoration: BoxDecoration(
|
||||||
shape: BoxShape.circle,
|
shape: BoxShape.circle,
|
||||||
color: primaryColor.withValues(alpha: 0.2),
|
color: primaryColor.withValues(alpha: 0.2),
|
||||||
border: Border.all(
|
border: Border.all(color: primaryColor, width: 3),
|
||||||
color: primaryColor,
|
|
||||||
width: 3,
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
child: Center(
|
child: Center(
|
||||||
child: Icon(
|
child: Icon(
|
||||||
@@ -323,7 +322,8 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
|
|||||||
displayText = _currentTranscript;
|
displayText = _currentTranscript;
|
||||||
} else if (_currentState == VoiceCallState.speaking &&
|
} else if (_currentState == VoiceCallState.speaking &&
|
||||||
_currentResponse.isNotEmpty) {
|
_currentResponse.isNotEmpty) {
|
||||||
displayText = _currentResponse;
|
// Convert markdown to clean text for display
|
||||||
|
displayText = MarkdownToText.convert(_currentResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (displayText.isEmpty) {
|
if (displayText.isEmpty) {
|
||||||
@@ -405,25 +405,12 @@ class _VoiceCallPageState extends ConsumerState<VoiceCallPage>
|
|||||||
child: Container(
|
child: Container(
|
||||||
width: 64,
|
width: 64,
|
||||||
height: 64,
|
height: 64,
|
||||||
decoration: BoxDecoration(
|
decoration: BoxDecoration(shape: BoxShape.circle, color: color),
|
||||||
shape: BoxShape.circle,
|
child: Icon(icon, color: Colors.white, size: 32),
|
||||||
color: color,
|
|
||||||
),
|
|
||||||
child: Icon(
|
|
||||||
icon,
|
|
||||||
color: Colors.white,
|
|
||||||
size: 32,
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
const SizedBox(height: 8),
|
const SizedBox(height: 8),
|
||||||
Text(
|
Text(label, style: TextStyle(fontSize: 12, color: color)),
|
||||||
label,
|
|
||||||
style: TextStyle(
|
|
||||||
fontSize: 12,
|
|
||||||
color: color,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user