feat: enhance text-to-speech functionality with markdown support

- Integrated markdown conversion in TextToSpeechController to clean text before speech synthesis, ensuring only valid content is spoken. - Updated VoiceCallService to utilize markdown conversion for responses, improving the clarity of spoken content. - Enhanced VoiceCallPage to display cleaned text from markdown, providing a better user experience during voice interactions.
2025-10-09 00:20:36 +05:30
parent 96202c7453
commit fabb1df63a
4 changed files with 166 additions and 37 deletions
@@ -0,0 +1,118 @@
+/// Converts markdown text to plain text suitable for text-to-speech.
+///
+/// Strips formatting while preserving the semantic meaning and readability
+/// of the content for audio consumption.
+class MarkdownToText {
+  const MarkdownToText._();
+
+  static final _codeBlockRegex = RegExp(
+    r'```[^\n]*\n(.*?)```',
+    multiLine: true,
+    dotAll: true,
+  );
+  static final _inlineCodeRegex = RegExp(r'`([^`]+)`');
+  static final _boldItalicRegex = RegExp(r'\*\*\*([^*]+)\*\*\*');
+  static final _boldRegex = RegExp(r'\*\*([^*]+)\*\*');
+  static final _italicRegex = RegExp(r'\*([^*]+)\*|_([^_]+)_');
+  static final _strikethroughRegex = RegExp(r'~~([^~]+)~~');
+  static final _linkRegex = RegExp(r'\[([^\]]+)\]\([^)]+\)');
+  static final _imageRegex = RegExp(r'!\[([^\]]*)\]\([^)]+\)');
+  static final _headingRegex = RegExp(r'^#{1,6}\s+(.+)$', multiLine: true);
+  static final _listItemRegex = RegExp(r'^[\s]*[-*+]\s+(.+)$', multiLine: true);
+  static final _orderedListRegex = RegExp(
+    r'^[\s]*\d+\.\s+(.+)$',
+    multiLine: true,
+  );
+  static final _blockquoteRegex = RegExp(r'^>\s*(.+)$', multiLine: true);
+  static final _horizontalRuleRegex = RegExp(
+    r'^[\s]*[-*_]{3,}[\s]*$',
+    multiLine: true,
+  );
+  static final _htmlTagRegex = RegExp(r'<[^>]+>');
+  static final _multipleNewlinesRegex = RegExp(r'\n{3,}');
+  static final _multipleSpacesRegex = RegExp(r' {2,}');
+
+  /// Converts markdown text to plain text suitable for TTS.
+  ///
+  /// - Removes code blocks (replaces with descriptive text)
+  /// - Strips all formatting (bold, italic, strikethrough)
+  /// - Converts links to just their text
+  /// - Removes images (or converts to alt text)
+  /// - Simplifies headings
+  /// - Preserves list structure with natural pauses
+  /// - Removes HTML tags
+  /// - Normalizes whitespace
+  static String convert(String markdown) {
+    if (markdown.trim().isEmpty) {
+      return '';
+    }
+
+    var text = markdown;
+
+    // Remove or replace code blocks with descriptive text
+    text = text.replaceAllMapped(_codeBlockRegex, (match) {
+      final code = match[1]?.trim() ?? '';
+      if (code.isEmpty) {
+        return '';
+      }
+      // For TTS, skip code blocks or use a brief description
+      return ' (code block) ';
+    });
+
+    // Remove inline code backticks but keep the content
+    text = text.replaceAllMapped(_inlineCodeRegex, (match) => match[1] ?? '');
+
+    // Strip bold/italic/strikethrough formatting
+    text = text.replaceAllMapped(_boldItalicRegex, (match) => match[1] ?? '');
+    text = text.replaceAllMapped(_boldRegex, (match) => match[1] ?? '');
+    text = text.replaceAllMapped(
+      _italicRegex,
+      (match) => match[1] ?? match[2] ?? '',
+    );
+    text = text.replaceAllMapped(
+      _strikethroughRegex,
+      (match) => match[1] ?? '',
+    );
+
+    // Convert links to just their text
+    text = text.replaceAllMapped(_linkRegex, (match) => match[1] ?? '');
+
+    // Remove images (or use alt text if available)
+    text = text.replaceAllMapped(_imageRegex, (match) {
+      final alt = match[1]?.trim() ?? '';
+      return alt.isNotEmpty ? ' ($alt image) ' : '';
+    });
+
+    // Simplify headings (remove # symbols)
+    text = text.replaceAllMapped(_headingRegex, (match) {
+      final heading = match[1] ?? '';
+      // Add a pause after headings for natural speech flow
+      return '$heading.\n';
+    });
+
+    // Preserve list items with natural pauses
+    text = text.replaceAllMapped(_listItemRegex, (match) => '${match[1]}. ');
+    text = text.replaceAllMapped(_orderedListRegex, (match) => '${match[1]}. ');
+
+    // Remove blockquote markers
+    text = text.replaceAllMapped(_blockquoteRegex, (match) => match[1] ?? '');
+
+    // Remove horizontal rules
+    text = text.replaceAll(_horizontalRuleRegex, '');
+
+    // Remove HTML tags
+    text = text.replaceAll(_htmlTagRegex, '');
+
+    // Normalize whitespace
+    text = text.replaceAll(_multipleNewlinesRegex, '\n\n');
+    text = text.replaceAll(_multipleSpacesRegex, ' ');
+
+    // Convert newlines to spaces for natural speech flow
+    text = text.replaceAll('\n', ' ');
+
+    // Final cleanup
+    text = text.trim();
+
+    return text;
+  }
+}