From a3b5c4f5b7ee37f165a03265a72c73b79b732da2 Mon Sep 17 00:00:00 2001 From: cogwheel0 <172976095+cogwheel0@users.noreply.github.com> Date: Wed, 5 Nov 2025 00:09:35 +0530 Subject: [PATCH] feat(audio): replace record package with mic_stream_recorder --- .../chat/services/voice_input_service.dart | 161 ++++++------------ pubspec.lock | 72 +------- pubspec.yaml | 2 +- 3 files changed, 65 insertions(+), 170 deletions(-) diff --git a/lib/features/chat/services/voice_input_service.dart b/lib/features/chat/services/voice_input_service.dart index a817a12..805cdac 100644 --- a/lib/features/chat/services/voice_input_service.dart +++ b/lib/features/chat/services/voice_input_service.dart @@ -4,7 +4,7 @@ import 'dart:io' show File, Platform; import 'package:flutter/widgets.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:riverpod_annotation/riverpod_annotation.dart'; -import 'package:record/record.dart'; +import 'package:mic_stream_recorder/mic_stream_recorder.dart'; import 'package:stts/stts.dart'; import 'package:path/path.dart' as p; import 'package:path_provider/path_provider.dart'; @@ -23,7 +23,7 @@ class LocaleName { } class VoiceInputService { - final AudioRecorder _recorder = AudioRecorder(); + final MicStreamRecorder _recorder = MicStreamRecorder(); final Stt _speech = Stt(); final ApiService? _api; bool _isInitialized = false; @@ -31,14 +31,10 @@ class VoiceInputService { bool _localSttAvailable = false; SttPreference _preference = SttPreference.auto; bool _usingServerStt = false; - bool _serverRecorderActive = false; - String? _serverRecordingPath; - String? _serverRecordingMimeType; String? _selectedLocaleId; List _locales = const []; StreamController? _textStreamController; String _currentText = ''; - // Public stream for UI waveform visualization (emits partial text length as proxy) StreamController? _intensityController; Stream get intensityStream => _intensityController?.stream ?? const Stream.empty(); @@ -46,12 +42,13 @@ class VoiceInputService { Timer? _intensityDecayTimer; Timer? _silenceTimer; bool _hasDetectedSpeech = false; + int _amplitudeCallbackCount = 0; + Timer? _amplitudeFallbackTimer; - /// Public stream of partial/final transcript strings and special audio tokens. Stream get textStream => _textStreamController?.stream ?? const Stream.empty(); Timer? _autoStopTimer; - StreamSubscription? _ampSub; + StreamSubscription? _ampSub; StreamSubscription? _sttResultSub; StreamSubscription? _sttStateSub; @@ -111,10 +108,7 @@ class VoiceInputService { Future checkPermissions() async { try { - // Prefer stts permission check which will request microphone permission - final mic = await _speech.hasPermission(); - if (mic) return true; - return await _recorder.hasPermission(); + return await _speech.hasPermission(); } catch (_) { return false; } @@ -200,9 +194,6 @@ class VoiceInputService { _intensityController = StreamController.broadcast(); _lastIntensity = 0; _usingServerStt = false; - _serverRecorderActive = false; - _serverRecordingPath = null; - _serverRecordingMimeType = null; _startIntensityDecayTimer(); @@ -336,6 +327,9 @@ class VoiceInputService { _silenceTimer?.cancel(); _silenceTimer = null; + _amplitudeFallbackTimer?.cancel(); + _amplitudeFallbackTimer = null; + if (_usingServerStt) { await _finalizeServerRecording(); } else { @@ -356,9 +350,6 @@ class VoiceInputService { await _closeControllers(); _usingServerStt = false; - _serverRecorderActive = false; - _serverRecordingPath = null; - _serverRecordingMimeType = null; _hasDetectedSpeech = false; } @@ -417,52 +408,50 @@ class VoiceInputService { } Future _startServerRecording() async { - final (path, mimeType) = await _createRecordingTarget(); - _serverRecordingPath = path; - _serverRecordingMimeType = mimeType; - - final config = RecordConfig( - encoder: AudioEncoder.aacLc, - sampleRate: 44100, - bitRate: 96000, - numChannels: 1, - noiseSuppress: true, - ); - - await _recorder.start(config, path: path); - _serverRecorderActive = true; + final path = await _createRecordingPath(); _hasDetectedSpeech = false; - await _ampSub?.cancel(); - _ampSub = _recorder - .onAmplitudeChanged(const Duration(milliseconds: 140)) - .listen((Amplitude amplitude) { - if (!_isListening) return; - _lastIntensity = _amplitudeToIntensity(amplitude.current); - try { - _intensityController?.add(_lastIntensity); - } catch (_) {} + await _recorder.startRecording(path); - // Detect silence and auto-stop for server-side STT - _handleServerAmplitude(amplitude.current); - }, onError: (_) {}); + await _ampSub?.cancel(); + _amplitudeFallbackTimer?.cancel(); + _amplitudeCallbackCount = 0; + + _ampSub = _recorder.amplitudeStream.listen((amplitude) { + _amplitudeCallbackCount++; + if (!_isListening) return; + + _lastIntensity = _normalizedToIntensity(amplitude); + try { + _intensityController?.add(_lastIntensity); + } catch (_) {} + + _handleServerAmplitude(amplitude); + }); + + _amplitudeFallbackTimer = Timer(const Duration(seconds: 1), () { + if (_amplitudeCallbackCount == 0) { + _silenceTimer = Timer(const Duration(seconds: 15), () { + if (_isListening && _usingServerStt) { + unawaited(_stopListening()); + } + }); + } + }); } - void _handleServerAmplitude(double? amplitude) { + void _handleServerAmplitude(double amplitude) { if (!_usingServerStt || !_isListening) return; - // Threshold for detecting speech (in dB) - const double speechThreshold = -45.0; - final double currentDb = amplitude ?? -100.0; + const double speechThreshold = 0.55; + if (amplitude.isNaN || amplitude.isInfinite) return; - // If we detect speech, mark it and reset silence timer - if (currentDb > speechThreshold) { + if (amplitude > speechThreshold) { _hasDetectedSpeech = true; _silenceTimer?.cancel(); _silenceTimer = null; } else if (_hasDetectedSpeech && _silenceTimer == null) { - // Start silence timer only after we've detected speech at least once - _silenceTimer = Timer(const Duration(seconds: 2), () { + _silenceTimer = Timer(const Duration(milliseconds: 800), () { if (_isListening && _usingServerStt) { unawaited(_stopListening()); } @@ -470,53 +459,30 @@ class VoiceInputService { } } - Future<(String, String)> _createRecordingTarget() async { + Future _createRecordingPath() async { final directory = await getTemporaryDirectory(); final timestamp = DateTime.now().millisecondsSinceEpoch; - const extension = 'm4a'; - final fileName = 'conduit_voice_$timestamp.$extension'; - final path = p.join(directory.path, fileName); - return (path, 'audio/mp4'); + final fileName = 'conduit_voice_$timestamp.m4a'; + return p.join(directory.path, fileName); } Future _finalizeServerRecording() async { final api = _api; - if (api == null) { - return; - } + if (api == null) return; - String? path; + final path = await _recorder.stopRecording(); + if (path == null || path.isEmpty) return; + + final file = File(path); try { - if (_serverRecorderActive && await _recorder.isRecording()) { - path = await _recorder.stop(); - } else { - path = _serverRecordingPath; - } - } catch (_) { - path = _serverRecordingPath; - } finally { - _serverRecorderActive = false; - } - - final resolvedPath = path; - if (resolvedPath == null || resolvedPath.isEmpty) { - return; - } - - final file = File(resolvedPath); - try { - if (!await file.exists()) { - return; - } + if (!await file.exists()) return; final bytes = await file.readAsBytes(); - if (bytes.isEmpty) { - return; - } + if (bytes.isEmpty) return; final response = await api.transcribeSpeech( audioBytes: bytes, - fileName: p.basename(resolvedPath), - mimeType: _serverRecordingMimeType, + fileName: p.basename(path), + mimeType: 'audio/mp4', language: _languageForServer(), ); @@ -641,21 +607,9 @@ class VoiceInputService { return null; } - int _amplitudeToIntensity(double? value) { - if (value == null || value.isNaN || value.isInfinite) { - return 0; - } - const minDb = -55.0; - const maxDb = 0.0; - final double clamped = value.clamp(minDb, maxDb).toDouble(); - final double normalized = ((clamped - minDb) / (maxDb - minDb)).clamp( - 0.0, - 1.0, - ); - final int scaled = (normalized * 10).round(); - if (scaled <= 0) return 0; - if (scaled >= 10) return 10; - return scaled; + int _normalizedToIntensity(double value) { + if (value.isNaN || value.isInfinite) return 0; + return (value * 10).round().clamp(0, 10); } Future _closeControllers() async { @@ -693,9 +647,6 @@ class VoiceInputService { try { _speech.dispose().catchError((_) {}); } catch (_) {} - try { - _recorder.dispose().catchError((_) {}); - } catch (_) {} } } diff --git a/pubspec.lock b/pubspec.lock index 405533d..b662398 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -965,6 +965,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.16.0" + mic_stream_recorder: + dependency: "direct main" + description: + name: mic_stream_recorder + sha256: "73965991ef5cc93d2b0c1e6d590cbd567a853b9ee7b2d52de43a73f185bb0d9c" + url: "https://pub.dev" + source: hosted + version: "1.1.2" mime: dependency: transitive description: @@ -1165,70 +1173,6 @@ packages: url: "https://pub.dev" source: hosted version: "1.5.0" - record: - dependency: "direct main" - description: - name: record - sha256: "9dbc6ff3e784612f90a9b001373c45ff76b7a08abd2bd9fdf72c242320c8911c" - url: "https://pub.dev" - source: hosted - version: "6.1.1" - record_android: - dependency: transitive - description: - name: record_android - sha256: "854627cd78d8d66190377f98477eee06ca96ab7c9f2e662700daf33dbf7e6673" - url: "https://pub.dev" - source: hosted - version: "1.4.2" - record_ios: - dependency: transitive - description: - name: record_ios - sha256: "13e241ed9cbc220534a40ae6b66222e21288db364d96dd66fb762ebd3cb77c71" - url: "https://pub.dev" - source: hosted - version: "1.1.2" - record_linux: - dependency: transitive - description: - name: record_linux - sha256: "235b1f1fb84e810f8149cc0c2c731d7d697f8d1c333b32cb820c449bf7bb72d8" - url: "https://pub.dev" - source: hosted - version: "1.2.1" - record_macos: - dependency: transitive - description: - name: record_macos - sha256: "2849068bb59072f300ad63ed146e543d66afaef8263edba4de4834fc7c8d4d35" - url: "https://pub.dev" - source: hosted - version: "1.1.1" - record_platform_interface: - dependency: transitive - description: - name: record_platform_interface - sha256: b0065fdf1ec28f5a634d676724d388a77e43ce7646fb049949f58c69f3fcb4ed - url: "https://pub.dev" - source: hosted - version: "1.4.0" - record_web: - dependency: transitive - description: - name: record_web - sha256: "4f0adf20c9ccafcc02d71111fd91fba1ca7b17a7453902593e5a9b25b74a5c56" - url: "https://pub.dev" - source: hosted - version: "1.2.0" - record_windows: - dependency: transitive - description: - name: record_windows - sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78" - url: "https://pub.dev" - source: hosted - version: "1.0.7" riverpod: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index ee02fd1..10af0a9 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -44,7 +44,7 @@ dependencies: flutter_animate: ^4.5.0 # Platform Features - record: ^6.1.1 + mic_stream_recorder: ^1.1.2 stts: ^1.2.5 flutter_tts: ^4.2.3 audioplayers: ^6.5.1