feat(audio): replace record package with mic_stream_recorder
This commit is contained in:
@@ -4,7 +4,7 @@ import 'dart:io' show File, Platform;
|
|||||||
import 'package:flutter/widgets.dart';
|
import 'package:flutter/widgets.dart';
|
||||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||||
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
import 'package:riverpod_annotation/riverpod_annotation.dart';
|
||||||
import 'package:record/record.dart';
|
import 'package:mic_stream_recorder/mic_stream_recorder.dart';
|
||||||
import 'package:stts/stts.dart';
|
import 'package:stts/stts.dart';
|
||||||
import 'package:path/path.dart' as p;
|
import 'package:path/path.dart' as p;
|
||||||
import 'package:path_provider/path_provider.dart';
|
import 'package:path_provider/path_provider.dart';
|
||||||
@@ -23,7 +23,7 @@ class LocaleName {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class VoiceInputService {
|
class VoiceInputService {
|
||||||
final AudioRecorder _recorder = AudioRecorder();
|
final MicStreamRecorder _recorder = MicStreamRecorder();
|
||||||
final Stt _speech = Stt();
|
final Stt _speech = Stt();
|
||||||
final ApiService? _api;
|
final ApiService? _api;
|
||||||
bool _isInitialized = false;
|
bool _isInitialized = false;
|
||||||
@@ -31,14 +31,10 @@ class VoiceInputService {
|
|||||||
bool _localSttAvailable = false;
|
bool _localSttAvailable = false;
|
||||||
SttPreference _preference = SttPreference.auto;
|
SttPreference _preference = SttPreference.auto;
|
||||||
bool _usingServerStt = false;
|
bool _usingServerStt = false;
|
||||||
bool _serverRecorderActive = false;
|
|
||||||
String? _serverRecordingPath;
|
|
||||||
String? _serverRecordingMimeType;
|
|
||||||
String? _selectedLocaleId;
|
String? _selectedLocaleId;
|
||||||
List<LocaleName> _locales = const [];
|
List<LocaleName> _locales = const [];
|
||||||
StreamController<String>? _textStreamController;
|
StreamController<String>? _textStreamController;
|
||||||
String _currentText = '';
|
String _currentText = '';
|
||||||
// Public stream for UI waveform visualization (emits partial text length as proxy)
|
|
||||||
StreamController<int>? _intensityController;
|
StreamController<int>? _intensityController;
|
||||||
Stream<int> get intensityStream =>
|
Stream<int> get intensityStream =>
|
||||||
_intensityController?.stream ?? const Stream<int>.empty();
|
_intensityController?.stream ?? const Stream<int>.empty();
|
||||||
@@ -46,12 +42,13 @@ class VoiceInputService {
|
|||||||
Timer? _intensityDecayTimer;
|
Timer? _intensityDecayTimer;
|
||||||
Timer? _silenceTimer;
|
Timer? _silenceTimer;
|
||||||
bool _hasDetectedSpeech = false;
|
bool _hasDetectedSpeech = false;
|
||||||
|
int _amplitudeCallbackCount = 0;
|
||||||
|
Timer? _amplitudeFallbackTimer;
|
||||||
|
|
||||||
/// Public stream of partial/final transcript strings and special audio tokens.
|
|
||||||
Stream<String> get textStream =>
|
Stream<String> get textStream =>
|
||||||
_textStreamController?.stream ?? const Stream<String>.empty();
|
_textStreamController?.stream ?? const Stream<String>.empty();
|
||||||
Timer? _autoStopTimer;
|
Timer? _autoStopTimer;
|
||||||
StreamSubscription<Amplitude>? _ampSub;
|
StreamSubscription<double>? _ampSub;
|
||||||
StreamSubscription<SttRecognition>? _sttResultSub;
|
StreamSubscription<SttRecognition>? _sttResultSub;
|
||||||
StreamSubscription<SttState>? _sttStateSub;
|
StreamSubscription<SttState>? _sttStateSub;
|
||||||
|
|
||||||
@@ -111,10 +108,7 @@ class VoiceInputService {
|
|||||||
|
|
||||||
Future<bool> checkPermissions() async {
|
Future<bool> checkPermissions() async {
|
||||||
try {
|
try {
|
||||||
// Prefer stts permission check which will request microphone permission
|
return await _speech.hasPermission();
|
||||||
final mic = await _speech.hasPermission();
|
|
||||||
if (mic) return true;
|
|
||||||
return await _recorder.hasPermission();
|
|
||||||
} catch (_) {
|
} catch (_) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -200,9 +194,6 @@ class VoiceInputService {
|
|||||||
_intensityController = StreamController<int>.broadcast();
|
_intensityController = StreamController<int>.broadcast();
|
||||||
_lastIntensity = 0;
|
_lastIntensity = 0;
|
||||||
_usingServerStt = false;
|
_usingServerStt = false;
|
||||||
_serverRecorderActive = false;
|
|
||||||
_serverRecordingPath = null;
|
|
||||||
_serverRecordingMimeType = null;
|
|
||||||
|
|
||||||
_startIntensityDecayTimer();
|
_startIntensityDecayTimer();
|
||||||
|
|
||||||
@@ -336,6 +327,9 @@ class VoiceInputService {
|
|||||||
_silenceTimer?.cancel();
|
_silenceTimer?.cancel();
|
||||||
_silenceTimer = null;
|
_silenceTimer = null;
|
||||||
|
|
||||||
|
_amplitudeFallbackTimer?.cancel();
|
||||||
|
_amplitudeFallbackTimer = null;
|
||||||
|
|
||||||
if (_usingServerStt) {
|
if (_usingServerStt) {
|
||||||
await _finalizeServerRecording();
|
await _finalizeServerRecording();
|
||||||
} else {
|
} else {
|
||||||
@@ -356,9 +350,6 @@ class VoiceInputService {
|
|||||||
await _closeControllers();
|
await _closeControllers();
|
||||||
|
|
||||||
_usingServerStt = false;
|
_usingServerStt = false;
|
||||||
_serverRecorderActive = false;
|
|
||||||
_serverRecordingPath = null;
|
|
||||||
_serverRecordingMimeType = null;
|
|
||||||
_hasDetectedSpeech = false;
|
_hasDetectedSpeech = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -417,52 +408,50 @@ class VoiceInputService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Future<void> _startServerRecording() async {
|
Future<void> _startServerRecording() async {
|
||||||
final (path, mimeType) = await _createRecordingTarget();
|
final path = await _createRecordingPath();
|
||||||
_serverRecordingPath = path;
|
|
||||||
_serverRecordingMimeType = mimeType;
|
|
||||||
|
|
||||||
final config = RecordConfig(
|
|
||||||
encoder: AudioEncoder.aacLc,
|
|
||||||
sampleRate: 44100,
|
|
||||||
bitRate: 96000,
|
|
||||||
numChannels: 1,
|
|
||||||
noiseSuppress: true,
|
|
||||||
);
|
|
||||||
|
|
||||||
await _recorder.start(config, path: path);
|
|
||||||
_serverRecorderActive = true;
|
|
||||||
_hasDetectedSpeech = false;
|
_hasDetectedSpeech = false;
|
||||||
|
|
||||||
await _ampSub?.cancel();
|
await _recorder.startRecording(path);
|
||||||
_ampSub = _recorder
|
|
||||||
.onAmplitudeChanged(const Duration(milliseconds: 140))
|
|
||||||
.listen((Amplitude amplitude) {
|
|
||||||
if (!_isListening) return;
|
|
||||||
_lastIntensity = _amplitudeToIntensity(amplitude.current);
|
|
||||||
try {
|
|
||||||
_intensityController?.add(_lastIntensity);
|
|
||||||
} catch (_) {}
|
|
||||||
|
|
||||||
// Detect silence and auto-stop for server-side STT
|
await _ampSub?.cancel();
|
||||||
_handleServerAmplitude(amplitude.current);
|
_amplitudeFallbackTimer?.cancel();
|
||||||
}, onError: (_) {});
|
_amplitudeCallbackCount = 0;
|
||||||
|
|
||||||
|
_ampSub = _recorder.amplitudeStream.listen((amplitude) {
|
||||||
|
_amplitudeCallbackCount++;
|
||||||
|
if (!_isListening) return;
|
||||||
|
|
||||||
|
_lastIntensity = _normalizedToIntensity(amplitude);
|
||||||
|
try {
|
||||||
|
_intensityController?.add(_lastIntensity);
|
||||||
|
} catch (_) {}
|
||||||
|
|
||||||
|
_handleServerAmplitude(amplitude);
|
||||||
|
});
|
||||||
|
|
||||||
|
_amplitudeFallbackTimer = Timer(const Duration(seconds: 1), () {
|
||||||
|
if (_amplitudeCallbackCount == 0) {
|
||||||
|
_silenceTimer = Timer(const Duration(seconds: 15), () {
|
||||||
|
if (_isListening && _usingServerStt) {
|
||||||
|
unawaited(_stopListening());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void _handleServerAmplitude(double? amplitude) {
|
void _handleServerAmplitude(double amplitude) {
|
||||||
if (!_usingServerStt || !_isListening) return;
|
if (!_usingServerStt || !_isListening) return;
|
||||||
|
|
||||||
// Threshold for detecting speech (in dB)
|
const double speechThreshold = 0.55;
|
||||||
const double speechThreshold = -45.0;
|
if (amplitude.isNaN || amplitude.isInfinite) return;
|
||||||
final double currentDb = amplitude ?? -100.0;
|
|
||||||
|
|
||||||
// If we detect speech, mark it and reset silence timer
|
if (amplitude > speechThreshold) {
|
||||||
if (currentDb > speechThreshold) {
|
|
||||||
_hasDetectedSpeech = true;
|
_hasDetectedSpeech = true;
|
||||||
_silenceTimer?.cancel();
|
_silenceTimer?.cancel();
|
||||||
_silenceTimer = null;
|
_silenceTimer = null;
|
||||||
} else if (_hasDetectedSpeech && _silenceTimer == null) {
|
} else if (_hasDetectedSpeech && _silenceTimer == null) {
|
||||||
// Start silence timer only after we've detected speech at least once
|
_silenceTimer = Timer(const Duration(milliseconds: 800), () {
|
||||||
_silenceTimer = Timer(const Duration(seconds: 2), () {
|
|
||||||
if (_isListening && _usingServerStt) {
|
if (_isListening && _usingServerStt) {
|
||||||
unawaited(_stopListening());
|
unawaited(_stopListening());
|
||||||
}
|
}
|
||||||
@@ -470,53 +459,30 @@ class VoiceInputService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<(String, String)> _createRecordingTarget() async {
|
Future<String> _createRecordingPath() async {
|
||||||
final directory = await getTemporaryDirectory();
|
final directory = await getTemporaryDirectory();
|
||||||
final timestamp = DateTime.now().millisecondsSinceEpoch;
|
final timestamp = DateTime.now().millisecondsSinceEpoch;
|
||||||
const extension = 'm4a';
|
final fileName = 'conduit_voice_$timestamp.m4a';
|
||||||
final fileName = 'conduit_voice_$timestamp.$extension';
|
return p.join(directory.path, fileName);
|
||||||
final path = p.join(directory.path, fileName);
|
|
||||||
return (path, 'audio/mp4');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<void> _finalizeServerRecording() async {
|
Future<void> _finalizeServerRecording() async {
|
||||||
final api = _api;
|
final api = _api;
|
||||||
if (api == null) {
|
if (api == null) return;
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
String? path;
|
final path = await _recorder.stopRecording();
|
||||||
|
if (path == null || path.isEmpty) return;
|
||||||
|
|
||||||
|
final file = File(path);
|
||||||
try {
|
try {
|
||||||
if (_serverRecorderActive && await _recorder.isRecording()) {
|
if (!await file.exists()) return;
|
||||||
path = await _recorder.stop();
|
|
||||||
} else {
|
|
||||||
path = _serverRecordingPath;
|
|
||||||
}
|
|
||||||
} catch (_) {
|
|
||||||
path = _serverRecordingPath;
|
|
||||||
} finally {
|
|
||||||
_serverRecorderActive = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
final resolvedPath = path;
|
|
||||||
if (resolvedPath == null || resolvedPath.isEmpty) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
final file = File(resolvedPath);
|
|
||||||
try {
|
|
||||||
if (!await file.exists()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
final bytes = await file.readAsBytes();
|
final bytes = await file.readAsBytes();
|
||||||
if (bytes.isEmpty) {
|
if (bytes.isEmpty) return;
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
final response = await api.transcribeSpeech(
|
final response = await api.transcribeSpeech(
|
||||||
audioBytes: bytes,
|
audioBytes: bytes,
|
||||||
fileName: p.basename(resolvedPath),
|
fileName: p.basename(path),
|
||||||
mimeType: _serverRecordingMimeType,
|
mimeType: 'audio/mp4',
|
||||||
language: _languageForServer(),
|
language: _languageForServer(),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -641,21 +607,9 @@ class VoiceInputService {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _amplitudeToIntensity(double? value) {
|
int _normalizedToIntensity(double value) {
|
||||||
if (value == null || value.isNaN || value.isInfinite) {
|
if (value.isNaN || value.isInfinite) return 0;
|
||||||
return 0;
|
return (value * 10).round().clamp(0, 10);
|
||||||
}
|
|
||||||
const minDb = -55.0;
|
|
||||||
const maxDb = 0.0;
|
|
||||||
final double clamped = value.clamp(minDb, maxDb).toDouble();
|
|
||||||
final double normalized = ((clamped - minDb) / (maxDb - minDb)).clamp(
|
|
||||||
0.0,
|
|
||||||
1.0,
|
|
||||||
);
|
|
||||||
final int scaled = (normalized * 10).round();
|
|
||||||
if (scaled <= 0) return 0;
|
|
||||||
if (scaled >= 10) return 10;
|
|
||||||
return scaled;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<void> _closeControllers() async {
|
Future<void> _closeControllers() async {
|
||||||
@@ -693,9 +647,6 @@ class VoiceInputService {
|
|||||||
try {
|
try {
|
||||||
_speech.dispose().catchError((_) {});
|
_speech.dispose().catchError((_) {});
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
try {
|
|
||||||
_recorder.dispose().catchError((_) {});
|
|
||||||
} catch (_) {}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
72
pubspec.lock
72
pubspec.lock
@@ -965,6 +965,14 @@ packages:
|
|||||||
url: "https://pub.dev"
|
url: "https://pub.dev"
|
||||||
source: hosted
|
source: hosted
|
||||||
version: "1.16.0"
|
version: "1.16.0"
|
||||||
|
mic_stream_recorder:
|
||||||
|
dependency: "direct main"
|
||||||
|
description:
|
||||||
|
name: mic_stream_recorder
|
||||||
|
sha256: "73965991ef5cc93d2b0c1e6d590cbd567a853b9ee7b2d52de43a73f185bb0d9c"
|
||||||
|
url: "https://pub.dev"
|
||||||
|
source: hosted
|
||||||
|
version: "1.1.2"
|
||||||
mime:
|
mime:
|
||||||
dependency: transitive
|
dependency: transitive
|
||||||
description:
|
description:
|
||||||
@@ -1165,70 +1173,6 @@ packages:
|
|||||||
url: "https://pub.dev"
|
url: "https://pub.dev"
|
||||||
source: hosted
|
source: hosted
|
||||||
version: "1.5.0"
|
version: "1.5.0"
|
||||||
record:
|
|
||||||
dependency: "direct main"
|
|
||||||
description:
|
|
||||||
name: record
|
|
||||||
sha256: "9dbc6ff3e784612f90a9b001373c45ff76b7a08abd2bd9fdf72c242320c8911c"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "6.1.1"
|
|
||||||
record_android:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_android
|
|
||||||
sha256: "854627cd78d8d66190377f98477eee06ca96ab7c9f2e662700daf33dbf7e6673"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.4.2"
|
|
||||||
record_ios:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_ios
|
|
||||||
sha256: "13e241ed9cbc220534a40ae6b66222e21288db364d96dd66fb762ebd3cb77c71"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.1.2"
|
|
||||||
record_linux:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_linux
|
|
||||||
sha256: "235b1f1fb84e810f8149cc0c2c731d7d697f8d1c333b32cb820c449bf7bb72d8"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.2.1"
|
|
||||||
record_macos:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_macos
|
|
||||||
sha256: "2849068bb59072f300ad63ed146e543d66afaef8263edba4de4834fc7c8d4d35"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.1.1"
|
|
||||||
record_platform_interface:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_platform_interface
|
|
||||||
sha256: b0065fdf1ec28f5a634d676724d388a77e43ce7646fb049949f58c69f3fcb4ed
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.4.0"
|
|
||||||
record_web:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_web
|
|
||||||
sha256: "4f0adf20c9ccafcc02d71111fd91fba1ca7b17a7453902593e5a9b25b74a5c56"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.2.0"
|
|
||||||
record_windows:
|
|
||||||
dependency: transitive
|
|
||||||
description:
|
|
||||||
name: record_windows
|
|
||||||
sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78"
|
|
||||||
url: "https://pub.dev"
|
|
||||||
source: hosted
|
|
||||||
version: "1.0.7"
|
|
||||||
riverpod:
|
riverpod:
|
||||||
dependency: transitive
|
dependency: transitive
|
||||||
description:
|
description:
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ dependencies:
|
|||||||
flutter_animate: ^4.5.0
|
flutter_animate: ^4.5.0
|
||||||
|
|
||||||
# Platform Features
|
# Platform Features
|
||||||
record: ^6.1.1
|
mic_stream_recorder: ^1.1.2
|
||||||
stts: ^1.2.5
|
stts: ^1.2.5
|
||||||
flutter_tts: ^4.2.3
|
flutter_tts: ^4.2.3
|
||||||
audioplayers: ^6.5.1
|
audioplayers: ^6.5.1
|
||||||
|
|||||||
Reference in New Issue
Block a user