Merge pull request #165 from cogwheel0/optimize-vad-parameters

optimize-vad-parameters
This commit is contained in:
cogwheel
2025-11-24 14:18:18 +05:30
committed by GitHub

View File

@@ -27,7 +27,12 @@ class LocaleName {
class VoiceInputService { class VoiceInputService {
static const int _vadSampleRate = 16000; static const int _vadSampleRate = 16000;
static const int _vadFrameSamples = 1536; static const int _vadFrameSamples = 512;
static const int _vadPreSpeechPadFrames = 16;
static const int _vadMinSpeechFrames = 8;
static const int _vadEndSpeechPadFrames = 6;
static const double _vadPositiveSpeechThreshold = 0.6;
static const double _vadNegativeSpeechThreshold = 0.35;
static const Duration _localeFetchTimeout = Duration(seconds: 2); static const Duration _localeFetchTimeout = Duration(seconds: 2);
static const String _backgroundSttStreamId = 'voice-input-stt'; static const String _backgroundSttStreamId = 'voice-input-stt';
@@ -580,7 +585,6 @@ class VoiceInputService {
category: IosAudioCategory.playAndRecord, category: IosAudioCategory.playAndRecord,
options: [ options: [
IosAudioCategoryOptions.allowBluetooth, IosAudioCategoryOptions.allowBluetooth,
IosAudioCategoryOptions.allowBluetoothA2DP,
IosAudioCategoryOptions.defaultToSpeaker, IosAudioCategoryOptions.defaultToSpeaker,
IosAudioCategoryOptions.duckOthers, IosAudioCategoryOptions.duckOthers,
], ],
@@ -593,18 +597,21 @@ class VoiceInputService {
await _setupVadStreams(); await _setupVadStreams();
final settings = _ref?.read(appSettingsProvider); final settings = _ref?.read(appSettingsProvider);
final silenceMs = settings?.voiceSilenceDuration ?? 2000; final silenceMs = settings?.voiceSilenceDuration ?? 2000;
final redemptionFrames = _silenceDurationToFrames(silenceMs); final redemptionFrames = _silenceDurationToFrames(
final endPadFrames = redemptionFrames > 4 silenceMs,
? (redemptionFrames / 4).round().clamp(1, redemptionFrames) frameSamples: _vadFrameSamples,
: 1; );
try { try {
await _vadHandler.startListening( await _vadHandler.startListening(
frameSamples: _vadFrameSamples, frameSamples: _vadFrameSamples,
model: 'v5',
minSpeechFrames: _vadMinSpeechFrames,
preSpeechPadFrames: _vadPreSpeechPadFrames,
redemptionFrames: redemptionFrames, redemptionFrames: redemptionFrames,
endSpeechPadFrames: endPadFrames, endSpeechPadFrames: _vadEndSpeechPadFrames,
preSpeechPadFrames: 2, positiveSpeechThreshold: _vadPositiveSpeechThreshold,
minSpeechFrames: 3, negativeSpeechThreshold: _vadNegativeSpeechThreshold,
submitUserSpeechOnPause: true, submitUserSpeechOnPause: true,
recordConfig: const RecordConfig( recordConfig: const RecordConfig(
encoder: AudioEncoder.pcm16bits, encoder: AudioEncoder.pcm16bits,
@@ -612,15 +619,22 @@ class VoiceInputService {
numChannels: 1, numChannels: 1,
bitRate: 16, bitRate: 16,
echoCancel: true, echoCancel: true,
autoGain: true, autoGain: false,
noiseSuppress: true, noiseSuppress: true,
androidConfig: AndroidRecordConfig( androidConfig: AndroidRecordConfig(
audioSource: AndroidAudioSource.voiceCommunication, audioSource: AndroidAudioSource.voiceRecognition,
audioManagerMode: AudioManagerMode.modeInCommunication, audioManagerMode: AudioManagerMode.modeInCommunication,
speakerphone: true, speakerphone: true,
manageBluetooth: true, manageBluetooth: true,
useLegacy: false, useLegacy: false,
), ),
iosConfig: IosRecordConfig(
categoryOptions: [
IosAudioCategoryOption.allowBluetooth,
IosAudioCategoryOption.defaultToSpeaker,
IosAudioCategoryOption.duckOthers,
],
),
), ),
); );
} catch (error) { } catch (error) {
@@ -661,9 +675,7 @@ class VoiceInputService {
Future<void> _stopVadRecording() async { Future<void> _stopVadRecording() async {
try { try {
if (_isListening) { await _vadHandler.stopListening();
await _vadHandler.stopListening();
}
} catch (_) {} } catch (_) {}
await _vadSpeechEndSub?.cancel(); await _vadSpeechEndSub?.cancel();
_vadSpeechEndSub = null; _vadSpeechEndSub = null;
@@ -701,8 +713,9 @@ class VoiceInputService {
} }
} }
int _silenceDurationToFrames(int milliseconds) { int _silenceDurationToFrames(int milliseconds, {int? frameSamples}) {
final frameDurationMs = (_vadFrameSamples / _vadSampleRate) * 1000; final samples = frameSamples ?? _vadFrameSamples;
final frameDurationMs = (samples / _vadSampleRate) * 1000;
final frames = (milliseconds / frameDurationMs).round(); final frames = (milliseconds / frameDurationMs).round();
return frames.clamp(4, 50); return frames.clamp(4, 50);
} }