feat(tts): add speech rate support for text-to-speech generation

This commit is contained in:
cogwheel0
2025-11-03 00:44:24 +05:30
parent 1a570f4a08
commit 715849aff3
3 changed files with 17 additions and 3 deletions

View File

@@ -1659,12 +1659,17 @@ class ApiService {
Future<({Uint8List bytes, String mimeType})> generateSpeech({
required String text,
String? voice,
double? speed,
}) async {
final textPreview = text.length > 50 ? text.substring(0, 50) : text;
_traceApi('Generating speech for text: $textPreview...');
final response = await _dio.post(
'/api/v1/audio/speech',
data: {'input': text, if (voice != null) 'voice': voice},
data: {
'input': text,
if (voice != null) 'voice': voice,
if (speed != null) 'speed': speed,
},
options: Options(responseType: ResponseType.bytes),
);

View File

@@ -19,6 +19,7 @@ class TextToSpeechService {
TtsEngine _engine = TtsEngine.auto;
String? _preferredVoice;
String? _serverPreferredVoice;
double _speechRate = 0.5;
bool _initialized = false;
bool _available = false;
bool _voiceConfigured = false;
@@ -171,6 +172,7 @@ class TextToSpeechService {
}) async {
if (_initialized) {
_engine = engine;
_speechRate = speechRate;
if (deviceVoice != null) {
_preferredVoice = deviceVoice;
_voiceConfigured = false;
@@ -183,6 +185,7 @@ class TextToSpeechService {
}
_engine = engine;
_speechRate = speechRate;
_preferredVoice = deviceVoice;
_serverPreferredVoice = serverVoice;
_voiceConfigured = false;
@@ -352,6 +355,7 @@ class TextToSpeechService {
if (engine != null) _engine = engine;
if (voiceProvided) _preferredVoice = voiceValue;
if (serverVoiceProvided) _serverPreferredVoice = serverVoiceValue;
if (speechRate != null) _speechRate = speechRate;
return;
}
@@ -369,6 +373,7 @@ class TextToSpeechService {
await _tts.setVolume(volume);
}
if (speechRate != null) {
_speechRate = speechRate;
await _tts.setSpeechRate(speechRate);
}
if (pitch != null) {
@@ -645,7 +650,11 @@ class TextToSpeechService {
String? voice,
int session,
) async {
return await _api!.generateSpeech(text: text, voice: voice);
return await _api!.generateSpeech(
text: text,
voice: voice,
speed: _speechRate,
);
}
Future<void> _onAudioComplete() async {

View File

@@ -944,7 +944,7 @@ class AppCustomizationPage extends ConsumerWidget {
value: settings.ttsSpeechRate,
min: 0.25,
max: 2.0,
divisions: 7,
divisions: 35,
label: '${(settings.ttsSpeechRate * 100).round()}%',
onChanged: (value) => ref
.read(appSettingsProvider.notifier)