Skip to content

Commit

Permalink
Complete poor integration of ElevenLabs voice generation
Browse files Browse the repository at this point in the history
  • Loading branch information
HackXIt committed Jan 18, 2024
1 parent d872979 commit bf163f5
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 77 deletions.
172 changes: 96 additions & 76 deletions src/api/elevenlabsapi/elevenlabsapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,67 +7,12 @@
import argparse
from kivy.app import App
from kivy.properties import StringProperty, ListProperty, ObjectProperty
from kivy.logger import Logger as log
from kivy.uix.boxlayout import BoxLayout
import pyaudio
from typing import Iterator, List
from ..base_settings import BaseApiSettings

# NOTE This is not very functionally solid, just a template for the API integration that can be iterated upon
class ElevenLabsTTS():
"""
This is a TTS implementation for the ElevenLabs TTS API.
"""
_models = [
"eleven_multilingual_v2",
"eleven_monolingual_v1"
]
def __init__(self, api_key: str = None, voice_name: str = 'Serena', model: str ="eleven_multilingual_v2"):
if(model not in self._models):
raise ValueError(f'Model not supported: {model} (must be one of: {", ".join(self._models)})')
# if(not api_key):
# api_key = get_api_key()
# if(not api_key):
# raise ValueError("No API key provided and no API key found in environment variable (ELEVENLABS_API_KEY)");
# else:
# set_api_key(api_key)
self.voice = next((v for v in voices() if v.name == voice_name), None)
if(not self.voice):
raise ValueError(f'Voice not found: {voice_name} (available voices: {", ".join(v.name for v in voices())})')
self.model = model

def synthesize(self, input: str, out_filename: str = None):
"""
Synthesize an input using the ElevenLabs TTS API.
Args:
sentence (str): sentence to be synthesized
out_filename (str): output filename (Optional, if not provided, the audio will be played instead of saved)
"""
if(not input):
raise ValueError("Input must not be empty")
shouldStream = True if not out_filename else False
audio = generate(text=input, voice=self.voice, model=self.model, stream=shouldStream)
if(shouldStream):
play(audio) # FIXME returns a bytes error at the moment
else:
save(audio, out_filename)

@staticmethod
def get_config():
return {
"api_key": str,
"language": str,
"voice": str
}

@staticmethod
def get_models() -> List[str]:
return ElevenLabsTTS._models

@staticmethod
def get_voices() -> List[str]:
return [v.name for v in voices()]

class ElevenLabsWidget(BoxLayout):
api_key_input = ObjectProperty(None)
voice_selection = ObjectProperty(None)
Expand All @@ -81,27 +26,28 @@ def __init__(self, **kwargs):
self.voice_names = ElevenLabsTTS.get_voices()
self.voice_names.sort()
self.model_names = ElevenLabsTTS.get_models()
self.settings = ElevenLabsSettings()
self.settings = ElevenLabsSettings(self)
# Two-way bind api-key
self.api_key_input.bind(text=self.settings.setter('api_key'))
self.settings.bind(api_key=self.api_key_input.setter('text'))
self.api_key_input.bind(text=self.settings.setter('api_key_text'))
self.settings.bind(api_key_text=self.api_key_input.setter('text'))
self.api_key_input.bind(on_text_validate=self.update_key) # Set environment variable for token
# Two-way bind voice
self.voice_selection.bind(text=self.settings.setter('voice'))
self.settings.bind(voice=self.voice_selection.setter('text'))
self.voice_selection.bind(text=self.settings.setter('voice_text'))
self.settings.bind(voice_text=self.voice_selection.setter('text'))
# Two-way bind model
self.model_selection.bind(text=self.settings.setter('model'))
self.settings.bind(model=self.model_selection.setter('text'))
self.model_selection.bind(text=self.settings.setter('model_text'))
self.settings.bind(model_text=self.model_selection.setter('text'))

def update_key(self, instance, value):
set_api_key(value)
instance.text = value

class ElevenLabsSettings(BaseApiSettings):
api_name = 'ElevenLabs'
api_key = StringProperty('')
voice = StringProperty('')
model = StringProperty('')
api_key_text = StringProperty('')
voice_text = StringProperty('')
model_text = StringProperty('')
widget: ElevenLabsWidget

@classmethod
def isSupported(cls):
Expand All @@ -111,24 +57,98 @@ def isSupported(cls):
def get_settings_widget(cls):
return ElevenLabsWidget()

def __init__(self, **kwargs):
def __init__(self, widget: ElevenLabsWidget, **kwargs):
super(ElevenLabsSettings, self).__init__(**kwargs)
self.api = ElevenLabsTTS()
self.load_settings()
self.api = ElevenLabsTTS(self)
self.widget = widget
# Done by the schedule_once in super()
# self.load_settings()

def load_settings(self):
# FIXME Two-way-binding interfering loading: Error loading settings for Elevenlabsapi: 'str' object has no attribute 'value'
# FIXME Two-way-binding still doesn't update UI upon load
app_instance = App.get_running_app()
self.api_key = app_instance.global_settings.get_setting(self.api_name, "api_key", default="")
self.voice = app_instance.global_settings.get_setting(self.api_name, "voice", default="")
self.model = app_instance.global_settings.get_setting(self.api_name, "model", default="")
self.api_key_text = app_instance.global_settings.get_setting(self.api_name, "api_key", default="")
# self.dispatch("api_key_text")
self.voice_text = app_instance.global_settings.get_setting(self.api_name, "voice", default="")
# self.dispatch("voice_text")
self.model_text = app_instance.global_settings.get_setting(self.api_name, "model", default="")
# self.dispatch("model_text")
app_instance.api = self.api

def save_settings(self):
app_instance = App.get_running_app()
app_instance.global_settings.update_setting(self.api_name, "api_key", self.api_key)
app_instance.global_settings.update_setting(self.api_name, "voice", self.voice)
app_instance.global_settings.update_setting(self.api_name, "model", self.model)
app_instance.global_settings.update_setting(self.api_name, "api_key", self.api_key_text)
app_instance.global_settings.update_setting(self.api_name, "voice", self.voice_text)
app_instance.global_settings.update_setting(self.api_name, "model", self.model_text)

# NOTE This is not very functionally solid, just a template for the API integration that can be iterated upon
class ElevenLabsTTS():
"""
This is a TTS implementation for the ElevenLabs TTS API.
"""
_models = [
"eleven_multilingual_v2",
"eleven_monolingual_v1"
]
def __init__(self, settings: ElevenLabsSettings = None, api_key: str = None, voice_name: str = 'Serena', model: str ="eleven_multilingual_v2"):
if settings is None:
if model not in self._models:
raise ValueError(f'Model not supported: {model} (must be one of: {", ".join(self._models)})')
if(not api_key):
api_key = get_api_key()
if(not api_key):
raise ValueError("No API key provided and no API key found in environment variable (ELEVENLABS_API_KEY)");
else:
set_api_key(api_key)
self.voice = next((v for v in voices() if v.name == voice_name), None)
if(not self.voice):
raise ValueError(f'Voice not found: {voice_name} (available voices: {", ".join(v.name for v in voices())})')
self.model = model
else:
self.settings = settings

def synthesize(self, input: str, out_filename: str = None):
"""
Synthesize an input using the ElevenLabs TTS API.
Args:
sentence (str): sentence to be synthesized
out_filename (str): output filename (Optional, if not provided, the audio will be played instead of saved)
"""
if(not input):
raise ValueError("Input must not be empty")
shouldStream = True if not out_filename else False
if self.settings is None:
audio = generate(text=input, voice=self.voice, model=self.model, stream=shouldStream)
if(shouldStream):
play(audio) # FIXME returns a bytes error at the moment
else:
save(audio, out_filename)
else:
self.voice = next((v for v in voices() if v.name == self.settings.voice_text), None)
self.model = self._models[0]
set_api_key(self.settings.api_key_text)
audio = generate(text=input, voice=self.voice, model=self.model, stream=shouldStream)
if(shouldStream):
play(audio)
else:
save(audio, out_filename)

@staticmethod
def get_config():
return {
"api_key": str,
"language": str,
"voice": str
}

@staticmethod
def get_models() -> List[str]:
return ElevenLabsTTS._models

@staticmethod
def get_voices() -> List[str]:
return [v.name for v in voices()]


if __name__ == "__main__":
Expand Down
9 changes: 8 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from modules.dialog import loaddialog, savedialog
from modules.util.widget_loader import load_widget
from api.elevenlabsapi.elevenlabsapi import ElevenLabsTTS
from settings import app_settings

class MainScreen(BoxLayout):
Expand Down Expand Up @@ -45,7 +46,13 @@ def play_audio(self):

def generate_audio(self):
# Logic to save generated voice audio to file
pass
api = App.get_running_app().api
if isinstance(api, ElevenLabsTTS):
log.debug(f"Synthesizing: {self.text_input.text[0:10]}...")
try:
api.synthesize(self.text_input.text, os.path.join("tmp/", "tmp.wav"))
except Exception as e:
log.error(f"Audio generation failed: {e}")

def open_settings(self):
self.settings_popup.open()
Expand Down

0 comments on commit bf163f5

Please sign in to comment.