Compare commits

...

3 commits

Author SHA1 Message Date
a401fc485b Auto trim TTS audio 2025-04-25 02:11:19 -04:00
0915008b36 Expose level requirement to commands built in config 2025-04-25 02:08:46 -04:00
ecc1f71a53 Proper cache handling for tts 2025-04-25 02:08:05 -04:00
3 changed files with 14 additions and 4 deletions

View file

@ -25,9 +25,11 @@ os.close(old_stderr)
logger = logging.getLogger(__name__)
class Clip:
def __init__(self, path, samplerate=None, speed=1, keep_pitch=True, force_stereo=True):
def __init__(self, path, samplerate=None, speed=1, keep_pitch=True, force_stereo=True, trim=False):
self.path = path
raw, native_rate = librosa.load(self.path, sr=None, dtype='float32', mono=False)
if trim:
raw, _ = librosa.effects.trim(raw)
self.channels = raw.shape[0] if len(raw.shape) == 2 else 1
if force_stereo and self.channels == 1:

View file

@ -1,6 +1,7 @@
import uuid
import os
import asyncio
import shutil
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager
@ -29,6 +30,7 @@ class TextToSpeechPlugin(PluginBase):
conf_overrides = {k[2:]: v for k, v in kwargs.items() if k.startswith('o_')}
self.cache_dir = os.path.join(CACHE_DIR, 'tts')
self.cache = {}
os.makedirs(os.path.dirname(self.cache_dir), exist_ok=True)
self.cuda = cuda
@ -64,6 +66,7 @@ class TextToSpeechPlugin(PluginBase):
def close(self):
for task in self.tasks:
task.cancel()
shutil.rmtree(self.cache_dir)
def make_tts_wav(self, text, filename=None):
# Force punctuation (keeps the models from acting unpredictably)
@ -71,6 +74,10 @@ class TextToSpeechPlugin(PluginBase):
if not any([text.endswith(punc) for punc in '.!?:']):
text += '.'
if cached := self.cache.get(hash(text)):
self.logger.info(f'Cache hit - {cached}')
return cached
if filename is None:
filename = os.path.join(self.cache_dir, f'{uuid.uuid1()}.wav')
@ -81,6 +88,7 @@ class TextToSpeechPlugin(PluginBase):
wav = self.synthesizer.tts(text)
self.synthesizer.save_wav(wav, filename)
self.cache[hash(text)] = filename
self.logger.info(f'Done - saved as {filename}')
return filename
@ -90,7 +98,7 @@ class TextToSpeechPlugin(PluginBase):
filename = await asyncio.get_running_loop().run_in_executor(None, self.make_tts_wav, text)
# TODO: Play direct from memory
clip = Clip(filename, force_stereo=True, samplerate=self.sample_rate)
clip = Clip(filename, force_stereo=True, samplerate=self.sample_rate, trim=True)
stream = Stream(clip, self.output_index)
async def play():
try:

View file

@ -103,14 +103,14 @@ class CommandPlugin(PluginBase):
raise TypeError('Should be instance of Command')
self.commands[cmd.name] = (cmd, None, True)
def run(self, name, help=None, display=False, _children=None, **kwargs):
def run(self, name, help=None, display=False, required_level=None, _children=None, **kwargs):
actionnode = next((node for node in _children if node.name == 'do'), None)
if actionnode is None:
raise ValueError('Command defined without an action (`do` tag)')
aliases = [node.args for node in _children if node.name == 'alias']
aliases = list(itertools.chain(*aliases))
cmd = Command(name, help, aliases=aliases)
cmd = Command(name, help, aliases=aliases, required_level=required_level)
for argnode in [node for node in _children if node.name == 'arg']:
if argnode.props.get('type'):