Fix bug causing TTS to redownload models every time

Fix some bugs in the audio redesign
2025-04-30 23:28:52 -04:00 · 2025-04-30 23:28:27 -04:00
2 changed files with 13 additions and 5 deletions
--- a/src/ovtk_audiencekit/core/Audio.py
+++ b/src/ovtk_audiencekit/core/Audio.py
@ -36,7 +36,11 @@ class Clip:

    @classmethod
    def empty(cls, channels, samplerate, length=0):
-        blank = np.zeros((channels, length), dtype='float32')
+        samples = int(samplerate * length)
+        if channels > 1:
+            blank = np.zeros((channels, samples), dtype='float32')
+        else:
+            blank = np.zeros((samples,), dtype='float32')
        return cls(blank, samplerate)

    def copy(self):
@ -87,7 +91,7 @@ class Clip:

    def trim(self, aggressive=False):
        """Remove leading and trailing silence"""
-        self.samples, _ = librosa.effects.trim(self.samples, top_db=10 if aggressive else 20)
+        self.samples, _ = librosa.effects.trim(self.samples, top_db=10 if aggressive else 30)
        return self

    def stretch(self, speed):
@ -121,6 +125,7 @@ class Clip:
            self.samples, fix=False, scale=False,
            orig_sr=self.samplerate * speed, target_sr=self.samplerate,
        )
+        return self

    def _opcheck(self, other):
        if not isinstance(other, Clip):
--- a/src/ovtk_audiencekit/plugins/TTS/TTS.py
+++ b/src/ovtk_audiencekit/plugins/TTS/TTS.py
@ -23,9 +23,12 @@ class TextToSpeechPlugin(PluginBase):
        conf_overrides = {k[2:]: v for k, v in kwargs.items() if k.startswith('o_')}

        self.cache_dir = os.path.join(CACHE_DIR, 'tts')
-        self.cache = {}
        os.makedirs(os.path.dirname(self.cache_dir), exist_ok=True)

+        self.output_dir = os.path.join(self.cache_dir, 'outputs')
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.cache = {}
+
        self.cuda = cuda

        manager = ModelManager(output_prefix=CACHE_DIR) # HACK: coqui automatically adds 'tts' subdir
@ -60,7 +63,7 @@ class TextToSpeechPlugin(PluginBase):
        for task in self.tasks:
            task.cancel()
        self.output.close()
-        shutil.rmtree(self.cache_dir)
+        shutil.rmtree(self.output_dir)

    async def text_to_clip(self, text):
        # Force punctuation (keeps the models from acting unpredictably)
@ -73,7 +76,7 @@ class TextToSpeechPlugin(PluginBase):
            return Clip.from_file(cached)
        else:
            self.logger.info(f'Generating TTS "{text}"...')
-            filename = os.path.join(self.cache_dir, f'{uuid.uuid1()}.wav')
+            filename = os.path.join(self.output_dir, f'{uuid.uuid1()}.wav')

            if self.speaker_wav:
                fn = lambda _text: self.synthesizer.tts(_text, None, 'en', self.speaker_wav)
Author	SHA1	Message	Date
Derek	6a91980030	Fix bug causing TTS to redownload models every time	2025-04-30 23:28:52 -04:00
Derek	378dc5be1d	Fix some bugs in the audio redesign	2025-04-30 23:28:27 -04:00