Compare commits
2 commits
68010e22a9
...
6a91980030
Author | SHA1 | Date | |
---|---|---|---|
6a91980030 | |||
378dc5be1d |
2 changed files with 13 additions and 5 deletions
|
@ -36,7 +36,11 @@ class Clip:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def empty(cls, channels, samplerate, length=0):
|
def empty(cls, channels, samplerate, length=0):
|
||||||
blank = np.zeros((channels, length), dtype='float32')
|
samples = int(samplerate * length)
|
||||||
|
if channels > 1:
|
||||||
|
blank = np.zeros((channels, samples), dtype='float32')
|
||||||
|
else:
|
||||||
|
blank = np.zeros((samples,), dtype='float32')
|
||||||
return cls(blank, samplerate)
|
return cls(blank, samplerate)
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
|
@ -87,7 +91,7 @@ class Clip:
|
||||||
|
|
||||||
def trim(self, aggressive=False):
|
def trim(self, aggressive=False):
|
||||||
"""Remove leading and trailing silence"""
|
"""Remove leading and trailing silence"""
|
||||||
self.samples, _ = librosa.effects.trim(self.samples, top_db=10 if aggressive else 20)
|
self.samples, _ = librosa.effects.trim(self.samples, top_db=10 if aggressive else 30)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def stretch(self, speed):
|
def stretch(self, speed):
|
||||||
|
@ -121,6 +125,7 @@ class Clip:
|
||||||
self.samples, fix=False, scale=False,
|
self.samples, fix=False, scale=False,
|
||||||
orig_sr=self.samplerate * speed, target_sr=self.samplerate,
|
orig_sr=self.samplerate * speed, target_sr=self.samplerate,
|
||||||
)
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
def _opcheck(self, other):
|
def _opcheck(self, other):
|
||||||
if not isinstance(other, Clip):
|
if not isinstance(other, Clip):
|
||||||
|
|
|
@ -23,9 +23,12 @@ class TextToSpeechPlugin(PluginBase):
|
||||||
conf_overrides = {k[2:]: v for k, v in kwargs.items() if k.startswith('o_')}
|
conf_overrides = {k[2:]: v for k, v in kwargs.items() if k.startswith('o_')}
|
||||||
|
|
||||||
self.cache_dir = os.path.join(CACHE_DIR, 'tts')
|
self.cache_dir = os.path.join(CACHE_DIR, 'tts')
|
||||||
self.cache = {}
|
|
||||||
os.makedirs(os.path.dirname(self.cache_dir), exist_ok=True)
|
os.makedirs(os.path.dirname(self.cache_dir), exist_ok=True)
|
||||||
|
|
||||||
|
self.output_dir = os.path.join(self.cache_dir, 'outputs')
|
||||||
|
os.makedirs(self.output_dir, exist_ok=True)
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
self.cuda = cuda
|
self.cuda = cuda
|
||||||
|
|
||||||
manager = ModelManager(output_prefix=CACHE_DIR) # HACK: coqui automatically adds 'tts' subdir
|
manager = ModelManager(output_prefix=CACHE_DIR) # HACK: coqui automatically adds 'tts' subdir
|
||||||
|
@ -60,7 +63,7 @@ class TextToSpeechPlugin(PluginBase):
|
||||||
for task in self.tasks:
|
for task in self.tasks:
|
||||||
task.cancel()
|
task.cancel()
|
||||||
self.output.close()
|
self.output.close()
|
||||||
shutil.rmtree(self.cache_dir)
|
shutil.rmtree(self.output_dir)
|
||||||
|
|
||||||
async def text_to_clip(self, text):
|
async def text_to_clip(self, text):
|
||||||
# Force punctuation (keeps the models from acting unpredictably)
|
# Force punctuation (keeps the models from acting unpredictably)
|
||||||
|
@ -73,7 +76,7 @@ class TextToSpeechPlugin(PluginBase):
|
||||||
return Clip.from_file(cached)
|
return Clip.from_file(cached)
|
||||||
else:
|
else:
|
||||||
self.logger.info(f'Generating TTS "{text}"...')
|
self.logger.info(f'Generating TTS "{text}"...')
|
||||||
filename = os.path.join(self.cache_dir, f'{uuid.uuid1()}.wav')
|
filename = os.path.join(self.output_dir, f'{uuid.uuid1()}.wav')
|
||||||
|
|
||||||
if self.speaker_wav:
|
if self.speaker_wav:
|
||||||
fn = lambda _text: self.synthesizer.tts(_text, None, 'en', self.speaker_wav)
|
fn = lambda _text: self.synthesizer.tts(_text, None, 'en', self.speaker_wav)
|
||||||
|
|
Loading…
Add table
Reference in a new issue