Compare commits

..

17 commits

Author SHA1 Message Date
6f2128beb4 Fix TTS logging spam 2025-03-09 15:56:21 -07:00
fd128948ca Fix kdl encoding detection 2025-03-09 15:56:21 -07:00
ba82f2e422 Use selector asyncio loop
Testing shows that it is more stable than the proactor loop. Dunno why
2025-03-09 15:56:21 -07:00
533286c7ca Fix full-async stdin reader 2025-03-09 15:56:21 -07:00
23f4190506 Remove aioprocessing, update all other dependencies
This removes the ability for audio clips to be played sync, but no 
internal uses do that anyways, so its fine probably
2025-03-09 15:56:21 -07:00
3a4e65b683 Fix module defined event ingest 2025-03-09 15:56:21 -07:00
ffd48dc1f2 Fix some plugin requests -> httpx imports 2025-03-09 15:56:21 -07:00
d9e14d49dd Fix missing await 2025-03-09 15:56:21 -07:00
be6ec19762 Move websocket bus to asyncio operation 2025-03-09 15:56:21 -07:00
ded19ef261 Move chats to asyncio operation 2025-03-09 15:56:21 -07:00
54226e7940 Remove peertube chat module
This feature was rejected upstream aaaages ago, and i dont maintain the 
fork with it in it anymore
2025-03-09 15:56:21 -07:00
2d908e60a5 Add logging to tts gen 2025-03-09 15:55:25 -07:00
d53508c158 Fix audio stretching behavior attempt 2 2025-03-09 15:55:06 -07:00
11b4c92fe9 [plugins/tts] Do text filtering for external users as well 2025-03-02 17:21:05 -05:00
32fc1660ec [builtins/scene] Fix title
oop
2025-03-02 17:11:29 -05:00
4b5dd0cf43 Fix non-pitchsynced playback speed 2025-03-02 17:10:28 -05:00
ba0b8c1068 [plugins] Work around blueprint templates (web UI) sharing a namespace 2025-03-02 17:08:33 -05:00
11 changed files with 1639 additions and 478 deletions

2020
pdm.lock generated

File diff suppressed because it is too large Load diff

View file

@ -8,21 +8,20 @@ authors = [
dependencies = [
"click",
"kdl-py",
"quart==0.18.*",
"werkzeug==2.3.7",
"quart",
"werkzeug",
"hypercorn",
"websockets==11.0.3",
"aioprocessing",
"websockets",
"aioscheduler",
"pyaudio==0.2.*",
"librosa==0.8.*",
"pyaudio",
"librosa",
"pytsmod",
"numpy",
"multipledispatch",
"blessed",
"appdirs",
"maya",
"httpx>=0.28.1",
"httpx",
]
requires-python = ">=3.10,<3.11"
readme = "README.md"
@ -30,18 +29,17 @@ license = {text = "GPLv2"}
[project.optional-dependencies]
tts = [
"TTS==0.9.*",
"torch==1.13.*",
"coqui-tts",
]
phrasecounter = ["num2words"]
jail = ["owoify-py==2.*"]
jail = ["owoify-py"]
twitch = ["miniirc"]
midi = [
"mido",
"python-rtmidi",
]
obs = ["simpleobsws"]
osc = ["python-osc>=1.9.0"]
osc = ["python-osc"]
yt-dlp = ["yt-dlp"]
[build-system]

View file

@ -61,6 +61,10 @@ def cli(loglevel, show_time=False):
logging.getLogger('hypercorn.access').setLevel(logging.WARN)
logging.getLogger('httpx').setLevel(logging.WARN)
logging.getLogger('httpcore').setLevel(logging.INFO)
logging.getLogger('torio._extension.utils').setLevel(logging.WARN)
logging.getLogger('matplotlib').setLevel(logging.INFO)
logging.getLogger('fsspec').setLevel(logging.INFO)
logging.getLogger('TTS').setLevel(logging.INFO if loglevel == logging.DEBUG else logging.WARN)
# Quiet warnings
if loglevel > logging.DEBUG:
warnings.filterwarnings("ignore")

View file

@ -8,7 +8,6 @@ import pyaudio as pya
import librosa
import pytsmod as tsm
import soundfile
from aioprocessing import AioEvent
# HACK: Redirect stderr to /dev/null to silence portaudio boot
devnull = os.open(os.devnull, os.O_WRONLY)
@ -53,9 +52,9 @@ class Clip:
def stretch(self, speed, keep_pitch=True):
if keep_pitch:
stretched = tsm.wsola(self._stereo_transpose(self.raw), speed)
stretched = tsm.wsola(self._stereo_transpose(self.raw), 1 / speed)
else:
stretched = librosa.resample(self._stereo_transpose(self.raw), self.samplerate * (1 / speed), self.samplerate, fix=False, scale=True)
stretched = librosa.resample(self._stereo_transpose(self.raw), self.samplerate * speed, self.samplerate, fix=False, scale=True)
self.raw = np.ascontiguousarray(self._stereo_transpose(stretched), dtype='float32')
def save(self, filename):
@ -67,7 +66,8 @@ class Stream:
self.clip = clip
self.pos = 0
self.playing = False
self._end_event = AioEvent()
self.loop = asyncio.get_event_loop()
self._end_event = asyncio.Event()
self._stream = pyaudio.open(
output_device_index=output_index,
format=pya.paFloat32,
@ -85,16 +85,11 @@ class Stream:
if not self._stream.is_active():
self._stream.start_stream()
def play(self):
self._end_event.clear()
self._play()
self._end_event.wait(timeout=self.clip.length)
async def aplay(self):
async def play(self):
self._end_event.clear()
self._play()
try:
await self._end_event.coro_wait(timeout=self.clip.length)
await self._end_event.wait()
except asyncio.CancelledError:
self.playing = False
self._stream.stop_stream()
@ -117,7 +112,7 @@ class Stream:
if self.pos >= self.clip.raw.shape[0]:
self.playing = False
self._end_event.set()
self.loop.call_soon_threadsafe(self._end_event.set)
return buffer, pya.paContinue

View file

@ -139,7 +139,7 @@ def parse_kdl_deep(path, relativeto=None):
if relativeto:
path = os.path.normpath(os.path.join(relativeto, path))
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
try:
config = kdl.parse(f.read(), kdl_parse_config)
for node in config.nodes:

View file

@ -3,7 +3,6 @@ import asyncio
from datetime import datetime, timedelta
import logging
import os
import os.path
import pathlib
import sys
import signal
@ -67,6 +66,9 @@ class MainProcess:
# Save sys.path since some config will clobber it
self._initial_syspath = sys.path
if os.name == 'nt':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
def _unload_plugin(self, plugin_name):
plugin = self.plugins[plugin_name]
plugin.close()
@ -244,12 +246,15 @@ class MainProcess:
sys.path = self._initial_syspath
async def _discount_repl(self):
loop = asyncio.get_event_loop()
# REVIEW: Not a good UX at the moment (as new logs clobber the terminal entry)
async for line in reader:
while True:
line = await loop.run_in_executor(None, sys.stdin.readline)
line = line.strip()
if line == b'reload':
logger.debug(f'Got terminal input: {line}')
if line == 'reload':
self.reload_ev.set()
elif line == b'quit':
elif line == 'quit':
self.shutdown_ev.set()
async def run(self):
@ -261,8 +266,6 @@ class MainProcess:
try:
# System setup
## Make stdin handler
reader = asyncio.StreamReader()
await loop.connect_read_pipe(lambda: asyncio.StreamReaderProtocol(reader), sys.stdin)
self.cli_task = loop.create_task(self._discount_repl())
## Init websocket server (external end of the event bus)
self.bus_server = WebsocketServerProcess(self.event_queue, *self.bus_conf)

View file

@ -29,6 +29,17 @@ class OvtkBlueprint(quart.Blueprint):
endpoint = self.name + endpoint
return quart.url_for(endpoint, *args, **kwargs)
def render(self, name, **kwargs):
"""render_template that prefers the plugin-specific templates"""
full = self.template_folder / name
if os.path.exists(full):
template_string = None
with open(full, 'r') as template_file:
template_string = template_file.read()
return quart.render_template_string(template_string, **kwargs)
else:
return quart.render_template(name, **kwargs)
class PluginBase(ABC):
plugins = {}

View file

@ -54,9 +54,9 @@ class AudioAlert(PluginBase):
if wait:
await stream.aplay()
await stream.play()
else:
task = asyncio.create_task(stream.aplay())
task = asyncio.create_task(stream.play())
task.add_done_callback(self.tasks.discard)
self.tasks.add(task)

View file

@ -52,8 +52,8 @@ class TextToSpeechPlugin(PluginBase):
config_path = override_conf_path
self.synthesizer = Synthesizer(
model_path,
config_path,
tts_checkpoint=model_path,
tts_config_path=config_path,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config_path,
use_cuda=self.cuda,
@ -66,22 +66,26 @@ class TextToSpeechPlugin(PluginBase):
task.cancel()
def make_tts_wav(self, text, filename=None):
# Force punctuation (keeps the models from acting unpredictably)
text = text.strip()
if not any([text.endswith(punc) for punc in '.!?:']):
text += '.'
if filename is None:
filename = os.path.join(self.cache_dir, f'{uuid.uuid1()}.wav')
self.logger.info(f'Generating TTS "{text}"...')
if self.speaker_wav:
wav = self.synthesizer.tts(text, None, 'en', self.speaker_wav)
else:
wav = self.synthesizer.tts(text)
self.synthesizer.save_wav(wav, filename)
self.logger.info(f'Done - saved as {filename}')
return filename
async def run(self, text, *args, _ctx={}, wait=False, **kwargs):
try:
# Force punctuation (keep AI from spinning off into random noises)
if not any([text.endswith(punc) for punc in '.!?:']):
text += '.'
# Do TTS processing in a thread to avoid blocking main loop
filename = await asyncio.get_running_loop().run_in_executor(None, self.make_tts_wav, text)
@ -90,7 +94,7 @@ class TextToSpeechPlugin(PluginBase):
stream = Stream(clip, self.output_index)
async def play():
try:
await stream.aplay()
await stream.play()
finally:
stream.close()
os.remove(os.path.join(self.cache_dir, filename))

View file

@ -34,7 +34,7 @@ class ScenePlugin(PluginBase):
self.blueprint.add_url_rule('/', 'ctrlpanel', self.ui_ctrlpanel)
self.blueprint.add_url_rule('/<name>/<cmd>', 'api-sceneset', self.ui_setscene)
self.blueprint.add_url_rule('/monitor', 'monitor', self.ui_monitor_ws, is_websocket=True)
self.blueprint.add_url_rule('/monitor', 'monitor', self.ui_monitor_ws, websocket=True)
async def run(self, name, _children=None, _ctx={}, active=None, group=None, oneshot=False, **kwargs):
if _children is None:
@ -138,7 +138,7 @@ class ScenePlugin(PluginBase):
async def ui_ctrlpanel(self):
groups = self._get_state()
return await quart.render_template('index.html', init_state=json.dumps(groups))
return await self.blueprint.render('index.html', init_state=json.dumps(groups))
async def ui_setscene(self, name=None, cmd=None):
active = cmd == 'activate'

View file

@ -2,7 +2,7 @@
<html lang="en" dir="ltr">
<head>
<meta charset="utf-8">
<title>Test page</title>
<title>Scene control</title>
<script type="importmap">
{
"imports": { "vue": "https://unpkg.com/vue@3/dist/vue.esm-browser.js" }