diff --git a/README.md b/README.md index cee5bed..632c77d 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,18 @@ Minimal SDL3 + Vulkan spinning cube demo. ### Run - `python scripts/dev_commands.py run [--build-dir ...]` (source `build/conanrun.sh` / `build\conanrun.bat` first if the Conan runtime exports env vars). +### Audio assets +Install the dependencies that power `scripts/generate_audio_assets.py`: +- `python -m pip install --user numpy soundfile pedalboard piper-tts` + +Before translating phrases, download a Piper voice (the script defaults to `en_US-lessac-medium`): + +``` +python -m piper.download_voices en_US-lessac-medium --download-dir scripts/assets/audio/tts/voices +``` + +Running the generator recreates procedural effects under `scripts/assets/audio/sfx/` and voice clips under `scripts/assets/audio/tts/`. Use `--force` to rebuild every file and `--skip-sfx` / `--skip-tts` if you only need one subset; add `--verbose` to see the internal logging as the files are created. Override the voice files with `--piper-voice-model ` and (optionally) `--piper-voice-config ` if you downloaded a different voice or location. Pass `--download-voice` to have the script invoke `piper.download_voices` automatically before rendering (requires `piper-tts` and network access). + ## Runtime configuration 1. `sdl3_app --json-file-in ` loads JSON configs (script path, window size, `lua_debug`, etc.). 2. `sdl3_app --create-seed-json config/seed_runtime.json` writes a starter file assuming `scripts/cube_logic.lua` sits beside the binary. diff --git a/scripts/assets/audio/sfx/level_up.ogg b/scripts/assets/audio/sfx/level_up.ogg new file mode 100644 index 0000000..5193631 Binary files /dev/null and b/scripts/assets/audio/sfx/level_up.ogg differ diff --git a/scripts/assets/audio/sfx/menu_select.ogg b/scripts/assets/audio/sfx/menu_select.ogg new file mode 100644 index 0000000..c3efb9c Binary files /dev/null and b/scripts/assets/audio/sfx/menu_select.ogg differ diff --git a/scripts/assets/audio/sfx/power_up.ogg b/scripts/assets/audio/sfx/power_up.ogg new file mode 100644 index 0000000..f8f93bc Binary files /dev/null and b/scripts/assets/audio/sfx/power_up.ogg differ diff --git a/scripts/assets/audio/sfx/swish.ogg b/scripts/assets/audio/sfx/swish.ogg new file mode 100644 index 0000000..9261e85 Binary files /dev/null and b/scripts/assets/audio/sfx/swish.ogg differ diff --git a/scripts/assets/audio/sfx/thud.ogg b/scripts/assets/audio/sfx/thud.ogg new file mode 100644 index 0000000..4999dfc Binary files /dev/null and b/scripts/assets/audio/sfx/thud.ogg differ diff --git a/scripts/assets/audio/tts/continue.ogg b/scripts/assets/audio/tts/continue.ogg new file mode 100644 index 0000000..d9562af Binary files /dev/null and b/scripts/assets/audio/tts/continue.ogg differ diff --git a/scripts/assets/audio/tts/game_over.ogg b/scripts/assets/audio/tts/game_over.ogg new file mode 100644 index 0000000..7a251e0 Binary files /dev/null and b/scripts/assets/audio/tts/game_over.ogg differ diff --git a/scripts/assets/audio/tts/level_1.ogg b/scripts/assets/audio/tts/level_1.ogg new file mode 100644 index 0000000..fd8fc89 Binary files /dev/null and b/scripts/assets/audio/tts/level_1.ogg differ diff --git a/scripts/assets/audio/tts/level_2.ogg b/scripts/assets/audio/tts/level_2.ogg new file mode 100644 index 0000000..7022b03 Binary files /dev/null and b/scripts/assets/audio/tts/level_2.ogg differ diff --git a/scripts/assets/audio/tts/power_up.ogg b/scripts/assets/audio/tts/power_up.ogg new file mode 100644 index 0000000..b3ca271 Binary files /dev/null and b/scripts/assets/audio/tts/power_up.ogg differ diff --git a/scripts/generate_audio_assets.py b/scripts/generate_audio_assets.py new file mode 100644 index 0000000..5a183c2 --- /dev/null +++ b/scripts/generate_audio_assets.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 +"""Create the demo's shared `.ogg` audio library via ``pedalboard`` + ``soundfile``.""" + +from __future__ import annotations + +import argparse +import logging +import shutil +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +try: + import numpy as np + import soundfile as sf + from pedalboard import Chorus, Delay, Distortion, Pedalboard, Reverb +except ImportError as exc: # pragma: no cover - requires extra dependencies + raise SystemExit( + "Missing pedalboard or soundfile. Install them with " + "`python -m pip install pedalboard soundfile numpy`. " + f"ImportError: {exc}" + ) from exc + +try: + from piper import PiperVoice +except ImportError: + PiperVoice = None # type: ignore[assignment] + +try: + from piper.download_voices import download_voice +except ImportError: + download_voice = None # type: ignore[assignment] + +SAMPLE_RATE = 44100 + +TTS_PHRASES = [ + "Level 1", + "Level 2", + "Game Over", + "Continue", + "Power Up", +] + +DEFAULT_PIPER_VOICE_NAME = "en_US-lessac-medium" +DEFAULT_PIPER_VOICE_DIR = ( + Path(__file__).resolve().parent / "assets" / "audio" / "tts" / "voices" +) + + +def _num_samples(duration: float) -> int: + return max(1, int(round(duration * SAMPLE_RATE))) + + +def _apply_envelope( + signal: np.ndarray, + attack: float, + decay: float, + sustain_level: float, + release: float, +) -> np.ndarray: + total = signal.shape[0] + attack_samples = min(total, int(round(attack * SAMPLE_RATE))) + decay_samples = min(total - attack_samples, int(round(decay * SAMPLE_RATE))) + release_samples = min( + total - attack_samples - decay_samples, int(round(release * SAMPLE_RATE)) + ) + sustain_samples = total - (attack_samples + decay_samples + release_samples) + env = np.empty(total, dtype=np.float32) + idx = 0 + if attack_samples > 0: + env[idx : idx + attack_samples] = np.linspace(0.0, 1.0, attack_samples, False) + idx += attack_samples + if decay_samples > 0: + env[idx : idx + decay_samples] = np.linspace( + 1.0, sustain_level, decay_samples, False + ) + idx += decay_samples + if sustain_samples > 0: + env[idx : idx + sustain_samples] = sustain_level + idx += sustain_samples + if release_samples > 0: + env[-release_samples:] = np.linspace( + sustain_level, 0.0, release_samples, False + ) + if idx + sustain_samples < total - release_samples: + env[idx + sustain_samples : -release_samples] = sustain_level + if attack_samples + decay_samples + sustain_samples + release_samples == 0: + env[:] = 1.0 + return signal * env + + +def _download_piper_voice(voice_name: str, download_dir: Path) -> None: + if download_voice is None: + logger.warning( + "Automatic voice download requires `piper.download_voices`; install piper-tts to enable it." + ) + return + + download_dir.mkdir(parents=True, exist_ok=True) + logger.info("Downloading Piper voice %s into %s", voice_name, download_dir) + download_voice(voice_name, download_dir) + + +def _sine_glide(duration: float, start_freq: float, end_freq: float) -> np.ndarray: + samples = _num_samples(duration) + t = np.linspace(0.0, duration, samples, False) + freq = np.linspace(start_freq, end_freq, samples) + return np.sin(2 * np.pi * freq * t).astype(np.float32) + + +def _pink_noise(duration: float) -> np.ndarray: + samples = _num_samples(duration) + return np.random.normal(scale=0.15, size=samples).astype(np.float32) + + +def _menu_select(duration: float) -> tuple[np.ndarray, list]: + signal = _sine_glide(duration, 520, 960) + signal = _apply_envelope(signal, 0.01, 0.12, 0.6, 0.15) + effects = [Chorus(rate_hz=1.1, depth=0.18, mix=0.6)] + return signal, effects + + +def _power_up(duration: float) -> tuple[np.ndarray, list]: + signal = _sine_glide(duration, 270, 940) + signal += 0.25 * _pink_noise(duration) + signal = _apply_envelope(signal, 0.02, 0.26, 0.45, 0.2) + effects = [ + Distortion(drive_db=14.0), + Reverb(room_size=0.45, wet_level=0.25, dry_level=0.9), + ] + return signal, effects + + +def _level_up(duration: float) -> tuple[np.ndarray, list]: + base = _sine_glide(duration, 420, 660) + harmony = 0.45 * _sine_glide(duration, 660, 840) + signal = (base + harmony) / 1.45 + signal = _apply_envelope(signal, 0.01, 0.18, 0.55, 0.25) + effects = [ + Chorus(rate_hz=0.95, depth=0.24, mix=0.55), + Delay(delay_seconds=0.18, feedback=0.25, mix=0.35), + ] + return signal, effects + + +def _swish(duration: float) -> tuple[np.ndarray, list]: + signal = _pink_noise(duration) + signal = _apply_envelope(signal, 0.1, 0.3, 0.2, 0.3) + effects = [ + Reverb(room_size=0.7, wet_level=0.5, dry_level=0.4), + Chorus(rate_hz=0.4, depth=0.2, mix=0.45), + ] + return signal, effects + + +def _thud(duration: float) -> tuple[np.ndarray, list]: + base = _sine_glide(duration, 80, 120) + signal = 0.7 * base + 0.3 * _pink_noise(duration) + signal = _apply_envelope(signal, 0.01, duration * 0.4, 0.0, 0.3) + effects = [ + Distortion(drive_db=10.0), + Reverb(room_size=0.85, wet_level=0.55, dry_level=0.3), + ] + return signal, effects + + +SFX_DEFINITIONS = [ + ("menu_select", 0.65, "short ascending ping", _menu_select), + ("power_up", 1.1, "riser with harmonic shimmer", _power_up), + ("level_up", 0.9, "bright burst", _level_up), + ("swish", 0.7, "noisy transition", _swish), + ("thud", 1.0, "low impact", _thud), +] + + +def _render_ogg(path: Path, duration: float, builder): + signal, effects = builder(duration) + processed = signal + if effects: + board = Pedalboard(effects) + processed = board(processed, SAMPLE_RATE) + processed = np.clip(processed, -1.0, 1.0).astype(np.float32) + sf.write( + str(path), + processed, + SAMPLE_RATE, + format="OGG", + subtype="VORBIS", + ) + + +def _slugify(text: str) -> str: + return "".join(ch if ch.isalnum() else "_" for ch in text).strip("_").lower() + + +def _configure_logging(verbose: bool) -> None: + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig(level=level, format="%(levelname)s: %(message)s") + + +def _generate_sfx(output_dir: Path, force: bool): + target_dir = output_dir / "sfx" + if force and target_dir.exists(): + logger.debug("Removing existing SFX folder %s", target_dir) + shutil.rmtree(target_dir) + target_dir.mkdir(parents=True, exist_ok=True) + + for name, duration, description, builder in SFX_DEFINITIONS: + target = target_dir / f"{name}.ogg" + if target.exists() and not force: + logger.info(f"Skipping existing sound: {target.name} ({description})") + continue + logger.info(f"Rendering SFX: {name} -> {target.name}") + _render_ogg(target, duration, builder) + + +def _load_piper_voice(model_path: Path, config_path: Optional[Path]) -> Optional["PiperVoice"]: + if PiperVoice is None: + logger.warning("piper-tts is not installed; skipping voice generation.") + return None + + if not model_path.exists(): + logger.warning("Piper voice model not found at %s; skip TTS.", model_path) + return None + + resolved_config = config_path or Path(f"{model_path}.json") + if not resolved_config.exists(): + logger.warning( + "Piper voice config not found at %s; skip TTS generation.", resolved_config + ) + return None + + return PiperVoice.load(str(model_path), config_path=str(resolved_config)) + + +def _synthesize_phrase_to_ogg(voice: "PiperVoice", phrase: str, path: Path) -> None: + chunks = list(voice.synthesize(phrase)) + if not chunks: + logger.warning("Piper generated no audio for phrase '%s'", phrase) + return + + audio = np.concatenate([chunk.audio_float_array for chunk in chunks]) + audio = np.clip(audio, -1.0, 1.0).astype(np.float32) + sf.write( + str(path), + audio, + voice.config.sample_rate, + format="OGG", + subtype="VORBIS", + ) + + +def _generate_tts( + output_dir: Path, + force: bool, + voice_model: Path, + voice_config: Optional[Path], +) -> None: + voice = _load_piper_voice(voice_model, voice_config) + if voice is None: + return + + tts_dir = output_dir / "tts" + if force and tts_dir.exists(): + logger.debug("Removing existing TTS folder %s", tts_dir) + shutil.rmtree(tts_dir) + tts_dir.mkdir(parents=True, exist_ok=True) + + for phrase in TTS_PHRASES: + slug = _slugify(phrase) + target = tts_dir / f"{slug}.ogg" + if target.exists() and not force: + logger.info(f"Skipping existing voice: {target.name} ({phrase})") + continue + logger.info(f"Rendering voice: {phrase} -> {target.name}") + _synthesize_phrase_to_ogg(voice, phrase, target) + + +def main(): + parser = argparse.ArgumentParser( + description="Regenerate the OGG sound library for the demo." + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path(__file__).resolve().parent / "assets" / "audio", + help="Where to store generated OGG files.", + ) + parser.add_argument( + "--force", + action="store_true", + help="Rebuild every asset even if a file already exists.", + ) + parser.add_argument( + "--skip-tts", + action="store_true", + help="Do not regenerate the text-to-speech phrases.", + ) + parser.add_argument( + "--skip-sfx", + action="store_true", + help="Do not regenerate the procedural sound effects.", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable debug logging while generating audio assets.", + ) + parser.add_argument( + "--piper-voice", + default=DEFAULT_PIPER_VOICE_NAME, + help="Piper voice identifier like 'en_US-lessac-medium'.", + ) + parser.add_argument( + "--piper-voice-model", + type=Path, + help="Path to the Piper ONNX voice model (defaults to /.onnx).", + ) + parser.add_argument( + "--piper-voice-config", + type=Path, + help="Path to the Piper voice config JSON (defaults to .json).", + ) + parser.add_argument( + "--download-voice", + action="store_true", + help="Automatically download the Piper voice before generating TTS.", + ) + + args = parser.parse_args() + _configure_logging(args.verbose) + logger.debug("Output directory: %s", args.output_dir) + voice_model_directory = ( + args.piper_voice_model.parent if args.piper_voice_model else DEFAULT_PIPER_VOICE_DIR + ) + voice_model = args.piper_voice_model or ( + voice_model_directory / f"{args.piper_voice}.onnx" + ) + voice_config = args.piper_voice_config + voice_model.parent.mkdir(parents=True, exist_ok=True) + args.output_dir.mkdir(parents=True, exist_ok=True) + + if args.download_voice: + _download_piper_voice(args.piper_voice, voice_model.parent) + + if not args.skip_sfx: + _generate_sfx(args.output_dir, args.force) + if not args.skip_tts: + _generate_tts( + args.output_dir, + args.force, + voice_model, + voice_config, + ) + + +if __name__ == "__main__": + main()