generate noises

2026-04-24 13:44:58 +00:00 · 2025-12-19 18:03:11 +00:00
parent 8f434d8fd0
commit 2dac2b53df
12 changed files with 371 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -16,6 +16,18 @@ Minimal SDL3 + Vulkan spinning cube demo.
 ### Run
 - `python scripts/dev_commands.py run [--build-dir ...]` (source `build/conanrun.sh` / `build\conanrun.bat` first if the Conan runtime exports env vars).

+### Audio assets
+Install the dependencies that power `scripts/generate_audio_assets.py`:
+- `python -m pip install --user numpy soundfile pedalboard piper-tts`
+
+Before translating phrases, download a Piper voice (the script defaults to `en_US-lessac-medium`):
+
+```
+python -m piper.download_voices en_US-lessac-medium --download-dir scripts/assets/audio/tts/voices
+```
+
+Running the generator recreates procedural effects under `scripts/assets/audio/sfx/` and voice clips under `scripts/assets/audio/tts/`. Use `--force` to rebuild every file and `--skip-sfx` / `--skip-tts` if you only need one subset; add `--verbose` to see the internal logging as the files are created. Override the voice files with `--piper-voice-model <path>` and (optionally) `--piper-voice-config <path>` if you downloaded a different voice or location. Pass `--download-voice` to have the script invoke `piper.download_voices` automatically before rendering (requires `piper-tts` and network access).
+
 ## Runtime configuration
 1. `sdl3_app --json-file-in <path>` loads JSON configs (script path, window size, `lua_debug`, etc.).
 2. `sdl3_app --create-seed-json config/seed_runtime.json` writes a starter file assuming `scripts/cube_logic.lua` sits beside the binary.
--- a/scripts/assets/audio/sfx/level_up.ogg
+++ b/scripts/assets/audio/sfx/level_up.ogg
--- a/scripts/assets/audio/sfx/menu_select.ogg
+++ b/scripts/assets/audio/sfx/menu_select.ogg
--- a/scripts/assets/audio/sfx/power_up.ogg
+++ b/scripts/assets/audio/sfx/power_up.ogg
--- a/scripts/assets/audio/sfx/swish.ogg
+++ b/scripts/assets/audio/sfx/swish.ogg
--- a/scripts/assets/audio/sfx/thud.ogg
+++ b/scripts/assets/audio/sfx/thud.ogg
--- a/scripts/assets/audio/tts/continue.ogg
+++ b/scripts/assets/audio/tts/continue.ogg
--- a/scripts/assets/audio/tts/game_over.ogg
+++ b/scripts/assets/audio/tts/game_over.ogg
--- a/scripts/assets/audio/tts/level_1.ogg
+++ b/scripts/assets/audio/tts/level_1.ogg
--- a/scripts/assets/audio/tts/level_2.ogg
+++ b/scripts/assets/audio/tts/level_2.ogg
--- a/scripts/assets/audio/tts/power_up.ogg
+++ b/scripts/assets/audio/tts/power_up.ogg
--- a/scripts/generate_audio_assets.py
+++ b/scripts/generate_audio_assets.py
@@ -0,0 +1,359 @@
+#!/usr/bin/env python3
+"""Create the demo's shared `.ogg` audio library via ``pedalboard`` + ``soundfile``."""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import shutil
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+try:
+    import numpy as np
+    import soundfile as sf
+    from pedalboard import Chorus, Delay, Distortion, Pedalboard, Reverb
+except ImportError as exc:  # pragma: no cover - requires extra dependencies
+    raise SystemExit(
+        "Missing pedalboard or soundfile. Install them with "
+        "`python -m pip install pedalboard soundfile numpy`. "
+        f"ImportError: {exc}"
+    ) from exc
+
+try:
+    from piper import PiperVoice
+except ImportError:
+    PiperVoice = None  # type: ignore[assignment]
+
+try:
+    from piper.download_voices import download_voice
+except ImportError:
+    download_voice = None  # type: ignore[assignment]
+
+SAMPLE_RATE = 44100
+
+TTS_PHRASES = [
+    "Level 1",
+    "Level 2",
+    "Game Over",
+    "Continue",
+    "Power Up",
+]
+
+DEFAULT_PIPER_VOICE_NAME = "en_US-lessac-medium"
+DEFAULT_PIPER_VOICE_DIR = (
+    Path(__file__).resolve().parent / "assets" / "audio" / "tts" / "voices"
+)
+
+
+def _num_samples(duration: float) -> int:
+    return max(1, int(round(duration * SAMPLE_RATE)))
+
+
+def _apply_envelope(
+    signal: np.ndarray,
+    attack: float,
+    decay: float,
+    sustain_level: float,
+    release: float,
+) -> np.ndarray:
+    total = signal.shape[0]
+    attack_samples = min(total, int(round(attack * SAMPLE_RATE)))
+    decay_samples = min(total - attack_samples, int(round(decay * SAMPLE_RATE)))
+    release_samples = min(
+        total - attack_samples - decay_samples, int(round(release * SAMPLE_RATE))
+    )
+    sustain_samples = total - (attack_samples + decay_samples + release_samples)
+    env = np.empty(total, dtype=np.float32)
+    idx = 0
+    if attack_samples > 0:
+        env[idx : idx + attack_samples] = np.linspace(0.0, 1.0, attack_samples, False)
+        idx += attack_samples
+    if decay_samples > 0:
+        env[idx : idx + decay_samples] = np.linspace(
+            1.0, sustain_level, decay_samples, False
+        )
+        idx += decay_samples
+    if sustain_samples > 0:
+        env[idx : idx + sustain_samples] = sustain_level
+        idx += sustain_samples
+    if release_samples > 0:
+        env[-release_samples:] = np.linspace(
+            sustain_level, 0.0, release_samples, False
+        )
+        if idx + sustain_samples < total - release_samples:
+            env[idx + sustain_samples : -release_samples] = sustain_level
+    if attack_samples + decay_samples + sustain_samples + release_samples == 0:
+        env[:] = 1.0
+    return signal * env
+
+
+def _download_piper_voice(voice_name: str, download_dir: Path) -> None:
+    if download_voice is None:
+        logger.warning(
+            "Automatic voice download requires `piper.download_voices`; install piper-tts to enable it."
+        )
+        return
+
+    download_dir.mkdir(parents=True, exist_ok=True)
+    logger.info("Downloading Piper voice %s into %s", voice_name, download_dir)
+    download_voice(voice_name, download_dir)
+
+
+def _sine_glide(duration: float, start_freq: float, end_freq: float) -> np.ndarray:
+    samples = _num_samples(duration)
+    t = np.linspace(0.0, duration, samples, False)
+    freq = np.linspace(start_freq, end_freq, samples)
+    return np.sin(2 * np.pi * freq * t).astype(np.float32)
+
+
+def _pink_noise(duration: float) -> np.ndarray:
+    samples = _num_samples(duration)
+    return np.random.normal(scale=0.15, size=samples).astype(np.float32)
+
+
+def _menu_select(duration: float) -> tuple[np.ndarray, list]:
+    signal = _sine_glide(duration, 520, 960)
+    signal = _apply_envelope(signal, 0.01, 0.12, 0.6, 0.15)
+    effects = [Chorus(rate_hz=1.1, depth=0.18, mix=0.6)]
+    return signal, effects
+
+
+def _power_up(duration: float) -> tuple[np.ndarray, list]:
+    signal = _sine_glide(duration, 270, 940)
+    signal += 0.25 * _pink_noise(duration)
+    signal = _apply_envelope(signal, 0.02, 0.26, 0.45, 0.2)
+    effects = [
+        Distortion(drive_db=14.0),
+        Reverb(room_size=0.45, wet_level=0.25, dry_level=0.9),
+    ]
+    return signal, effects
+
+
+def _level_up(duration: float) -> tuple[np.ndarray, list]:
+    base = _sine_glide(duration, 420, 660)
+    harmony = 0.45 * _sine_glide(duration, 660, 840)
+    signal = (base + harmony) / 1.45
+    signal = _apply_envelope(signal, 0.01, 0.18, 0.55, 0.25)
+    effects = [
+        Chorus(rate_hz=0.95, depth=0.24, mix=0.55),
+        Delay(delay_seconds=0.18, feedback=0.25, mix=0.35),
+    ]
+    return signal, effects
+
+
+def _swish(duration: float) -> tuple[np.ndarray, list]:
+    signal = _pink_noise(duration)
+    signal = _apply_envelope(signal, 0.1, 0.3, 0.2, 0.3)
+    effects = [
+        Reverb(room_size=0.7, wet_level=0.5, dry_level=0.4),
+        Chorus(rate_hz=0.4, depth=0.2, mix=0.45),
+    ]
+    return signal, effects
+
+
+def _thud(duration: float) -> tuple[np.ndarray, list]:
+    base = _sine_glide(duration, 80, 120)
+    signal = 0.7 * base + 0.3 * _pink_noise(duration)
+    signal = _apply_envelope(signal, 0.01, duration * 0.4, 0.0, 0.3)
+    effects = [
+        Distortion(drive_db=10.0),
+        Reverb(room_size=0.85, wet_level=0.55, dry_level=0.3),
+    ]
+    return signal, effects
+
+
+SFX_DEFINITIONS = [
+    ("menu_select", 0.65, "short ascending ping", _menu_select),
+    ("power_up", 1.1, "riser with harmonic shimmer", _power_up),
+    ("level_up", 0.9, "bright burst", _level_up),
+    ("swish", 0.7, "noisy transition", _swish),
+    ("thud", 1.0, "low impact", _thud),
+]
+
+
+def _render_ogg(path: Path, duration: float, builder):
+    signal, effects = builder(duration)
+    processed = signal
+    if effects:
+        board = Pedalboard(effects)
+        processed = board(processed, SAMPLE_RATE)
+    processed = np.clip(processed, -1.0, 1.0).astype(np.float32)
+    sf.write(
+        str(path),
+        processed,
+        SAMPLE_RATE,
+        format="OGG",
+        subtype="VORBIS",
+    )
+
+
+def _slugify(text: str) -> str:
+    return "".join(ch if ch.isalnum() else "_" for ch in text).strip("_").lower()
+
+
+def _configure_logging(verbose: bool) -> None:
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
+
+
+def _generate_sfx(output_dir: Path, force: bool):
+    target_dir = output_dir / "sfx"
+    if force and target_dir.exists():
+        logger.debug("Removing existing SFX folder %s", target_dir)
+        shutil.rmtree(target_dir)
+    target_dir.mkdir(parents=True, exist_ok=True)
+
+    for name, duration, description, builder in SFX_DEFINITIONS:
+        target = target_dir / f"{name}.ogg"
+        if target.exists() and not force:
+            logger.info(f"Skipping existing sound: {target.name} ({description})")
+            continue
+        logger.info(f"Rendering SFX: {name} -> {target.name}")
+        _render_ogg(target, duration, builder)
+
+
+def _load_piper_voice(model_path: Path, config_path: Optional[Path]) -> Optional["PiperVoice"]:
+    if PiperVoice is None:
+        logger.warning("piper-tts is not installed; skipping voice generation.")
+        return None
+
+    if not model_path.exists():
+        logger.warning("Piper voice model not found at %s; skip TTS.", model_path)
+        return None
+
+    resolved_config = config_path or Path(f"{model_path}.json")
+    if not resolved_config.exists():
+        logger.warning(
+            "Piper voice config not found at %s; skip TTS generation.", resolved_config
+        )
+        return None
+
+    return PiperVoice.load(str(model_path), config_path=str(resolved_config))
+
+
+def _synthesize_phrase_to_ogg(voice: "PiperVoice", phrase: str, path: Path) -> None:
+    chunks = list(voice.synthesize(phrase))
+    if not chunks:
+        logger.warning("Piper generated no audio for phrase '%s'", phrase)
+        return
+
+    audio = np.concatenate([chunk.audio_float_array for chunk in chunks])
+    audio = np.clip(audio, -1.0, 1.0).astype(np.float32)
+    sf.write(
+        str(path),
+        audio,
+        voice.config.sample_rate,
+        format="OGG",
+        subtype="VORBIS",
+    )
+
+
+def _generate_tts(
+    output_dir: Path,
+    force: bool,
+    voice_model: Path,
+    voice_config: Optional[Path],
+) -> None:
+    voice = _load_piper_voice(voice_model, voice_config)
+    if voice is None:
+        return
+
+    tts_dir = output_dir / "tts"
+    if force and tts_dir.exists():
+        logger.debug("Removing existing TTS folder %s", tts_dir)
+        shutil.rmtree(tts_dir)
+    tts_dir.mkdir(parents=True, exist_ok=True)
+
+    for phrase in TTS_PHRASES:
+        slug = _slugify(phrase)
+        target = tts_dir / f"{slug}.ogg"
+        if target.exists() and not force:
+            logger.info(f"Skipping existing voice: {target.name} ({phrase})")
+            continue
+        logger.info(f"Rendering voice: {phrase} -> {target.name}")
+        _synthesize_phrase_to_ogg(voice, phrase, target)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Regenerate the OGG sound library for the demo."
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(__file__).resolve().parent / "assets" / "audio",
+        help="Where to store generated OGG files.",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Rebuild every asset even if a file already exists.",
+    )
+    parser.add_argument(
+        "--skip-tts",
+        action="store_true",
+        help="Do not regenerate the text-to-speech phrases.",
+    )
+    parser.add_argument(
+        "--skip-sfx",
+        action="store_true",
+        help="Do not regenerate the procedural sound effects.",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable debug logging while generating audio assets.",
+    )
+    parser.add_argument(
+        "--piper-voice",
+        default=DEFAULT_PIPER_VOICE_NAME,
+        help="Piper voice identifier like 'en_US-lessac-medium'.",
+    )
+    parser.add_argument(
+        "--piper-voice-model",
+        type=Path,
+        help="Path to the Piper ONNX voice model (defaults to <download-dir>/<voice>.onnx).",
+    )
+    parser.add_argument(
+        "--piper-voice-config",
+        type=Path,
+        help="Path to the Piper voice config JSON (defaults to <model>.json).",
+    )
+    parser.add_argument(
+        "--download-voice",
+        action="store_true",
+        help="Automatically download the Piper voice before generating TTS.",
+    )
+
+    args = parser.parse_args()
+    _configure_logging(args.verbose)
+    logger.debug("Output directory: %s", args.output_dir)
+    voice_model_directory = (
+        args.piper_voice_model.parent if args.piper_voice_model else DEFAULT_PIPER_VOICE_DIR
+    )
+    voice_model = args.piper_voice_model or (
+        voice_model_directory / f"{args.piper_voice}.onnx"
+    )
+    voice_config = args.piper_voice_config
+    voice_model.parent.mkdir(parents=True, exist_ok=True)
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+
+    if args.download_voice:
+        _download_piper_voice(args.piper_voice, voice_model.parent)
+
+    if not args.skip_sfx:
+        _generate_sfx(args.output_dir, args.force)
+    if not args.skip_tts:
+        _generate_tts(
+            args.output_dir,
+            args.force,
+            voice_model,
+            voice_config,
+        )
+
+
+if __name__ == "__main__":
+    main()