generate noises

This commit is contained in:
Richard Ward
2025-12-19 18:03:11 +00:00
parent 8f434d8fd0
commit 2dac2b53df
12 changed files with 371 additions and 0 deletions

View File

@@ -16,6 +16,18 @@ Minimal SDL3 + Vulkan spinning cube demo.
### Run
- `python scripts/dev_commands.py run [--build-dir ...]` (source `build/conanrun.sh` / `build\conanrun.bat` first if the Conan runtime exports env vars).
### Audio assets
Install the dependencies that power `scripts/generate_audio_assets.py`:
- `python -m pip install --user numpy soundfile pedalboard piper-tts`
Before translating phrases, download a Piper voice (the script defaults to `en_US-lessac-medium`):
```
python -m piper.download_voices en_US-lessac-medium --download-dir scripts/assets/audio/tts/voices
```
Running the generator recreates procedural effects under `scripts/assets/audio/sfx/` and voice clips under `scripts/assets/audio/tts/`. Use `--force` to rebuild every file and `--skip-sfx` / `--skip-tts` if you only need one subset; add `--verbose` to see the internal logging as the files are created. Override the voice files with `--piper-voice-model <path>` and (optionally) `--piper-voice-config <path>` if you downloaded a different voice or location. Pass `--download-voice` to have the script invoke `piper.download_voices` automatically before rendering (requires `piper-tts` and network access).
## Runtime configuration
1. `sdl3_app --json-file-in <path>` loads JSON configs (script path, window size, `lua_debug`, etc.).
2. `sdl3_app --create-seed-json config/seed_runtime.json` writes a starter file assuming `scripts/cube_logic.lua` sits beside the binary.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,359 @@
#!/usr/bin/env python3
"""Create the demo's shared `.ogg` audio library via ``pedalboard`` + ``soundfile``."""
from __future__ import annotations
import argparse
import logging
import shutil
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
try:
import numpy as np
import soundfile as sf
from pedalboard import Chorus, Delay, Distortion, Pedalboard, Reverb
except ImportError as exc: # pragma: no cover - requires extra dependencies
raise SystemExit(
"Missing pedalboard or soundfile. Install them with "
"`python -m pip install pedalboard soundfile numpy`. "
f"ImportError: {exc}"
) from exc
try:
from piper import PiperVoice
except ImportError:
PiperVoice = None # type: ignore[assignment]
try:
from piper.download_voices import download_voice
except ImportError:
download_voice = None # type: ignore[assignment]
SAMPLE_RATE = 44100
TTS_PHRASES = [
"Level 1",
"Level 2",
"Game Over",
"Continue",
"Power Up",
]
DEFAULT_PIPER_VOICE_NAME = "en_US-lessac-medium"
DEFAULT_PIPER_VOICE_DIR = (
Path(__file__).resolve().parent / "assets" / "audio" / "tts" / "voices"
)
def _num_samples(duration: float) -> int:
return max(1, int(round(duration * SAMPLE_RATE)))
def _apply_envelope(
signal: np.ndarray,
attack: float,
decay: float,
sustain_level: float,
release: float,
) -> np.ndarray:
total = signal.shape[0]
attack_samples = min(total, int(round(attack * SAMPLE_RATE)))
decay_samples = min(total - attack_samples, int(round(decay * SAMPLE_RATE)))
release_samples = min(
total - attack_samples - decay_samples, int(round(release * SAMPLE_RATE))
)
sustain_samples = total - (attack_samples + decay_samples + release_samples)
env = np.empty(total, dtype=np.float32)
idx = 0
if attack_samples > 0:
env[idx : idx + attack_samples] = np.linspace(0.0, 1.0, attack_samples, False)
idx += attack_samples
if decay_samples > 0:
env[idx : idx + decay_samples] = np.linspace(
1.0, sustain_level, decay_samples, False
)
idx += decay_samples
if sustain_samples > 0:
env[idx : idx + sustain_samples] = sustain_level
idx += sustain_samples
if release_samples > 0:
env[-release_samples:] = np.linspace(
sustain_level, 0.0, release_samples, False
)
if idx + sustain_samples < total - release_samples:
env[idx + sustain_samples : -release_samples] = sustain_level
if attack_samples + decay_samples + sustain_samples + release_samples == 0:
env[:] = 1.0
return signal * env
def _download_piper_voice(voice_name: str, download_dir: Path) -> None:
if download_voice is None:
logger.warning(
"Automatic voice download requires `piper.download_voices`; install piper-tts to enable it."
)
return
download_dir.mkdir(parents=True, exist_ok=True)
logger.info("Downloading Piper voice %s into %s", voice_name, download_dir)
download_voice(voice_name, download_dir)
def _sine_glide(duration: float, start_freq: float, end_freq: float) -> np.ndarray:
samples = _num_samples(duration)
t = np.linspace(0.0, duration, samples, False)
freq = np.linspace(start_freq, end_freq, samples)
return np.sin(2 * np.pi * freq * t).astype(np.float32)
def _pink_noise(duration: float) -> np.ndarray:
samples = _num_samples(duration)
return np.random.normal(scale=0.15, size=samples).astype(np.float32)
def _menu_select(duration: float) -> tuple[np.ndarray, list]:
signal = _sine_glide(duration, 520, 960)
signal = _apply_envelope(signal, 0.01, 0.12, 0.6, 0.15)
effects = [Chorus(rate_hz=1.1, depth=0.18, mix=0.6)]
return signal, effects
def _power_up(duration: float) -> tuple[np.ndarray, list]:
signal = _sine_glide(duration, 270, 940)
signal += 0.25 * _pink_noise(duration)
signal = _apply_envelope(signal, 0.02, 0.26, 0.45, 0.2)
effects = [
Distortion(drive_db=14.0),
Reverb(room_size=0.45, wet_level=0.25, dry_level=0.9),
]
return signal, effects
def _level_up(duration: float) -> tuple[np.ndarray, list]:
base = _sine_glide(duration, 420, 660)
harmony = 0.45 * _sine_glide(duration, 660, 840)
signal = (base + harmony) / 1.45
signal = _apply_envelope(signal, 0.01, 0.18, 0.55, 0.25)
effects = [
Chorus(rate_hz=0.95, depth=0.24, mix=0.55),
Delay(delay_seconds=0.18, feedback=0.25, mix=0.35),
]
return signal, effects
def _swish(duration: float) -> tuple[np.ndarray, list]:
signal = _pink_noise(duration)
signal = _apply_envelope(signal, 0.1, 0.3, 0.2, 0.3)
effects = [
Reverb(room_size=0.7, wet_level=0.5, dry_level=0.4),
Chorus(rate_hz=0.4, depth=0.2, mix=0.45),
]
return signal, effects
def _thud(duration: float) -> tuple[np.ndarray, list]:
base = _sine_glide(duration, 80, 120)
signal = 0.7 * base + 0.3 * _pink_noise(duration)
signal = _apply_envelope(signal, 0.01, duration * 0.4, 0.0, 0.3)
effects = [
Distortion(drive_db=10.0),
Reverb(room_size=0.85, wet_level=0.55, dry_level=0.3),
]
return signal, effects
SFX_DEFINITIONS = [
("menu_select", 0.65, "short ascending ping", _menu_select),
("power_up", 1.1, "riser with harmonic shimmer", _power_up),
("level_up", 0.9, "bright burst", _level_up),
("swish", 0.7, "noisy transition", _swish),
("thud", 1.0, "low impact", _thud),
]
def _render_ogg(path: Path, duration: float, builder):
signal, effects = builder(duration)
processed = signal
if effects:
board = Pedalboard(effects)
processed = board(processed, SAMPLE_RATE)
processed = np.clip(processed, -1.0, 1.0).astype(np.float32)
sf.write(
str(path),
processed,
SAMPLE_RATE,
format="OGG",
subtype="VORBIS",
)
def _slugify(text: str) -> str:
return "".join(ch if ch.isalnum() else "_" for ch in text).strip("_").lower()
def _configure_logging(verbose: bool) -> None:
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
def _generate_sfx(output_dir: Path, force: bool):
target_dir = output_dir / "sfx"
if force and target_dir.exists():
logger.debug("Removing existing SFX folder %s", target_dir)
shutil.rmtree(target_dir)
target_dir.mkdir(parents=True, exist_ok=True)
for name, duration, description, builder in SFX_DEFINITIONS:
target = target_dir / f"{name}.ogg"
if target.exists() and not force:
logger.info(f"Skipping existing sound: {target.name} ({description})")
continue
logger.info(f"Rendering SFX: {name} -> {target.name}")
_render_ogg(target, duration, builder)
def _load_piper_voice(model_path: Path, config_path: Optional[Path]) -> Optional["PiperVoice"]:
if PiperVoice is None:
logger.warning("piper-tts is not installed; skipping voice generation.")
return None
if not model_path.exists():
logger.warning("Piper voice model not found at %s; skip TTS.", model_path)
return None
resolved_config = config_path or Path(f"{model_path}.json")
if not resolved_config.exists():
logger.warning(
"Piper voice config not found at %s; skip TTS generation.", resolved_config
)
return None
return PiperVoice.load(str(model_path), config_path=str(resolved_config))
def _synthesize_phrase_to_ogg(voice: "PiperVoice", phrase: str, path: Path) -> None:
chunks = list(voice.synthesize(phrase))
if not chunks:
logger.warning("Piper generated no audio for phrase '%s'", phrase)
return
audio = np.concatenate([chunk.audio_float_array for chunk in chunks])
audio = np.clip(audio, -1.0, 1.0).astype(np.float32)
sf.write(
str(path),
audio,
voice.config.sample_rate,
format="OGG",
subtype="VORBIS",
)
def _generate_tts(
output_dir: Path,
force: bool,
voice_model: Path,
voice_config: Optional[Path],
) -> None:
voice = _load_piper_voice(voice_model, voice_config)
if voice is None:
return
tts_dir = output_dir / "tts"
if force and tts_dir.exists():
logger.debug("Removing existing TTS folder %s", tts_dir)
shutil.rmtree(tts_dir)
tts_dir.mkdir(parents=True, exist_ok=True)
for phrase in TTS_PHRASES:
slug = _slugify(phrase)
target = tts_dir / f"{slug}.ogg"
if target.exists() and not force:
logger.info(f"Skipping existing voice: {target.name} ({phrase})")
continue
logger.info(f"Rendering voice: {phrase} -> {target.name}")
_synthesize_phrase_to_ogg(voice, phrase, target)
def main():
parser = argparse.ArgumentParser(
description="Regenerate the OGG sound library for the demo."
)
parser.add_argument(
"--output-dir",
type=Path,
default=Path(__file__).resolve().parent / "assets" / "audio",
help="Where to store generated OGG files.",
)
parser.add_argument(
"--force",
action="store_true",
help="Rebuild every asset even if a file already exists.",
)
parser.add_argument(
"--skip-tts",
action="store_true",
help="Do not regenerate the text-to-speech phrases.",
)
parser.add_argument(
"--skip-sfx",
action="store_true",
help="Do not regenerate the procedural sound effects.",
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable debug logging while generating audio assets.",
)
parser.add_argument(
"--piper-voice",
default=DEFAULT_PIPER_VOICE_NAME,
help="Piper voice identifier like 'en_US-lessac-medium'.",
)
parser.add_argument(
"--piper-voice-model",
type=Path,
help="Path to the Piper ONNX voice model (defaults to <download-dir>/<voice>.onnx).",
)
parser.add_argument(
"--piper-voice-config",
type=Path,
help="Path to the Piper voice config JSON (defaults to <model>.json).",
)
parser.add_argument(
"--download-voice",
action="store_true",
help="Automatically download the Piper voice before generating TTS.",
)
args = parser.parse_args()
_configure_logging(args.verbose)
logger.debug("Output directory: %s", args.output_dir)
voice_model_directory = (
args.piper_voice_model.parent if args.piper_voice_model else DEFAULT_PIPER_VOICE_DIR
)
voice_model = args.piper_voice_model or (
voice_model_directory / f"{args.piper_voice}.onnx"
)
voice_config = args.piper_voice_config
voice_model.parent.mkdir(parents=True, exist_ok=True)
args.output_dir.mkdir(parents=True, exist_ok=True)
if args.download_voice:
_download_piper_voice(args.piper_voice, voice_model.parent)
if not args.skip_sfx:
_generate_sfx(args.output_dir, args.force)
if not args.skip_tts:
_generate_tts(
args.output_dir,
args.force,
voice_model,
voice_config,
)
if __name__ == "__main__":
main()