feat(audio): move beat routing server-side and extend presets
Route beat-triggered manual selects from the controller server, add preset background and beat-counter UI support, and bump led-driver to include the matching pattern/runtime fixes. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
375
tests/beat_detect.py
Normal file
375
tests/beat_detect.py
Normal file
@@ -0,0 +1,375 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Live beat detection utility with custom/aubio/hybrid modes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import queue
|
||||
import sys
|
||||
import time
|
||||
from typing import Deque
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError as exc:
|
||||
raise SystemExit(
|
||||
"Missing dependency: numpy. Install with `pip install numpy`."
|
||||
) from exc
|
||||
|
||||
try:
|
||||
import sounddevice as sd
|
||||
except ImportError as exc:
|
||||
raise SystemExit(
|
||||
"Missing dependency: sounddevice. Install with `pip install sounddevice`."
|
||||
) from exc
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
requests = None
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Beat detector utility")
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=("custom", "aubio", "hybrid"),
|
||||
default="aubio",
|
||||
help="Detection mode",
|
||||
)
|
||||
parser.add_argument("--device", default=None, help="Input device name or index")
|
||||
parser.add_argument(
|
||||
"--sample-rate",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Audio sample rate (0 = use selected device default)",
|
||||
)
|
||||
parser.add_argument("--hop-size", type=int, default=256, help="Frame hop size in samples")
|
||||
parser.add_argument("--win-mult", type=int, default=2, help="Aubio window size multiplier")
|
||||
parser.add_argument(
|
||||
"--min-band-hz",
|
||||
type=float,
|
||||
default=45.0,
|
||||
help="Low frequency bound used for beat energy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-band-hz",
|
||||
type=float,
|
||||
default=180.0,
|
||||
help="High frequency bound used for beat energy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--energy-weight",
|
||||
type=float,
|
||||
default=0.7,
|
||||
help="Weight for low-band energy component (0..1)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--flux-weight",
|
||||
type=float,
|
||||
default=0.3,
|
||||
help="Weight for spectral flux component (0..1)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--threshold-multiplier",
|
||||
type=float,
|
||||
default=1.35,
|
||||
help="Custom-mode threshold multiplier vs adaptive baseline",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ema-alpha",
|
||||
type=float,
|
||||
default=0.08,
|
||||
help="Adaptive baseline smoothing (higher reacts faster)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-ioi-ms",
|
||||
type=float,
|
||||
default=85.0,
|
||||
help="Minimum time between beats in milliseconds",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bpm-window",
|
||||
type=int,
|
||||
default=8,
|
||||
help="How many recent beat intervals to use for BPM estimate",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--post-url",
|
||||
default="",
|
||||
help="Optional HTTP URL to POST beat events",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--aubio-method",
|
||||
default="default",
|
||||
choices=("default", "specdiff", "hfc", "complex", "phase", "energy"),
|
||||
help="Aubio tempo method",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--aubio-threshold",
|
||||
type=float,
|
||||
default=0.12,
|
||||
help="Aubio detection threshold",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--silence-gate-db",
|
||||
type=float,
|
||||
default=-58.0,
|
||||
help="Ignore beat triggers when frame RMS is below this dB level",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def _estimate_bpm(beat_times: Deque[float]) -> float | None:
|
||||
if len(beat_times) < 3:
|
||||
return None
|
||||
intervals = np.diff(np.array(beat_times, dtype=np.float64))
|
||||
valid = intervals[(intervals > 0.2) & (intervals < 2.0)]
|
||||
if valid.size == 0:
|
||||
return None
|
||||
return 60.0 / float(np.median(valid))
|
||||
|
||||
|
||||
def _load_aubio_if_needed(mode: str):
|
||||
if mode == "custom":
|
||||
return None
|
||||
try:
|
||||
import aubio
|
||||
return aubio
|
||||
except ImportError:
|
||||
dist_packages = "/usr/lib/python3/dist-packages"
|
||||
if dist_packages not in sys.path:
|
||||
sys.path.append(dist_packages)
|
||||
try:
|
||||
import aubio
|
||||
return aubio
|
||||
except ImportError:
|
||||
raise SystemExit("aubio not installed; use --mode custom or install aubio")
|
||||
|
||||
|
||||
class BeatDetectRuntime:
|
||||
"""Reusable detector runtime so web and CLI can share logic."""
|
||||
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.aubio = _load_aubio_if_needed(args.mode)
|
||||
self.sample_rate = 0
|
||||
self.frame_size = 0
|
||||
self.tempo = None
|
||||
self.band_mask = None
|
||||
self.freqs = None
|
||||
self.window = None
|
||||
self.prev_mag = None
|
||||
self.kick_mask = None
|
||||
self.snare_mask = None
|
||||
self.hat_mask = None
|
||||
self.baseline = 1e-6
|
||||
self.beat_times: Deque[float] = collections.deque(
|
||||
maxlen=max(2, args.bpm_window + 1)
|
||||
)
|
||||
self.last_trigger_s = 0.0
|
||||
self.debounce_s = float(args.min_ioi_ms) / 1000.0
|
||||
|
||||
def setup(self, sample_rate: int):
|
||||
self.sample_rate = int(sample_rate)
|
||||
self.frame_size = max(128, int(self.args.hop_size))
|
||||
win_size = max(1024, self.frame_size * max(2, self.args.win_mult))
|
||||
freqs = np.fft.rfftfreq(self.frame_size, d=1.0 / self.sample_rate)
|
||||
self.freqs = freqs
|
||||
self.band_mask = (freqs >= self.args.min_band_hz) & (
|
||||
freqs <= self.args.max_band_hz
|
||||
)
|
||||
self.kick_mask = (freqs >= 40.0) & (freqs <= 140.0)
|
||||
self.snare_mask = (freqs >= 140.0) & (freqs <= 3000.0)
|
||||
self.hat_mask = (freqs >= 5000.0) & (freqs <= 12000.0)
|
||||
if not np.any(self.band_mask):
|
||||
raise ValueError("Invalid band range for current sample rate")
|
||||
self.window = np.hanning(self.frame_size).astype(np.float32)
|
||||
self.prev_mag = np.zeros(freqs.shape[0], dtype=np.float32)
|
||||
self.baseline = 1e-6
|
||||
self.last_trigger_s = 0.0
|
||||
self.beat_times.clear()
|
||||
self.tempo = None
|
||||
if self.aubio is not None:
|
||||
self.tempo = self.aubio.tempo(
|
||||
self.args.aubio_method, win_size, self.frame_size, self.sample_rate
|
||||
)
|
||||
if hasattr(self.tempo, "set_threshold"):
|
||||
self.tempo.set_threshold(float(self.args.aubio_threshold))
|
||||
if hasattr(self.tempo, "set_minioi_ms"):
|
||||
self.tempo.set_minioi_ms(float(self.args.min_ioi_ms))
|
||||
|
||||
def _classify_hit(self, mag: np.ndarray):
|
||||
total = float(np.mean(mag) + 1e-9)
|
||||
kick = float(np.mean(mag[self.kick_mask])) / total if np.any(self.kick_mask) else 0.0
|
||||
snare = float(np.mean(mag[self.snare_mask])) / total if np.any(self.snare_mask) else 0.0
|
||||
hat = float(np.mean(mag[self.hat_mask])) / total if np.any(self.hat_mask) else 0.0
|
||||
scores = {
|
||||
"kick": kick,
|
||||
"snare": snare,
|
||||
"hat": hat,
|
||||
}
|
||||
label, value = max(scores.items(), key=lambda kv: kv[1])
|
||||
if value < 1.15:
|
||||
return "unknown", value
|
||||
return label, value
|
||||
|
||||
def process_frame(self, frame: np.ndarray, now_s: float | None = None):
|
||||
if self.window is None or self.band_mask is None:
|
||||
raise RuntimeError("Runtime not setup")
|
||||
if frame.shape[0] != self.frame_size:
|
||||
if frame.shape[0] > self.frame_size:
|
||||
frame = frame[: self.frame_size]
|
||||
else:
|
||||
frame = np.pad(frame, (0, self.frame_size - frame.shape[0]))
|
||||
|
||||
f32 = frame.astype(np.float32)
|
||||
rms = float(np.sqrt(np.mean(f32 * f32) + 1e-12))
|
||||
db = 20.0 * np.log10(max(rms, 1e-12))
|
||||
if db < float(self.args.silence_gate_db):
|
||||
return None
|
||||
mag = np.abs(np.fft.rfft(f32 * self.window)).astype(np.float32)
|
||||
band_energy = float(np.mean(mag[self.band_mask]))
|
||||
flux = float(np.mean(np.maximum(0.0, mag - self.prev_mag)))
|
||||
self.prev_mag[:] = mag
|
||||
|
||||
weight_sum = max(1e-6, self.args.energy_weight + self.args.flux_weight)
|
||||
score = ((self.args.energy_weight * band_energy) + (self.args.flux_weight * flux)) / weight_sum
|
||||
self.baseline = ((1.0 - self.args.ema_alpha) * self.baseline) + (
|
||||
self.args.ema_alpha * score
|
||||
)
|
||||
threshold = self.baseline * self.args.threshold_multiplier
|
||||
custom_hit = score > threshold
|
||||
|
||||
aubio_hit = False
|
||||
aubio_bpm = None
|
||||
if self.tempo is not None:
|
||||
aubio_hit = bool(self.tempo(f32)[0])
|
||||
val = float(self.tempo.get_bpm())
|
||||
aubio_bpm = val if val > 0 else None
|
||||
|
||||
if now_s is None:
|
||||
now_s = time.time()
|
||||
if (now_s - self.last_trigger_s) < self.debounce_s:
|
||||
return None
|
||||
|
||||
if self.args.mode == "custom":
|
||||
should_trigger = custom_hit
|
||||
elif self.args.mode == "aubio":
|
||||
should_trigger = aubio_hit
|
||||
else:
|
||||
should_trigger = custom_hit or aubio_hit
|
||||
if not should_trigger:
|
||||
return None
|
||||
|
||||
self.last_trigger_s = now_s
|
||||
self.beat_times.append(now_s)
|
||||
bpm = aubio_bpm if aubio_bpm is not None else _estimate_bpm(self.beat_times)
|
||||
strength = score / max(1e-9, self.baseline)
|
||||
beat_type, beat_type_conf = self._classify_hit(mag)
|
||||
if self.args.mode == "custom":
|
||||
src = "custom"
|
||||
elif self.args.mode == "aubio":
|
||||
src = "aubio"
|
||||
elif custom_hit and aubio_hit:
|
||||
src = "both"
|
||||
elif custom_hit:
|
||||
src = "custom"
|
||||
else:
|
||||
src = "aubio"
|
||||
return {
|
||||
"ts": now_s,
|
||||
"bpm": bpm,
|
||||
"src": src,
|
||||
"score": score,
|
||||
"threshold": threshold,
|
||||
"strength": strength,
|
||||
"beat_type": beat_type,
|
||||
"beat_type_confidence": beat_type_conf,
|
||||
"db": db,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
runtime = BeatDetectRuntime(args)
|
||||
|
||||
if args.post_url and requests is None:
|
||||
raise SystemExit("`requests` is required for --post-url (pip install requests)")
|
||||
|
||||
if args.sample_rate > 0:
|
||||
sample_rate = args.sample_rate
|
||||
else:
|
||||
dev_info = sd.query_devices(args.device, "input")
|
||||
sample_rate = int(dev_info["default_samplerate"])
|
||||
|
||||
runtime.setup(sample_rate=sample_rate)
|
||||
frame_size = runtime.frame_size
|
||||
audio_q: "queue.Queue[np.ndarray]" = queue.Queue(maxsize=64)
|
||||
|
||||
def audio_callback(indata, frames, _time_info, status):
|
||||
_ = frames
|
||||
if status:
|
||||
print(f"audio status: {status}")
|
||||
mono = np.asarray(indata[:, 0], dtype=np.float32)
|
||||
if not audio_q.full():
|
||||
audio_q.put_nowait(mono)
|
||||
|
||||
print(
|
||||
"Listening... Ctrl+C to stop. "
|
||||
f"mode={args.mode} sr={sample_rate} hop={frame_size} "
|
||||
f"band={args.min_band_hz:.0f}-{args.max_band_hz:.0f}Hz "
|
||||
f"custom_th={args.threshold_multiplier:.2f} aubio_th={args.aubio_threshold:.2f} "
|
||||
f"min_ioi={args.min_ioi_ms:.0f}ms"
|
||||
)
|
||||
|
||||
with sd.InputStream(
|
||||
device=args.device,
|
||||
channels=1,
|
||||
samplerate=sample_rate,
|
||||
blocksize=frame_size,
|
||||
callback=audio_callback,
|
||||
):
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
frame = audio_q.get(timeout=0.1)
|
||||
except queue.Empty:
|
||||
continue
|
||||
if frame.shape[0] != frame_size:
|
||||
if frame.shape[0] > frame_size:
|
||||
frame = frame[:frame_size]
|
||||
else:
|
||||
frame = np.pad(frame, (0, frame_size - frame.shape[0]))
|
||||
|
||||
event = runtime.process_frame(frame, now_s=time.time())
|
||||
if event is None:
|
||||
continue
|
||||
now_s = event["ts"]
|
||||
bpm = event["bpm"]
|
||||
bpm_text = f"{bpm:.1f}" if isinstance(bpm, (float, int)) else "--"
|
||||
src = event["src"]
|
||||
print(
|
||||
f"[{args.mode}] BEAT bpm={bpm_text} src={src} type={event['beat_type']} "
|
||||
f"type_conf={event['beat_type_confidence']:.2f} strength={event['strength']:.2f} "
|
||||
f"db={event['db']:.1f} "
|
||||
f"score={event['score']:.3e} threshold={event['threshold']:.3e}"
|
||||
)
|
||||
|
||||
if args.post_url and requests is not None:
|
||||
try:
|
||||
requests.post(
|
||||
args.post_url,
|
||||
json={"beat": True, "source": src, "ts": now_s, "bpm": bpm},
|
||||
timeout=0.5,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"post failed: {exc}")
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user