feat(controller): migrate wifi drivers from tcp to websocket clients

This commit is contained in:
2026-04-14 23:13:26 +12:00
parent f5a7b42e7c
commit 96712dda88
19 changed files with 1195 additions and 673 deletions

View File

@@ -237,16 +237,19 @@ class Device(Model):
"""
Register or update a Wi-Fi client by **MAC** (storage id). Updates **name**,
**address** (peer IP), and optionally **type** from the client hello when valid.
Returns ``(mac_hex | None, persisted)`` where **persisted** is True iff ``save()``
ran (new row or field changes). Duplicate hellos with identical data are no-ops.
"""
mac_hex = normalize_mac(mac)
if not mac_hex:
return None
return None, False
name = (device_name or "").strip()
if not name:
return None
return None, False
ip = normalize_address_for_transport(peer_ip, "wifi")
if not ip:
return None
return None, False
resolved_type = None
if device_type is not None:
try:
@@ -254,7 +257,8 @@ class Device(Model):
except ValueError:
resolved_type = None
if mac_hex in self:
merged = dict(self[mac_hex])
prev = self[mac_hex]
merged = dict(prev)
merged["name"] = name
if resolved_type is not None:
merged["type"] = resolved_type
@@ -263,9 +267,11 @@ class Device(Model):
merged["transport"] = "wifi"
merged["address"] = ip
merged["id"] = mac_hex
if merged == prev:
return mac_hex, False
self[mac_hex] = merged
self.save()
return mac_hex
return mac_hex, True
self[mac_hex] = {
"id": mac_hex,
"name": name,
@@ -276,4 +282,4 @@ class Device(Model):
"zones": [],
}
self.save()
return mac_hex
return mac_hex, True

View File

@@ -1,115 +0,0 @@
"""Track connected Wi-Fi LED drivers (TCP clients) for outbound JSON lines."""
import asyncio
_writers = {}
def prune_stale_tcp_writers() -> None:
"""Remove writers that are already closing so the UI does not stay online."""
stale = [(ip, w) for ip, w in list(_writers.items()) if w.is_closing()]
for ip, w in stale:
unregister_tcp_writer(ip, w)
def normalize_tcp_peer_ip(ip: str) -> str:
"""Match asyncio peer addresses to registry IPs (strip IPv4-mapped IPv6 prefix)."""
s = str(ip).strip()
if s.lower().startswith("::ffff:"):
s = s[7:]
return s
# Optional ``async def (ip: str, connected: bool) -> None`` set from ``main``.
_tcp_status_broadcast = None
def set_tcp_status_broadcaster(coro) -> None:
global _tcp_status_broadcast
_tcp_status_broadcast = coro
def _schedule_tcp_status_broadcast(ip: str, connected: bool) -> None:
fn = _tcp_status_broadcast
if not fn:
return
try:
loop = asyncio.get_running_loop()
except RuntimeError:
return
try:
loop.create_task(fn(ip, connected))
except Exception:
pass
def register_tcp_writer(peer_ip: str, writer) -> None:
if not peer_ip:
return
key = normalize_tcp_peer_ip(peer_ip)
if not key:
return
old = _writers.get(key)
_writers[key] = writer
_schedule_tcp_status_broadcast(key, True)
if old is not None and old is not writer:
try:
old.close()
except Exception:
pass
def unregister_tcp_writer(peer_ip: str, writer=None) -> str:
"""
Remove the writer for peer_ip. If ``writer`` is given, only pop when it is still
the registered instance (avoids a replaced TCP session removing the new one).
Returns ``removed`` (cleared live session + UI offline), ``noop`` (already gone),
or ``superseded`` (this writer is not the registered one for that IP).
"""
if not peer_ip:
return "noop"
key = normalize_tcp_peer_ip(peer_ip)
if not key:
return "noop"
current = _writers.get(key)
if writer is not None:
if current is None:
return "noop"
if current is not writer:
return "superseded"
had = key in _writers
if had:
_writers.pop(key, None)
_schedule_tcp_status_broadcast(key, False)
print(f"[TCP] device disconnected: {key}")
return "removed"
return "noop"
def list_connected_ips():
"""IPs with an active TCP writer (for UI snapshot)."""
prune_stale_tcp_writers()
return list(_writers.keys())
def tcp_client_connected(ip: str) -> bool:
"""True if a Wi-Fi driver is connected on this IP (TCP writer registered)."""
prune_stale_tcp_writers()
key = normalize_tcp_peer_ip(ip)
return bool(key and key in _writers)
async def send_json_line_to_ip(ip: str, json_str: str) -> bool:
"""Send one newline-terminated JSON message to a connected TCP client."""
ip = normalize_tcp_peer_ip(ip)
writer = _writers.get(ip)
if not writer:
return False
try:
line = json_str if json_str.endswith("\n") else json_str + "\n"
writer.write(line.encode("utf-8"))
await writer.drain()
return True
except Exception as exc:
print(f"[TCP] send to {ip} failed: {exc}")
unregister_tcp_writer(ip, writer)
return False

View File

@@ -0,0 +1,281 @@
"""Outbound WebSocket clients to Wi-Fi LED drivers (firmware serves ``/ws`` on device)."""
from __future__ import annotations
import asyncio
import errno
import json
import traceback
import websockets
from websockets.exceptions import ConnectionClosed
_connections: dict[str, object] = {}
_send_locks: dict[str, asyncio.Lock] = {}
_tasks: dict[str, asyncio.Task] = {}
_unreachable_counts: dict[str, int] = {}
_settings = None
_tcp_status_broadcast = None
def set_settings(settings) -> None:
global _settings
_settings = settings
def set_tcp_status_broadcaster(coro) -> None:
global _tcp_status_broadcast
_tcp_status_broadcast = coro
def _schedule_status_broadcast(ip: str, connected: bool) -> None:
fn = _tcp_status_broadcast
if not fn:
return
try:
loop = asyncio.get_running_loop()
except RuntimeError:
return
try:
loop.create_task(fn(ip, connected))
except Exception:
pass
def _benign_ws_connect_failure(exc: BaseException) -> bool:
"""True for common \"driver down / no route\" errors while dialling the WebSocket."""
if isinstance(exc, (asyncio.TimeoutError, TimeoutError)):
return True
if isinstance(exc, ConnectionRefusedError):
return True
if not isinstance(exc, OSError):
return False
en = exc.errno
if en is None:
return False
codes = {errno.ECONNREFUSED, errno.ETIMEDOUT}
for name in ("EHOSTUNREACH", "ENETUNREACH", "ENETDOWN", "EADDRNOTAVAIL"):
if hasattr(errno, name):
codes.add(getattr(errno, name))
return en in codes
def normalize_tcp_peer_ip(ip: str) -> str:
"""Match peer addresses to registry IPs (strip IPv4-mapped IPv6 prefix)."""
s = str(ip).strip()
if s.lower().startswith("::ffff:"):
s = s[7:]
return s
def _ws_open(ws) -> bool:
try:
return ws.close_code is None
except Exception:
return False
def prune_stale_tcp_writers() -> None:
"""Drop closed WebSocket entries (name kept for callers)."""
stale = [ip for ip, ws in list(_connections.items()) if not _ws_open(ws)]
for ip in stale:
_connections.pop(ip, None)
_schedule_status_broadcast(ip, False)
def _register_ws(ip: str, ws) -> None:
key = normalize_tcp_peer_ip(ip)
if not key:
return
_connections[key] = ws
_unreachable_counts.pop(key, None)
if key not in _send_locks:
_send_locks[key] = asyncio.Lock()
_schedule_status_broadcast(key, True)
print(f"[WS] driver connected {key!r}")
def unregister_tcp_writer(peer_ip: str, ws=None) -> str:
"""
Remove the WebSocket for peer_ip. If ``ws`` is given, only pop when it is still
the registered instance.
Returns ``removed``, ``noop``, or ``superseded`` (same contract as former TCP registry).
"""
if not peer_ip:
return "noop"
key = normalize_tcp_peer_ip(peer_ip)
if not key:
return "noop"
current = _connections.get(key)
if ws is not None:
if current is None:
return "noop"
if current is not ws:
return "superseded"
had = key in _connections
if had:
_connections.pop(key, None)
_schedule_status_broadcast(key, False)
print(f"[WS] driver disconnected: {key}")
return "removed"
return "noop"
def list_connected_ips():
"""IPs with an active outbound WebSocket to the driver."""
prune_stale_tcp_writers()
return list(_connections.keys())
def tcp_client_connected(ip: str) -> bool:
"""True if the controller has an outbound WebSocket to this driver IP."""
prune_stale_tcp_writers()
key = normalize_tcp_peer_ip(ip)
return bool(key and key in _connections)
async def send_json_line_to_ip(ip: str, json_str: str) -> bool:
"""Send one JSON text frame (v1 line; trailing newline stripped for WebSocket)."""
ip = normalize_tcp_peer_ip(ip)
ws = _connections.get(ip)
if ws is None or not _ws_open(ws):
return False
text = json_str.rstrip("\n")
lock = _send_locks.setdefault(ip, asyncio.Lock())
try:
async with lock:
await ws.send(text)
return True
except Exception as exc:
print(f"[WS] send to {ip} failed: {exc}")
unregister_tcp_writer(ip, ws)
return False
async def _recv_forward_loop(ip: str, ws) -> None:
from models.transport import get_current_sender
sender = get_current_sender()
async for message in ws:
if isinstance(message, bytes):
try:
text = message.decode("utf-8")
except UnicodeDecodeError:
print(f"[WS] recv {ip} (non-UTF-8, {len(message)} bytes)")
continue
else:
text = message
text = text.strip()
if not text:
continue
print(f"[WS] recv {ip}: {text}")
if not sender:
continue
try:
parsed = json.loads(text)
except json.JSONDecodeError:
try:
await sender.send(text)
except Exception:
pass
continue
if isinstance(parsed, dict):
addr = parsed.pop("to", None)
payload = json.dumps(parsed) if parsed else "{}"
try:
await sender.send(payload, addr=addr)
except Exception as e:
print(f"[WS] forward to bridge failed: {e}")
else:
try:
await sender.send(text)
except Exception:
pass
async def _driver_connection_loop(ip: str) -> None:
global _settings
if _settings is None:
return
port = int(_settings.get("wifi_driver_ws_port", 80))
path = str(_settings.get("wifi_driver_ws_path", "/ws"))
if not path.startswith("/"):
path = "/" + path
uri = f"ws://{ip}:{port}{path}"
retry_interval_s = 2.0
retry_window_s = 30.0
deadline = asyncio.get_running_loop().time() + retry_window_s
try:
while True:
now = asyncio.get_running_loop().time()
if now >= deadline:
print(
f"[WS] driver {ip} still unreachable after {int(retry_window_s)}s; "
"stopping retries until next hello"
)
break
try:
print(f"[WS] connecting to {uri!r}")
async with websockets.connect(
uri,
ping_interval=20,
ping_timeout=15,
open_timeout=30,
) as ws:
_register_ws(ip, ws)
try:
await _recv_forward_loop(ip, ws)
finally:
unregister_tcp_writer(ip, ws)
except asyncio.CancelledError:
raise
except ConnectionClosed as e:
print(f"[WS] driver {ip} closed: {e}")
unregister_tcp_writer(ip, None)
except Exception as e:
if _benign_ws_connect_failure(e):
n = _unreachable_counts.get(ip, 0) + 1
_unreachable_counts[ip] = n
if n == 1 or (n % 30) == 0:
print(f"[WS] driver {ip} unreachable, retry in 2s: {e} (x{n})")
else:
print(f"[WS] driver {ip} session error: {e!r}")
traceback.print_exception(type(e), e, e.__traceback__)
_unreachable_counts.pop(ip, None)
unregister_tcp_writer(ip, None)
await asyncio.sleep(retry_interval_s)
except asyncio.CancelledError:
unregister_tcp_writer(ip, None)
raise
finally:
_tasks.pop(ip, None)
def ensure_driver_connection(peer_ip: str) -> None:
"""Start (or keep) a background task that maintains ``ws://<ip>:port/ws``."""
key = normalize_tcp_peer_ip(peer_ip)
if not key:
return
t = _tasks.get(key)
if t is not None and not t.done():
return
try:
loop = asyncio.get_running_loop()
except RuntimeError:
return
_tasks[key] = loop.create_task(_driver_connection_loop(key))
def cancel_all_driver_tasks() -> None:
"""Signal shutdown: cancel outbound driver connection tasks."""
for _ip, t in list(_tasks.items()):
if not t.done():
t.cancel()
_tasks.clear()
for ip in list(_connections.keys()):
_schedule_status_broadcast(ip, False)
_connections.clear()
_send_locks.clear()
_unreachable_counts.clear()