From 15f8c8a0398f2e26918f7dfd8438d0b1af5814d3 Mon Sep 17 00:00:00 2001 From: Jimmy Date: Mon, 18 May 2026 14:54:22 +1200 Subject: [PATCH] fix(wifi): limit outbound driver WS to hello-triggered attempts Remove periodic UDP hello loop; dial each driver at most wifi_driver_initial_connect_attempts times per discovery hello. Co-authored-by: Cursor --- src/main.py | 73 +---------------------------------- src/models/wifi_ws_clients.py | 42 ++++++++------------ src/settings.py | 9 ++--- 3 files changed, 20 insertions(+), 104 deletions(-) diff --git a/src/main.py b/src/main.py index dd936b8..f97edc5 100644 --- a/src/main.py +++ b/src/main.py @@ -100,11 +100,7 @@ async def _handle_udp_discovery(sock, udp_holder=None) -> None: def _prime_wifi_outbound_driver_connections() -> None: - """ - For each Wi‑Fi device in the registry with a usable IPv4, start (or keep) the - outbound WebSocket task. The client loop reconnects automatically if the link - drops. Presets are not pushed automatically; use Send Presets / profile apply. - """ + """On boot, dial each registered Wi-Fi driver (same 4-attempt limit as UDP hello).""" n = 0 try: dev = Device() @@ -143,69 +139,6 @@ def _ipv4_address(addr: str) -> str | None: return s -async def _periodic_wifi_driver_hello_loop(settings, udp_holder) -> None: - """ - While a registered Wi-Fi driver has no outbound WebSocket, send a short JSON hello on - UDP discovery port so the device can announce itself and we can reconnect. - """ - try: - interval = float(settings.get("wifi_driver_hello_interval_s", 10.0)) - except (TypeError, ValueError): - interval = 10.0 - if interval <= 0: - return - - sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - sock.setblocking(False) - loop = asyncio.get_running_loop() - try: - while not udp_holder.get("closing"): - slept = 0.0 - while slept < interval and not udp_holder.get("closing"): - chunk = min(1.0, interval - slept) - await asyncio.sleep(chunk) - slept += chunk - if udp_holder.get("closing"): - break - try: - dev = Device() - except Exception as e: - print(f"[hello] device list failed: {e!r}") - continue - for _mac_key, doc in list(dev.items()): - if not isinstance(doc, dict): - continue - if doc.get("transport") != "wifi": - continue - ip = _ipv4_address(str(doc.get("address") or "")) - if not ip: - continue - if tcp_client_registry.tcp_client_connected(ip): - continue - name = (doc.get("name") or "").strip() - mac = normalize_mac(doc.get("id") or _mac_key) - if not name or not mac: - continue - line = ( - json.dumps( - {"m": "hello", "device_name": name, "mac": mac}, - separators=(",", ":"), - ) - + "\n" - ) - try: - await loop.sock_sendto( - sock, line.encode("utf-8"), (ip, DISCOVERY_UDP_PORT) - ) - except OSError as e: - print(f"[hello] UDP to {ip!r} failed: {e!r}") - finally: - try: - sock.close() - except OSError: - pass - - async def _run_udp_discovery_server(udp_holder=None) -> None: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.setblocking(False) @@ -573,10 +506,6 @@ async def main(port=80): asyncio.create_task( _run_udp_discovery_server(udp_holder), name="udp" ), - asyncio.create_task( - _periodic_wifi_driver_hello_loop(settings, udp_holder), - name="hello", - ), ] await asyncio.gather(*server_tasks) except asyncio.CancelledError: diff --git a/src/models/wifi_ws_clients.py b/src/models/wifi_ws_clients.py index 629d9d1..0429f1d 100644 --- a/src/models/wifi_ws_clients.py +++ b/src/models/wifi_ws_clients.py @@ -13,7 +13,6 @@ from websockets.exceptions import ConnectionClosed _connections: dict[str, object] = {} _send_locks: dict[str, asyncio.Lock] = {} _tasks: dict[str, asyncio.Task] = {} -_unreachable_counts: dict[str, int] = {} _settings = None _tcp_status_broadcast = None @@ -119,7 +118,6 @@ def _register_ws(ip: str, ws) -> None: if not key: return _connections[key] = ws - _unreachable_counts.pop(key, None) if key not in _send_locks: _send_locks[key] = asyncio.Lock() _schedule_status_broadcast(key, True) @@ -275,52 +273,43 @@ async def _driver_connection_loop(ip: str) -> None: if stagger > 0: await asyncio.sleep(stagger) - # Only bound boot-time: after we have connected once, keep retrying (Wi-Fi drops, reboots). - connected_once = False - boot_attempts = 0 try: - while True: - if not connected_once: - if boot_attempts >= max_boot_attempts: - print( - f"[WS] driver {ip} still unreachable after {max_boot_attempts} " - f"initial dial attempt(s); stopping until next UDP hello / registry prime" - ) - break - boot_attempts += 1 + for attempt in range(1, max_boot_attempts + 1): try: - print(f"[WS] connecting to {uri!r}") + print(f"[WS] connecting to {uri!r} (attempt {attempt}/{max_boot_attempts})") async with websockets.connect( uri, ping_interval=20, ping_timeout=15, open_timeout=open_timeout, ) as ws: - connected_once = True _register_ws(ip, ws) try: await _recv_forward_loop(ip, ws) finally: unregister_tcp_writer(ip, ws) + return except asyncio.CancelledError: raise except ConnectionClosed as e: print(f"[WS] driver {ip} closed: {e}") unregister_tcp_writer(ip, None) + return except Exception as e: if _benign_ws_connect_failure(e): - n = _unreachable_counts.get(ip, 0) + 1 - _unreachable_counts[ip] = n - if n == 1 or (n % 30) == 0: - print( - f"[WS] driver {ip} unreachable, retry in {retry_interval_s}s: {e} (x{n})" - ) + print( + f"[WS] driver {ip} unreachable (attempt {attempt}/{max_boot_attempts}): {e}" + ) else: print(f"[WS] driver {ip} session error: {e!r}") traceback.print_exception(type(e), e, e.__traceback__) - _unreachable_counts.pop(ip, None) unregister_tcp_writer(ip, None) - await asyncio.sleep(retry_interval_s) + if attempt < max_boot_attempts: + await asyncio.sleep(retry_interval_s) + print( + f"[WS] driver {ip} still unreachable after {max_boot_attempts} attempt(s); " + "waiting for next UDP hello" + ) except asyncio.CancelledError: unregister_tcp_writer(ip, None) raise @@ -329,10 +318,12 @@ async def _driver_connection_loop(ip: str) -> None: def ensure_driver_connection(peer_ip: str) -> None: - """Start (or keep) a background task that maintains ``ws://:port/ws``.""" + """Dial ``ws://:port/ws`` up to wifi_driver_initial_connect_attempts times (UDP hello only).""" key = normalize_tcp_peer_ip(peer_ip) if not key: return + if tcp_client_connected(key): + return t = _tasks.get(key) if t is not None and not t.done(): return @@ -353,4 +344,3 @@ def cancel_all_driver_tasks() -> None: _schedule_status_broadcast(ip, False) _connections.clear() _send_locks.clear() - _unreachable_counts.clear() diff --git a/src/settings.py b/src/settings.py index 32c8928..60882cc 100644 --- a/src/settings.py +++ b/src/settings.py @@ -57,12 +57,9 @@ class Settings(dict): self['wifi_driver_ws_port'] = 80 if 'wifi_driver_ws_path' not in self: self['wifi_driver_ws_path'] = '/ws' - # Seconds between UDP discovery nudges when a Wi-Fi driver WebSocket is - # down (0 disables). Helps drivers that reconnect after seeing traffic on 8766. + # Legacy (unused): periodic UDP nudges removed; connect only on driver hello. if 'wifi_driver_hello_interval_s' not in self: - self['wifi_driver_hello_interval_s'] = 10.0 - # Legacy key (no longer read): initial outbound dial limit uses - # wifi_driver_initial_connect_attempts instead. + self['wifi_driver_hello_interval_s'] = 0 if 'wifi_driver_connect_retry_window_s' not in self: self['wifi_driver_connect_retry_window_s'] = 120.0 # Spread outbound dials 0..N s by device IP so six+ drivers do not all hit the AP at once. @@ -74,7 +71,7 @@ class Settings(dict): # Pause between outbound WebSocket dial attempts (seconds). if 'wifi_driver_connect_retry_interval_s' not in self: self['wifi_driver_connect_retry_interval_s'] = 2.0 - # Outbound dial attempts to the saved driver IP before first success; then wait for UDP discovery. + # Outbound WebSocket dial attempts per driver UDP hello (then wait for next hello). if 'wifi_driver_initial_connect_attempts' not in self: self['wifi_driver_initial_connect_attempts'] = 4 # UART to ESP32 ESP-NOW bridge; default off (Wi-Fi drivers need no serial).