fix(wifi): stagger driver ws dials and extend initial retry window
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -195,6 +195,27 @@ async def _recv_forward_loop(ip: str, ws) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def _stagger_delay_s_for_ip(ip: str) -> float:
|
||||
"""0 .. wifi_driver_connect_stagger_max_s based on last IPv4 octet (deterministic spread)."""
|
||||
global _settings
|
||||
if _settings is None:
|
||||
return 0.0
|
||||
try:
|
||||
max_s = float(_settings.get("wifi_driver_connect_stagger_max_s", 2.5))
|
||||
except (TypeError, ValueError):
|
||||
max_s = 2.5
|
||||
if max_s <= 0:
|
||||
return 0.0
|
||||
parts = str(ip).strip().split(".")
|
||||
if len(parts) != 4:
|
||||
return 0.0
|
||||
try:
|
||||
last = int(parts[3]) % 256
|
||||
except ValueError:
|
||||
return 0.0
|
||||
return (last / 255.0) * max_s
|
||||
|
||||
|
||||
async def _driver_connection_loop(ip: str) -> None:
|
||||
global _settings
|
||||
if _settings is None:
|
||||
@@ -204,16 +225,37 @@ async def _driver_connection_loop(ip: str) -> None:
|
||||
if not path.startswith("/"):
|
||||
path = "/" + path
|
||||
uri = f"ws://{ip}:{port}{path}"
|
||||
try:
|
||||
retry_interval_s = float(_settings.get("wifi_driver_connect_retry_interval_s", 2.0))
|
||||
except (TypeError, ValueError):
|
||||
retry_interval_s = 2.0
|
||||
retry_window_s = 30.0
|
||||
deadline = asyncio.get_running_loop().time() + retry_window_s
|
||||
retry_interval_s = max(0.2, retry_interval_s)
|
||||
try:
|
||||
retry_window_s = float(_settings.get("wifi_driver_connect_retry_window_s", 120.0))
|
||||
except (TypeError, ValueError):
|
||||
retry_window_s = 120.0
|
||||
retry_window_s = max(5.0, retry_window_s)
|
||||
try:
|
||||
open_timeout = float(_settings.get("wifi_driver_ws_open_timeout", 45.0))
|
||||
except (TypeError, ValueError):
|
||||
open_timeout = 45.0
|
||||
open_timeout = max(5.0, open_timeout)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
stagger = _stagger_delay_s_for_ip(ip)
|
||||
if stagger > 0:
|
||||
await asyncio.sleep(stagger)
|
||||
|
||||
# Only bound boot-time: after we have connected once, keep retrying (Wi-Fi drops, reboots).
|
||||
connected_once = False
|
||||
deadline = loop.time() + retry_window_s
|
||||
try:
|
||||
while True:
|
||||
now = asyncio.get_running_loop().time()
|
||||
if now >= deadline:
|
||||
now = loop.time()
|
||||
if not connected_once and now >= deadline:
|
||||
print(
|
||||
f"[WS] driver {ip} still unreachable after {int(retry_window_s)}s; "
|
||||
"stopping retries until next hello"
|
||||
f"[WS] driver {ip} still unreachable after {int(retry_window_s)}s "
|
||||
f"(initial window); stopping until next UDP hello / registry prime"
|
||||
)
|
||||
break
|
||||
try:
|
||||
@@ -222,8 +264,9 @@ async def _driver_connection_loop(ip: str) -> None:
|
||||
uri,
|
||||
ping_interval=20,
|
||||
ping_timeout=15,
|
||||
open_timeout=30,
|
||||
open_timeout=open_timeout,
|
||||
) as ws:
|
||||
connected_once = True
|
||||
_register_ws(ip, ws)
|
||||
try:
|
||||
await _recv_forward_loop(ip, ws)
|
||||
@@ -239,7 +282,9 @@ async def _driver_connection_loop(ip: str) -> None:
|
||||
n = _unreachable_counts.get(ip, 0) + 1
|
||||
_unreachable_counts[ip] = n
|
||||
if n == 1 or (n % 30) == 0:
|
||||
print(f"[WS] driver {ip} unreachable, retry in 2s: {e} (x{n})")
|
||||
print(
|
||||
f"[WS] driver {ip} unreachable, retry in {retry_interval_s}s: {e} (x{n})"
|
||||
)
|
||||
else:
|
||||
print(f"[WS] driver {ip} session error: {e!r}")
|
||||
traceback.print_exception(type(e), e, e.__traceback__)
|
||||
|
||||
@@ -57,6 +57,19 @@ class Settings(dict):
|
||||
# down (0 disables). Helps drivers that reconnect after seeing traffic on 8766.
|
||||
if 'wifi_driver_hello_interval_s' not in self:
|
||||
self['wifi_driver_hello_interval_s'] = 10.0
|
||||
# Outbound WebSocket dial: total seconds to keep trying before first success
|
||||
# (many devices booting at once need more than a short window).
|
||||
if 'wifi_driver_connect_retry_window_s' not in self:
|
||||
self['wifi_driver_connect_retry_window_s'] = 120.0
|
||||
# Spread outbound dials 0..N s by device IP so six+ drivers do not all hit the AP at once.
|
||||
if 'wifi_driver_connect_stagger_max_s' not in self:
|
||||
self['wifi_driver_connect_stagger_max_s'] = 2.5
|
||||
# TCP/WebSocket open timeout per attempt (seconds).
|
||||
if 'wifi_driver_ws_open_timeout' not in self:
|
||||
self['wifi_driver_ws_open_timeout'] = 45.0
|
||||
# Pause between outbound WebSocket dial attempts (seconds).
|
||||
if 'wifi_driver_connect_retry_interval_s' not in self:
|
||||
self['wifi_driver_connect_retry_interval_s'] = 2.0
|
||||
# UART to ESP32 ESP-NOW bridge; default off (Wi-Fi drivers need no serial).
|
||||
if 'serial_enabled' not in self:
|
||||
self['serial_enabled'] = False
|
||||
|
||||
Reference in New Issue
Block a user