fix(wifi): stagger driver ws dials and extend initial retry window
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -195,6 +195,27 @@ async def _recv_forward_loop(ip: str, ws) -> None:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _stagger_delay_s_for_ip(ip: str) -> float:
|
||||||
|
"""0 .. wifi_driver_connect_stagger_max_s based on last IPv4 octet (deterministic spread)."""
|
||||||
|
global _settings
|
||||||
|
if _settings is None:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
max_s = float(_settings.get("wifi_driver_connect_stagger_max_s", 2.5))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
max_s = 2.5
|
||||||
|
if max_s <= 0:
|
||||||
|
return 0.0
|
||||||
|
parts = str(ip).strip().split(".")
|
||||||
|
if len(parts) != 4:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
last = int(parts[3]) % 256
|
||||||
|
except ValueError:
|
||||||
|
return 0.0
|
||||||
|
return (last / 255.0) * max_s
|
||||||
|
|
||||||
|
|
||||||
async def _driver_connection_loop(ip: str) -> None:
|
async def _driver_connection_loop(ip: str) -> None:
|
||||||
global _settings
|
global _settings
|
||||||
if _settings is None:
|
if _settings is None:
|
||||||
@@ -204,16 +225,37 @@ async def _driver_connection_loop(ip: str) -> None:
|
|||||||
if not path.startswith("/"):
|
if not path.startswith("/"):
|
||||||
path = "/" + path
|
path = "/" + path
|
||||||
uri = f"ws://{ip}:{port}{path}"
|
uri = f"ws://{ip}:{port}{path}"
|
||||||
|
try:
|
||||||
|
retry_interval_s = float(_settings.get("wifi_driver_connect_retry_interval_s", 2.0))
|
||||||
|
except (TypeError, ValueError):
|
||||||
retry_interval_s = 2.0
|
retry_interval_s = 2.0
|
||||||
retry_window_s = 30.0
|
retry_interval_s = max(0.2, retry_interval_s)
|
||||||
deadline = asyncio.get_running_loop().time() + retry_window_s
|
try:
|
||||||
|
retry_window_s = float(_settings.get("wifi_driver_connect_retry_window_s", 120.0))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
retry_window_s = 120.0
|
||||||
|
retry_window_s = max(5.0, retry_window_s)
|
||||||
|
try:
|
||||||
|
open_timeout = float(_settings.get("wifi_driver_ws_open_timeout", 45.0))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
open_timeout = 45.0
|
||||||
|
open_timeout = max(5.0, open_timeout)
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
stagger = _stagger_delay_s_for_ip(ip)
|
||||||
|
if stagger > 0:
|
||||||
|
await asyncio.sleep(stagger)
|
||||||
|
|
||||||
|
# Only bound boot-time: after we have connected once, keep retrying (Wi-Fi drops, reboots).
|
||||||
|
connected_once = False
|
||||||
|
deadline = loop.time() + retry_window_s
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
now = asyncio.get_running_loop().time()
|
now = loop.time()
|
||||||
if now >= deadline:
|
if not connected_once and now >= deadline:
|
||||||
print(
|
print(
|
||||||
f"[WS] driver {ip} still unreachable after {int(retry_window_s)}s; "
|
f"[WS] driver {ip} still unreachable after {int(retry_window_s)}s "
|
||||||
"stopping retries until next hello"
|
f"(initial window); stopping until next UDP hello / registry prime"
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
@@ -222,8 +264,9 @@ async def _driver_connection_loop(ip: str) -> None:
|
|||||||
uri,
|
uri,
|
||||||
ping_interval=20,
|
ping_interval=20,
|
||||||
ping_timeout=15,
|
ping_timeout=15,
|
||||||
open_timeout=30,
|
open_timeout=open_timeout,
|
||||||
) as ws:
|
) as ws:
|
||||||
|
connected_once = True
|
||||||
_register_ws(ip, ws)
|
_register_ws(ip, ws)
|
||||||
try:
|
try:
|
||||||
await _recv_forward_loop(ip, ws)
|
await _recv_forward_loop(ip, ws)
|
||||||
@@ -239,7 +282,9 @@ async def _driver_connection_loop(ip: str) -> None:
|
|||||||
n = _unreachable_counts.get(ip, 0) + 1
|
n = _unreachable_counts.get(ip, 0) + 1
|
||||||
_unreachable_counts[ip] = n
|
_unreachable_counts[ip] = n
|
||||||
if n == 1 or (n % 30) == 0:
|
if n == 1 or (n % 30) == 0:
|
||||||
print(f"[WS] driver {ip} unreachable, retry in 2s: {e} (x{n})")
|
print(
|
||||||
|
f"[WS] driver {ip} unreachable, retry in {retry_interval_s}s: {e} (x{n})"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print(f"[WS] driver {ip} session error: {e!r}")
|
print(f"[WS] driver {ip} session error: {e!r}")
|
||||||
traceback.print_exception(type(e), e, e.__traceback__)
|
traceback.print_exception(type(e), e, e.__traceback__)
|
||||||
|
|||||||
@@ -57,6 +57,19 @@ class Settings(dict):
|
|||||||
# down (0 disables). Helps drivers that reconnect after seeing traffic on 8766.
|
# down (0 disables). Helps drivers that reconnect after seeing traffic on 8766.
|
||||||
if 'wifi_driver_hello_interval_s' not in self:
|
if 'wifi_driver_hello_interval_s' not in self:
|
||||||
self['wifi_driver_hello_interval_s'] = 10.0
|
self['wifi_driver_hello_interval_s'] = 10.0
|
||||||
|
# Outbound WebSocket dial: total seconds to keep trying before first success
|
||||||
|
# (many devices booting at once need more than a short window).
|
||||||
|
if 'wifi_driver_connect_retry_window_s' not in self:
|
||||||
|
self['wifi_driver_connect_retry_window_s'] = 120.0
|
||||||
|
# Spread outbound dials 0..N s by device IP so six+ drivers do not all hit the AP at once.
|
||||||
|
if 'wifi_driver_connect_stagger_max_s' not in self:
|
||||||
|
self['wifi_driver_connect_stagger_max_s'] = 2.5
|
||||||
|
# TCP/WebSocket open timeout per attempt (seconds).
|
||||||
|
if 'wifi_driver_ws_open_timeout' not in self:
|
||||||
|
self['wifi_driver_ws_open_timeout'] = 45.0
|
||||||
|
# Pause between outbound WebSocket dial attempts (seconds).
|
||||||
|
if 'wifi_driver_connect_retry_interval_s' not in self:
|
||||||
|
self['wifi_driver_connect_retry_interval_s'] = 2.0
|
||||||
# UART to ESP32 ESP-NOW bridge; default off (Wi-Fi drivers need no serial).
|
# UART to ESP32 ESP-NOW bridge; default off (Wi-Fi drivers need no serial).
|
||||||
if 'serial_enabled' not in self:
|
if 'serial_enabled' not in self:
|
||||||
self['serial_enabled'] = False
|
self['serial_enabled'] = False
|
||||||
|
|||||||
Reference in New Issue
Block a user