fix(wifi): limit outbound driver WS to hello-triggered attempts
Remove periodic UDP hello loop; dial each driver at most wifi_driver_initial_connect_attempts times per discovery hello. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -13,7 +13,6 @@ from websockets.exceptions import ConnectionClosed
|
||||
_connections: dict[str, object] = {}
|
||||
_send_locks: dict[str, asyncio.Lock] = {}
|
||||
_tasks: dict[str, asyncio.Task] = {}
|
||||
_unreachable_counts: dict[str, int] = {}
|
||||
_settings = None
|
||||
|
||||
_tcp_status_broadcast = None
|
||||
@@ -119,7 +118,6 @@ def _register_ws(ip: str, ws) -> None:
|
||||
if not key:
|
||||
return
|
||||
_connections[key] = ws
|
||||
_unreachable_counts.pop(key, None)
|
||||
if key not in _send_locks:
|
||||
_send_locks[key] = asyncio.Lock()
|
||||
_schedule_status_broadcast(key, True)
|
||||
@@ -275,52 +273,43 @@ async def _driver_connection_loop(ip: str) -> None:
|
||||
if stagger > 0:
|
||||
await asyncio.sleep(stagger)
|
||||
|
||||
# Only bound boot-time: after we have connected once, keep retrying (Wi-Fi drops, reboots).
|
||||
connected_once = False
|
||||
boot_attempts = 0
|
||||
try:
|
||||
while True:
|
||||
if not connected_once:
|
||||
if boot_attempts >= max_boot_attempts:
|
||||
print(
|
||||
f"[WS] driver {ip} still unreachable after {max_boot_attempts} "
|
||||
f"initial dial attempt(s); stopping until next UDP hello / registry prime"
|
||||
)
|
||||
break
|
||||
boot_attempts += 1
|
||||
for attempt in range(1, max_boot_attempts + 1):
|
||||
try:
|
||||
print(f"[WS] connecting to {uri!r}")
|
||||
print(f"[WS] connecting to {uri!r} (attempt {attempt}/{max_boot_attempts})")
|
||||
async with websockets.connect(
|
||||
uri,
|
||||
ping_interval=20,
|
||||
ping_timeout=15,
|
||||
open_timeout=open_timeout,
|
||||
) as ws:
|
||||
connected_once = True
|
||||
_register_ws(ip, ws)
|
||||
try:
|
||||
await _recv_forward_loop(ip, ws)
|
||||
finally:
|
||||
unregister_tcp_writer(ip, ws)
|
||||
return
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except ConnectionClosed as e:
|
||||
print(f"[WS] driver {ip} closed: {e}")
|
||||
unregister_tcp_writer(ip, None)
|
||||
return
|
||||
except Exception as e:
|
||||
if _benign_ws_connect_failure(e):
|
||||
n = _unreachable_counts.get(ip, 0) + 1
|
||||
_unreachable_counts[ip] = n
|
||||
if n == 1 or (n % 30) == 0:
|
||||
print(
|
||||
f"[WS] driver {ip} unreachable, retry in {retry_interval_s}s: {e} (x{n})"
|
||||
)
|
||||
print(
|
||||
f"[WS] driver {ip} unreachable (attempt {attempt}/{max_boot_attempts}): {e}"
|
||||
)
|
||||
else:
|
||||
print(f"[WS] driver {ip} session error: {e!r}")
|
||||
traceback.print_exception(type(e), e, e.__traceback__)
|
||||
_unreachable_counts.pop(ip, None)
|
||||
unregister_tcp_writer(ip, None)
|
||||
await asyncio.sleep(retry_interval_s)
|
||||
if attempt < max_boot_attempts:
|
||||
await asyncio.sleep(retry_interval_s)
|
||||
print(
|
||||
f"[WS] driver {ip} still unreachable after {max_boot_attempts} attempt(s); "
|
||||
"waiting for next UDP hello"
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
unregister_tcp_writer(ip, None)
|
||||
raise
|
||||
@@ -329,10 +318,12 @@ async def _driver_connection_loop(ip: str) -> None:
|
||||
|
||||
|
||||
def ensure_driver_connection(peer_ip: str) -> None:
|
||||
"""Start (or keep) a background task that maintains ``ws://<ip>:port/ws``."""
|
||||
"""Dial ``ws://<ip>:port/ws`` up to wifi_driver_initial_connect_attempts times (UDP hello only)."""
|
||||
key = normalize_tcp_peer_ip(peer_ip)
|
||||
if not key:
|
||||
return
|
||||
if tcp_client_connected(key):
|
||||
return
|
||||
t = _tasks.get(key)
|
||||
if t is not None and not t.done():
|
||||
return
|
||||
@@ -353,4 +344,3 @@ def cancel_all_driver_tasks() -> None:
|
||||
_schedule_status_broadcast(ip, False)
|
||||
_connections.clear()
|
||||
_send_locks.clear()
|
||||
_unreachable_counts.clear()
|
||||
|
||||
Reference in New Issue
Block a user