From 50761a4b37bc1e4a615f629321ed64d0aa63d504 Mon Sep 17 00:00:00 2001 From: pober Date: Wed, 20 May 2026 10:12:08 +0000 Subject: [PATCH] bugfix/1025-local-link-lost-connection (#34) Fixes the bug that local link loses connection after a few minutes. Openproject: #1025 #608 Reviewed-on: https://gitea.summitwave.work/auracaster/bumble-auracast/pulls/34 --- src/service/10-link-local-mgmt | 69 +++++++------------ .../update_and_run_server_and_frontend.sh | 17 ++++- 2 files changed, 41 insertions(+), 45 deletions(-) diff --git a/src/service/10-link-local-mgmt b/src/service/10-link-local-mgmt index 3908060..324f759 100644 --- a/src/service/10-link-local-mgmt +++ b/src/service/10-link-local-mgmt @@ -5,6 +5,8 @@ # using nmcli device modify (active session only, NOT saved to the profile). # The persistent profile always keeps ipv4.link-local=enabled so that # direct-connect (no DHCP) plug-ins always activate and trigger events. +# Avahi is reloaded on each event — no /etc/avahi/hosts file, avahi uses +# natural per-interface advertisement so each segment gets the right IP. # # Triggers: up, down, dhcp4-change on ethernet interfaces # Install to: /etc/NetworkManager/dispatcher.d/10-link-local-mgmt @@ -12,47 +14,31 @@ INTERFACE="$1" ACTION="$2" -CONNECTION_NAME="${CONNECTION_ID:-}" - # Only handle ethernet interfaces if [[ ! "$INTERFACE" =~ ^eth ]]; then exit 0 fi -# If CONNECTION_ID env var is not set, look up the active connection for this interface -if [ -z "$CONNECTION_NAME" ]; then - CONNECTION_NAME=$(nmcli -t -f NAME,DEVICE connection show --active 2>/dev/null \ - | grep ":${INTERFACE}$" | cut -d: -f1 | head -n1) - [ -z "$CONNECTION_NAME" ] && exit 0 -fi - -# Update /etc/avahi/hosts to point mDNS hostname at the best available DHCP address -# across all ethernet interfaces (so Avahi doesn't advertise a link-local address). -update_avahi() { - local hostname - hostname=$(hostname) - # Find first non-link-local IPv4 across all ethernet interfaces - local dhcp_ip - dhcp_ip=$(ip -4 addr show 2>/dev/null \ - | grep -A5 ': eth' \ - | grep -oP '(?<=inet\s)\d+(\.\d+){3}' \ - | grep -v '^127\.' \ - | grep -v '^169\.254\.' \ - | head -n1) - - if [ -n "$dhcp_ip" ]; then - mkdir -p /etc/avahi - echo "$dhcp_ip $hostname $hostname.local" > /etc/avahi/hosts - logger -t nm-link-local "Avahi: pinned $hostname -> $dhcp_ip" - else - rm -f /etc/avahi/hosts - logger -t nm-link-local "Avahi: removed hosts pin, using all addresses" - fi - systemctl restart avahi-daemon 2>/dev/null +reload_avahi() { + systemctl reload avahi-daemon 2>/dev/null || systemctl restart avahi-daemon 2>/dev/null + logger -t nm-link-local "[$INTERFACE] $ACTION — avahi reloaded" } case "$ACTION" in - up|dhcp4-change) + up) + # On 'up' the interface may still carry a stale DHCP address from the previous + # session (NM hasn't cleaned it up yet). Reading ip-addr here is unreliable. + # Always re-enable link-local as a clean slate; let dhcp4-change suppress it + # later if a real DHCP lease is obtained. + logger -t nm-link-local "[$INTERFACE] Up — ensuring link-local active (clean slate)" + (sleep 2 && nmcli device modify "$INTERFACE" ipv4.link-local enabled 2>/dev/null \ + && logger -t nm-link-local "[$INTERFACE] Link-local explicitly enabled on up") & + reload_avahi + ;; + + dhcp4-change) + # dhcp4-change fires only when DHCP actually succeeds (new/renewed lease). + # At this point the DHCP IP is reliably present — safe to read and suppress link-local. DHCP_IP=$(ip -4 addr show "$INTERFACE" 2>/dev/null \ | grep -oP '(?<=inet\s)\d+(\.\d+){3}' \ | grep -v '^127\.' \ @@ -60,24 +46,19 @@ case "$ACTION" in | head -n1) if [ -n "$DHCP_IP" ]; then - logger -t nm-link-local "[$INTERFACE] DHCP $DHCP_IP detected — suppressing link-local (session only)" - # Use device modify (not connection modify) so the persistent profile keeps - # ipv4.link-local=enabled. This ensures direct-connect plug-ins always activate. + logger -t nm-link-local "[$INTERFACE] DHCP $DHCP_IP confirmed — suppressing link-local (session only)" # Run in background after a delay — nmcli blocks on NM, which is waiting for # this dispatcher to return, causing a deadlock if called synchronously. (sleep 2 && nmcli device modify "$INTERFACE" ipv4.link-local disabled 2>/dev/null \ && logger -t nm-link-local "[$INTERFACE] Link-local suppressed for current session") & - else - logger -t nm-link-local "[$INTERFACE] No DHCP on $INTERFACE — keeping link-local active" fi - update_avahi + reload_avahi ;; down) - # Profile always has ipv4.link-local=enabled so no action needed here. - # The suppression from device modify was session-only and is gone when the - # connection goes down. - logger -t nm-link-local "[$INTERFACE] Down — link-local will be active on next connect" - update_avahi + # NOTE: a carrier-change does NOT fully reset session-level 'device modify' state. + # The re-enable is therefore handled in the 'up' handler when no DHCP is detected. + logger -t nm-link-local "[$INTERFACE] Down — link-local will be re-enabled on next up without DHCP" + reload_avahi ;; esac diff --git a/src/service/update_and_run_server_and_frontend.sh b/src/service/update_and_run_server_and_frontend.sh index f6b8159..57d2e27 100755 --- a/src/service/update_and_run_server_and_frontend.sh +++ b/src/service/update_and_run_server_and_frontend.sh @@ -8,13 +8,28 @@ set -e # Enable link-local for all wired ethernet connections while IFS=: read -r name type; do if [[ "$type" == *"ethernet"* ]]; then - echo "Enabling IPv4 link-local for connection: $name" + echo "Configuring connection: $name" + # link-local: always enabled so direct-connect (no DHCP) works immediately sudo nmcli connection modify "$name" ipv4.link-local enabled 2>/dev/null || echo "Failed to modify $name" + # may-fail=yes: do NOT tear down the connection when DHCP times out. + # Without this, NM declares ip-config-unavailable after the 45s DHCP timeout + # and enters a reconnect loop that causes ~1.5 min outages every ~45 seconds. + sudo nmcli connection modify "$name" ipv4.may-fail yes 2>/dev/null || echo "Failed to set may-fail on $name" + # Infinite DHCP timeout: NM keeps retrying DHCP in the background but never + # declares ip-config-unavailable. This prevents the 45s reconnect loop that + # kills the link-local address in direct-connect (no DHCP server) scenarios. + sudo nmcli connection modify "$name" ipv4.dhcp-timeout infinity 2>/dev/null || echo "Failed to set dhcp-timeout on $name" sudo nmcli connection up "$name" 2>/dev/null || echo "Failed to bring up $name" fi done < <(nmcli -t -f NAME,TYPE connection show) +# Remove stale avahi hosts pin — this file overrides per-interface advertisement +# and causes mDNS to always resolve to eth0's IP regardless of which interface +# the query arrived on, breaking eth1 mDNS entirely. +sudo rm -f /etc/avahi/hosts +sudo systemctl restart avahi-daemon + # Ensure Loopback is loaded with a fixed name and index # Needed for dante # TODO image when we create the next image this should be part of it