From 8d0702408db20236382a512de52f1e7f54577593 Mon Sep 17 00:00:00 2001 From: pober Date: Tue, 26 May 2026 17:43:53 +0200 Subject: [PATCH 1/2] Provisioning runs system update script so that radio firmware is flashed and accepts new host keys. --- src/provision.py | 160 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 122 insertions(+), 38 deletions(-) diff --git a/src/provision.py b/src/provision.py index 2039f41..a44c3e8 100644 --- a/src/provision.py +++ b/src/provision.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -import ipaddress, os, re, subprocess, tempfile, json, datetime, shlex +import ipaddress, os, re, subprocess, tempfile, json, datetime, shlex, time import secrets from pathlib import Path @@ -21,6 +21,47 @@ SSH_USER = os.getenv("IOT_SSH_USER", "caster") SSH_PORT = int(os.getenv("IOT_SSH_PORT", "22")) SSH_KEY = os.getenv("SSH_KEY") or None # path or None PROVISION_LOG = os.getenv("PROVISION_LOG") or str((Path(__file__).resolve().parent / "provision.log")) +REPO_URL = os.getenv("REPO_URL", "ssh://git@gitea.summitwave.work:222/auracaster/bumble-auracast.git") +REPO_DIR = os.getenv("REPO_DIR", "~/bumble-auracast") + + +def _ssh_base_opts() -> list: + """Common SSH options shared across all ssh/scp calls.""" + opts = [ + "-o", "StrictHostKeyChecking=accept-new", + "-o", "ConnectTimeout=10", + "-o", "ServerAliveInterval=30", + "-o", "ServerAliveCountMax=6", + ] + if SSH_KEY: + opts += ["-i", SSH_KEY] + return opts + + +def wait_for_ssh(ssh_host: str, timeout: int = 180, interval: int = 5) -> bool: + """Poll SSH until the device is stably reachable or timeout (seconds) is exceeded. + + Returns True when a connection succeeds, False on timeout. + """ + deadline = time.monotonic() + timeout + attempt = 0 + while time.monotonic() < deadline: + attempt += 1 + cmd = ["ssh", "-p", str(SSH_PORT), "-o", "BatchMode=yes"] + _ssh_base_opts() + cmd += [f"{SSH_USER}@{ssh_host}", "true"] + proc = subprocess.run(cmd, check=False, capture_output=True, text=True) + if proc.returncode == 0: + if attempt > 1: + print(f"✅ SSH ready after {attempt} attempts") + return True + remaining = max(0, int(deadline - time.monotonic())) + print(f" ⏳ SSH not ready (attempt {attempt}, rc={proc.returncode}), retrying in {interval}s... ({remaining}s left)", flush=True) + for _ in range(interval): + time.sleep(1) + print(".", end="", flush=True) + print() + print(f"❌ SSH did not become ready within {timeout}s") + return False def rewrite_allowed_ips(config_text: str, allowed_cidr: str = None) -> str: @@ -46,9 +87,7 @@ def scp_and_enable(ssh_host, config_text): tmp = Path(tempfile.gettempdir()) / f"{WG_IFACE}.conf" tmp.write_text(config_text) - scp_cmd = ["scp", "-P", str(SSH_PORT)] - if SSH_KEY: - scp_cmd += ["-i", SSH_KEY] + scp_cmd = ["scp", "-P", str(SSH_PORT)] + _ssh_base_opts() scp_cmd += [str(tmp), f"{SSH_USER}@{ssh_host}:/tmp/{WG_IFACE}.conf"] subprocess.run(scp_cmd, check=True) @@ -66,17 +105,13 @@ sudo systemctl is-enabled wg-quick@{WG_IFACE} || true sudo systemctl is-active wg-quick@{WG_IFACE} || true sudo wg show {WG_IFACE} || true """ - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{ssh_host}", remote] subprocess.run(ssh_cmd, check=True) def ssh_capture(ssh_host: str, command: str) -> str: """Run a command on the remote host over SSH and capture stdout (stripped).""" - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{ssh_host}", command] out = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) if out.returncode != 0: @@ -143,9 +178,7 @@ def step_set_eth1_mac(iot_host: str): "EOF\n" ) - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) @@ -197,9 +230,7 @@ def step_set_hostname(iot_host: str, hostname: str | None): "hostname 2>/dev/null || true\n" ) - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) @@ -231,9 +262,16 @@ def step_git_pull(iot_host: str, branch: str = "main"): Executes git fetch, finds the latest tag, and checks it out in ~/bumble-auracast. """ + quoted_repo_url = shlex.quote(REPO_URL) remote = ( "set -e\n" - "cd ~/bumble-auracast\n" + "export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=accept-new'\n" + f"if [ ! -d {REPO_DIR}/.git ]; then\n" + f" echo 'Repository not found, cloning from {REPO_URL}...'\n" + f" git clone {quoted_repo_url} {REPO_DIR}\n" + "fi\n" + f"cd {REPO_DIR}\n" + f"git remote set-url origin {quoted_repo_url}\n" f"git fetch origin {shlex.quote(branch)} --tags\n" "LATEST_TAG=$(git tag --sort=-v:refname | head -n 1)\n" "if [ -z \"$LATEST_TAG\" ]; then\n" @@ -245,9 +283,7 @@ def step_git_pull(iot_host: str, branch: str = "main"): " git checkout \"$LATEST_TAG\"\n" "fi\n" ) - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) @@ -265,6 +301,49 @@ def step_git_pull(iot_host: str, branch: str = "main"): "err": stderr[-500:], } +def step_system_update(iot_host: str): + """Run system_update.sh on the device: poetry install, build/install sw_openocd, + flash firmware to both SWD radios, then restart services. + + First run takes 10-30 min (openocd build from source). Subsequent runs skip the + build if the openocd commit hasn't changed. The service restart at the end kills + the SSH session, so rc=255 is treated as success. + """ + script = f"{REPO_DIR}/src/auracast/server/system_update.sh" + remote = ( + "set -e\n" + "export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=accept-new'\n" + f"if [ ! -f {script} ]; then\n" + f" echo 'system_update.sh not in current tag, fetching from origin/main...'\n" + f" git -C {REPO_DIR} fetch origin main\n" + f" git -C {REPO_DIR} checkout origin/main -- src/auracast/server/system_update.sh\n" + "fi\n" + f"bash {script}\n" + ) + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() + ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] + + print(" ⚠️ system_update: first run may take 10-30 min (openocd build). Please wait...", flush=True) + proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) + stdout = (proc.stdout or "").strip() + stderr = (proc.stderr or "").strip() + + if proc.returncode not in (0, 255): + print(f"❌ system_update: failed rc={proc.returncode}: {stderr[-500:]}") + if stdout: + print(f"stdout: {stdout[-500:]}") + else: + print("✅ system_update: completed (radios flashed, services restarted)") + if stdout: + print(f"Output:\n{stdout[-1000:]}") + + return { + "rc": proc.returncode, + "out": stdout[-1000:], + "err": stderr[-500:], + } + + def step_update_app(iot_host: str): """Install dependencies using poetry for the checked-out code. @@ -272,13 +351,11 @@ def step_update_app(iot_host: str): """ remote = ( "set -e\n" - "cd ~/bumble-auracast\n" + f"cd {REPO_DIR}\n" "/home/caster/.local/bin/poetry config virtualenvs.in-project true\n" "/home/caster/.local/bin/poetry install\n" ) - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) @@ -329,9 +406,7 @@ def step_start_app(iot_host: str, app: str): "sudo systemctl is-active auracast-frontend.service || true\n" ) - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) @@ -367,16 +442,14 @@ def step_finish(iot_host: str): "set -e\n" "sudo reboot\n" ) - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) stdout = (proc.stdout or "").strip() stderr = (proc.stderr or "").strip() - if proc.returncode != 0: + if proc.returncode not in (0, 255): print(f"❌ finish: failed rc={proc.returncode}: {stderr}") else: print("✅ finish: reboot initiated") @@ -400,7 +473,7 @@ def main(): ap.add_argument( "--steps", nargs="+", - choices=["pull", "wg", "hostname", "mac", "update_app", "start_app", "finish", "all"], + choices=["pull", "wg", "hostname", "mac", "system_update", "update_app", "start_app", "finish", "all"], default=["all"], help="Which steps to run. Default: all", ) @@ -417,13 +490,12 @@ def main(): steps = args.steps if "all" in steps: steps = [ - "pull", - "hostname", - "mac", + "pull", + "hostname", + "mac", "wg", - "update_app", - "start_app", - "finish" + "system_update", + "finish", ] # Validate required args per step @@ -436,6 +508,11 @@ def main(): if name and re.fullmatch(r"\d+", name): name = f"summitwave-beacon{name}" + # Wait for SSH to be stably reachable before running any steps (handles first-boot reboots) + print(f"⏳ Waiting for SSH on {args.iot_host}...") + if not wait_for_ssh(args.iot_host): + raise SystemExit(f"❌ Could not reach {args.iot_host} via SSH. Aborting.") + # Gather device facts once (may change after hostname step, but we at least log the initial state) facts = get_device_facts(args.iot_host) @@ -473,6 +550,13 @@ def main(): **mac_info, }) + if "system_update" in steps: + su_info = step_system_update(args.iot_host) + write_provision_log({ + "action": "system_update", + **get_device_facts(args.iot_host), + **su_info, + }) if "update_app" in steps: upd_info = step_update_app(args.iot_host) write_provision_log({ From 57ba52818c7c418ce2df619e1c2fed19429442bb Mon Sep 17 00:00:00 2001 From: pober Date: Wed, 27 May 2026 09:52:50 +0200 Subject: [PATCH 2/2] Cleanup after merge; pauls ssh key now works for vpn.: --- src/provision.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/provision.py b/src/provision.py index 8aa8726..d5eaecf 100644 --- a/src/provision.py +++ b/src/provision.py @@ -442,14 +442,14 @@ def step_add_ssh_key(iot_host: str): "set -e\n" "mkdir -p ~/.ssh\n" "chmod 700 ~/.ssh\n" - "echo " + shlex.quote(ssh_key) + " >> ~/.ssh/authorized_keys\n" + "touch ~/.ssh/authorized_keys\n" "chmod 600 ~/.ssh/authorized_keys\n" + "grep -qF " + shlex.quote(ssh_key) + " ~/.ssh/authorized_keys " + "|| echo " + shlex.quote(ssh_key) + " >> ~/.ssh/authorized_keys\n" "echo 'SSH key for paul added successfully'\n" ) - - ssh_cmd = ["ssh", "-p", str(SSH_PORT)] - if SSH_KEY: - ssh_cmd += ["-i", SSH_KEY] + + ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts() ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) @@ -509,7 +509,7 @@ def main(): ap.add_argument( "--steps", nargs="+", - choices=["pull", "wg", "hostname", "mac", "add_ssh_key", "update_app", "start_app", "finish", "all"], + choices=["pull", "wg", "hostname", "mac", "add_ssh_key", "system_update", "update_app", "start_app", "finish", "all"], default=["all"], help="Which steps to run. Default: all", )