Merge branch 'main' of ssh://ssh.pstruebi.xyz:222/pstruebi/castbox-provisioning

This commit is contained in:
2026-05-27 09:53:49 +02:00
+124 -41
View File
@@ -1,5 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import ipaddress, os, re, subprocess, tempfile, json, datetime, shlex import ipaddress, os, re, subprocess, tempfile, json, datetime, shlex, time
import secrets import secrets
from pathlib import Path from pathlib import Path
@@ -21,6 +21,47 @@ SSH_USER = os.getenv("IOT_SSH_USER", "caster")
SSH_PORT = int(os.getenv("IOT_SSH_PORT", "22")) SSH_PORT = int(os.getenv("IOT_SSH_PORT", "22"))
SSH_KEY = os.getenv("SSH_KEY") or None # path or None SSH_KEY = os.getenv("SSH_KEY") or None # path or None
PROVISION_LOG = os.getenv("PROVISION_LOG") or str((Path(__file__).resolve().parent / "provision.log")) PROVISION_LOG = os.getenv("PROVISION_LOG") or str((Path(__file__).resolve().parent / "provision.log"))
REPO_URL = os.getenv("REPO_URL", "ssh://git@gitea.summitwave.work:222/auracaster/bumble-auracast.git")
REPO_DIR = os.getenv("REPO_DIR", "~/bumble-auracast")
def _ssh_base_opts() -> list:
"""Common SSH options shared across all ssh/scp calls."""
opts = [
"-o", "StrictHostKeyChecking=accept-new",
"-o", "ConnectTimeout=10",
"-o", "ServerAliveInterval=30",
"-o", "ServerAliveCountMax=6",
]
if SSH_KEY:
opts += ["-i", SSH_KEY]
return opts
def wait_for_ssh(ssh_host: str, timeout: int = 180, interval: int = 5) -> bool:
"""Poll SSH until the device is stably reachable or timeout (seconds) is exceeded.
Returns True when a connection succeeds, False on timeout.
"""
deadline = time.monotonic() + timeout
attempt = 0
while time.monotonic() < deadline:
attempt += 1
cmd = ["ssh", "-p", str(SSH_PORT), "-o", "BatchMode=yes"] + _ssh_base_opts()
cmd += [f"{SSH_USER}@{ssh_host}", "true"]
proc = subprocess.run(cmd, check=False, capture_output=True, text=True)
if proc.returncode == 0:
if attempt > 1:
print(f"✅ SSH ready after {attempt} attempts")
return True
remaining = max(0, int(deadline - time.monotonic()))
print(f" ⏳ SSH not ready (attempt {attempt}, rc={proc.returncode}), retrying in {interval}s... ({remaining}s left)", flush=True)
for _ in range(interval):
time.sleep(1)
print(".", end="", flush=True)
print()
print(f"❌ SSH did not become ready within {timeout}s")
return False
def rewrite_allowed_ips(config_text: str, allowed_cidr: str = None) -> str: def rewrite_allowed_ips(config_text: str, allowed_cidr: str = None) -> str:
@@ -46,9 +87,7 @@ def scp_and_enable(ssh_host, config_text):
tmp = Path(tempfile.gettempdir()) / f"{WG_IFACE}.conf" tmp = Path(tempfile.gettempdir()) / f"{WG_IFACE}.conf"
tmp.write_text(config_text) tmp.write_text(config_text)
scp_cmd = ["scp", "-P", str(SSH_PORT)] scp_cmd = ["scp", "-P", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
scp_cmd += ["-i", SSH_KEY]
scp_cmd += [str(tmp), f"{SSH_USER}@{ssh_host}:/tmp/{WG_IFACE}.conf"] scp_cmd += [str(tmp), f"{SSH_USER}@{ssh_host}:/tmp/{WG_IFACE}.conf"]
subprocess.run(scp_cmd, check=True) subprocess.run(scp_cmd, check=True)
@@ -66,17 +105,13 @@ sudo systemctl is-enabled wg-quick@{WG_IFACE} || true
sudo systemctl is-active wg-quick@{WG_IFACE} || true sudo systemctl is-active wg-quick@{WG_IFACE} || true
sudo wg show {WG_IFACE} || true sudo wg show {WG_IFACE} || true
""" """
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{ssh_host}", remote] ssh_cmd += [f"{SSH_USER}@{ssh_host}", remote]
subprocess.run(ssh_cmd, check=True) subprocess.run(ssh_cmd, check=True)
def ssh_capture(ssh_host: str, command: str) -> str: def ssh_capture(ssh_host: str, command: str) -> str:
"""Run a command on the remote host over SSH and capture stdout (stripped).""" """Run a command on the remote host over SSH and capture stdout (stripped)."""
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{ssh_host}", command] ssh_cmd += [f"{SSH_USER}@{ssh_host}", command]
out = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) out = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
if out.returncode != 0: if out.returncode != 0:
@@ -143,9 +178,7 @@ def step_set_eth1_mac(iot_host: str):
"EOF\n" "EOF\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
@@ -197,9 +230,7 @@ def step_set_hostname(iot_host: str, hostname: str | None):
"hostname 2>/dev/null || true\n" "hostname 2>/dev/null || true\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
@@ -231,10 +262,16 @@ def step_git_pull(iot_host: str, branch: str = "main"):
Executes git fetch, finds the latest tag, and checks it out in ~/bumble-auracast. Executes git fetch, finds the latest tag, and checks it out in ~/bumble-auracast.
""" """
quoted_repo_url = shlex.quote(REPO_URL)
remote = ( remote = (
"set -e\n" "set -e\n"
"cd ~/bumble-auracast\n" "export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=accept-new'\n"
"git remote set-url origin https://gitea.summitwave.work/auracaster/bumble-auracast\n" f"if [ ! -d {REPO_DIR}/.git ]; then\n"
f" echo 'Repository not found, cloning from {REPO_URL}...'\n"
f" git clone {quoted_repo_url} {REPO_DIR}\n"
"fi\n"
f"cd {REPO_DIR}\n"
f"git remote set-url origin {quoted_repo_url}\n"
f"git fetch origin {shlex.quote(branch)} --tags\n" f"git fetch origin {shlex.quote(branch)} --tags\n"
"LATEST_TAG=$(git tag --sort=-v:refname | head -n 1)\n" "LATEST_TAG=$(git tag --sort=-v:refname | head -n 1)\n"
"if [ -z \"$LATEST_TAG\" ]; then\n" "if [ -z \"$LATEST_TAG\" ]; then\n"
@@ -246,9 +283,7 @@ def step_git_pull(iot_host: str, branch: str = "main"):
" git checkout \"$LATEST_TAG\"\n" " git checkout \"$LATEST_TAG\"\n"
"fi\n" "fi\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
@@ -266,6 +301,49 @@ def step_git_pull(iot_host: str, branch: str = "main"):
"err": stderr[-500:], "err": stderr[-500:],
} }
def step_system_update(iot_host: str):
"""Run system_update.sh on the device: poetry install, build/install sw_openocd,
flash firmware to both SWD radios, then restart services.
First run takes 10-30 min (openocd build from source). Subsequent runs skip the
build if the openocd commit hasn't changed. The service restart at the end kills
the SSH session, so rc=255 is treated as success.
"""
script = f"{REPO_DIR}/src/auracast/server/system_update.sh"
remote = (
"set -e\n"
"export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=accept-new'\n"
f"if [ ! -f {script} ]; then\n"
f" echo 'system_update.sh not in current tag, fetching from origin/main...'\n"
f" git -C {REPO_DIR} fetch origin main\n"
f" git -C {REPO_DIR} checkout origin/main -- src/auracast/server/system_update.sh\n"
"fi\n"
f"bash {script}\n"
)
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
print(" ⚠️ system_update: first run may take 10-30 min (openocd build). Please wait...", flush=True)
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
stdout = (proc.stdout or "").strip()
stderr = (proc.stderr or "").strip()
if proc.returncode not in (0, 255):
print(f"❌ system_update: failed rc={proc.returncode}: {stderr[-500:]}")
if stdout:
print(f"stdout: {stdout[-500:]}")
else:
print("✅ system_update: completed (radios flashed, services restarted)")
if stdout:
print(f"Output:\n{stdout[-1000:]}")
return {
"rc": proc.returncode,
"out": stdout[-1000:],
"err": stderr[-500:],
}
def step_update_app(iot_host: str): def step_update_app(iot_host: str):
"""Install dependencies using poetry for the checked-out code. """Install dependencies using poetry for the checked-out code.
@@ -273,13 +351,11 @@ def step_update_app(iot_host: str):
""" """
remote = ( remote = (
"set -e\n" "set -e\n"
"cd ~/bumble-auracast\n" f"cd {REPO_DIR}\n"
"/home/caster/.local/bin/poetry config virtualenvs.in-project true\n" "/home/caster/.local/bin/poetry config virtualenvs.in-project true\n"
"/home/caster/.local/bin/poetry install\n" "/home/caster/.local/bin/poetry install\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
@@ -330,9 +406,7 @@ def step_start_app(iot_host: str, app: str):
"sudo systemctl is-active auracast-frontend.service || true\n" "sudo systemctl is-active auracast-frontend.service || true\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
@@ -368,14 +442,14 @@ def step_add_ssh_key(iot_host: str):
"set -e\n" "set -e\n"
"mkdir -p ~/.ssh\n" "mkdir -p ~/.ssh\n"
"chmod 700 ~/.ssh\n" "chmod 700 ~/.ssh\n"
"echo " + shlex.quote(ssh_key) + " >> ~/.ssh/authorized_keys\n" "touch ~/.ssh/authorized_keys\n"
"chmod 600 ~/.ssh/authorized_keys\n" "chmod 600 ~/.ssh/authorized_keys\n"
"grep -qF " + shlex.quote(ssh_key) + " ~/.ssh/authorized_keys "
"|| echo " + shlex.quote(ssh_key) + " >> ~/.ssh/authorized_keys\n"
"echo 'SSH key for paul added successfully'\n" "echo 'SSH key for paul added successfully'\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
@@ -404,16 +478,14 @@ def step_finish(iot_host: str):
"set -e\n" "set -e\n"
"sudo reboot\n" "sudo reboot\n"
) )
ssh_cmd = ["ssh", "-p", str(SSH_PORT)] ssh_cmd = ["ssh", "-p", str(SSH_PORT)] + _ssh_base_opts()
if SSH_KEY:
ssh_cmd += ["-i", SSH_KEY]
ssh_cmd += [f"{SSH_USER}@{iot_host}", remote] ssh_cmd += [f"{SSH_USER}@{iot_host}", remote]
proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True) proc = subprocess.run(ssh_cmd, check=False, capture_output=True, text=True)
stdout = (proc.stdout or "").strip() stdout = (proc.stdout or "").strip()
stderr = (proc.stderr or "").strip() stderr = (proc.stderr or "").strip()
if proc.returncode != 0: if proc.returncode not in (0, 255):
print(f"❌ finish: failed rc={proc.returncode}: {stderr}") print(f"❌ finish: failed rc={proc.returncode}: {stderr}")
else: else:
print("✅ finish: reboot initiated") print("✅ finish: reboot initiated")
@@ -437,7 +509,7 @@ def main():
ap.add_argument( ap.add_argument(
"--steps", "--steps",
nargs="+", nargs="+",
choices=["pull", "wg", "hostname", "mac", "update_app", "start_app", "add_ssh_key", "finish", "all"], choices=["pull", "wg", "hostname", "mac", "add_ssh_key", "system_update", "update_app", "start_app", "finish", "all"],
default=["all"], default=["all"],
help="Which steps to run. Default: all", help="Which steps to run. Default: all",
) )
@@ -459,9 +531,8 @@ def main():
"hostname", "hostname",
"mac", "mac",
"wg", "wg",
"update_app", "system_update",
"start_app", "finish",
"finish"
] ]
# Validate required args per step # Validate required args per step
@@ -474,6 +545,11 @@ def main():
if name and re.fullmatch(r"\d+", name): if name and re.fullmatch(r"\d+", name):
name = f"summitwave-beacon{name}" name = f"summitwave-beacon{name}"
# Wait for SSH to be stably reachable before running any steps (handles first-boot reboots)
print(f"⏳ Waiting for SSH on {args.iot_host}...")
if not wait_for_ssh(args.iot_host):
raise SystemExit(f"❌ Could not reach {args.iot_host} via SSH. Aborting.")
# Gather device facts once (may change after hostname step, but we at least log the initial state) # Gather device facts once (may change after hostname step, but we at least log the initial state)
facts = get_device_facts(args.iot_host) facts = get_device_facts(args.iot_host)
@@ -518,6 +594,13 @@ def main():
**mac_info, **mac_info,
}) })
if "system_update" in steps:
su_info = step_system_update(args.iot_host)
write_provision_log({
"action": "system_update",
**get_device_facts(args.iot_host),
**su_info,
})
if "update_app" in steps: if "update_app" in steps:
upd_info = step_update_app(args.iot_host) upd_info = step_update_app(args.iot_host)
write_provision_log({ write_provision_log({