feat(ops): self-update script, systemd units, README install guide, recovery runbook

- deploy/ops-dashboard-updater/update.sh: git pull → docker build → force-recreate → smoke-test
- deploy/ops-dashboard-updater/install.sh: installs script + systemd units to host
- ops-dashboard-updater.service / .timer: oneshot + daily 03:00 scheduled trigger
- README.md: Installation and Configuration sections (env files, ops-agent, updater)
- docs/runbooks/recovery.md: agent-crash, DB corruption/restore, container failure, cert expiry

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scrum4Me Agent 2026-05-13 20:10:21 +02:00
parent 09050d5ce7
commit caeb5f3306
6 changed files with 361 additions and 0 deletions

View file

@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Install the ops-dashboard self-update script and systemd units.
# Run as root from within the repo.
set -euo pipefail
REPO_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
INSTALL_DIR=/opt/ops-dashboard-updater
SERVICE_DIR=/etc/systemd/system
echo "==> Installing update script to ${INSTALL_DIR}"
mkdir -p "${INSTALL_DIR}"
install -m 0750 -o root -g root \
"${REPO_DIR}/deploy/ops-dashboard-updater/update.sh" \
"${INSTALL_DIR}/update.sh"
echo "==> Installing systemd units"
install -m 0644 -o root -g root \
"${REPO_DIR}/deploy/ops-dashboard-updater/ops-dashboard-updater.service" \
"${SERVICE_DIR}/ops-dashboard-updater.service"
install -m 0644 -o root -g root \
"${REPO_DIR}/deploy/ops-dashboard-updater/ops-dashboard-updater.timer" \
"${SERVICE_DIR}/ops-dashboard-updater.timer"
systemctl daemon-reload
echo ""
echo "==> Done. To enable automatic scheduled updates:"
echo " systemctl enable --now ops-dashboard-updater.timer"
echo ""
echo " To run a manual update now:"
echo " systemctl start ops-dashboard-updater.service"
echo " # or directly:"
echo " /opt/ops-dashboard-updater/update.sh"

View file

@ -0,0 +1,14 @@
[Unit]
Description=Self-update ops-dashboard (oneshot, triggered by timer or SSH)
After=network.target docker.service
[Service]
Type=oneshot
User=root
ExecStart=/opt/ops-dashboard-updater/update.sh
StandardOutput=journal
StandardError=journal
SyslogIdentifier=ops-dashboard-update
[Install]
WantedBy=multi-user.target

View file

@ -0,0 +1,11 @@
[Unit]
Description=Scheduled self-update for ops-dashboard (optional)
[Timer]
# Check for updates every day at 03:00 local time.
# Disable this timer if you prefer manual-only updates via SSH.
OnCalendar=*-*-* 03:00:00
Persistent=true
[Install]
WantedBy=timers.target

View file

@ -0,0 +1,56 @@
#!/usr/bin/env bash
# Self-update script for ops-dashboard.
# Run as root via SSH or the systemd oneshot service below.
# Do NOT invoke this through the UI — it restarts the container serving the UI.
set -euo pipefail
REPO_DIR=/srv/ops/repos/ops-dashboard
COMPOSE_FILE=/srv/scrum4me/compose/docker-compose.yml
SERVICE=ops-dashboard
LOG_TAG=ops-dashboard-update
log() { echo "[$(date -u +%FT%TZ)] $*" | tee /dev/fd/1 | systemd-cat -t "$LOG_TAG" -p info 2>/dev/null || true; }
die() { echo "[$(date -u +%FT%TZ)] ERROR: $*" >&2; exit 1; }
# ── 1. Pull latest code ────────────────────────────────────────────────────────
log "Pulling latest code from origin..."
git -C "$REPO_DIR" fetch --prune origin
CURRENT=$(git -C "$REPO_DIR" rev-parse HEAD)
git -C "$REPO_DIR" reset --hard origin/main
NEW=$(git -C "$REPO_DIR" rev-parse HEAD)
if [[ "$CURRENT" == "$NEW" ]]; then
log "Already up-to-date at $NEW — nothing to rebuild."
exit 0
fi
log "Updated $CURRENT$NEW"
# ── 2. Build new image ─────────────────────────────────────────────────────────
log "Building Docker image..."
docker compose -f "$COMPOSE_FILE" build "$SERVICE"
# ── 3. Restart container ───────────────────────────────────────────────────────
log "Restarting $SERVICE with new image..."
docker compose -f "$COMPOSE_FILE" up -d --force-recreate "$SERVICE"
# ── 4. Smoke test ──────────────────────────────────────────────────────────────
log "Waiting for container to become healthy..."
for i in $(seq 1 12); do
STATUS=$(docker inspect --format='{{.State.Health.Status}}' "$SERVICE" 2>/dev/null || true)
if [[ "$STATUS" == "healthy" ]]; then
log "Container is healthy after ${i}×5 s."
break
fi
# Fallback: accept running if no HEALTHCHECK is defined
RUNNING=$(docker inspect --format='{{.State.Running}}' "$SERVICE" 2>/dev/null || echo false)
if [[ "$RUNNING" == "true" && -z "$STATUS" ]]; then
log "Container running (no HEALTHCHECK defined)."
break
fi
if [[ $i -eq 12 ]]; then
die "Container did not become healthy within 60 s. Check: docker logs $SERVICE"
fi
sleep 5
done
log "Update complete — $SERVICE is running commit $NEW."