feat(ops): self-update script, systemd units, README install guide, recovery runbook
- deploy/ops-dashboard-updater/update.sh: git pull → docker build → force-recreate → smoke-test - deploy/ops-dashboard-updater/install.sh: installs script + systemd units to host - ops-dashboard-updater.service / .timer: oneshot + daily 03:00 scheduled trigger - README.md: Installation and Configuration sections (env files, ops-agent, updater) - docs/runbooks/recovery.md: agent-crash, DB corruption/restore, container failure, cert expiry Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
09050d5ce7
commit
caeb5f3306
6 changed files with 361 additions and 0 deletions
33
deploy/ops-dashboard-updater/install.sh
Normal file
33
deploy/ops-dashboard-updater/install.sh
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env bash
|
||||
# Install the ops-dashboard self-update script and systemd units.
|
||||
# Run as root from within the repo.
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
INSTALL_DIR=/opt/ops-dashboard-updater
|
||||
SERVICE_DIR=/etc/systemd/system
|
||||
|
||||
echo "==> Installing update script to ${INSTALL_DIR}"
|
||||
mkdir -p "${INSTALL_DIR}"
|
||||
install -m 0750 -o root -g root \
|
||||
"${REPO_DIR}/deploy/ops-dashboard-updater/update.sh" \
|
||||
"${INSTALL_DIR}/update.sh"
|
||||
|
||||
echo "==> Installing systemd units"
|
||||
install -m 0644 -o root -g root \
|
||||
"${REPO_DIR}/deploy/ops-dashboard-updater/ops-dashboard-updater.service" \
|
||||
"${SERVICE_DIR}/ops-dashboard-updater.service"
|
||||
install -m 0644 -o root -g root \
|
||||
"${REPO_DIR}/deploy/ops-dashboard-updater/ops-dashboard-updater.timer" \
|
||||
"${SERVICE_DIR}/ops-dashboard-updater.timer"
|
||||
|
||||
systemctl daemon-reload
|
||||
|
||||
echo ""
|
||||
echo "==> Done. To enable automatic scheduled updates:"
|
||||
echo " systemctl enable --now ops-dashboard-updater.timer"
|
||||
echo ""
|
||||
echo " To run a manual update now:"
|
||||
echo " systemctl start ops-dashboard-updater.service"
|
||||
echo " # or directly:"
|
||||
echo " /opt/ops-dashboard-updater/update.sh"
|
||||
14
deploy/ops-dashboard-updater/ops-dashboard-updater.service
Normal file
14
deploy/ops-dashboard-updater/ops-dashboard-updater.service
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
[Unit]
|
||||
Description=Self-update ops-dashboard (oneshot, triggered by timer or SSH)
|
||||
After=network.target docker.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=root
|
||||
ExecStart=/opt/ops-dashboard-updater/update.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=ops-dashboard-update
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
11
deploy/ops-dashboard-updater/ops-dashboard-updater.timer
Normal file
11
deploy/ops-dashboard-updater/ops-dashboard-updater.timer
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
[Unit]
|
||||
Description=Scheduled self-update for ops-dashboard (optional)
|
||||
|
||||
[Timer]
|
||||
# Check for updates every day at 03:00 local time.
|
||||
# Disable this timer if you prefer manual-only updates via SSH.
|
||||
OnCalendar=*-*-* 03:00:00
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
56
deploy/ops-dashboard-updater/update.sh
Normal file
56
deploy/ops-dashboard-updater/update.sh
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env bash
|
||||
# Self-update script for ops-dashboard.
|
||||
# Run as root via SSH or the systemd oneshot service below.
|
||||
# Do NOT invoke this through the UI — it restarts the container serving the UI.
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR=/srv/ops/repos/ops-dashboard
|
||||
COMPOSE_FILE=/srv/scrum4me/compose/docker-compose.yml
|
||||
SERVICE=ops-dashboard
|
||||
LOG_TAG=ops-dashboard-update
|
||||
|
||||
log() { echo "[$(date -u +%FT%TZ)] $*" | tee /dev/fd/1 | systemd-cat -t "$LOG_TAG" -p info 2>/dev/null || true; }
|
||||
die() { echo "[$(date -u +%FT%TZ)] ERROR: $*" >&2; exit 1; }
|
||||
|
||||
# ── 1. Pull latest code ────────────────────────────────────────────────────────
|
||||
log "Pulling latest code from origin..."
|
||||
git -C "$REPO_DIR" fetch --prune origin
|
||||
CURRENT=$(git -C "$REPO_DIR" rev-parse HEAD)
|
||||
git -C "$REPO_DIR" reset --hard origin/main
|
||||
NEW=$(git -C "$REPO_DIR" rev-parse HEAD)
|
||||
|
||||
if [[ "$CURRENT" == "$NEW" ]]; then
|
||||
log "Already up-to-date at $NEW — nothing to rebuild."
|
||||
exit 0
|
||||
fi
|
||||
log "Updated $CURRENT → $NEW"
|
||||
|
||||
# ── 2. Build new image ─────────────────────────────────────────────────────────
|
||||
log "Building Docker image..."
|
||||
docker compose -f "$COMPOSE_FILE" build "$SERVICE"
|
||||
|
||||
# ── 3. Restart container ───────────────────────────────────────────────────────
|
||||
log "Restarting $SERVICE with new image..."
|
||||
docker compose -f "$COMPOSE_FILE" up -d --force-recreate "$SERVICE"
|
||||
|
||||
# ── 4. Smoke test ──────────────────────────────────────────────────────────────
|
||||
log "Waiting for container to become healthy..."
|
||||
for i in $(seq 1 12); do
|
||||
STATUS=$(docker inspect --format='{{.State.Health.Status}}' "$SERVICE" 2>/dev/null || true)
|
||||
if [[ "$STATUS" == "healthy" ]]; then
|
||||
log "Container is healthy after ${i}×5 s."
|
||||
break
|
||||
fi
|
||||
# Fallback: accept running if no HEALTHCHECK is defined
|
||||
RUNNING=$(docker inspect --format='{{.State.Running}}' "$SERVICE" 2>/dev/null || echo false)
|
||||
if [[ "$RUNNING" == "true" && -z "$STATUS" ]]; then
|
||||
log "Container running (no HEALTHCHECK defined)."
|
||||
break
|
||||
fi
|
||||
if [[ $i -eq 12 ]]; then
|
||||
die "Container did not become healthy within 60 s. Check: docker logs $SERVICE"
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
log "Update complete — $SERVICE is running commit $NEW."
|
||||
Loading…
Add table
Add a link
Reference in a new issue