Ops-dashboard/ops-agent/commands.yml.example
Scrum4Me Agent 4dd0490afc feat(backup): add ops-db backup commands, flow, and systemd timer
Adds pg_dump_ops_db, list_ops_backups, and cleanup_ops_backups to the
agent command whitelist. Includes a backup_ops_db flow YAML (dump +
30-day retention), and a systemd service/timer for daily automated
backups at 02:00.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 20:07:14 +02:00

238 lines
8 KiB
Text

# Whitelist of allowed commands for ops-agent.
# Copy to /etc/ops-agent/commands.yml on the host.
# Restart ops-agent after changes.
#
# Schema per command:
# cmd: required — command + static args as array (no shell, no interpolation)
# cwd: optional — working directory for the subprocess
# cwd_pattern: optional — working directory as a glob/pattern (resolved at runtime)
# args:
# allowed: optional — whitelist of argument values accepted from the caller
# If absent or empty, the command takes no extra arguments.
# description: optional — human-readable description
commands:
docker_ps:
cmd: ["docker", "ps", "--format", "table"]
description: "List running Docker containers"
git_status:
cmd: ["git", "status", "--short", "--branch"]
cwd_pattern: "/srv/"
description: "Git status with branch info (first arg = repo path, must start with /srv/)"
git_log_ahead:
cmd: ["git", "log", "@{upstream}..HEAD", "--oneline"]
cwd_pattern: "/srv/"
description: "Local commits not yet pushed (first arg = repo path)"
git_diff:
cmd: ["git", "diff", "HEAD"]
cwd_pattern: "/srv/"
description: "Uncommitted diff against HEAD (first arg = repo path)"
git_fetch:
cmd: ["git", "fetch", "--quiet"]
cwd_pattern: "/srv/"
description: "Fetch all remotes silently (first arg = repo path)"
systemctl_status:
cmd: ["systemctl", "status", "--no-pager", "-l"]
args:
allowed:
- scrum4me-web
- ops-agent
- caddy
- docker
- nginx
- postgresql
description: "Show systemctl status for an allowed service"
journalctl_recent:
cmd: ["journalctl", "--since", "1 hour ago", "-n", "100", "--no-pager", "-u"]
args:
allowed:
- scrum4me-web
- ops-agent
- caddy
- docker
- nginx
- postgresql
description: "Last 100 journal lines from the past hour for an allowed service"
caddy_show_config:
cmd: ["caddy", "fmt", "/etc/caddy/Caddyfile"]
description: "Print the formatted Caddy config"
caddy_list_certs:
cmd:
- sh
- -c
- "for f in /data/caddy/certificates/*/*.crt; do [ -f \"$f\" ] || continue; echo \"CERTFILE:$f\"; openssl x509 -noout -subject -issuer -dates -in \"$f\" 2>&1; echo \"CERTEND\"; done"
description: "List TLS cert info (subject, issuer, validity dates) from Caddy certificate store"
# ── Destructive / write commands ──────────────────────────────────────────
docker_compose_restart:
cmd: ["docker", "compose", "restart"]
cwd: "/srv/scrum4me/compose"
args:
allowed:
- scrum4me-web
- worker-idea
- ops-dashboard
- caddy
- postgres
description: "Restart a docker compose service (ops-agent user must be in the docker group)"
docker_compose_stop:
cmd: ["docker", "compose", "stop"]
cwd: "/srv/scrum4me/compose"
args:
allowed:
- scrum4me-web
- worker-idea
- ops-dashboard
- caddy
- postgres
description: "Stop a docker compose service"
docker_compose_build:
cmd: ["docker", "compose", "build"]
cwd: "/srv/scrum4me/compose"
args:
allowed:
- scrum4me-web
- worker-idea
- ops-dashboard
description: "Build a docker compose service image"
docker_compose_up:
cmd: ["docker", "compose", "up", "-d"]
cwd: "/srv/scrum4me/compose"
args:
allowed:
- scrum4me-web
- worker-idea
- ops-dashboard
description: "Start or recreate a docker compose service in detached mode"
docker_compose_up_recreate:
cmd: ["docker", "compose", "up", "-d", "--force-recreate"]
cwd: "/srv/scrum4me/compose"
args:
allowed:
- scrum4me-web
- worker-idea
- ops-dashboard
description: "Force-recreate a docker compose service (picks up a rebuilt image)"
git_pull:
cmd: ["git", "pull", "--ff-only"]
cwd_pattern: "/srv/"
preconditions:
- git_status_clean
description: "Fast-forward pull — refused when working tree is dirty"
systemctl_restart:
# Requires /etc/sudoers.d/ops-agent (see deploy/ops-agent/sudoers).
cmd: ["sudo", "/usr/bin/systemctl", "restart"]
args:
allowed:
- scrum4me-web
- ops-agent
- caddy
description: "Restart an allowed systemd service via sudo"
caddy_validate:
cmd: ["caddy", "validate", "--config", "/srv/scrum4me/caddy/Caddyfile"]
description: "Validate /srv/scrum4me/caddy/Caddyfile without reloading"
caddy_reload:
cmd: ["caddy", "reload", "--config", "/srv/scrum4me/caddy/Caddyfile"]
description: "Reload Caddy with /srv/scrum4me/caddy/Caddyfile"
caddy_write_config:
# Writes stdin to Caddyfile.new first; mv is atomic on the same filesystem.
# ops-agent user must own /srv/scrum4me/caddy/.
cmd:
- sh
- -c
- "cat > /srv/scrum4me/caddy/Caddyfile.new && mv /srv/scrum4me/caddy/Caddyfile.new /srv/scrum4me/caddy/Caddyfile"
stdin_from_body: true
description: "Atomically replace /srv/scrum4me/caddy/Caddyfile (write stdin to .new, then mv)"
# ── Smoke tests / health checks ───────────────────────────────────────────
curl_smoke_scrum4me_web:
cmd: ["curl", "-sf", "--max-time", "10", "https://scrum4me.com"]
description: "HTTP smoke test — fails (non-zero) if the site is unreachable or returns a non-2xx status"
docker_compose_ps_worker:
cmd: ["docker", "compose", "ps", "--filter", "status=running", "worker-idea"]
cwd: "/srv/scrum4me/compose"
description: "Verify worker-idea container is in the running state"
wait_for_health_worker:
cmd:
- sh
- -c
- "timeout 60 sh -c 'until grep -q \"pre-flight passed\" /var/log/agent/current 2>/dev/null; do sleep 3; done && echo \"pre-flight passed\"'"
description: "Wait up to 60s for MCP worker pre-flight check (/var/log/agent/current)"
# ── Scrum4Me web deployment steps ────────────────────────────────────────
npm_ci:
cmd: ["npm", "ci"]
cwd: "/srv/scrum4me/repos/Scrum4Me"
description: "Install production dependencies for Scrum4Me web (npm ci)"
prisma_migrate_deploy:
cmd: ["npx", "prisma", "migrate", "deploy"]
cwd: "/srv/scrum4me/repos/Scrum4Me"
description: "Apply pending Prisma migrations for Scrum4Me web"
npm_run_build:
cmd: ["npm", "run", "build"]
cwd: "/srv/scrum4me/repos/Scrum4Me"
description: "Build the Scrum4Me web application (next build)"
curl_smoke_scrum4me_thuis:
cmd:
- sh
- -c
- "code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 15 https://thuis.jp-visser.nl/api/products); echo \"HTTP $code\"; [ \"$code\" = \"200\" ] || [ \"$code\" = \"401\" ]"
description: "Smoke test: /api/products must return 200 or 401"
# ── Ops-dashboard database backup ────────────────────────────────────────
pg_dump_ops_db:
cmd:
- sh
- -c
- |
mkdir -p /srv/ops/backups
FNAME="/srv/ops/backups/ops_db_$(date +%Y%m%d_%H%M).dump"
docker exec postgres pg_dump -Fc ops_dashboard > "$FNAME"
echo "Backup written: $FNAME"
ls -lh "$FNAME"
description: "Dump ops_dashboard DB via docker exec postgres to /srv/ops/backups/"
list_ops_backups:
cmd:
- sh
- -c
- "find /srv/ops/backups -maxdepth 1 -name '*.dump' -printf '%f\\t%s\\n' 2>/dev/null | sort -r || true"
description: "List ops_dashboard backup files (filename TAB size_bytes, newest-first)"
cleanup_ops_backups:
cmd:
- find
- /srv/ops/backups
- -name
- "*.dump"
- -mtime
- "+30"
- -delete
- -print
description: "Delete ops_dashboard backup files older than 30 days"