- entrypoint.sh: chown → chmod a+rwX → fail-fast met diagnostiek voor AGENT_STATE_DIR en AGENT_LOG_DIR. Lost stille state.json permission denied op QNAP-share op (NAS-ACL blokkeert chown vanuit container). - bin/log-cleanup.sh: nieuwe hard-delete >2d (env-tunable) naast de conservatievere rotate-logs.sh (gzip 24u, delete 30d). - run-agent.sh: roept log-cleanup.sh aan bij startup en elke iteratie. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
123 lines
4.4 KiB
Bash
123 lines
4.4 KiB
Bash
#!/usr/bin/env bash
|
|
# entrypoint.sh — container-startup
|
|
#
|
|
# Verantwoordelijkheden:
|
|
# 1. Schrijfbare dirs op de bind-mounts garanderen (UID/GID matching)
|
|
# 2. Health-server starten als achtergrondproces
|
|
# 3. gosu naar de agent-user en daemon-loop starten
|
|
#
|
|
# Loopt als root tot stap 3 — daarvoor hebben we root nodig om
|
|
# bind-mounts goed te zetten als de share met andere ownership is
|
|
# aangemaakt.
|
|
|
|
set -euo pipefail
|
|
|
|
log() { printf '[entrypoint] %s\n' "$*" >&2; }
|
|
|
|
: "${AGENT_UID:=1000}"
|
|
: "${AGENT_GID:=1000}"
|
|
: "${AGENT_STATE_DIR:=/var/run/agent}"
|
|
: "${AGENT_LOG_DIR:=/var/log/agent}"
|
|
: "${AGENT_REPO_CACHE:=/var/cache/repos}"
|
|
: "${AGENT_HEALTH_PORT:=8080}"
|
|
|
|
# ----- 0. preflight: /var/cache mount-type + writable --------------------
|
|
_cache_fs=$(stat -f -c %T /var/cache 2>/dev/null \
|
|
|| stat -f /var/cache 2>/dev/null | awk '/Type:/{print $NF}')
|
|
if [ "$_cache_fs" = "tmpfs" ]; then
|
|
log "FATAL: /var/cache is tmpfs (likely missing bind-mount). Fix docker-compose.yml en doe \`compose up -d --force-recreate\`."
|
|
exit 1
|
|
fi
|
|
if ! touch /var/cache/.write-test 2>/dev/null; then
|
|
log "FATAL: /var/cache niet writable als user $(id -u)."
|
|
exit 1
|
|
fi
|
|
rm -f /var/cache/.write-test
|
|
log "/var/cache OK (fs=${_cache_fs})"
|
|
# Lighter warning-only check voor log/state mounts
|
|
_logdir_fs=$(stat -f -c %T /var/log/agent 2>/dev/null || echo unknown)
|
|
if [ "$_logdir_fs" = "tmpfs" ]; then
|
|
log "WARN: /var/log/agent is tmpfs — overleeft geen container-herstart."
|
|
fi
|
|
_statedir_fs=$(stat -f -c %T /var/run/agent 2>/dev/null || echo unknown)
|
|
if [ "$_statedir_fs" = "tmpfs" ]; then
|
|
log "WARN: /var/run/agent is tmpfs — overleeft geen container-herstart."
|
|
fi
|
|
|
|
# ----- 1. dirs op bind-mounts -------------------------------------------
|
|
log "ensuring directories on bind-mounts"
|
|
|
|
# Helper: garandeer dat $1 schrijfbaar is voor de agent-user.
|
|
# Escalatie: chown → chmod a+rwX → fail-fast met diagnostiek.
|
|
# Nodig omdat bind-mounts vanaf de NAS-share de Dockerfile-chown
|
|
# overschrijven en QNAP-ACLs een tweede chown vaak blokkeren.
|
|
ensure_writable() {
|
|
local dir="$1"
|
|
mkdir -p "$dir"
|
|
chown "${AGENT_UID}:${AGENT_GID}" "$dir" 2>/dev/null || true
|
|
if gosu agent test -w "$dir"; then
|
|
log "$dir OK (writable als UID=${AGENT_UID})"
|
|
return 0
|
|
fi
|
|
chmod a+rwX "$dir" 2>/dev/null || true
|
|
if gosu agent test -w "$dir"; then
|
|
log "WARN: $dir was niet writable — chmod a+rwX toegepast"
|
|
return 0
|
|
fi
|
|
log "FATAL: $dir niet writable als UID=${AGENT_UID}"
|
|
log " huidige stat: $(stat -c '%U:%G %a' "$dir" 2>/dev/null || echo '<onbekend>')"
|
|
log " fix op de NAS-host:"
|
|
log " chown -R ${AGENT_UID}:${AGENT_GID} <host-pad voor $dir>"
|
|
log " of: chmod -R 0775 <host-pad voor $dir>"
|
|
log " (zie docker-compose.yml volumes-mapping voor het host-pad)"
|
|
exit 1
|
|
}
|
|
|
|
ensure_writable "${AGENT_STATE_DIR}"
|
|
ensure_writable "${AGENT_LOG_DIR}"
|
|
|
|
# Sub-dirs en cache-paden — niet kritiek genoeg voor fail-fast; chown
|
|
# best-effort. /var/cache writability is al boven gecheckt.
|
|
mkdir -p \
|
|
"${AGENT_LOG_DIR}/runs" \
|
|
"${AGENT_LOG_DIR}/jobs" \
|
|
"${AGENT_REPO_CACHE}" \
|
|
/var/cache/npm \
|
|
/var/cache/pnpm
|
|
chown "${AGENT_UID}:${AGENT_GID}" \
|
|
"${AGENT_LOG_DIR}/runs" \
|
|
"${AGENT_LOG_DIR}/jobs" \
|
|
"${AGENT_REPO_CACHE}" \
|
|
/var/cache/npm \
|
|
/var/cache/pnpm 2>/dev/null || true
|
|
|
|
# ----- 2. health-server in de achtergrond -------------------------------
|
|
log "starting health-server on :${AGENT_HEALTH_PORT}"
|
|
gosu agent node /opt/agent/bin/health-server.js \
|
|
> "${AGENT_LOG_DIR}/health-server.log" 2>&1 &
|
|
HEALTH_PID=$!
|
|
log "health-server pid=${HEALTH_PID}"
|
|
|
|
# Initial state: starting
|
|
gosu agent /bin/bash -c 'cat > "${AGENT_STATE_DIR}/state.json"' <<EOF
|
|
{
|
|
"status": "starting",
|
|
"startedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
|
"lastBatchAt": null,
|
|
"lastBatchExit": null,
|
|
"consecutiveFailures": 0
|
|
}
|
|
EOF
|
|
|
|
# ----- 3. drop privileges, bootstrap repos, start daemon-loop -----------
|
|
# repo-bootstrap.sh runs as agent (NOT root) so that the cloned repos
|
|
# are owned by the agent user and live under ~agent/Projects/<name> —
|
|
# that is exactly where scrum4me-mcp's resolveRepoRoot looks via its
|
|
# convention fallback.
|
|
log "dropping to agent user and bootstrapping repos"
|
|
gosu agent /opt/agent/bin/repo-bootstrap.sh \
|
|
>> "${AGENT_LOG_DIR}/repo-bootstrap.log" 2>&1 \
|
|
|| log "WARN: repo-bootstrap returned non-zero (continuing)"
|
|
|
|
log "starting run-agent.sh"
|
|
exec gosu agent /opt/agent/bin/run-agent.sh
|