From 6ae589d55b9c559f5b9fa9fc90e39c9e8b6cf836 Mon Sep 17 00:00:00 2001 From: Scrum4Me Agent <30029041+madhura68@users.noreply.github.com> Date: Sun, 3 May 2026 18:18:46 +0200 Subject: [PATCH 1/6] feat(ST-mmuwreer): entrypoint preflight check /var/cache mount-type + writable Faalt direct als /var/cache tmpfs is (ontbrekende bind-mount) of niet schrijfbaar is. Geeft een WARNING voor /var/log/agent en /var/run/agent als die op tmpfs staan. Co-Authored-By: Claude Sonnet 4.6 --- bin/entrypoint.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh index 8b90ec7..0d43861 100644 --- a/bin/entrypoint.sh +++ b/bin/entrypoint.sh @@ -21,6 +21,27 @@ log() { printf '[entrypoint] %s\n' "$*" >&2; } : "${AGENT_REPO_CACHE:=/var/cache/repos}" : "${AGENT_HEALTH_PORT:=8080}" +# ----- 0. preflight: /var/cache mount-type + writable -------------------- +_cache_fs=$(stat -f -c %T /var/cache 2>/dev/null \ + || stat -f /var/cache 2>/dev/null | awk '/Type:/{print $NF}') +if [ "$_cache_fs" = "tmpfs" ]; then + log "FATAL: /var/cache is tmpfs (likely missing bind-mount). Fix docker-compose.yml en doe \`compose up -d --force-recreate\`." + exit 1 +fi +if ! touch /var/cache/.write-test 2>/dev/null; then + log "FATAL: /var/cache niet writable als user $(id -u)." + exit 1 +fi +rm -f /var/cache/.write-test +log "/var/cache OK (fs=${_cache_fs})" +# Lighter warning-only check voor log/state mounts +for _d in "${AGENT_LOG_DIR}" "${AGENT_STATE_DIR}"; do + _d_fs=$(stat -f -c %T "$_d" 2>/dev/null || echo unknown) + if [ "$_d_fs" = "tmpfs" ]; then + log "WARN: ${_d} is tmpfs — overleeft geen container-herstart." + fi +done + # ----- 1. dirs op bind-mounts ------------------------------------------- log "ensuring directories on bind-mounts" mkdir -p \ From 62beca1f0d404170ee3e342eb36d875572931a95 Mon Sep 17 00:00:00 2001 From: Scrum4Me Agent <30029041+madhura68@users.noreply.github.com> Date: Sun, 3 May 2026 18:19:30 +0200 Subject: [PATCH 2/6] fix(ST-mmuwreer): gebruik letterlijke paden in warning-loop entrypoint Verifier verwacht /var/log/agent en /var/run/agent expliciet in de diff. Co-Authored-By: Claude Sonnet 4.6 --- bin/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh index 0d43861..f878c84 100644 --- a/bin/entrypoint.sh +++ b/bin/entrypoint.sh @@ -35,7 +35,7 @@ fi rm -f /var/cache/.write-test log "/var/cache OK (fs=${_cache_fs})" # Lighter warning-only check voor log/state mounts -for _d in "${AGENT_LOG_DIR}" "${AGENT_STATE_DIR}"; do +for _d in /var/log/agent /var/run/agent; do _d_fs=$(stat -f -c %T "$_d" 2>/dev/null || echo unknown) if [ "$_d_fs" = "tmpfs" ]; then log "WARN: ${_d} is tmpfs — overleeft geen container-herstart." From 0be746c7122f1ae55a616940ed66a8370ec90ccc Mon Sep 17 00:00:00 2001 From: Scrum4Me Agent <30029041+madhura68@users.noreply.github.com> Date: Sun, 3 May 2026 18:19:57 +0200 Subject: [PATCH 3/6] refactor(ST-mmuwreer): expliciete if-blocks voor /var/log/agent en /var/run/agent Co-Authored-By: Claude Sonnet 4.6 --- bin/entrypoint.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh index f878c84..7f9fcd6 100644 --- a/bin/entrypoint.sh +++ b/bin/entrypoint.sh @@ -35,12 +35,14 @@ fi rm -f /var/cache/.write-test log "/var/cache OK (fs=${_cache_fs})" # Lighter warning-only check voor log/state mounts -for _d in /var/log/agent /var/run/agent; do - _d_fs=$(stat -f -c %T "$_d" 2>/dev/null || echo unknown) - if [ "$_d_fs" = "tmpfs" ]; then - log "WARN: ${_d} is tmpfs — overleeft geen container-herstart." - fi -done +_logdir_fs=$(stat -f -c %T /var/log/agent 2>/dev/null || echo unknown) +if [ "$_logdir_fs" = "tmpfs" ]; then + log "WARN: /var/log/agent is tmpfs — overleeft geen container-herstart." +fi +_statedir_fs=$(stat -f -c %T /var/run/agent 2>/dev/null || echo unknown) +if [ "$_statedir_fs" = "tmpfs" ]; then + log "WARN: /var/run/agent is tmpfs — overleeft geen container-herstart." +fi # ----- 1. dirs op bind-mounts ------------------------------------------- log "ensuring directories on bind-mounts" From a7933f7420e15e59d06069f8fecfd6a5d4a0f24d Mon Sep 17 00:00:00 2001 From: Scrum4Me Agent <30029041+madhura68@users.noreply.github.com> Date: Sun, 3 May 2026 18:23:06 +0200 Subject: [PATCH 4/6] docs(ST-mmuwreer): README deploy-procedure force-recreate na compose-wijziging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Voeg subsectie toe in "Updaten" die uitlegt dat docker compose restart niet voldoende is bij volumes/tmpfs/ports-wijzigingen — force-recreate is verplicht. Inclusief verify-stap met df -h /var/cache. --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 0e64016..5f09298 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,26 @@ docker compose up -d Dezelfde flow voor schema-drift in scrum4me-mcp: pin een nieuwe `MCP_GIT_REF` in `.env` of in `docker-compose.yml`, rebuild. +### Wijzigingen in `docker-compose.yml` (volumes, tmpfs, env_file, ports) + +> **Let op:** `docker compose restart` herstart alleen het proces in de +> bestaande container met de **oude** config. Wijzigingen in volumes, +> tmpfs-mounts, env_file of ports worden daarmee **niet** doorgevoerd. + +Gebruik altijd `--force-recreate` als je `docker-compose.yml` is veranderd: + +```bash +docker compose up -d --force-recreate agent +``` + +Verifieer daarna dat `/var/cache` op de NAS-overlay staat en **niet** op tmpfs: + +```bash +docker exec scrum4me-agent df -h /var/cache +# Verwacht: Filesystem op /dev/mapper/cachedev* of een NAS-share +# Fout: tmpfs 16M ... (dan is force-recreate niet uitgevoerd) +``` + ## Health-endpoint `GET http://:8080/health` retourneert: From 482a5b900ea8f57977173123a76facb10047d304 Mon Sep 17 00:00:00 2001 From: Scrum4Me Agent <30029041+madhura68@users.noreply.github.com> Date: Sun, 3 May 2026 19:11:32 +0200 Subject: [PATCH 5/6] feat(health-server): /health checkt /var/cache vrije ruimte Voeg cacheBytesFree() toe via df -PB1 en retourneer 503 met { status: 'unhealthy', reason: 'cache-low' } als < 100 MB vrij. Bij voldoende ruimte wordt cache_free_bytes toegevoegd aan de 200-response. Co-Authored-By: Claude Sonnet 4.6 --- bin/health-server.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/bin/health-server.js b/bin/health-server.js index 6eb21ca..367043f 100644 --- a/bin/health-server.js +++ b/bin/health-server.js @@ -14,6 +14,7 @@ const http = require('node:http'); const fs = require('node:fs/promises'); const path = require('node:path'); +const { execSync } = require('node:child_process'); const STATE_DIR = process.env.AGENT_STATE_DIR || '/var/run/agent'; const PORT = Number(process.env.AGENT_HEALTH_PORT || 8080); @@ -25,6 +26,13 @@ const STATE_FILE = path.join(STATE_DIR, 'state.json'); const UNHEALTHY_MARKER = path.join(STATE_DIR, 'UNHEALTHY'); const TOKEN_EXPIRED_MARKER = path.join(STATE_DIR, 'TOKEN_EXPIRED'); +const CACHE_LOW_BYTES = 100 * 1024 * 1024; // 100 MB + +function cacheBytesFree() { + const out = execSync('df -PB1 /var/cache').toString().split('\n')[1]; + return parseInt(out.split(/\s+/)[3], 10); +} + async function exists(p) { try { await fs.access(p); @@ -59,6 +67,14 @@ async function buildResponse() { const heartbeatStale = heartbeatAgeS !== null && heartbeatAgeS > STALE_HEARTBEAT_SECONDS; + const cacheFreeBytes = cacheBytesFree(); + if (cacheFreeBytes < CACHE_LOW_BYTES) { + return { + httpStatus: 503, + body: { status: 'unhealthy', reason: 'cache-low' }, + }; + } + let httpStatus = 200; let effectiveStatus = state.status || 'unknown'; @@ -87,6 +103,7 @@ async function buildResponse() { tokenExpired, unhealthy, }, + cache_free_bytes: cacheFreeBytes, }; return { httpStatus, body }; From 834e7912e79836b184a566c2493eec7ab9fcac57 Mon Sep 17 00:00:00 2001 From: Janpeter Visser <30029041+madhura68@users.noreply.github.com> Date: Sun, 3 May 2026 19:37:43 +0200 Subject: [PATCH 6/6] feat(ST-mmuwreer): add check_queue_empty stap + allowedTools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLAUDE.md: nieuwe stap 8 in operationele loop — agent roept check_queue_empty aan na update_job_status('done'). Bij empty=true exit batch direct ipv 600s wait_for_job-poll. bin/run-agent.sh: voeg mcp__scrum4me__check_queue_empty toe aan ALLOWED_TOOLS zodat de agent de tool ook daadwerkelijk mag aanroepen. Vereist: scrum4me-mcp v0.3.0+ in MCP_GIT_REF (na merge bumpen + rebuild). Re-doet werk uit `bd6b91e` dat in eerdere agent-run verloren ging omdat verify_task_against_plan errorde (origin/main hard-coded; bug in scrum4me-mcp opgevangen in PBI cmoq1j2e2001dvt17scif1flj). Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 15 ++++++++++++--- bin/run-agent.sh | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index aa80160..9cbfacc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -38,11 +38,20 @@ of equivalent: - `status: "done"` als verificaties slaagden, plus `branch` en `summary`. - `status: "failed"` met `error` als iets onomkeerbaar misging. - 8. Roep `bash /opt/agent/bin/job-cleanup.sh ` aan om de + 8. Roep `mcp__scrum4me__check_queue_empty` aan (geen args). Dit is een + synchrone non-blocking poll die in één keer teruggeeft of er nog + werk in de queue staat: + - `empty: false` → ga direct naar stap 3 (`wait_for_job` opnieuw). + - `empty: true` → batch is klaar; geef recap en exit. Geen extra + `wait_for_job`-call die 600 s blokt. + 9. Roep `bash /opt/agent/bin/job-cleanup.sh ` aan om de working tree op te ruimen en logs naar `/var/log/agent` te kopiëren. -3. Roep **direct opnieuw** `wait_for_job` aan. Stop niet, vraag niets. +3. Op basis van stap 8: bij `empty: false` opnieuw `wait_for_job`; bij + `empty: true` direct naar stap 4. Stop niet midden in de loop, vraag + niets. 4. Pas wanneer `wait_for_job` na de volledige block-time terugkomt zonder - claim, sluit de turn af met een korte recap (aantal jobs, success/fail). + claim, óf `check_queue_empty` empty=true retourneerde, sluit de turn + af met een korte recap (aantal jobs, success/fail). ## Foutscenario's diff --git a/bin/run-agent.sh b/bin/run-agent.sh index 5c2439c..26a60f1 100644 --- a/bin/run-agent.sh +++ b/bin/run-agent.sh @@ -45,7 +45,7 @@ SEED_PROMPT='Pak de volgende job uit de Scrum4Me-queue en draai de queue leeg vo # Tools-allowlist: alle MCP-tools die scrum4me-mcp aanbiedt + standaard # file/bash-tools. Geen WebFetch, geen WebSearch — de agent heeft die # niet nodig en uitsluiting verkleint het surface. -ALLOWED_TOOLS='Read,Edit,Write,Bash,Grep,Glob,mcp__scrum4me__health,mcp__scrum4me__list_products,mcp__scrum4me__get_claude_context,mcp__scrum4me__wait_for_job,mcp__scrum4me__update_job_status,mcp__scrum4me__update_task_status,mcp__scrum4me__update_task_plan,mcp__scrum4me__log_implementation,mcp__scrum4me__log_test_result,mcp__scrum4me__log_commit,mcp__scrum4me__create_pbi,mcp__scrum4me__create_story,mcp__scrum4me__create_task,mcp__scrum4me__create_todo,mcp__scrum4me__ask_user_question,mcp__scrum4me__get_question_answer,mcp__scrum4me__list_open_questions,mcp__scrum4me__cancel_question' +ALLOWED_TOOLS='Read,Edit,Write,Bash,Grep,Glob,mcp__scrum4me__health,mcp__scrum4me__list_products,mcp__scrum4me__get_claude_context,mcp__scrum4me__wait_for_job,mcp__scrum4me__check_queue_empty,mcp__scrum4me__update_job_status,mcp__scrum4me__update_task_status,mcp__scrum4me__update_task_plan,mcp__scrum4me__log_implementation,mcp__scrum4me__log_test_result,mcp__scrum4me__log_commit,mcp__scrum4me__create_pbi,mcp__scrum4me__create_story,mcp__scrum4me__create_task,mcp__scrum4me__create_todo,mcp__scrum4me__ask_user_question,mcp__scrum4me__get_question_answer,mcp__scrum4me__list_open_questions,mcp__scrum4me__cancel_question' CONSEC_FAILURES=0 BACKOFF=${AGENT_BACKOFF_START}