diff --git a/.env.example b/.env.example index d35bff2..3e92f97 100644 --- a/.env.example +++ b/.env.example @@ -110,9 +110,3 @@ AGENT_BACKOFF_MAX=300 AGENT_LOG_GZIP_AFTER_HOURS=24 # Hoeveel dagen ge-gzipte logs bewaren voor we ze verwijderen. AGENT_LOG_DELETE_AFTER_DAYS=30 - -# Claude CLI --output-format. Default 'stream-json' streamt de volledige -# event-stream (tool-calls, berichten) live naar de run-log; 'text' geeft -# alleen Claude's eind-samenvatting (terser, maar geen live-meekijken). -# stream-json maakt de run-log JSONL — gebruik jq of een viewer. -AGENT_CLAUDE_OUTPUT_FORMAT=stream-json diff --git a/bin/log-cleanup.sh b/bin/log-cleanup.sh index 5a4ad85..dbd8a0d 100755 --- a/bin/log-cleanup.sh +++ b/bin/log-cleanup.sh @@ -18,9 +18,4 @@ find "${AGENT_LOG_DIR}" -type f \ \( -name '*.log' -o -name '*.log.gz' -o -name '*.txt' -o -name '*.json' \) \ -mtime "+${AGENT_LOG_HARD_DELETE_DAYS}" -delete 2>/dev/null || true -# Prune dangling per-job symlinks: jobs/.log -> runs/.log waarvan -# het doel door rotatie is gegzipt of verwijderd. De -type f hierboven raakt -# symlinks niet, dus broken links worden hier expliciet opgeruimd (-xtype l). -find "${AGENT_LOG_DIR}/jobs" -maxdepth 1 -xtype l -delete 2>/dev/null || true - find "${AGENT_LOG_DIR}/jobs" -mindepth 1 -type d -empty -delete 2>/dev/null || true diff --git a/bin/run-agent.sh b/bin/run-agent.sh index 6b3dd32..c67213a 100644 --- a/bin/run-agent.sh +++ b/bin/run-agent.sh @@ -68,9 +68,7 @@ while true; do # claimt zelf via tryClaimJob, leest JobConfig (PBI-67), bouwt de # juiste Claude CLI-args, spawnt 'claude', wacht, sluit af. set +e - # RUN_LOG laat run-one-job.ts een jobs/.log symlink leggen naar - # dit run-log, zodat de output van een job op job-id vindbaar is. - RUN_LOG="${run_log}" tsx /opt/agent/bin/run-one-job.ts > "${run_log}" 2>&1 + tsx /opt/agent/bin/run-one-job.ts > "${run_log}" 2>&1 exit_code=$? set -e @@ -80,12 +78,8 @@ while true; do # Token-expiry detectie: run-one-job.ts retourneert exit 3 wanneer het # bekende auth-error-strings in Claude's output ziet. We checken óók de # log-tekst voor het geval een ander pad het patroon raakt (bv. Prisma- - # connection-error met OAuth-expired in error-body) — maar alléén bij een - # niet-nul exit. Het run-log bevat de volledige stream-json output (incl. - # tool-results én run-one-job's eigen "TOKEN_EXPIRED detected"-logregel), - # dus een geslaagde job die toevallig "401 unauthorized" in z'n output - # heeft mag de grep-fallback niet triggeren. - if [[ "$exit_code" -eq 3 ]] || { [[ "$exit_code" -ne 0 ]] && grep -qE '(invalid_api_key|authentication.*failed|401.*unauthor|OAuth.*expired)' "${run_log}"; }; then + # connection-error met OAuth-expired in error-body). + if [[ "$exit_code" -eq 3 ]] || grep -qE '(invalid_api_key|authentication.*failed|401.*unauthor|OAuth.*expired)' "${run_log}"; then log "AUTH FAILURE detected (exit=$exit_code or pattern in log) — marking TOKEN_EXPIRED" touch "${AGENT_STATE_DIR}/TOKEN_EXPIRED" write_state "$(jq -n \ diff --git a/bin/run-one-job.ts b/bin/run-one-job.ts index f9cc879..3da7cf9 100644 --- a/bin/run-one-job.ts +++ b/bin/run-one-job.ts @@ -22,8 +22,7 @@ // 3 = TOKEN_EXPIRED detected → run-agent.sh schrijft TOKEN_EXPIRED marker import { spawn, spawnSync } from 'node:child_process' -import { mkdirSync, rmSync, symlinkSync, writeFileSync } from 'node:fs' -import { basename, join } from 'node:path' +import { mkdirSync, rmSync, writeFileSync } from 'node:fs' import { Client as PgClient } from 'pg' @@ -197,25 +196,6 @@ async function main(): Promise { log(`claimed job_id=${jobId}`) - // Per-job log: symlink jobs/.log -> the runs/.log of - // this iteration. runs/ files are timestamp-named, so without this a job's - // output is only findable by grepping. run-agent.sh passes the run-log - // path via RUN_LOG. Relative target so it survives the host bind-mount. - // Best-effort — never fail the job over a log convenience. Dangling links - // (after the runs/ file is gzipped/deleted) are pruned by log-cleanup.sh. - const runLog = process.env.RUN_LOG - if (runLog) { - try { - const jobsDir = join(process.env.AGENT_LOG_DIR ?? '/var/log/agent', 'jobs') - mkdirSync(jobsDir, { recursive: true }) - const linkPath = join(jobsDir, `${jobId}.log`) - rmSync(linkPath, { force: true }) - symlinkSync(join('..', 'runs', basename(runLog)), linkPath) - } catch (err) { - log(`per-job log symlink skipped for ${jobId}: ${(err as Error).message}`) - } - } - // 3. Resolve full context. let ctx: Awaited> = null try { @@ -292,13 +272,6 @@ async function main(): Promise { // 7. Build CLI args. const promptText = getKindPromptText(ctx.kind).replace('$PAYLOAD_PATH', payloadPath) - // --output-format is configureerbaar via env. Default 'stream-json' geeft - // de volledige event-stream (elke tool-call, elk bericht) live in de - // run-log, i.p.v. alleen Claude's eind-samenvatting. stream-json vereist - // --verbose in print-mode. Zet AGENT_CLAUDE_OUTPUT_FORMAT=text terug voor - // de oude terse output. TOKEN_EXPIRED-detectie werkt ongewijzigd: de - // auth-error-strings staan ook binnen de JSON-events. - const outputFormat = process.env.AGENT_CLAUDE_OUTPUT_FORMAT ?? 'stream-json' const args: string[] = [ '-p', promptText, @@ -313,9 +286,8 @@ async function main(): Promise { '--add-dir', '/opt/agent', '--output-format', - outputFormat, + 'text', ] - if (outputFormat === 'stream-json') args.push('--verbose') if (effort) args.push('--effort', effort) const cwd = worktreePath ?? '/opt/agent' @@ -383,21 +355,13 @@ async function main(): Promise { `duration_ms=${durationMs} wall_clock_seconds=${Math.round(durationMs / 1000)}`, ) - // 10. Token-expiry detection — alleen als Claude zelf non-zero eindigde. - // stdoutBuf bevat de volledige stream-json output incl. álle tool-results, - // dus de auth-error-strings kunnen ook agent-werk-content zijn (een doc - // over 401-handling gelezen, een endpoint getest). Een echte credential- - // fout laat 'claude' non-zero exiten; een geslaagde run (exit 0) is per - // definitie geen token-expiry. Zonder deze gate legt zulke content de - // worker onterecht plat (run-agent.sh → TOKEN_EXPIRED marker + sleep). + // 10. Token-expiry detection. let tokenExpired = false - if (exitCode !== 0) { - for (const pat of TOKEN_EXPIRY_PATTERNS) { - if (pat.test(stdoutBuf)) { - tokenExpired = true - log(`TOKEN_EXPIRED detected pattern="${pat.source}" exiting code=3`) - break - } + for (const pat of TOKEN_EXPIRY_PATTERNS) { + if (pat.test(stdoutBuf)) { + tokenExpired = true + log(`TOKEN_EXPIRED detected pattern="${pat.source}" exiting code=3`) + break } }