feat(logs): per-job log-symlink jobs/<job_id>.log -> runs/<ts>.log (IDEA-063)

Run-logs in /var/log/agent/runs/ zijn timestamp-named, dus de output van
een specifieke job was alleen via grep te vinden. De map jobs/ bestond al
maar werd niet gevuld.

- run-agent.sh: geeft het run-log-pad door als RUN_LOG env-var aan
  run-one-job.ts.
- run-one-job.ts: legt direct na de claim een symlink
  jobs/<job_id>.log -> ../runs/<ts>.log. Relatief pad (overleeft de
  host bind-mount), best-effort (faalt de job nooit over een log-gemak).
- log-cleanup.sh: ruimt dangling per-job symlinks op met `find -xtype l`
  — nodig omdat rotate-logs.sh het doel na 24u gzipt (.log -> .log.gz)
  of na 30d verwijdert, en de bestaande `-type f` cleanup symlinks niet
  raakt.

Functioneel geverifieerd: symlink resolveert, dangling-prune werkt,
`-type f` negeert de symlink (geen voortijdige delete). run-one-job.ts
parseert schoon (node --check + type-strip).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Janpeter Visser 2026-05-14 19:22:40 +02:00
parent 7ec32c8def
commit 0b5a044ea5
3 changed files with 29 additions and 2 deletions

View file

@ -18,4 +18,9 @@ find "${AGENT_LOG_DIR}" -type f \
\( -name '*.log' -o -name '*.log.gz' -o -name '*.txt' -o -name '*.json' \) \
-mtime "+${AGENT_LOG_HARD_DELETE_DAYS}" -delete 2>/dev/null || true
# Prune dangling per-job symlinks: jobs/<job_id>.log -> runs/<ts>.log waarvan
# het doel door rotatie is gegzipt of verwijderd. De -type f hierboven raakt
# symlinks niet, dus broken links worden hier expliciet opgeruimd (-xtype l).
find "${AGENT_LOG_DIR}/jobs" -maxdepth 1 -xtype l -delete 2>/dev/null || true
find "${AGENT_LOG_DIR}/jobs" -mindepth 1 -type d -empty -delete 2>/dev/null || true

View file

@ -68,7 +68,9 @@ while true; do
# claimt zelf via tryClaimJob, leest JobConfig (PBI-67), bouwt de
# juiste Claude CLI-args, spawnt 'claude', wacht, sluit af.
set +e
tsx /opt/agent/bin/run-one-job.ts > "${run_log}" 2>&1
# RUN_LOG laat run-one-job.ts een jobs/<job_id>.log symlink leggen naar
# dit run-log, zodat de output van een job op job-id vindbaar is.
RUN_LOG="${run_log}" tsx /opt/agent/bin/run-one-job.ts > "${run_log}" 2>&1
exit_code=$?
set -e

View file

@ -22,7 +22,8 @@
// 3 = TOKEN_EXPIRED detected → run-agent.sh schrijft TOKEN_EXPIRED marker
import { spawn, spawnSync } from 'node:child_process'
import { mkdirSync, rmSync, writeFileSync } from 'node:fs'
import { mkdirSync, rmSync, symlinkSync, writeFileSync } from 'node:fs'
import { basename, join } from 'node:path'
import { Client as PgClient } from 'pg'
@ -196,6 +197,25 @@ async function main(): Promise<number> {
log(`claimed job_id=${jobId}`)
// Per-job log: symlink jobs/<jobId>.log -> the runs/<timestamp>.log of
// this iteration. runs/ files are timestamp-named, so without this a job's
// output is only findable by grepping. run-agent.sh passes the run-log
// path via RUN_LOG. Relative target so it survives the host bind-mount.
// Best-effort — never fail the job over a log convenience. Dangling links
// (after the runs/ file is gzipped/deleted) are pruned by log-cleanup.sh.
const runLog = process.env.RUN_LOG
if (runLog) {
try {
const jobsDir = join(process.env.AGENT_LOG_DIR ?? '/var/log/agent', 'jobs')
mkdirSync(jobsDir, { recursive: true })
const linkPath = join(jobsDir, `${jobId}.log`)
rmSync(linkPath, { force: true })
symlinkSync(join('..', 'runs', basename(runLog)), linkPath)
} catch (err) {
log(`per-job log symlink skipped for ${jobId}: ${(err as Error).message}`)
}
}
// 3. Resolve full context.
let ctx: Awaited<ReturnType<typeof getFullJobContext>> = null
try {