fix(runner): registreer worker-presence + 10s heartbeat in run-one-job
Tot nu toe schreef de NAS-runner nooit naar `claude_workers`, waardoor de UI de worker als offline toonde ondanks gezonde container-health. Direct na `getAuth()` doen we nu een UPSERT via `registerWorker` en starten we een 10s heartbeat die `last_seen_at` vers houdt tijdens quota-backoff, LISTEN-wait, claude-spawn en cleanup. De heartbeat stopt via try/finally op elk exit-pad. Bewust geen `unregisterWorker`: tussen iteraties zou dat UI-flicker geven, en abnormale exits worden door de UI's eigen 60s-prune opgevangen. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
38c0e5f103
commit
e8c4518abb
1 changed files with 221 additions and 198 deletions
|
|
@ -38,6 +38,8 @@ import {
|
|||
import { releaseLocksOnTerminal } from '/opt/scrum4me-mcp/src/git/job-locks.js'
|
||||
import { mapBudgetToEffort } from '/opt/scrum4me-mcp/src/lib/job-config.js'
|
||||
import { getKindPromptText } from '/opt/scrum4me-mcp/src/lib/kind-prompts.js'
|
||||
import { registerWorker } from '/opt/scrum4me-mcp/src/presence/worker.js'
|
||||
import { startHeartbeat } from '/opt/scrum4me-mcp/src/presence/heartbeat.js'
|
||||
|
||||
// ----- logging --------------------------------------------------------
|
||||
const log = (msg: string) =>
|
||||
|
|
@ -49,6 +51,7 @@ const logError = (msg: string) =>
|
|||
const WAIT_DEADLINE_SECONDS = 270 // ruim binnen MAX_WAIT_SECONDS van wait_for_job
|
||||
const POLL_INTERVAL_MS = 5000
|
||||
const HEARTBEAT_INTERVAL_MS = 60_000
|
||||
const WORKER_HEARTBEAT_INTERVAL_MS = 10_000
|
||||
const MCP_CONFIG = '/opt/agent/mcp-config.json'
|
||||
const QUOTA_PROBE_PATH = '/opt/agent/bin/worker-quota-probe.sh'
|
||||
const QUOTA_BACKOFF_CAP_MS = 30 * 60 * 1000
|
||||
|
|
@ -152,6 +155,23 @@ async function main(): Promise<number> {
|
|||
const { userId, tokenId } = await getAuth()
|
||||
log(`auth ok user_id=${userId} token_id=${tokenId}`)
|
||||
|
||||
// Worker presence — UI leest claude_workers.last_seen_at.
|
||||
// UPSERT zodat de rij tussen iteraties blijft bestaan (geen flicker),
|
||||
// en heartbeat houdt last_seen_at vers tijdens quota-backoff,
|
||||
// LISTEN-wait, claude-spawn, en cleanup. Niet unregisteren bij exit:
|
||||
// de UI prunet zelf rijen ouder dan 60s.
|
||||
try {
|
||||
await registerWorker({ userId, tokenId })
|
||||
} catch (err) {
|
||||
logError(`registerWorker failed (non-fatal): ${(err as Error).message}`)
|
||||
}
|
||||
const workerHeartbeat = startHeartbeat({
|
||||
userId,
|
||||
tokenId,
|
||||
intervalMs: WORKER_HEARTBEAT_INTERVAL_MS,
|
||||
})
|
||||
|
||||
try {
|
||||
// 1. Quota probe (gate vóór elke claim).
|
||||
try {
|
||||
await quotaProbe(userId)
|
||||
|
|
@ -371,6 +391,9 @@ async function main(): Promise<number> {
|
|||
|
||||
if (tokenExpired) return 3
|
||||
return exitCode ?? 1
|
||||
} finally {
|
||||
workerHeartbeat.stop()
|
||||
}
|
||||
}
|
||||
|
||||
// ----- entry ----------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue