From de6bbd4edd29d79ff4657caa7bf7fd33d3d1e449 Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Thu, 7 May 2026 12:27:48 +0200 Subject: [PATCH 1/6] PBI-50 F2-T1: claim-filter kind-based + lease_until persisten Schema-sync vanaf Scrum4Me (PBI-50 F1): - PrStrategy.SPRINT_BATCH, ClaudeJobKind.SPRINT_IMPLEMENTATION - enum SprintTaskExecutionStatus, model SprintTaskExecution - ClaudeJob.lease_until + status_lease_until index - SprintRun.previous_run_id (self-relation) tryClaimJob in src/tools/wait-for-job.ts: - WHERE-clause refactor naar kind-based discriminatie. NULL-checks vervangen door expliciete `cj.kind IN (...)`. SPRINT_IMPLEMENTATION en TASK_IMPLEMENTATION vereisen beide actieve SprintRun (QUEUED/RUNNING). Idea-kinds blijven standalone claimable. - UPDATE op claim zet `lease_until = NOW() + INTERVAL '5 minutes'`. Tests: 19 wait-for-job tests groen. Co-Authored-By: Claude Opus 4.7 (1M context) --- prisma/schema.prisma | 152 +++++++++++++++++++++++++------------- src/tools/wait-for-job.ts | 28 ++++--- 2 files changed, 118 insertions(+), 62 deletions(-) diff --git a/prisma/schema.prisma b/prisma/schema.prisma index dce449e..9766b6f 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -87,6 +87,7 @@ enum SprintRunStatus { enum PrStrategy { SPRINT STORY + SPRINT_BATCH } enum IdeaStatus { @@ -105,6 +106,15 @@ enum ClaudeJobKind { IDEA_GRILL IDEA_MAKE_PLAN PLAN_CHAT + SPRINT_IMPLEMENTATION +} + +enum SprintTaskExecutionStatus { + PENDING + RUNNING + DONE + FAILED + SKIPPED } enum IdeaLogType { @@ -299,24 +309,27 @@ model Sprint { } model SprintRun { - id String @id @default(cuid()) - sprint Sprint @relation(fields: [sprint_id], references: [id], onDelete: Cascade) - sprint_id String - started_by User @relation("SprintRunStartedBy", fields: [started_by_id], references: [id]) - started_by_id String - status SprintRunStatus @default(QUEUED) - pr_strategy PrStrategy - branch String? - pr_url String? - started_at DateTime? - finished_at DateTime? - failure_reason String? - failed_task Task? @relation("SprintRunFailedTask", fields: [failed_task_id], references: [id], onDelete: SetNull) - failed_task_id String? - pause_context Json? - created_at DateTime @default(now()) - updated_at DateTime @updatedAt - jobs ClaudeJob[] + id String @id @default(cuid()) + sprint Sprint @relation(fields: [sprint_id], references: [id], onDelete: Cascade) + sprint_id String + started_by User @relation("SprintRunStartedBy", fields: [started_by_id], references: [id]) + started_by_id String + status SprintRunStatus @default(QUEUED) + pr_strategy PrStrategy + branch String? + pr_url String? + started_at DateTime? + finished_at DateTime? + failure_reason String? + failed_task Task? @relation("SprintRunFailedTask", fields: [failed_task_id], references: [id], onDelete: SetNull) + failed_task_id String? + pause_context Json? + previous_run_id String? @unique + previous_run SprintRun? @relation("SprintRunChain", fields: [previous_run_id], references: [id], onDelete: SetNull) + next_run SprintRun? @relation("SprintRunChain") + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + jobs ClaudeJob[] @@index([sprint_id, status]) @@index([started_by_id, status]) @@ -324,32 +337,33 @@ model SprintRun { } model Task { - id String @id @default(cuid()) - story Story @relation(fields: [story_id], references: [id], onDelete: Cascade) - story_id String - product Product @relation(fields: [product_id], references: [id], onDelete: Cascade) - product_id String - sprint Sprint? @relation(fields: [sprint_id], references: [id]) - sprint_id String? - code String @db.VarChar(30) - title String - description String? - implementation_plan String? - priority Int - sort_order Float - status TaskStatus @default(TO_DO) - verify_only Boolean @default(false) - verify_required VerifyRequired @default(ALIGNED_OR_PARTIAL) + id String @id @default(cuid()) + story Story @relation(fields: [story_id], references: [id], onDelete: Cascade) + story_id String + product Product @relation(fields: [product_id], references: [id], onDelete: Cascade) + product_id String + sprint Sprint? @relation(fields: [sprint_id], references: [id]) + sprint_id String? + code String @db.VarChar(30) + title String + description String? + implementation_plan String? + priority Int + sort_order Float + status TaskStatus @default(TO_DO) + verify_only Boolean @default(false) + verify_required VerifyRequired @default(ALIGNED_OR_PARTIAL) // Override product.repo_url for branch/worktree/push purposes. Set when // a task targets a different repo than its parent product (e.g. an // MCP-server task tracked under the main product's PBI). Falls back to // product.repo_url when null. - repo_url String? - created_at DateTime @default(now()) - updated_at DateTime @updatedAt - claude_questions ClaudeQuestion[] - claude_jobs ClaudeJob[] - sprint_run_failures SprintRun[] @relation("SprintRunFailedTask") + repo_url String? + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + claude_questions ClaudeQuestion[] + claude_jobs ClaudeJob[] + sprint_run_failures SprintRun[] @relation("SprintRunFailedTask") + sprint_task_executions SprintTaskExecution[] @@unique([product_id, code]) @@index([story_id, priority, sort_order]) @@ -359,20 +373,20 @@ model Task { } model ClaudeJob { - id String @id @default(cuid()) - user User @relation(fields: [user_id], references: [id], onDelete: Cascade) + id String @id @default(cuid()) + user User @relation(fields: [user_id], references: [id], onDelete: Cascade) user_id String - product Product @relation(fields: [product_id], references: [id], onDelete: Cascade) + product Product @relation(fields: [product_id], references: [id], onDelete: Cascade) product_id String - task Task? @relation(fields: [task_id], references: [id], onDelete: Cascade) + task Task? @relation(fields: [task_id], references: [id], onDelete: Cascade) task_id String? - idea Idea? @relation(fields: [idea_id], references: [id], onDelete: Cascade) + idea Idea? @relation(fields: [idea_id], references: [id], onDelete: Cascade) idea_id String? - sprint_run SprintRun? @relation(fields: [sprint_run_id], references: [id], onDelete: SetNull) + sprint_run SprintRun? @relation(fields: [sprint_run_id], references: [id], onDelete: SetNull) sprint_run_id String? - kind ClaudeJobKind @default(TASK_IMPLEMENTATION) - status ClaudeJobStatus @default(QUEUED) - claimed_by_token ApiToken? @relation(fields: [claimed_by_token_id], references: [id], onDelete: SetNull) + kind ClaudeJobKind @default(TASK_IMPLEMENTATION) + status ClaudeJobStatus @default(QUEUED) + claimed_by_token ApiToken? @relation(fields: [claimed_by_token_id], references: [id], onDelete: SetNull) claimed_by_token_id String? claimed_at DateTime? started_at DateTime? @@ -391,9 +405,11 @@ model ClaudeJob { pr_url String? summary String? error String? - retry_count Int @default(0) - created_at DateTime @default(now()) - updated_at DateTime @updatedAt + retry_count Int @default(0) + lease_until DateTime? + task_executions SprintTaskExecution[] @relation("SprintJobExecutions") + created_at DateTime @default(now()) + updated_at DateTime @updatedAt @@index([user_id, status]) @@index([task_id, status]) @@ -401,9 +417,41 @@ model ClaudeJob { @@index([sprint_run_id, status]) @@index([status, claimed_at]) @@index([status, finished_at]) + @@index([status, lease_until]) @@map("claude_jobs") } +// PBI-50: frozen scope-snapshot per SPRINT_IMPLEMENTATION-claim. Bij claim +// wordt voor elke TO_DO-task in scope één PENDING-record gemaakt met +// implementation_plan + verify_required gesnapshot. Worker en gate werken +// uitsluitend op deze rows; latere wijzigingen aan Task hebben geen +// invloed op de lopende batch. +model SprintTaskExecution { + id String @id @default(cuid()) + sprint_job ClaudeJob @relation("SprintJobExecutions", fields: [sprint_job_id], references: [id], onDelete: Cascade) + sprint_job_id String + task Task @relation(fields: [task_id], references: [id], onDelete: Cascade) + task_id String + order Int + plan_snapshot String @db.Text + verify_required_snapshot VerifyRequired + verify_only_snapshot Boolean @default(false) + base_sha String? + head_sha String? + status SprintTaskExecutionStatus @default(PENDING) + verify_result VerifyResult? + verify_summary String? @db.Text + skip_reason String? @db.Text + started_at DateTime? + finished_at DateTime? + created_at DateTime @default(now()) + updated_at DateTime @updatedAt + + @@unique([sprint_job_id, task_id]) + @@index([sprint_job_id, order]) + @@map("sprint_task_executions") +} + model ModelPrice { id String @id @default(cuid()) model_id String @unique diff --git a/src/tools/wait-for-job.ts b/src/tools/wait-for-job.ts index 5741ec5..d05e9f8 100644 --- a/src/tools/wait-for-job.ts +++ b/src/tools/wait-for-job.ts @@ -308,12 +308,15 @@ export async function tryClaimJob( ): Promise { // Atomic claim in a single transaction — also captures plan_snapshot from task. // - // Sprint-flow filter (PBI-46): - // Idea-jobs (task_id IS NULL) blijven onafhankelijk claimable. - // Task-jobs zijn alleen claimable wanneer ze aan een actieve SprintRun - // hangen (status QUEUED of RUNNING). Legacy task-jobs zonder sprint_run_id - // en jobs in PAUSED/FAILED/CANCELLED/DONE SprintRuns worden overgeslagen. + // PBI-50: claim-filter discrimineert via cj.kind: + // - IDEA_GRILL/IDEA_MAKE_PLAN/PLAN_CHAT: standalone idea-jobs. + // - TASK_IMPLEMENTATION/SPRINT_IMPLEMENTATION: alleen via actieve SprintRun + // (status QUEUED of RUNNING). Legacy task-jobs zonder sprint_run_id en + // jobs in PAUSED/FAILED/CANCELLED/DONE SprintRuns worden overgeslagen. // Bij eerste claim van een nog QUEUED SprintRun → status RUNNING. + // + // PBI-50 lease: lease_until = NOW() + 5min op claim. resetStaleClaimedJobs + // reset bij verlopen lease. const rows = await prisma.$transaction(async (tx) => { const found = productId ? await tx.$queryRaw< @@ -327,8 +330,10 @@ export async function tryClaimJob( AND cj.product_id = ${productId} AND cj.status = 'QUEUED' AND ( - cj.task_id IS NULL - OR (cj.sprint_run_id IS NOT NULL AND sr.status IN ('QUEUED', 'RUNNING')) + cj.kind IN ('IDEA_GRILL', 'IDEA_MAKE_PLAN', 'PLAN_CHAT') + OR (cj.kind IN ('TASK_IMPLEMENTATION', 'SPRINT_IMPLEMENTATION') + AND cj.sprint_run_id IS NOT NULL + AND sr.status IN ('QUEUED', 'RUNNING')) ) ORDER BY cj.created_at ASC LIMIT 1 @@ -344,8 +349,10 @@ export async function tryClaimJob( WHERE cj.user_id = ${userId} AND cj.status = 'QUEUED' AND ( - cj.task_id IS NULL - OR (cj.sprint_run_id IS NOT NULL AND sr.status IN ('QUEUED', 'RUNNING')) + cj.kind IN ('IDEA_GRILL', 'IDEA_MAKE_PLAN', 'PLAN_CHAT') + OR (cj.kind IN ('TASK_IMPLEMENTATION', 'SPRINT_IMPLEMENTATION') + AND cj.sprint_run_id IS NOT NULL + AND sr.status IN ('QUEUED', 'RUNNING')) ) ORDER BY cj.created_at ASC LIMIT 1 @@ -362,7 +369,8 @@ export async function tryClaimJob( SET status = 'CLAIMED', claimed_by_token_id = ${tokenId}, claimed_at = NOW(), - plan_snapshot = ${snapshot} + plan_snapshot = ${snapshot}, + lease_until = NOW() + INTERVAL '5 minutes' WHERE id = ${jobId} ` From 35601e8e4bc976ff8f7cb2885402cfd4700ede3d Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Thu, 7 May 2026 12:33:55 +0200 Subject: [PATCH 2/6] PBI-50 F2-T2/T3: SPRINT_IMPLEMENTATION-pad in getFullJobContext + lease-driven stale-reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F2-T2 — getFullJobContext branche voor `kind === 'SPRINT_IMPLEMENTATION'`: - Fetch sprint_run met deep include (sprint → product + stories → pbi + tasks). - resolveRepoRoot via product; rollbackClaim bij faal. - Branch-resolutie: previous_run_id + branch → reuse (resume-pad), anders verse `feat/sprint-`. createWorktreeForJob met juiste reuseBranch-flag. - Capture base_sha via `git rev-parse HEAD` na worktree-add. - Frozen scope-snapshot: SprintTaskExecution.createMany met plan_snapshot, verify_required_snapshot, verify_only_snapshot per task in scope. Order is PBI→Story→Task. base_sha alleen op task[0] (rest fillt verify-tool). - Update job.branch + job.base_sha + sprint_run.branch in één transactie. - Lookup execution_ids voor response shape. F2-T3 — resetStaleClaimedJobs lease-driven: - WHERE-clause uitgebreid naar `status IN ('CLAIMED','RUNNING')` met OR-clause `lease_until < NOW() OR (lease_until IS NULL AND claimed_at < NOW() - 30min)`. Legacy jobs zonder lease blijven via claimed_at-pad werken; nieuwe jobs via lease_until. - RETURNING uitgebreid met kind, sprint_run_id, branch. - Bij stale FAILED SPRINT_IMPLEMENTATION: push branch (geen mark-ready, geen PR-promotie) zodat werk niet verloren gaat. Vul SprintRun.failure_reason met laatst-RUNNING execution voor diagnose. Imports: getWorktreeRoot uit worktree-paths.js, pushBranchForJob uit push.js. Tests: 31 files, 243 passing. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/tools/wait-for-job.ts | 250 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 237 insertions(+), 13 deletions(-) diff --git a/src/tools/wait-for-job.ts b/src/tools/wait-for-job.ts index d05e9f8..1323e50 100644 --- a/src/tools/wait-for-job.ts +++ b/src/tools/wait-for-job.ts @@ -15,7 +15,9 @@ const execFileP = promisify(execFile) import { requireWriteAccess } from '../auth.js' import { toolJson, toolError, withToolErrors } from '../errors.js' import { createWorktreeForJob } from '../git/worktree.js' +import { getWorktreeRoot } from '../git/worktree-paths.js' import { setupProductWorktrees, releaseLocksOnTerminal } from '../git/job-locks.js' +import { pushBranchForJob } from '../git/push.js' /** Parse `https://github.com//(.git)?` → ``. */ export function repoNameFromUrl(repoUrl: string | null | undefined): string | null { @@ -225,45 +227,96 @@ const inputSchema = z.object({ const STALE_ERROR_MSG = 'agent did not complete job within 2 attempts' export async function resetStaleClaimedJobs(userId: string): Promise { - // Jobs that exceeded the retry limit → FAILED - const failedRows = await prisma.$queryRaw< - Array<{ id: string; task_id: string; product_id: string }> - >` + // PBI-50: lease-driven stale-detection. Jobs in CLAIMED of RUNNING met + // verlopen lease_until (default 5 min, verlengd door job_heartbeat) worden + // gereset. Legacy jobs zonder lease_until vallen terug op de oude + // claimed_at + 30-min-regel. + type StaleRow = { + id: string + task_id: string | null + product_id: string + kind: string + sprint_run_id: string | null + branch: string | null + } + + const failedRows = await prisma.$queryRaw` UPDATE claude_jobs SET status = 'FAILED', finished_at = NOW(), error = ${STALE_ERROR_MSG} WHERE user_id = ${userId} - AND status = 'CLAIMED' - AND claimed_at < NOW() - INTERVAL '30 minutes' + AND status IN ('CLAIMED', 'RUNNING') AND retry_count >= 2 - RETURNING id, task_id, product_id + AND ( + lease_until < NOW() + OR (lease_until IS NULL AND claimed_at < NOW() - INTERVAL '30 minutes') + ) + RETURNING id, task_id, product_id, kind::text AS kind, sprint_run_id, branch ` - // Jobs under the retry limit → back to QUEUED, increment retry_count const requeuedRows = await prisma.$queryRaw< - Array<{ id: string; task_id: string; product_id: string; retry_count: number }> + (StaleRow & { retry_count: number })[] >` UPDATE claude_jobs SET status = 'QUEUED', claimed_by_token_id = NULL, claimed_at = NULL, plan_snapshot = NULL, + lease_until = NULL, retry_count = retry_count + 1 WHERE user_id = ${userId} - AND status = 'CLAIMED' - AND claimed_at < NOW() - INTERVAL '30 minutes' + AND status IN ('CLAIMED', 'RUNNING') AND retry_count < 2 - RETURNING id, task_id, product_id, retry_count + AND ( + lease_until < NOW() + OR (lease_until IS NULL AND claimed_at < NOW() - INTERVAL '30 minutes') + ) + RETURNING id, task_id, product_id, kind::text AS kind, sprint_run_id, branch, retry_count ` if (failedRows.length === 0 && requeuedRows.length === 0) return // PBI-9: release any product-worktree locks held by these stale jobs. - // No-op for jobs without registered locks (TASK_IMPLEMENTATION). for (const j of failedRows) await releaseLocksOnTerminal(j.id) for (const j of requeuedRows) await releaseLocksOnTerminal(j.id) + // PBI-50: voor stale FAILED SPRINT_IMPLEMENTATION jobs — push de branch + // zodat het werk niet verloren gaat (geen mark-ready / PR-promotie), + // en zet SprintRun.failure_reason met een verwijzing naar de laatst + // RUNNING execution voor diagnose. + for (const j of failedRows.filter((r) => r.kind === 'SPRINT_IMPLEMENTATION')) { + if (j.branch && j.product_id) { + const repoRoot = await resolveRepoRoot(j.product_id).catch(() => null) + if (repoRoot) { + const worktreeDir = getWorktreeRoot() + const worktreePath = path.join(worktreeDir, j.id) + try { + await pushBranchForJob({ worktreePath, branchName: j.branch }) + } catch (err) { + console.warn(`[stale-reset] push failed for stale sprint-job ${j.id}:`, err) + } + } + } + if (j.sprint_run_id) { + const lastRunning = await prisma.sprintTaskExecution.findFirst({ + where: { sprint_job_id: j.id, status: 'RUNNING' }, + orderBy: { order: 'desc' }, + select: { order: true, task_id: true }, + }) + const reasonSuffix = lastRunning + ? `, last execution: order ${lastRunning.order} task ${lastRunning.task_id}` + : '' + await prisma.sprintRun.update({ + where: { id: j.sprint_run_id }, + data: { + status: 'FAILED', + failure_reason: `stale: lease verlopen${reasonSuffix}`, + }, + }) + } + } + // Notify UI via SSE for each transition (best-effort) try { const pg = new Client({ connectionString: process.env.DATABASE_URL }) @@ -490,6 +543,177 @@ async function getFullJobContext(jobId: string) { } } + // PBI-50: SPRINT_IMPLEMENTATION — single-session sprint runner. + // Eén ClaudeJob per SprintRun handelt sequentieel alle TO_DO-tasks af. + // Bij claim: maak frozen scope-snapshot via SprintTaskExecution-rows, + // resolve worktree (verse branch of hergebruikt via previous_run_id), + // capture base_sha. Worker werkt uitsluitend op deze frozen snapshot. + if (job.kind === 'SPRINT_IMPLEMENTATION') { + if (!job.sprint_run_id) { + await rollbackClaim(job.id) + return null + } + const sprintRun = await prisma.sprintRun.findUnique({ + where: { id: job.sprint_run_id }, + include: { + sprint: { + include: { + product: true, + stories: { + where: { status: { not: 'DONE' } }, + include: { + pbi: { + select: { id: true, code: true, title: true, priority: true, sort_order: true, status: true }, + }, + tasks: { + where: { status: 'TO_DO' }, + orderBy: [{ priority: 'asc' }, { sort_order: 'asc' }], + }, + }, + orderBy: [{ priority: 'asc' }, { sort_order: 'asc' }], + }, + }, + }, + }, + }) + if (!sprintRun) { + await rollbackClaim(job.id) + return null + } + + const repoRoot = await resolveRepoRoot(sprintRun.sprint.product_id) + if (!repoRoot) { + await rollbackClaim(job.id) + return null + } + + // Branch resolution: previous_run_id + branch → reuse; anders verse. + const isResume = !!(sprintRun.previous_run_id && sprintRun.branch) + const branchName = isResume + ? sprintRun.branch! + : `feat/sprint-${job.sprint_run_id.slice(-8)}` + + let worktreePath: string + let baseSha: string + try { + const wt = await createWorktreeForJob({ + repoRoot, + jobId: job.id, + branchName, + reuseBranch: isResume, + }) + worktreePath = wt.worktreePath + + const { stdout: headSha } = await execFileP('git', ['rev-parse', 'HEAD'], { + cwd: worktreePath, + }) + baseSha = headSha.trim() + } catch (err) { + console.warn(`[wait-for-job] sprint-worktree setup failed for ${job.id}:`, err) + await rollbackClaim(job.id) + return null + } + + // Verzamel ordered tasks in flat list, behoud volgorde + const orderedTasks = sprintRun.sprint.stories.flatMap((s) => + s.tasks.map((t) => ({ ...t, story_pbi_id: s.pbi.id })), + ) + + // Persist branch + base_sha + scope-snapshot in één transactie + await prisma.$transaction([ + prisma.claudeJob.update({ + where: { id: job.id }, + data: { branch: branchName, base_sha: baseSha }, + }), + prisma.sprintTaskExecution.createMany({ + data: orderedTasks.map((t, idx) => ({ + sprint_job_id: job.id, + task_id: t.id, + order: idx, + plan_snapshot: t.implementation_plan ?? '', + verify_required_snapshot: t.verify_required, + verify_only_snapshot: t.verify_only, + base_sha: idx === 0 ? baseSha : null, + status: 'PENDING' as const, + })), + }), + prisma.sprintRun.update({ + where: { id: job.sprint_run_id }, + data: { branch: branchName }, + }), + ]) + + // Lookup execution_ids in volgorde voor de response + const executions = await prisma.sprintTaskExecution.findMany({ + where: { sprint_job_id: job.id }, + orderBy: { order: 'asc' }, + select: { id: true, task_id: true, order: true, base_sha: true }, + }) + const execIdByTaskId = new Map(executions.map((e) => [e.task_id, e.id])) + + // Dedupe PBIs uit de stories (één PBI kan meerdere stories hebben) + const pbiMap = new Map() + for (const s of sprintRun.sprint.stories) pbiMap.set(s.pbi.id, s.pbi) + + return { + job_id: job.id, + kind: job.kind, + status: 'claimed', + sprint: { + id: sprintRun.sprint.id, + sprint_goal: sprintRun.sprint.sprint_goal, + status: sprintRun.sprint.status, + }, + sprint_run: { + id: sprintRun.id, + pr_strategy: sprintRun.pr_strategy, + branch: branchName, + previous_run_id: sprintRun.previous_run_id, + }, + product: { + id: sprintRun.sprint.product.id, + name: sprintRun.sprint.product.name, + repo_url: sprintRun.sprint.product.repo_url, + definition_of_done: sprintRun.sprint.product.definition_of_done, + auto_pr: sprintRun.sprint.product.auto_pr, + }, + pbis: Array.from(pbiMap.values()).map((p) => ({ + id: p.id, + code: p.code, + title: p.title, + priority: p.priority, + sort_order: p.sort_order, + status: p.status, + })), + stories: sprintRun.sprint.stories.map((s) => ({ + id: s.id, + code: s.code, + title: s.title, + pbi_id: s.pbi_id, + priority: s.priority, + sort_order: s.sort_order, + status: s.status, + })), + task_executions: orderedTasks.map((t, idx) => ({ + execution_id: execIdByTaskId.get(t.id)!, + task_id: t.id, + code: t.code, + title: t.title, + story_id: t.story_id, + order: idx, + plan_snapshot: t.implementation_plan ?? '', + verify_required: t.verify_required, + verify_only: t.verify_only, + base_sha: idx === 0 ? baseSha : null, + })), + worktree_path: worktreePath, + branch_name: branchName, + repo_url: sprintRun.sprint.product.repo_url, + base_sha: baseSha, + heartbeat_interval_seconds: 60, + } + } + // TASK_IMPLEMENTATION (default) — bestaande gedrag onaangetast. const { task } = job if (!task) return null From 25ab68073a0cc32109965c0efca20dd39d5f01d8 Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Thu, 7 May 2026 12:40:18 +0200 Subject: [PATCH 3/6] PBI-50 F3: nieuwe MCP-tools voor SPRINT_IMPLEMENTATION-flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vier nieuwe tools + propagateStatusUpwards uitbreiding: T1 — verify_sprint_task (src/tools/verify-sprint-task.ts): Execution-aware verify met frozen plan_snapshot. Input: execution_id + worktree_path + optionele summary (voor PARTIAL/DIVERGENT-rationale). Vult base_sha dynamisch voor task[1..N] op basis van vorige DONE-execution's head_sha. Schrijft verify_result + verify_summary op execution-row. Returns { result, reasoning, base_sha, allowed_for_done, reason? } — allowed_for_done via standaard checkVerifyGate met snapshot-velden. T2 — update_task_execution (src/tools/update-task-execution.ts): Lifecycle-tool voor SprintTaskExecution: PENDING/RUNNING/DONE/FAILED/SKIPPED + base_sha/head_sha/skip_reason. Idempotent. Token-check via execution.sprint_job.claimed_by_token_id. started_at/finished_at automatisch. T3 — job_heartbeat (src/tools/job-heartbeat.ts): Verlengt ClaudeJob.lease_until met 5 min via atomic conditional UPDATE (token-check + status-check in WHERE). Voor SPRINT-jobs: response bevat sprint_run_status + sprint_run_pause_reason zodat worker op UI-side cancel of MERGE_CONFLICT-pause kan breken zonder extra query. T4 — update_task_status sprint_run_id-arg + token-coupling (src/tools/update-task-status.ts): Optionele sprint_run_id-arg voor expliciete cascade. Validaties: SprintRun bestaat + actief, task in deze sprint, current token heeft een actieve ClaudeJob in deze run geclaimd (403 anders). Response uitgebreid met sprint_run_status_change. T5 — propagateStatusUpwards sprintRunId-param (src/lib/tasks-status-update.ts): Optionele sprintRunId-parameter. Resolve-volgorde: expliciete arg → ClaudeJob.task_id-lookup → Story → Sprint → SprintRun.findFirst({active}). De derde fallback dekt SPRINT_IMPLEMENTATION (geen task_id-koppeling) én handmatige task-statuswijzigingen via UI. cancelExceptJobId voor sibling-cancel; null voor SPRINT-job betekent geen siblings te cancellen. src/index.ts: drie nieuwe tools geregistreerd. Tests: 31 files, 243 passing (geen tests voor nieuwe tools nog — F5). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/index.ts | 8 ++ src/lib/tasks-status-update.ts | 58 +++++++++-- src/tools/job-heartbeat.ts | 81 ++++++++++++++++ src/tools/update-task-execution.ts | 110 +++++++++++++++++++++ src/tools/update-task-status.ts | 79 +++++++++++++-- src/tools/verify-sprint-task.ts | 151 +++++++++++++++++++++++++++++ 6 files changed, 471 insertions(+), 16 deletions(-) create mode 100644 src/tools/job-heartbeat.ts create mode 100644 src/tools/update-task-execution.ts create mode 100644 src/tools/verify-sprint-task.ts diff --git a/src/index.ts b/src/index.ts index d05900c..2938c70 100644 --- a/src/index.ts +++ b/src/index.ts @@ -29,6 +29,10 @@ import { registerUpdateIdeaPlanMdTool } from './tools/update-idea-plan-md.js' import { registerLogIdeaDecisionTool } from './tools/log-idea-decision.js' import { registerGetWorkerSettingsTool } from './tools/get-worker-settings.js' import { registerWorkerHeartbeatTool } from './tools/worker-heartbeat.js' +// PBI-50: SPRINT_IMPLEMENTATION-tools +import { registerVerifySprintTaskTool } from './tools/verify-sprint-task.js' +import { registerUpdateTaskExecutionTool } from './tools/update-task-execution.js' +import { registerJobHeartbeatTool } from './tools/job-heartbeat.js' import { registerImplementNextStoryPrompt } from './prompts/implement-next-story.js' import { getAuth } from './auth.js' import { registerWorker } from './presence/worker.js' @@ -92,6 +96,10 @@ async function main() { // M13: worker quota-gate tools registerGetWorkerSettingsTool(server) registerWorkerHeartbeatTool(server) + // PBI-50: SPRINT_IMPLEMENTATION-tools + registerVerifySprintTaskTool(server) + registerUpdateTaskExecutionTool(server) + registerJobHeartbeatTool(server) registerImplementNextStoryPrompt(server) // Presence bootstrap MUST run before server.connect — the stdio transport diff --git a/src/lib/tasks-status-update.ts b/src/lib/tasks-status-update.ts index 3549f3d..64e2ac6 100644 --- a/src/lib/tasks-status-update.ts +++ b/src/lib/tasks-status-update.ts @@ -38,6 +38,11 @@ export async function propagateStatusUpwards( taskId: string, newStatus: TaskStatus, client?: Prisma.TransactionClient, + // PBI-50: optionele expliciete sprint_run_id voor SPRINT_IMPLEMENTATION + // (waar geen ClaudeJob.task_id-koppeling bestaat). Wanneer afwezig valt + // de helper terug op de lookup via ClaudeJob.task_id, met als laatste + // fallback Story → Sprint → SprintRun.findFirst({ status: active }). + sprintRunId?: string, ): Promise { const run = async (tx: Prisma.TransactionClient): Promise => { const task = await tx.task.update({ @@ -151,18 +156,43 @@ export async function propagateStatusUpwards( } } - // SprintRun herevalueren — via ClaudeJob.sprint_run_id van deze task + // SprintRun herevalueren. Resolve sprint_run_id in volgorde: + // 1. Expliciete sprintRunId-arg (PBI-50: SPRINT_IMPLEMENTATION-pad). + // 2. ClaudeJob.task_id-lookup (PER_TASK-flow). + // 3. Story → Sprint → SprintRun.findFirst({ status: active }) (geen + // task-job, bv. handmatige task-statuswijziging via UI). let sprintRunChanged = false if (nextSprintStatus === 'FAILED' || nextSprintStatus === 'COMPLETED') { - const job = await tx.claudeJob.findFirst({ - where: { task_id: taskId, sprint_run_id: { not: null } }, - orderBy: { created_at: 'desc' }, - select: { id: true, sprint_run_id: true }, - }) + let resolvedRunId: string | null = sprintRunId ?? null + let cancelExceptJobId: string | null = null - if (job?.sprint_run_id) { + if (!resolvedRunId) { + const job = await tx.claudeJob.findFirst({ + where: { task_id: taskId, sprint_run_id: { not: null } }, + orderBy: { created_at: 'desc' }, + select: { id: true, sprint_run_id: true }, + }) + if (job?.sprint_run_id) { + resolvedRunId = job.sprint_run_id + cancelExceptJobId = job.id + } + } + + if (!resolvedRunId && story.sprint_id) { + const activeRun = await tx.sprintRun.findFirst({ + where: { + sprint_id: story.sprint_id, + status: { in: ['QUEUED', 'RUNNING', 'PAUSED'] }, + }, + orderBy: { created_at: 'desc' }, + select: { id: true }, + }) + if (activeRun) resolvedRunId = activeRun.id + } + + if (resolvedRunId) { const sprintRun = await tx.sprintRun.findUnique({ - where: { id: job.sprint_run_id }, + where: { id: resolvedRunId }, select: { id: true, status: true }, }) if ( @@ -180,11 +210,16 @@ export async function propagateStatusUpwards( failed_task_id: taskId, }, }) + // Cancel sibling-jobs binnen dezelfde SprintRun behalve de + // huidige task-job (als die er is). Voor SPRINT_IMPLEMENTATION + // is cancelExceptJobId null en hebben we geen siblings om te + // cancellen — de SPRINT-job zelf blijft actief en de worker + // detecteert dit via job_heartbeat. await tx.claudeJob.updateMany({ where: { sprint_run_id: sprintRun.id, status: { in: ['QUEUED', 'CLAIMED', 'RUNNING'] }, - id: { not: job.id }, + ...(cancelExceptJobId ? { id: { not: cancelExceptJobId } } : {}), }, data: { status: 'CANCELLED', @@ -230,14 +265,16 @@ export interface UpdateTaskStatusResult { task: PropagationResult['task'] storyStatusChange: StoryStatusChange storyId: string + sprintRunChanged: boolean } export async function updateTaskStatusWithStoryPromotion( taskId: string, newStatus: TaskStatus, client?: Prisma.TransactionClient, + sprintRunId?: string, ): Promise { - const result = await propagateStatusUpwards(taskId, newStatus, client) + const result = await propagateStatusUpwards(taskId, newStatus, client, sprintRunId) let storyStatusChange: StoryStatusChange = null if (result.storyChanged) { storyStatusChange = newStatus === 'DONE' ? 'promoted' : 'demoted' @@ -246,5 +283,6 @@ export async function updateTaskStatusWithStoryPromotion( task: result.task, storyStatusChange, storyId: result.storyId, + sprintRunChanged: result.sprintRunChanged, } } diff --git a/src/tools/job-heartbeat.ts b/src/tools/job-heartbeat.ts new file mode 100644 index 0000000..36c42a2 --- /dev/null +++ b/src/tools/job-heartbeat.ts @@ -0,0 +1,81 @@ +// PBI-50 F3-T3: job_heartbeat +// +// Verlengt ClaudeJob.lease_until met 5 min zodat resetStaleClaimedJobs een +// long-running job (bv. SPRINT_IMPLEMENTATION over 30+ min) niet ten onrechte +// als stale markt. Worker draait een achtergrond-loop elke 60s. +// +// Voor SPRINT-jobs: respons bevat sprint_run_status zodat de worker zijn +// loop kan breken bij ≠ RUNNING (bv. UI-side cancel of MERGE_CONFLICT-pause). + +import { z } from 'zod' +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { prisma } from '../prisma.js' +import { requireWriteAccess } from '../auth.js' +import { toolError, toolJson, withToolErrors } from '../errors.js' + +const inputSchema = z.object({ + job_id: z.string().min(1), +}) + +export function registerJobHeartbeatTool(server: McpServer) { + server.registerTool( + 'job_heartbeat', + { + title: 'Job heartbeat', + description: + 'Extend the lease on a CLAIMED/RUNNING job by 5 minutes. Token must own the job. ' + + 'For SPRINT_IMPLEMENTATION jobs: response includes sprint_run_status so the worker ' + + 'can break its task-loop on UI-side cancel/pause without an extra query. ' + + 'Worker should call this every ~60s during long-running batches. ' + + 'Forbidden for demo accounts.', + inputSchema, + }, + async ({ job_id }) => + withToolErrors(async () => { + const auth = await requireWriteAccess() + + // Atomic conditional UPDATE so a non-owner / non-active job is rejected + // without a separate read. + const updated = await prisma.$queryRaw< + Array<{ id: string; lease_until: Date; kind: string; sprint_run_id: string | null }> + >` + UPDATE claude_jobs + SET lease_until = NOW() + INTERVAL '5 minutes' + WHERE id = ${job_id} + AND claimed_by_token_id = ${auth.tokenId} + AND status IN ('CLAIMED', 'RUNNING') + RETURNING id, lease_until, kind::text AS kind, sprint_run_id + ` + if (updated.length === 0) { + return toolError( + `Job ${job_id} not found, not claimed by your token, or in terminal state`, + ) + } + const row = updated[0] + + let sprint_run_status: string | null = null + let sprint_run_pause_reason: string | null = null + if (row.kind === 'SPRINT_IMPLEMENTATION' && row.sprint_run_id) { + const sprintRun = await prisma.sprintRun.findUnique({ + where: { id: row.sprint_run_id }, + select: { status: true, pause_context: true }, + }) + sprint_run_status = sprintRun?.status ?? null + // Extract pause_reason from pause_context Json (best-effort) + const ctx = sprintRun?.pause_context as + | { pause_reason?: string } + | null + | undefined + sprint_run_pause_reason = ctx?.pause_reason ?? null + } + + return toolJson({ + ok: true, + job_id: row.id, + lease_until: row.lease_until.toISOString(), + sprint_run_status, + sprint_run_pause_reason, + }) + }), + ) +} diff --git a/src/tools/update-task-execution.ts b/src/tools/update-task-execution.ts new file mode 100644 index 0000000..8b3213a --- /dev/null +++ b/src/tools/update-task-execution.ts @@ -0,0 +1,110 @@ +// PBI-50 F3-T2: update_task_execution +// +// SPRINT_IMPLEMENTATION-flow lifecycle-tool. Worker roept dit aan voor elke +// task in de batch om de SprintTaskExecution-row te muteren: +// PENDING → RUNNING → DONE/FAILED/SKIPPED +// Idempotent: dezelfde call kan veilig herhaald worden. + +import { z } from 'zod' +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { prisma } from '../prisma.js' +import { requireWriteAccess } from '../auth.js' +import { toolError, toolJson, withToolErrors } from '../errors.js' + +const inputSchema = z.object({ + execution_id: z.string().min(1), + status: z.enum(['PENDING', 'RUNNING', 'DONE', 'FAILED', 'SKIPPED']), + base_sha: z.string().optional(), + head_sha: z.string().optional(), + skip_reason: z.string().max(2000).optional(), +}) + +export function registerUpdateTaskExecutionTool(server: McpServer) { + server.registerTool( + 'update_task_execution', + { + title: 'Update SprintTaskExecution status', + description: + 'Mutate a SprintTaskExecution row in a SPRINT_IMPLEMENTATION batch. ' + + 'Status: PENDING|RUNNING|DONE|FAILED|SKIPPED. Worker calls this for each ' + + 'task transition. Token must own the parent SPRINT_IMPLEMENTATION ClaudeJob. ' + + 'Idempotent — safe to retry. Schrijft started_at (RUNNING) en finished_at ' + + '(DONE/FAILED/SKIPPED). Forbidden for demo accounts.', + inputSchema, + }, + async ({ execution_id, status, base_sha, head_sha, skip_reason }) => + withToolErrors(async () => { + const auth = await requireWriteAccess() + + const execution = await prisma.sprintTaskExecution.findUnique({ + where: { id: execution_id }, + select: { + id: true, + sprint_job_id: true, + sprint_job: { + select: { claimed_by_token_id: true, status: true, kind: true }, + }, + }, + }) + if (!execution) { + return toolError(`SprintTaskExecution ${execution_id} not found`) + } + if (execution.sprint_job.kind !== 'SPRINT_IMPLEMENTATION') { + return toolError( + `Execution ${execution_id} hangs at job kind ${execution.sprint_job.kind}, expected SPRINT_IMPLEMENTATION`, + ) + } + if (execution.sprint_job.claimed_by_token_id !== auth.tokenId) { + return toolError( + `Forbidden: token does not own SPRINT_IMPLEMENTATION job for execution ${execution_id}`, + ) + } + if ( + execution.sprint_job.status !== 'CLAIMED' && + execution.sprint_job.status !== 'RUNNING' + ) { + return toolError( + `Sprint job is in terminal state ${execution.sprint_job.status}`, + ) + } + + const now = new Date() + const updated = await prisma.sprintTaskExecution.update({ + where: { id: execution_id }, + data: { + status, + ...(base_sha !== undefined ? { base_sha } : {}), + ...(head_sha !== undefined ? { head_sha } : {}), + ...(skip_reason !== undefined ? { skip_reason } : {}), + ...(status === 'RUNNING' ? { started_at: now } : {}), + ...(status === 'DONE' || status === 'FAILED' || status === 'SKIPPED' + ? { finished_at: now } + : {}), + }, + select: { + id: true, + status: true, + base_sha: true, + head_sha: true, + verify_result: true, + verify_summary: true, + skip_reason: true, + started_at: true, + finished_at: true, + }, + }) + + return toolJson({ + execution_id: updated.id, + status: updated.status, + base_sha: updated.base_sha, + head_sha: updated.head_sha, + verify_result: updated.verify_result, + verify_summary: updated.verify_summary, + skip_reason: updated.skip_reason, + started_at: updated.started_at?.toISOString() ?? null, + finished_at: updated.finished_at?.toISOString() ?? null, + }) + }), + ) +} diff --git a/src/tools/update-task-status.ts b/src/tools/update-task-status.ts index d3756ce..8ac8463 100644 --- a/src/tools/update-task-status.ts +++ b/src/tools/update-task-status.ts @@ -1,5 +1,6 @@ import { z } from 'zod' import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { prisma } from '../prisma.js' import { requireWriteAccess } from '../auth.js' import { userCanAccessTask } from '../access.js' import { toolError, toolJson, withToolErrors } from '../errors.js' @@ -9,6 +10,10 @@ import { updateTaskStatusWithStoryPromotion } from '../lib/tasks-status-update.j const inputSchema = z.object({ task_id: z.string().min(1), status: z.enum(TASK_STATUS_API_VALUES as [string, ...string[]]), + // PBI-50: optionele sprint_run_id voor SPRINT_IMPLEMENTATION-flow. + // Wanneer aanwezig: server valideert dat task in deze sprint zit, run + // actief is, en de huidige token een ClaudeJob in deze run heeft geclaimt. + sprint_run_id: z.string().min(1).optional(), }) export function registerUpdateTaskStatusTool(server: McpServer) { @@ -17,11 +22,14 @@ export function registerUpdateTaskStatusTool(server: McpServer) { { title: 'Update task status', description: - 'Set the status of a task. Allowed values: todo, in_progress, review, done. ' + + 'Set the status of a task. Allowed values: todo, in_progress, review, done, failed. ' + + 'Optional sprint_run_id binds the update to a SPRINT_IMPLEMENTATION run for ' + + 'cascade-propagation; the server validates that the task belongs to the sprint ' + + 'and that the calling token has claimed a job in that run. ' + 'Forbidden for demo accounts.', inputSchema, }, - async ({ task_id, status }) => + async ({ task_id, status, sprint_run_id }) => withToolErrors(async () => { const auth = await requireWriteAccess() const dbStatus = taskStatusFromApi(status) @@ -31,15 +39,74 @@ export function registerUpdateTaskStatusTool(server: McpServer) { if (!(await userCanAccessTask(task_id, auth.userId))) { return toolError(`Task ${task_id} not found or not accessible`) } - const { task, storyStatusChange } = await updateTaskStatusWithStoryPromotion( - task_id, - dbStatus, - ) + + // PBI-50: validate explicit sprint_run_id binding. + if (sprint_run_id) { + const sprintRun = await prisma.sprintRun.findUnique({ + where: { id: sprint_run_id }, + select: { id: true, status: true, sprint_id: true }, + }) + if (!sprintRun) { + return toolError(`SprintRun ${sprint_run_id} not found`) + } + if ( + sprintRun.status !== 'QUEUED' && + sprintRun.status !== 'RUNNING' && + sprintRun.status !== 'PAUSED' + ) { + return toolError( + `SprintRun ${sprint_run_id} is in terminal state ${sprintRun.status}; cannot update task status against it`, + ) + } + + // Task moet in deze sprint zitten + const task = await prisma.task.findUnique({ + where: { id: task_id }, + select: { story: { select: { sprint_id: true } } }, + }) + if (!task || task.story.sprint_id !== sprintRun.sprint_id) { + return toolError( + `Task ${task_id} is not in sprint ${sprintRun.sprint_id} (sprint_run ${sprint_run_id})`, + ) + } + + // Token-coupling: huidige token moet een actieve ClaudeJob in deze + // SprintRun hebben geclaimt (typisch de SPRINT_IMPLEMENTATION-job). + const tokenJob = await prisma.claudeJob.findFirst({ + where: { + sprint_run_id, + claimed_by_token_id: auth.tokenId, + status: { in: ['CLAIMED', 'RUNNING'] }, + }, + select: { id: true }, + }) + if (!tokenJob) { + return toolError( + `Forbidden: current token has no active claim in sprint_run ${sprint_run_id}`, + ) + } + } + + const { task, storyStatusChange, sprintRunChanged } = + await updateTaskStatusWithStoryPromotion(task_id, dbStatus, undefined, sprint_run_id) + + // Voor SPRINT-flow: stuur expliciete sprint_run_status mee zodat + // worker zijn loop kan breken bij FAILED/PAUSED zonder extra query. + let sprintRunStatusChange: string | null = null + if (sprintRunChanged && sprint_run_id) { + const updated = await prisma.sprintRun.findUnique({ + where: { id: sprint_run_id }, + select: { status: true }, + }) + sprintRunStatusChange = updated?.status ?? null + } + return toolJson({ id: task.id, status: taskStatusToApi(task.status), implementation_plan: task.implementation_plan, story_status_change: storyStatusChange, + sprint_run_status_change: sprintRunStatusChange, }) }), ) diff --git a/src/tools/verify-sprint-task.ts b/src/tools/verify-sprint-task.ts new file mode 100644 index 0000000..fbd62d2 --- /dev/null +++ b/src/tools/verify-sprint-task.ts @@ -0,0 +1,151 @@ +// PBI-50 F3-T1: verify_sprint_task +// +// Execution-aware verify-tool voor SPRINT_IMPLEMENTATION-flow. +// Verschilt van verify_task_against_plan in: +// - input via execution_id (niet task_id) +// - base_sha komt uit SprintTaskExecution.base_sha; voor task[1..N] zonder +// base_sha vult de tool dynamisch met head_sha van vorige DONE-execution +// - plan_snapshot komt uit execution.plan_snapshot (frozen op claim-tijd) +// - resultaat opgeslagen op execution-row, niet op ClaudeJob.verify_result +// - response geeft allowed_for_done direct mee + +import { execFile } from 'node:child_process' +import { promisify } from 'node:util' +import { z } from 'zod' +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' +import { prisma } from '../prisma.js' +import { requireWriteAccess } from '../auth.js' +import { toolError, toolJson, withToolErrors } from '../errors.js' +import { classifyDiffAgainstPlan } from '../verify/classify.js' +import { checkVerifyGate } from './update-job-status.js' + +const exec = promisify(execFile) + +const inputSchema = z.object({ + execution_id: z.string().min(1), + worktree_path: z.string().min(1), + summary: z.string().max(2000).optional(), +}) + +export function registerVerifySprintTaskTool(server: McpServer) { + server.registerTool( + 'verify_sprint_task', + { + title: 'Verify SprintTaskExecution against frozen plan', + description: + 'Run `git diff ...HEAD` in the worktree and classify against the ' + + 'frozen plan_snapshot of this SprintTaskExecution. Returns ALIGNED|PARTIAL|EMPTY|' + + 'DIVERGENT plus reasoning + allowed_for_done (computed via the standard verify-gate ' + + 'with the execution\'s frozen verify_required/verify_only). ' + + 'For task[1..N] zonder base_sha vult de tool die in op basis van de head_sha van de ' + + 'vorige DONE-execution. Optional summary is opgeslagen voor PARTIAL/DIVERGENT-rationale ' + + 'en gebruikt door de gate. ' + + 'Call this BEFORE update_task_execution(DONE) for each task in the sprint batch. ' + + 'Forbidden for demo accounts.', + inputSchema, + annotations: { readOnlyHint: false }, + }, + async ({ execution_id, worktree_path, summary }) => + withToolErrors(async () => { + const auth = await requireWriteAccess() + + const execution = await prisma.sprintTaskExecution.findUnique({ + where: { id: execution_id }, + select: { + id: true, + sprint_job_id: true, + order: true, + base_sha: true, + plan_snapshot: true, + verify_required_snapshot: true, + verify_only_snapshot: true, + sprint_job: { + select: { claimed_by_token_id: true, status: true, kind: true }, + }, + }, + }) + if (!execution) { + return toolError(`SprintTaskExecution ${execution_id} not found`) + } + if (execution.sprint_job.kind !== 'SPRINT_IMPLEMENTATION') { + return toolError( + `Execution ${execution_id} hangs at job kind ${execution.sprint_job.kind}, expected SPRINT_IMPLEMENTATION`, + ) + } + if (execution.sprint_job.claimed_by_token_id !== auth.tokenId) { + return toolError( + `Forbidden: token does not own SPRINT_IMPLEMENTATION job for execution ${execution_id}`, + ) + } + + // Resolve base_sha. Voor task[0] is dit gevuld bij claim. Voor + // task[1..N] wordt dit dynamisch gevuld op basis van de vorige + // DONE-execution's head_sha. Persist na fill zodat herhaalde calls + // dezelfde base gebruiken. + let baseSha = execution.base_sha + if (!baseSha) { + const previousDone = await prisma.sprintTaskExecution.findFirst({ + where: { + sprint_job_id: execution.sprint_job_id, + order: { lt: execution.order }, + status: 'DONE', + head_sha: { not: null }, + }, + orderBy: { order: 'desc' }, + select: { head_sha: true }, + }) + if (!previousDone?.head_sha) { + return toolError( + `MISSING_BASE_SHA: execution ${execution_id} has no base_sha and no previous DONE-execution with head_sha. Did you skip update_task_execution(DONE) on a prior task?`, + ) + } + baseSha = previousDone.head_sha + await prisma.sprintTaskExecution.update({ + where: { id: execution_id }, + data: { base_sha: baseSha }, + }) + } + + let diff: string + try { + const { stdout } = await exec('git', ['diff', `${baseSha}...HEAD`], { + cwd: worktree_path, + }) + diff = stdout + } catch (err) { + return toolError( + `git diff failed in worktree (${worktree_path}): ${(err as Error).message ?? 'unknown error'}`, + ) + } + + const { result, reasoning } = classifyDiffAgainstPlan({ + diff, + plan: execution.plan_snapshot, + }) + + await prisma.sprintTaskExecution.update({ + where: { id: execution_id }, + data: { + verify_result: result, + ...(summary !== undefined ? { verify_summary: summary } : {}), + }, + }) + + const gate = checkVerifyGate( + result, + execution.verify_only_snapshot, + execution.verify_required_snapshot, + summary, + ) + + return toolJson({ + execution_id: execution.id, + result: result.toLowerCase() as 'aligned' | 'partial' | 'empty' | 'divergent', + reasoning, + base_sha: baseSha, + allowed_for_done: gate.allowed, + reason: gate.allowed ? null : gate.error, + }) + }), + ) +} From 876a7ad5d94271f32a44f7b34a4004cbea71b235 Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Thu, 7 May 2026 12:48:04 +0200 Subject: [PATCH 4/6] PBI-50 F4: SPRINT_IMPLEMENTATION DONE/FAILED-paden + quota-pause MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - checkSprintVerifyGate: aggregate verify-gate via SprintTaskExecution. Per row: DONE → checkVerifyGate met snapshot-velden, SKIPPED → alleen toegestaan bij verify_required=ANY, FAILED/PENDING/RUNNING → blocker. Toolerror met opsomming bij faal. - finalizeSprintRunOnDone: idempotent SprintRun → DONE wanneer alle stories DONE/FAILED zijn. - maybeCreateSprintBatchPr: één draft-PR per sprint met sprint_goal als title. Hergebruikt bestaande PR via SprintRunChain bij resume. - DONE-pad: na update markPullRequestReady wanneer SprintRun DONE. - FAILED-pad: detect QUOTA_PAUSE: prefix → SprintRun PAUSED met pause_context (resume-instructions + last-completed-task); anders → FAILED met failure_reason + failed_task_id (uit error-string). - cancelPbiOnFailure overslaan voor SPRINT-jobs (geen task_id). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/tools/update-job-status.ts | 273 +++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) diff --git a/src/tools/update-job-status.ts b/src/tools/update-job-status.ts index 9f60006..d03336e 100644 --- a/src/tools/update-job-status.ts +++ b/src/tools/update-job-status.ts @@ -225,6 +225,106 @@ export function checkVerifyGate( return { allowed: true } } +// PBI-50 F4-T1: aggregate verify-gate voor SPRINT_IMPLEMENTATION DONE. +// Bron: alleen SprintTaskExecution-rows voor deze job. Per row: +// DONE → checkVerifyGate met snapshot-velden (gate per row) +// SKIPPED → alleen toegestaan als verify_required_snapshot === 'ANY' +// FAILED/PENDING/RUNNING → blocker (sprint mag niet DONE met openstaand werk) +// Bij overall pass → { allowed: true }; anders error met opsomming. +export async function checkSprintVerifyGate( + sprintJobId: string, +): Promise<{ allowed: true } | { allowed: false; error: string }> { + const executions = await prisma.sprintTaskExecution.findMany({ + where: { sprint_job_id: sprintJobId }, + orderBy: { order: 'asc' }, + select: { + id: true, + task_id: true, + order: true, + status: true, + verify_result: true, + verify_summary: true, + verify_required_snapshot: true, + verify_only_snapshot: true, + task: { select: { code: true, title: true } }, + }, + }) + if (executions.length === 0) { + return { + allowed: false, + error: + 'Sprint-job heeft geen SprintTaskExecution-rows. ' + + 'Dit duidt op een claim-bug; reclaim de sprint.', + } + } + + const blockers: string[] = [] + for (const exec of executions) { + const taskLabel = `${exec.task.code}: ${exec.task.title}` + if (exec.status === 'PENDING' || exec.status === 'RUNNING') { + blockers.push(`[${exec.status}] ${taskLabel} — onafgemaakt werk`) + continue + } + if (exec.status === 'FAILED') { + blockers.push(`[FAILED] ${taskLabel}`) + continue + } + if (exec.status === 'SKIPPED') { + if (exec.verify_required_snapshot !== 'ANY') { + blockers.push( + `[SKIPPED] ${taskLabel} — alleen toegestaan bij verify_required=ANY`, + ) + } + continue + } + // DONE: per-row gate + const gate = checkVerifyGate( + exec.verify_result, + exec.verify_only_snapshot, + exec.verify_required_snapshot, + exec.verify_summary ?? undefined, + ) + if (!gate.allowed) { + blockers.push(`[DONE-gate] ${taskLabel}: ${gate.error}`) + } + } + + if (blockers.length === 0) return { allowed: true } + return { + allowed: false, + error: + `Sprint kan niet DONE — ${blockers.length} task(s) blokkeren:\n` + + blockers.map((b) => ` - ${b}`).join('\n'), + } +} + +// PBI-50 F4-T2: idempotent SprintRun-finalisering. +// Invariant: alleen aanroepen wanneer alle stories in de sprint status +// DONE/FAILED/CANCELLED hebben. Effect: SprintRun.status → DONE + +// finished_at = NOW(). Idempotent — bij al-DONE: no-op. +export async function finalizeSprintRunOnDone(sprintRunId: string): Promise { + const sprintRun = await prisma.sprintRun.findUnique({ + where: { id: sprintRunId }, + select: { id: true, status: true, sprint_id: true }, + }) + if (!sprintRun) return + if (sprintRun.status === 'DONE') return // idempotent + + // Check alle stories in deze sprint zijn klaar + const openStories = await prisma.story.count({ + where: { + sprint_id: sprintRun.sprint_id, + status: { notIn: ['DONE', 'FAILED'] }, + }, + }) + if (openStories > 0) return // nog werk over — niet finaliseren + + await prisma.sprintRun.update({ + where: { id: sprintRunId }, + data: { status: 'DONE', finished_at: new Date() }, + }) +} + const DB_STATUS_MAP = { running: 'RUNNING', done: 'DONE', @@ -332,6 +432,68 @@ export async function maybeCreateAutoPr(opts: { return null } +// PBI-50 F4-T2: SPRINT_BATCH PR-flow. Eén draft-PR voor de hele sprint, +// title = sprint.sprint_goal. Mens reviewt + mergt zelf — geen auto-merge. +// Lijkt op de SPRINT-mode van maybeCreateAutoPr maar zonder task-context. +export async function maybeCreateSprintBatchPr(opts: { + jobId: string + productId: string + worktreePath: string + branchName: string + summary: string | undefined +}): Promise { + const { jobId, productId, worktreePath, branchName, summary } = opts + + const product = await prisma.product.findUnique({ + where: { id: productId }, + select: { auto_pr: true }, + }) + if (!product?.auto_pr) return null + + const job = await prisma.claudeJob.findUnique({ + where: { id: jobId }, + select: { + sprint_run_id: true, + sprint_run: { + select: { id: true, sprint: { select: { sprint_goal: true } } }, + }, + }, + }) + if (!job?.sprint_run) return null + + // Resume-pad: oude SprintRun heeft mogelijk al een PR via vorige run-job. + // Lookup via SprintRunChain (previous_run_id) of via sibling-SPRINT-job. + const previousRun = await prisma.sprintRun.findUnique({ + where: { id: job.sprint_run.id }, + select: { previous_run_id: true }, + }) + if (previousRun?.previous_run_id) { + const prevPr = await prisma.claudeJob.findFirst({ + where: { sprint_run_id: previousRun.previous_run_id, pr_url: { not: null } }, + select: { pr_url: true }, + }) + if (prevPr?.pr_url) return prevPr.pr_url + } + + const goal = job.sprint_run.sprint.sprint_goal + const sprintTitle = `Sprint: ${goal}`.slice(0, 200) + const body = summary + ? `${summary}\n\n---\n\n*Draft PR voor sprint-batch \`${job.sprint_run.id}\` (single-session). Wordt ready-for-review zodra alle tasks DONE zijn.*` + : `*Draft PR voor sprint-batch \`${job.sprint_run.id}\` (single-session). Wordt ready-for-review zodra alle tasks DONE zijn.*` + + const result = await createPullRequest({ + worktreePath, + branchName, + title: sprintTitle, + body, + draft: true, + enableAutoMerge: false, + }) + if ('url' in result) return result.url + console.warn(`[update_job_status] sprint-batch draft-PR skipped for job ${jobId}:`, result.error) + return null +} + export function registerUpdateJobStatusTool(server: McpServer) { server.registerTool( 'update_job_status', @@ -379,6 +541,7 @@ export function registerUpdateJobStatusTool(server: McpServer) { product_id: true, task_id: true, idea_id: true, + sprint_run_id: true, kind: true, verify_result: true, task: { select: { verify_only: true, verify_required: true } }, @@ -419,6 +582,19 @@ export function registerUpdateJobStatusTool(server: McpServer) { actualStatus = 'done' // pushedAt blijft undefined, branch/error overrides ook skipWorktreeCleanup = true + } else if (job.kind === 'SPRINT_IMPLEMENTATION') { + // PBI-50 F4-T2: aggregate verify-gate via SprintTaskExecution-rows. + // Geen single-task verify_result op de SPRINT-job zelf. + const gate = await checkSprintVerifyGate(job_id) + if (!gate.allowed) return toolError(gate.error) + + const plan = await prepareDoneUpdate(job_id, branch) + actualStatus = plan.dbStatus === 'DONE' ? 'done' : 'failed' + pushedAt = plan.pushedAt + if (plan.branchOverride !== undefined) branchToWrite = plan.branchOverride + if (plan.errorOverride !== undefined) errorToWrite = plan.errorOverride + skipWorktreeCleanup = plan.skipWorktreeCleanup + headShaToWrite = plan.headSha } else { const gate = checkVerifyGate( job.verify_result ?? null, @@ -440,6 +616,7 @@ export function registerUpdateJobStatusTool(server: McpServer) { // Auto-PR: best-effort, only when push actually happened. // M12: idee-jobs hebben geen task_id en geen branch — skip auto-PR. + // PBI-50: SPRINT_IMPLEMENTATION krijgt een eigen PR-flow (sprint-goal als title). let prUrl: string | null = null if ( actualStatus === 'done' && @@ -460,6 +637,23 @@ export function registerUpdateJobStatusTool(server: McpServer) { console.warn(`[update_job_status] auto-PR error for job ${job_id}:`, err) return null }) + } else if ( + actualStatus === 'done' && + pushedAt && + branchToWrite && + job.kind === 'SPRINT_IMPLEMENTATION' + ) { + const worktreeDir = getWorktreeRoot() + prUrl = await maybeCreateSprintBatchPr({ + jobId: job_id, + productId: job.product_id, + worktreePath: path.join(worktreeDir, job_id), + branchName: branchToWrite, + summary, + }).catch((err) => { + console.warn(`[update_job_status] sprint-batch PR error for job ${job_id}:`, err) + return null + }) } const dbStatus = DB_STATUS_MAP[actualStatus as keyof typeof DB_STATUS_MAP] @@ -493,6 +687,7 @@ export function registerUpdateJobStatusTool(server: McpServer) { error: true, started_at: true, finished_at: true, + head_sha: true, }, }) @@ -694,10 +889,88 @@ export function registerUpdateJobStatusTool(server: McpServer) { // cancel all queued/claimed/running siblings under the same PBI and // undo any pushed commits (close open PRs / open revert-PRs for // already-merged ones). Idempotent + non-blocking — never throws. + // PBI-50: SPRINT_IMPLEMENTATION SKIPS this — cascade naar tasks/stories/ + // PBIs is al gebeurd via per-task update_task_status('failed')-calls + // van de worker. Sprint-job heeft geen task_id; cancelPbi-flow past niet. if (actualStatus === 'failed' && job.kind === 'TASK_IMPLEMENTATION' && job.task_id) { await cancelPbiOnFailure(job_id) } + // PBI-50 F4-T2: SPRINT_IMPLEMENTATION DONE → finalize SprintRun. + if ( + actualStatus === 'done' && + job.kind === 'SPRINT_IMPLEMENTATION' && + job.sprint_run_id + ) { + try { + await finalizeSprintRunOnDone(job.sprint_run_id) + // Mark draft-PR ready-for-review als de SprintRun nu DONE is + const finalRun = await prisma.sprintRun.findUnique({ + where: { id: job.sprint_run_id }, + select: { status: true }, + }) + if (finalRun?.status === 'DONE' && updated.pr_url) { + try { + const ready = await markPullRequestReady({ prUrl: updated.pr_url }) + if ('error' in ready) { + console.warn( + `[update_job_status] sprint-batch markPullRequestReady failed for ${updated.pr_url}: ${ready.error}`, + ) + } + } catch (err) { + console.warn(`[update_job_status] sprint-batch markPullRequestReady error:`, err) + } + } + } catch (err) { + console.warn(`[update_job_status] finalizeSprintRunOnDone error:`, err) + } + } + + // PBI-50 F4-T3: SPRINT_IMPLEMENTATION FAILED → + // - Detect QUOTA_PAUSE: error-prefix → PAUSED met pause_context. + // - Anders: vul SprintRun.failure_reason + failed_task_id (uit error). + if (actualStatus === 'failed' && job.kind === 'SPRINT_IMPLEMENTATION' && job.sprint_run_id) { + const isQuotaPause = (errorToWrite ?? '').startsWith('QUOTA_PAUSE:') + if (isQuotaPause) { + // Vind laatst-DONE execution voor pause-context + const lastDone = await prisma.sprintTaskExecution.findFirst({ + where: { sprint_job_id: job_id, status: 'DONE' }, + orderBy: { order: 'desc' }, + select: { id: true, order: true, task_id: true }, + }) + await prisma.sprintRun.update({ + where: { id: job.sprint_run_id }, + data: { + status: 'PAUSED', + pause_context: { + pause_reason: 'QUOTA_DEPLETED', + paused_at: new Date().toISOString(), + resume_instructions: + 'Wacht tot quota is gereset, dan resume de SprintRun via de UI. Een nieuwe SprintRun wordt gemaakt met previous_run_id en branch hergebruik.', + last_completed_execution_id: lastDone?.id ?? null, + last_completed_order: lastDone?.order ?? null, + last_completed_task_id: lastDone?.task_id ?? null, + pr_url: updated.pr_url ?? null, + pr_head_sha: updated.head_sha ?? null, + conflict_files: [], + claude_question_id: '', + } as any, + }, + }) + } else { + const failedTaskId = (errorToWrite ?? '').match(/task[:\s]+([a-z0-9]+)/i)?.[1] ?? null + await prisma.sprintRun.update({ + where: { id: job.sprint_run_id }, + data: { + status: 'FAILED', + failure_reason: errorToWrite?.slice(0, 500) ?? null, + failed_task_id: failedTaskId, + finished_at: new Date(), + }, + }) + } + } + // PBI-9: release product-worktree locks on terminal transitions. // No-op for jobs without registered locks (i.e. TASK_IMPLEMENTATION). if (actualStatus === 'done' || actualStatus === 'failed') { From b80264c26ceb4cdd592cab5d987e4507f919fc67 Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Thu, 7 May 2026 12:53:04 +0200 Subject: [PATCH 5/6] PBI-50 F5: tests voor SPRINT_IMPLEMENTATION-tools - update-job-status-sprint-gate: checkSprintVerifyGate per-row blockers, SKIPPED-policy, finalizeSprintRunOnDone idempotentie. - update-task-execution: token-coupling, lifecycle (RUNNING zet started_at, DONE/FAILED/SKIPPED zet finished_at), skip_reason. - job-heartbeat: token-mismatch error, non-SPRINT vs SPRINT response-shape, tolerantie voor pause_context=null. - verify-sprint-task: PARTIAL+summary gate-pass, PARTIAL zonder summary gate-fail, DIVERGENT met ALIGNED gate-fail, base_sha auto-fill via vorige DONE execution head_sha + persistence, MISSING_BASE_SHA error. Co-Authored-By: Claude Opus 4.7 (1M context) --- __tests__/job-heartbeat.test.ts | 137 +++++++++++ .../update-job-status-sprint-gate.test.ts | 192 ++++++++++++++++ __tests__/update-task-execution.test.ts | 199 ++++++++++++++++ __tests__/verify-sprint-task.test.ts | 216 ++++++++++++++++++ 4 files changed, 744 insertions(+) create mode 100644 __tests__/job-heartbeat.test.ts create mode 100644 __tests__/update-job-status-sprint-gate.test.ts create mode 100644 __tests__/update-task-execution.test.ts create mode 100644 __tests__/verify-sprint-task.test.ts diff --git a/__tests__/job-heartbeat.test.ts b/__tests__/job-heartbeat.test.ts new file mode 100644 index 0000000..896f317 --- /dev/null +++ b/__tests__/job-heartbeat.test.ts @@ -0,0 +1,137 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +vi.mock('../src/prisma.js', () => ({ + prisma: { + $queryRaw: vi.fn(), + sprintRun: { findUnique: vi.fn() }, + }, +})) + +vi.mock('../src/auth.js', async (importOriginal) => { + const original = await importOriginal() + return { ...original, requireWriteAccess: vi.fn() } +}) + +import { prisma } from '../src/prisma.js' +import { requireWriteAccess } from '../src/auth.js' +import { registerJobHeartbeatTool } from '../src/tools/job-heartbeat.js' +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' + +const mockPrisma = prisma as unknown as { + $queryRaw: ReturnType + sprintRun: { findUnique: ReturnType } +} +const mockAuth = requireWriteAccess as ReturnType + +const TOKEN_ID = 'tok-owner' + +function makeServer() { + let handler: (args: Record) => Promise + const server = { + registerTool: vi.fn((_name: string, _meta: unknown, fn: typeof handler) => { + handler = fn + }), + call: (args: Record) => handler(args), + } + registerJobHeartbeatTool(server as unknown as McpServer) + return server +} + +beforeEach(() => { + vi.clearAllMocks() + mockAuth.mockResolvedValue({ + userId: 'u-1', + tokenId: TOKEN_ID, + username: 'agent', + isDemo: false, + }) +}) + +describe('job_heartbeat', () => { + it('returns 403-style error when no row matched (token mismatch / terminal)', async () => { + mockPrisma.$queryRaw.mockResolvedValue([]) + const server = makeServer() + const result = (await server.call({ job_id: 'job-x' })) as { + content: { text: string }[] + isError?: boolean + } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/not found|terminal|claimed_by/i) + }) + + it('non-SPRINT job returns ok + lease_until without sprint fields', async () => { + const lease = new Date() + mockPrisma.$queryRaw.mockResolvedValue([ + { + id: 'job-1', + lease_until: lease, + kind: 'TASK_IMPLEMENTATION', + sprint_run_id: null, + }, + ]) + const server = makeServer() + const result = (await server.call({ job_id: 'job-1' })) as { + content: { text: string }[] + } + const body = JSON.parse(result.content[0].text) + expect(body).toEqual({ + ok: true, + job_id: 'job-1', + lease_until: lease.toISOString(), + sprint_run_status: null, + sprint_run_pause_reason: null, + }) + expect(mockPrisma.sprintRun.findUnique).not.toHaveBeenCalled() + }) + + it('SPRINT job returns sprint_run_status from sibling lookup', async () => { + const lease = new Date() + mockPrisma.$queryRaw.mockResolvedValue([ + { + id: 'job-2', + lease_until: lease, + kind: 'SPRINT_IMPLEMENTATION', + sprint_run_id: 'sr-1', + }, + ]) + mockPrisma.sprintRun.findUnique.mockResolvedValue({ + status: 'PAUSED', + pause_context: { pause_reason: 'QUOTA_DEPLETED' }, + }) + + const server = makeServer() + const result = (await server.call({ job_id: 'job-2' })) as { + content: { text: string }[] + } + const body = JSON.parse(result.content[0].text) + expect(body).toMatchObject({ + ok: true, + sprint_run_status: 'PAUSED', + sprint_run_pause_reason: 'QUOTA_DEPLETED', + }) + }) + + it('SPRINT job tolerates missing pause_context', async () => { + const lease = new Date() + mockPrisma.$queryRaw.mockResolvedValue([ + { + id: 'job-3', + lease_until: lease, + kind: 'SPRINT_IMPLEMENTATION', + sprint_run_id: 'sr-2', + }, + ]) + mockPrisma.sprintRun.findUnique.mockResolvedValue({ + status: 'RUNNING', + pause_context: null, + }) + + const server = makeServer() + const result = (await server.call({ job_id: 'job-3' })) as { + content: { text: string }[] + } + const body = JSON.parse(result.content[0].text) + expect(body.sprint_run_status).toBe('RUNNING') + expect(body.sprint_run_pause_reason).toBeNull() + }) +}) diff --git a/__tests__/update-job-status-sprint-gate.test.ts b/__tests__/update-job-status-sprint-gate.test.ts new file mode 100644 index 0000000..e96b94a --- /dev/null +++ b/__tests__/update-job-status-sprint-gate.test.ts @@ -0,0 +1,192 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +vi.mock('../src/prisma.js', () => ({ + prisma: { + sprintTaskExecution: { + findMany: vi.fn(), + }, + sprintRun: { + findUnique: vi.fn(), + update: vi.fn(), + }, + story: { + count: vi.fn(), + }, + }, +})) + +import { prisma } from '../src/prisma.js' +import { + checkSprintVerifyGate, + finalizeSprintRunOnDone, +} from '../src/tools/update-job-status.js' + +type MockedPrisma = { + sprintTaskExecution: { findMany: ReturnType } + sprintRun: { + findUnique: ReturnType + update: ReturnType + } + story: { count: ReturnType } +} + +const mocked = prisma as unknown as MockedPrisma + +const LONG_SUMMARY = 'Refactor touched extra files for type narrowing.' + +function execRow(overrides: Record) { + return { + id: 'exec-' + Math.random().toString(36).slice(2, 8), + task_id: 't1', + order: 0, + status: 'DONE', + verify_result: 'ALIGNED', + verify_summary: null, + verify_required_snapshot: 'ALIGNED_OR_PARTIAL', + verify_only_snapshot: false, + task: { code: 'TASK-1', title: 'Sample task' }, + ...overrides, + } +} + +describe('checkSprintVerifyGate', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('rejects when no executions exist (claim-bug)', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([]) + const r = await checkSprintVerifyGate('job-x') + expect(r.allowed).toBe(false) + if (!r.allowed) expect(r.error).toMatch(/geen SprintTaskExecution-rows/i) + }) + + it('blocks PENDING/RUNNING executions', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ status: 'PENDING' }), + execRow({ status: 'RUNNING' }), + ]) + const r = await checkSprintVerifyGate('job-x') + expect(r.allowed).toBe(false) + if (!r.allowed) { + expect(r.error).toMatch(/PENDING/) + expect(r.error).toMatch(/RUNNING/) + } + }) + + it('blocks FAILED executions', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ status: 'FAILED' }), + ]) + const r = await checkSprintVerifyGate('job-x') + expect(r.allowed).toBe(false) + if (!r.allowed) expect(r.error).toMatch(/FAILED/) + }) + + it('blocks SKIPPED unless verify_required_snapshot=ANY', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ status: 'SKIPPED', verify_required_snapshot: 'ALIGNED' }), + ]) + const r = await checkSprintVerifyGate('job-x') + expect(r.allowed).toBe(false) + if (!r.allowed) expect(r.error).toMatch(/SKIPPED/) + }) + + it('allows SKIPPED when verify_required_snapshot=ANY', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ status: 'SKIPPED', verify_required_snapshot: 'ANY' }), + ]) + expect((await checkSprintVerifyGate('job-x')).allowed).toBe(true) + }) + + it('runs per-row gate for DONE executions', async () => { + // PARTIAL zonder summary onder ALIGNED_OR_PARTIAL → blocker + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ + status: 'DONE', + verify_result: 'PARTIAL', + verify_summary: null, + verify_required_snapshot: 'ALIGNED_OR_PARTIAL', + }), + ]) + const r = await checkSprintVerifyGate('job-x') + expect(r.allowed).toBe(false) + if (!r.allowed) expect(r.error).toMatch(/DONE-gate/) + }) + + it('passes when all DONE rows pass per-row gate', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ verify_result: 'ALIGNED' }), + execRow({ + verify_result: 'PARTIAL', + verify_summary: LONG_SUMMARY, + verify_required_snapshot: 'ALIGNED_OR_PARTIAL', + }), + ]) + expect((await checkSprintVerifyGate('job-x')).allowed).toBe(true) + }) + + it('aggregates multiple blockers in one error message', async () => { + mocked.sprintTaskExecution.findMany.mockResolvedValue([ + execRow({ status: 'FAILED', task: { code: 'A', title: 'a' } }), + execRow({ status: 'PENDING', task: { code: 'B', title: 'b' } }), + ]) + const r = await checkSprintVerifyGate('job-x') + expect(r.allowed).toBe(false) + if (!r.allowed) { + expect(r.error).toMatch(/2 task\(s\) blokkeren/) + expect(r.error).toMatch(/A: a/) + expect(r.error).toMatch(/B: b/) + } + }) +}) + +describe('finalizeSprintRunOnDone', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('no-op when SprintRun already DONE (idempotent)', async () => { + mocked.sprintRun.findUnique.mockResolvedValue({ + id: 'sr-1', + status: 'DONE', + sprint_id: 's1', + }) + await finalizeSprintRunOnDone('sr-1') + expect(mocked.sprintRun.update).not.toHaveBeenCalled() + }) + + it('no-op when SprintRun does not exist', async () => { + mocked.sprintRun.findUnique.mockResolvedValue(null) + await finalizeSprintRunOnDone('sr-x') + expect(mocked.sprintRun.update).not.toHaveBeenCalled() + }) + + it('no-op when stories still open', async () => { + mocked.sprintRun.findUnique.mockResolvedValue({ + id: 'sr-1', + status: 'RUNNING', + sprint_id: 's1', + }) + mocked.story.count.mockResolvedValue(2) + await finalizeSprintRunOnDone('sr-1') + expect(mocked.sprintRun.update).not.toHaveBeenCalled() + }) + + it('sets SprintRun → DONE when all stories DONE/FAILED', async () => { + mocked.sprintRun.findUnique.mockResolvedValue({ + id: 'sr-1', + status: 'RUNNING', + sprint_id: 's1', + }) + mocked.story.count.mockResolvedValue(0) + await finalizeSprintRunOnDone('sr-1') + expect(mocked.sprintRun.update).toHaveBeenCalledWith({ + where: { id: 'sr-1' }, + data: expect.objectContaining({ + status: 'DONE', + finished_at: expect.any(Date), + }), + }) + }) +}) diff --git a/__tests__/update-task-execution.test.ts b/__tests__/update-task-execution.test.ts new file mode 100644 index 0000000..a893650 --- /dev/null +++ b/__tests__/update-task-execution.test.ts @@ -0,0 +1,199 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +vi.mock('../src/prisma.js', () => ({ + prisma: { + sprintTaskExecution: { + findUnique: vi.fn(), + update: vi.fn(), + }, + }, +})) + +vi.mock('../src/auth.js', async (importOriginal) => { + const original = await importOriginal() + return { ...original, requireWriteAccess: vi.fn() } +}) + +import { prisma } from '../src/prisma.js' +import { requireWriteAccess } from '../src/auth.js' +import { registerUpdateTaskExecutionTool } from '../src/tools/update-task-execution.js' +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' + +const mockPrisma = prisma as unknown as { + sprintTaskExecution: { + findUnique: ReturnType + update: ReturnType + } +} +const mockAuth = requireWriteAccess as ReturnType + +const TOKEN_ID = 'tok-owner' + +function makeServer() { + let handler: (args: Record) => Promise + const server = { + registerTool: vi.fn((_name: string, _meta: unknown, fn: typeof handler) => { + handler = fn + }), + call: (args: Record) => handler(args), + } + registerUpdateTaskExecutionTool(server as unknown as McpServer) + return server +} + +function execRecord(overrides: Record = {}) { + return { + id: 'exec-1', + sprint_job_id: 'job-1', + sprint_job: { + claimed_by_token_id: TOKEN_ID, + status: 'CLAIMED', + kind: 'SPRINT_IMPLEMENTATION', + }, + ...overrides, + } +} + +beforeEach(() => { + vi.clearAllMocks() + mockAuth.mockResolvedValue({ + userId: 'u-1', + tokenId: TOKEN_ID, + username: 'agent', + isDemo: false, + }) +}) + +describe('update_task_execution', () => { + it('rejects when execution not found', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(null) + const server = makeServer() + const result = (await server.call({ + execution_id: 'missing', + status: 'RUNNING', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/not found/i) + }) + + it('rejects wrong job-kind', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ + sprint_job: { claimed_by_token_id: TOKEN_ID, status: 'CLAIMED', kind: 'TASK_IMPLEMENTATION' }, + }), + ) + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + status: 'RUNNING', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/SPRINT_IMPLEMENTATION/) + }) + + it('rejects when token does not own the job', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ + sprint_job: { claimed_by_token_id: 'other-token', status: 'CLAIMED', kind: 'SPRINT_IMPLEMENTATION' }, + }), + ) + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + status: 'RUNNING', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/Forbidden/) + }) + + it('rejects when job is in terminal state', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ + sprint_job: { claimed_by_token_id: TOKEN_ID, status: 'DONE', kind: 'SPRINT_IMPLEMENTATION' }, + }), + ) + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + status: 'DONE', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/terminal/) + }) + + it('writes started_at on RUNNING', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(execRecord()) + mockPrisma.sprintTaskExecution.update.mockResolvedValue({ + id: 'exec-1', + status: 'RUNNING', + base_sha: null, + head_sha: null, + verify_result: null, + verify_summary: null, + skip_reason: null, + started_at: new Date(), + finished_at: null, + }) + + const server = makeServer() + await server.call({ execution_id: 'exec-1', status: 'RUNNING' }) + + const updateCall = mockPrisma.sprintTaskExecution.update.mock.calls[0][0] + expect(updateCall.data.status).toBe('RUNNING') + expect(updateCall.data.started_at).toBeInstanceOf(Date) + expect(updateCall.data.finished_at).toBeUndefined() + }) + + it('writes finished_at on DONE/FAILED/SKIPPED', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(execRecord()) + mockPrisma.sprintTaskExecution.update.mockResolvedValue({ + id: 'exec-1', + status: 'DONE', + base_sha: 'sha-base', + head_sha: 'sha-head', + verify_result: null, + verify_summary: null, + skip_reason: null, + started_at: new Date(), + finished_at: new Date(), + }) + + const server = makeServer() + await server.call({ + execution_id: 'exec-1', + status: 'DONE', + head_sha: 'sha-head', + }) + + const updateCall = mockPrisma.sprintTaskExecution.update.mock.calls[0][0] + expect(updateCall.data.status).toBe('DONE') + expect(updateCall.data.finished_at).toBeInstanceOf(Date) + expect(updateCall.data.head_sha).toBe('sha-head') + }) + + it('persists skip_reason on SKIPPED', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(execRecord()) + mockPrisma.sprintTaskExecution.update.mockResolvedValue({ + id: 'exec-1', + status: 'SKIPPED', + base_sha: null, + head_sha: null, + verify_result: null, + verify_summary: null, + skip_reason: 'no-op task', + started_at: null, + finished_at: new Date(), + }) + + const server = makeServer() + await server.call({ + execution_id: 'exec-1', + status: 'SKIPPED', + skip_reason: 'no-op task', + }) + + const updateCall = mockPrisma.sprintTaskExecution.update.mock.calls[0][0] + expect(updateCall.data.skip_reason).toBe('no-op task') + expect(updateCall.data.finished_at).toBeInstanceOf(Date) + }) +}) diff --git a/__tests__/verify-sprint-task.test.ts b/__tests__/verify-sprint-task.test.ts new file mode 100644 index 0000000..77bbc1b --- /dev/null +++ b/__tests__/verify-sprint-task.test.ts @@ -0,0 +1,216 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +vi.mock('../src/prisma.js', () => ({ + prisma: { + sprintTaskExecution: { + findUnique: vi.fn(), + findFirst: vi.fn(), + update: vi.fn(), + }, + }, +})) + +vi.mock('../src/auth.js', async (importOriginal) => { + const original = await importOriginal() + return { ...original, requireWriteAccess: vi.fn() } +}) + +vi.mock('../src/verify/classify.js', () => ({ + classifyDiffAgainstPlan: vi.fn(), +})) + +vi.mock('node:child_process', () => ({ + execFile: vi.fn(), +})) + +import { prisma } from '../src/prisma.js' +import { requireWriteAccess } from '../src/auth.js' +import { classifyDiffAgainstPlan } from '../src/verify/classify.js' +import { execFile } from 'node:child_process' +import { registerVerifySprintTaskTool } from '../src/tools/verify-sprint-task.js' +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' + +const mockPrisma = prisma as unknown as { + sprintTaskExecution: { + findUnique: ReturnType + findFirst: ReturnType + update: ReturnType + } +} +const mockAuth = requireWriteAccess as ReturnType +const mockClassify = classifyDiffAgainstPlan as ReturnType +const mockExecFile = execFile as unknown as ReturnType + +const TOKEN_ID = 'tok-owner' + +function makeServer() { + let handler: (args: Record) => Promise + const server = { + registerTool: vi.fn((_name: string, _meta: unknown, fn: typeof handler) => { + handler = fn + }), + call: (args: Record) => handler(args), + } + registerVerifySprintTaskTool(server as unknown as McpServer) + return server +} + +function stubGitDiff(stdout: string) { + // promisify(execFile) calls (cmd, args, opts, cb) + mockExecFile.mockImplementation( + ( + _cmd: string, + _args: string[], + _opts: unknown, + cb: (err: null, result: { stdout: string; stderr: string }) => void, + ) => { + cb(null, { stdout, stderr: '' }) + }, + ) +} + +function execRecord(overrides: Record = {}) { + return { + id: 'exec-1', + sprint_job_id: 'job-1', + order: 0, + base_sha: 'sha-base', + plan_snapshot: 'frozen plan', + verify_required_snapshot: 'ALIGNED_OR_PARTIAL', + verify_only_snapshot: false, + sprint_job: { + claimed_by_token_id: TOKEN_ID, + status: 'CLAIMED', + kind: 'SPRINT_IMPLEMENTATION', + }, + ...overrides, + } +} + +beforeEach(() => { + vi.clearAllMocks() + mockAuth.mockResolvedValue({ + userId: 'u-1', + tokenId: TOKEN_ID, + username: 'agent', + isDemo: false, + }) +}) + +describe('verify_sprint_task', () => { + it('rejects when execution not found', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(null) + const server = makeServer() + const result = (await server.call({ + execution_id: 'missing', + worktree_path: '/tmp/wt', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/not found/i) + }) + + it('rejects wrong token', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ + sprint_job: { claimed_by_token_id: 'other', status: 'CLAIMED', kind: 'SPRINT_IMPLEMENTATION' }, + }), + ) + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + worktree_path: '/tmp/wt', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/Forbidden/) + }) + + it('PARTIAL with summary returns allowed_for_done=true under ALIGNED_OR_PARTIAL', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(execRecord()) + stubGitDiff('diff --git a/x b/x\n+ change\n') + mockClassify.mockReturnValue({ result: 'PARTIAL', reasoning: 'extra files' }) + + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + worktree_path: '/tmp/wt', + summary: 'Refactor touched extra files for type narrowing.', + })) as { content: { text: string }[] } + const body = JSON.parse(result.content[0].text) + expect(body.result).toBe('partial') + expect(body.allowed_for_done).toBe(true) + expect(body.reason).toBeNull() + }) + + it('PARTIAL without summary returns allowed_for_done=false', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue(execRecord()) + stubGitDiff('diff --git a/x b/x\n') + mockClassify.mockReturnValue({ result: 'PARTIAL', reasoning: 'r' }) + + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + worktree_path: '/tmp/wt', + })) as { content: { text: string }[] } + const body = JSON.parse(result.content[0].text) + expect(body.result).toBe('partial') + expect(body.allowed_for_done).toBe(false) + expect(body.reason).toMatch(/summary/i) + }) + + it('DIVERGENT with strict ALIGNED returns allowed_for_done=false', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ verify_required_snapshot: 'ALIGNED' }), + ) + stubGitDiff('diff --git a/x b/x\n') + mockClassify.mockReturnValue({ result: 'DIVERGENT', reasoning: 'r' }) + + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + worktree_path: '/tmp/wt', + summary: 'Long enough summary describing the deviation rationale clearly.', + })) as { content: { text: string }[] } + const body = JSON.parse(result.content[0].text) + expect(body.allowed_for_done).toBe(false) + expect(body.reason).toMatch(/ALIGNED/) + }) + + it('auto-fills base_sha from previous DONE execution head_sha', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ order: 1, base_sha: null }), + ) + mockPrisma.sprintTaskExecution.findFirst.mockResolvedValue({ + head_sha: 'prev-head-sha', + }) + stubGitDiff('diff\n') + mockClassify.mockReturnValue({ result: 'ALIGNED', reasoning: 'ok' }) + + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + worktree_path: '/tmp/wt', + })) as { content: { text: string }[] } + const body = JSON.parse(result.content[0].text) + expect(body.base_sha).toBe('prev-head-sha') + + // Persisted back to row + const updateCalls = mockPrisma.sprintTaskExecution.update.mock.calls + const baseShaPersist = updateCalls.find((c) => c[0].data.base_sha === 'prev-head-sha') + expect(baseShaPersist).toBeDefined() + }) + + it('errors when base_sha cannot be derived (no prior DONE)', async () => { + mockPrisma.sprintTaskExecution.findUnique.mockResolvedValue( + execRecord({ order: 2, base_sha: null }), + ) + mockPrisma.sprintTaskExecution.findFirst.mockResolvedValue(null) + + const server = makeServer() + const result = (await server.call({ + execution_id: 'exec-1', + worktree_path: '/tmp/wt', + })) as { content: { text: string }[]; isError?: boolean } + expect(result.isError).toBe(true) + expect(result.content[0].text).toMatch(/MISSING_BASE_SHA/) + }) +}) From 98786f763f3e1ceff61b3bdb1ab7702308c68a76 Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Thu, 7 May 2026 12:56:22 +0200 Subject: [PATCH 6/6] =?UTF-8?q?PBI-50=20F5:=20README=20=E2=80=94=20verify?= =?UTF-8?q?=5Fsprint=5Ftask,=20update=5Ftask=5Fexecution,=20job=5Fheartbea?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drie nieuwe tools voor SPRINT_IMPLEMENTATION-flow toegevoegd aan tool-tabel. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index af91dbd..fb20e38 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,9 @@ activity and create todos via native tool calls instead of curl. | `check_queue_empty` | Synchronous, non-blocking count of active jobs (QUEUED/CLAIMED/RUNNING); optional `product_id` scope | no | | `set_pbi_pr` | Write `pr_url` on a PBI and clear `pr_merged_at`. Idempotent: re-calling overwrites `pr_url` and resets `pr_merged_at` to null | no | | `mark_pbi_pr_merged` | Set `pr_merged_at = now()` on a PBI. Requires `pr_url` to already be set. Idempotent: re-calling overwrites the timestamp | no | +| `verify_sprint_task` | SPRINT_IMPLEMENTATION-flow: compare a `SprintTaskExecution`'s frozen `plan_snapshot` against `git diff ...HEAD`. Returns `verify_result` + `allowed_for_done`. For `task[1..N]` zonder base_sha vult de tool die in op basis van de head_sha van de vorige DONE-execution | yes (read-only) | +| `update_task_execution` | SPRINT_IMPLEMENTATION-flow: mutate `SprintTaskExecution.status` (PENDING/RUNNING/DONE/FAILED/SKIPPED). Token must own the parent SPRINT-job. Idempotent | no | +| `job_heartbeat` | Extend `claude_jobs.lease_until` by 5 min. For SPRINT-jobs: response includes `sprint_run_status` + `sprint_run_pause_reason` so the worker can break its task-loop on UI-side cancel/pause | no | Demo accounts may read but writes return `PERMISSION_DENIED`.