From 1fe6ccf6095f06516bdb0c7ecfdde594e2e9a3ec Mon Sep 17 00:00:00 2001 From: Janpeter Visser Date: Sat, 2 May 2026 17:55:06 +0200 Subject: [PATCH] =?UTF-8?q?feat(gate):=20verify=5Frequired=20levels=20?= =?UTF-8?q?=E2=80=94=20ALIGNED/ALIGNED=5FOR=5FPARTIAL/ANY=20(#16)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sluit story 'Verify-gate uitbreiden' in PBI 'Agent verify-flow hardening' af. The previous gate weighed only EMPTY: any PARTIAL or DIVERGENT verify slipped through. The Insights batch (2 May 2026) showed why that's weak — agent-jobs claiming DONE while only delivering helpers, not the requested UI components, with verify=DIVERGENT/PARTIAL accepted. New decision matrix: null → reject (run verify_task_against_plan) EMPTY + !verify_only → reject EMPTY + verify_only → allowed ALIGNED → always allowed PARTIAL/DIVERGENT required=ALIGNED → reject (strict task) required=ALIGNED_OR_PARTIAL (default) → allowed only if summary ≥20 chars (acknowledge drift) required=ANY → allowed (refactor escape hatch) `update_job_status('done')` now reads `task.verify_required` from the DB (field added in Scrum4Me PR #53) and passes it + `summary` to the gate. Tool description updated with the new rules. Vendor submodule synced to pick up the schema enum. Tests: 129/129 (was 120 + 9 new combinatorial gate tests). Co-authored-by: Claude Opus 4.7 (1M context) --- __tests__/update-job-status-gate.test.ts | 78 ++++++++++++++++++------ prisma/schema.prisma | 30 ++++++--- src/tools/update-job-status.ts | 54 +++++++++++++++- vendor/scrum4me | 2 +- 4 files changed, 131 insertions(+), 33 deletions(-) diff --git a/__tests__/update-job-status-gate.test.ts b/__tests__/update-job-status-gate.test.ts index 4a2b07e..ce181cb 100644 --- a/__tests__/update-job-status-gate.test.ts +++ b/__tests__/update-job-status-gate.test.ts @@ -1,39 +1,79 @@ import { describe, it, expect } from 'vitest' import { checkVerifyGate } from '../src/tools/update-job-status.js' +const LONG_SUMMARY = 'Refactor touched extra files for type narrowing.' + describe('checkVerifyGate', () => { - it('rejects when verify_result is null — agent must verify first', () => { + it('rejects when verify_result is null', () => { const r = checkVerifyGate(null, false) expect(r.allowed).toBe(false) if (!r.allowed) expect(r.error).toMatch(/verify_task_against_plan/i) }) - it('rejects when verify_result is EMPTY and task is not verify_only', () => { + it('rejects EMPTY when task is not verify_only', () => { const r = checkVerifyGate('EMPTY', false) expect(r.allowed).toBe(false) - if (!r.allowed) { - expect(r.error).toMatch(/EMPTY/i) - expect(r.error).toMatch(/verify_only/i) - } + if (!r.allowed) expect(r.error).toMatch(/EMPTY/i) }) - it('allows when verify_result is EMPTY and task IS verify_only', () => { - const r = checkVerifyGate('EMPTY', true) - expect(r.allowed).toBe(true) + it('allows EMPTY when task is verify_only', () => { + expect(checkVerifyGate('EMPTY', true).allowed).toBe(true) }) - it('allows when verify_result is ALIGNED', () => { - const r = checkVerifyGate('ALIGNED', false) - expect(r.allowed).toBe(true) + it('always allows ALIGNED', () => { + expect(checkVerifyGate('ALIGNED', false, 'ALIGNED').allowed).toBe(true) + expect(checkVerifyGate('ALIGNED', false, 'ALIGNED_OR_PARTIAL').allowed).toBe(true) + expect(checkVerifyGate('ALIGNED', false, 'ANY').allowed).toBe(true) }) - it('allows when verify_result is PARTIAL', () => { + describe('verify_required=ALIGNED (strict)', () => { + it('rejects PARTIAL', () => { + const r = checkVerifyGate('PARTIAL', false, 'ALIGNED', LONG_SUMMARY) + expect(r.allowed).toBe(false) + if (!r.allowed) expect(r.error).toMatch(/ALIGNED/) + }) + it('rejects DIVERGENT', () => { + const r = checkVerifyGate('DIVERGENT', false, 'ALIGNED', LONG_SUMMARY) + expect(r.allowed).toBe(false) + }) + }) + + describe('verify_required=ALIGNED_OR_PARTIAL (default — needs summary on drift)', () => { + it('rejects PARTIAL without summary', () => { + const r = checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', undefined) + expect(r.allowed).toBe(false) + if (!r.allowed) expect(r.error).toMatch(/summary/i) + }) + it('rejects PARTIAL with too-short summary', () => { + const r = checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', 'short') + expect(r.allowed).toBe(false) + }) + it('allows PARTIAL with long summary', () => { + expect(checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', LONG_SUMMARY).allowed).toBe(true) + }) + it('rejects DIVERGENT without summary', () => { + expect(checkVerifyGate('DIVERGENT', false, 'ALIGNED_OR_PARTIAL', undefined).allowed).toBe(false) + }) + it('allows DIVERGENT with long summary', () => { + expect(checkVerifyGate('DIVERGENT', false, 'ALIGNED_OR_PARTIAL', LONG_SUMMARY).allowed).toBe(true) + }) + }) + + describe('verify_required=ANY (refactor escape hatch)', () => { + it('allows PARTIAL without summary', () => { + expect(checkVerifyGate('PARTIAL', false, 'ANY').allowed).toBe(true) + }) + it('allows DIVERGENT without summary', () => { + expect(checkVerifyGate('DIVERGENT', false, 'ANY').allowed).toBe(true) + }) + it('still rejects EMPTY (verify_only takes precedence)', () => { + expect(checkVerifyGate('EMPTY', false, 'ANY').allowed).toBe(false) + }) + }) + + it('default verify_required=ALIGNED_OR_PARTIAL when omitted', () => { + // No third arg → falls back to ALIGNED_OR_PARTIAL → PARTIAL needs summary const r = checkVerifyGate('PARTIAL', false) - expect(r.allowed).toBe(true) - }) - - it('allows when verify_result is DIVERGENT', () => { - const r = checkVerifyGate('DIVERGENT', false) - expect(r.allowed).toBe(true) + expect(r.allowed).toBe(false) }) }) diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 19ddd81..ee5beb6 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -34,6 +34,19 @@ enum ClaudeJobStatus { CANCELLED } +enum VerifyResult { + ALIGNED + PARTIAL + EMPTY + DIVERGENT +} + +enum VerifyRequired { + ALIGNED + ALIGNED_OR_PARTIAL + ANY +} + enum TaskStatus { TO_DO IN_PROGRESS @@ -57,13 +70,6 @@ enum SprintStatus { COMPLETED } -enum VerifyResult { - ALIGNED - PARTIAL - EMPTY - DIVERGENT -} - model User { id String @id @default(cuid()) username String @unique @@ -219,6 +225,8 @@ model Sprint { product_id String sprint_goal String status SprintStatus @default(ACTIVE) + start_date DateTime? @db.Date + end_date DateTime? @db.Date created_at DateTime @default(now()) completed_at DateTime? stories Story[] @@ -240,8 +248,9 @@ model Task { priority Int sort_order Float status TaskStatus @default(TO_DO) - verify_only Boolean @default(false) - created_at DateTime @default(now()) + verify_only Boolean @default(false) + verify_required VerifyRequired @default(ALIGNED_OR_PARTIAL) + created_at DateTime @default(now()) updated_at DateTime @updatedAt claude_questions ClaudeQuestion[] claude_jobs ClaudeJob[] @@ -266,12 +275,12 @@ model ClaudeJob { started_at DateTime? finished_at DateTime? pushed_at DateTime? + verify_result VerifyResult? plan_snapshot String? branch String? pr_url String? summary String? error String? - verify_result VerifyResult? retry_count Int @default(0) created_at DateTime @default(now()) updated_at DateTime @updatedAt @@ -279,6 +288,7 @@ model ClaudeJob { @@index([user_id, status]) @@index([task_id, status]) @@index([status, claimed_at]) + @@index([status, finished_at]) @@map("claude_jobs") } diff --git a/src/tools/update-job-status.ts b/src/tools/update-job-status.ts index 4833c6c..73ac0d8 100644 --- a/src/tools/update-job-status.ts +++ b/src/tools/update-job-status.ts @@ -122,9 +122,29 @@ export async function prepareDoneUpdate( } } +export type VerifyRequired = 'ALIGNED' | 'ALIGNED_OR_PARTIAL' | 'ANY' + +const SUMMARY_MIN_LENGTH = 20 + +/** + * Validate whether a CLAIMED/RUNNING job can transition to DONE based on its + * verify_result + the task's verify_required level. + * + * Decision matrix: + * verifyResult=null → reject (run verify_task_against_plan first) + * EMPTY + !verify_only → reject + * EMPTY + verify_only → allowed + * ALIGNED → always allowed + * PARTIAL/DIVERGENT + * required=ALIGNED → reject (strict task) + * required=ALIGNED_OR_PARTIAL → require non-empty summary explaining drift + * required=ANY → allowed (refactor/multi-file edit) + */ export function checkVerifyGate( verifyResult: string | null, verifyOnly: boolean, + verifyRequired: VerifyRequired = 'ALIGNED_OR_PARTIAL', + summary: string | undefined = undefined, ): { allowed: true } | { allowed: false; error: string } { if (verifyResult === null) { return { @@ -132,7 +152,8 @@ export function checkVerifyGate( error: 'Roep eerst verify_task_against_plan aan voordat je DONE markeert.', } } - if (verifyResult === 'EMPTY' && !verifyOnly) { + if (verifyResult === 'EMPTY') { + if (verifyOnly) return { allowed: true } return { allowed: false, error: @@ -140,6 +161,28 @@ export function checkVerifyGate( 'Markeer de task als verify_only of pas de implementatie aan.', } } + if (verifyResult === 'ALIGNED') return { allowed: true } + + // PARTIAL or DIVERGENT + if (verifyRequired === 'ANY') return { allowed: true } + if (verifyRequired === 'ALIGNED') { + return { + allowed: false, + error: + `Plan vereist ALIGNED maar verify gaf ${verifyResult}. ` + + `Pas de implementatie aan zodat alle plan-paden zijn afgedekt, ` + + `of stel verify_required in op ALIGNED_OR_PARTIAL/ANY.`, + } + } + // verifyRequired === 'ALIGNED_OR_PARTIAL': vereist summary + if (!summary || summary.trim().length < SUMMARY_MIN_LENGTH) { + return { + allowed: false, + error: + `Verify gaf ${verifyResult}. Geef een summary (≥${SUMMARY_MIN_LENGTH} chars) die uitlegt ` + + `waarom de implementatie afwijkt van het plan, of stel verify_required in op ANY.`, + } + } return { allowed: true } } @@ -218,7 +261,10 @@ export function registerUpdateJobStatusTool(server: McpServer) { 'running (start), done (finished), failed (error). ' + 'The Bearer token must match the token that claimed the job. ' + 'Before marking done: call verify_task_against_plan first — done is rejected when ' + - 'verify_result is null or EMPTY (unless task.verify_only is true). ' + + 'verify_result is null, EMPTY (unless task.verify_only is true), or when the verify level ' + + 'doesn’t meet task.verify_required: ALIGNED-only is strict; ALIGNED_OR_PARTIAL accepts ' + + 'PARTIAL/DIVERGENT but requires a non-empty summary (≥20 chars) explaining the drift; ANY ' + + 'accepts everything. ' + 'Automatically emits an SSE event so the Scrum4Me UI updates in real time. ' + 'Response includes next_action: when wait_for_job_again, immediately call wait_for_job again. When queue_empty, the agent batch is done.', inputSchema, @@ -238,7 +284,7 @@ export function registerUpdateJobStatusTool(server: McpServer) { product_id: true, task_id: true, verify_result: true, - task: { select: { verify_only: true } }, + task: { select: { verify_only: true, verify_required: true } }, }, }) @@ -261,6 +307,8 @@ export function registerUpdateJobStatusTool(server: McpServer) { const gate = checkVerifyGate( job.verify_result ?? null, job.task?.verify_only ?? false, + (job.task?.verify_required ?? 'ALIGNED_OR_PARTIAL') as VerifyRequired, + summary, ) if (!gate.allowed) return toolError(gate.error) diff --git a/vendor/scrum4me b/vendor/scrum4me index 794f7af..e02c6ff 160000 --- a/vendor/scrum4me +++ b/vendor/scrum4me @@ -1 +1 @@ -Subproject commit 794f7afd2edfef63f468ef89fe28826a3b611d17 +Subproject commit e02c6ff9d9eef142cd72011d46f565a10e4b23ac