feat(gate): verify_required levels — ALIGNED/ALIGNED_OR_PARTIAL/ANY

Sluit story 'Verify-gate uitbreiden' in PBI 'Agent verify-flow hardening' af.

The previous gate weighed only EMPTY: any PARTIAL or DIVERGENT verify
slipped through. The Insights batch (2 May 2026) showed why that's
weak — agent-jobs claiming DONE while only delivering helpers, not
the requested UI components, with verify=DIVERGENT/PARTIAL accepted.

New decision matrix:

  null                       → reject (run verify_task_against_plan)
  EMPTY  + !verify_only      → reject
  EMPTY  + verify_only       → allowed
  ALIGNED                    → always allowed
  PARTIAL/DIVERGENT
    required=ALIGNED         → reject (strict task)
    required=ALIGNED_OR_PARTIAL (default) → allowed only if summary
                                            ≥20 chars (acknowledge drift)
    required=ANY             → allowed (refactor escape hatch)

`update_job_status('done')` now reads `task.verify_required` from the DB
(field added in Scrum4Me PR #53) and passes it + `summary` to the gate.
Tool description updated with the new rules.

Vendor submodule synced to pick up the schema enum.

Tests: 129/129 (was 120 + 9 new combinatorial gate tests).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Janpeter Visser 2026-05-02 17:52:49 +02:00
parent 0bcca15235
commit f11081920d
4 changed files with 131 additions and 33 deletions

View file

@ -1,39 +1,79 @@
import { describe, it, expect } from 'vitest'
import { checkVerifyGate } from '../src/tools/update-job-status.js'
const LONG_SUMMARY = 'Refactor touched extra files for type narrowing.'
describe('checkVerifyGate', () => {
it('rejects when verify_result is null — agent must verify first', () => {
it('rejects when verify_result is null', () => {
const r = checkVerifyGate(null, false)
expect(r.allowed).toBe(false)
if (!r.allowed) expect(r.error).toMatch(/verify_task_against_plan/i)
})
it('rejects when verify_result is EMPTY and task is not verify_only', () => {
it('rejects EMPTY when task is not verify_only', () => {
const r = checkVerifyGate('EMPTY', false)
expect(r.allowed).toBe(false)
if (!r.allowed) {
expect(r.error).toMatch(/EMPTY/i)
expect(r.error).toMatch(/verify_only/i)
}
if (!r.allowed) expect(r.error).toMatch(/EMPTY/i)
})
it('allows when verify_result is EMPTY and task IS verify_only', () => {
const r = checkVerifyGate('EMPTY', true)
expect(r.allowed).toBe(true)
it('allows EMPTY when task is verify_only', () => {
expect(checkVerifyGate('EMPTY', true).allowed).toBe(true)
})
it('allows when verify_result is ALIGNED', () => {
const r = checkVerifyGate('ALIGNED', false)
expect(r.allowed).toBe(true)
it('always allows ALIGNED', () => {
expect(checkVerifyGate('ALIGNED', false, 'ALIGNED').allowed).toBe(true)
expect(checkVerifyGate('ALIGNED', false, 'ALIGNED_OR_PARTIAL').allowed).toBe(true)
expect(checkVerifyGate('ALIGNED', false, 'ANY').allowed).toBe(true)
})
it('allows when verify_result is PARTIAL', () => {
describe('verify_required=ALIGNED (strict)', () => {
it('rejects PARTIAL', () => {
const r = checkVerifyGate('PARTIAL', false, 'ALIGNED', LONG_SUMMARY)
expect(r.allowed).toBe(false)
if (!r.allowed) expect(r.error).toMatch(/ALIGNED/)
})
it('rejects DIVERGENT', () => {
const r = checkVerifyGate('DIVERGENT', false, 'ALIGNED', LONG_SUMMARY)
expect(r.allowed).toBe(false)
})
})
describe('verify_required=ALIGNED_OR_PARTIAL (default — needs summary on drift)', () => {
it('rejects PARTIAL without summary', () => {
const r = checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', undefined)
expect(r.allowed).toBe(false)
if (!r.allowed) expect(r.error).toMatch(/summary/i)
})
it('rejects PARTIAL with too-short summary', () => {
const r = checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', 'short')
expect(r.allowed).toBe(false)
})
it('allows PARTIAL with long summary', () => {
expect(checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', LONG_SUMMARY).allowed).toBe(true)
})
it('rejects DIVERGENT without summary', () => {
expect(checkVerifyGate('DIVERGENT', false, 'ALIGNED_OR_PARTIAL', undefined).allowed).toBe(false)
})
it('allows DIVERGENT with long summary', () => {
expect(checkVerifyGate('DIVERGENT', false, 'ALIGNED_OR_PARTIAL', LONG_SUMMARY).allowed).toBe(true)
})
})
describe('verify_required=ANY (refactor escape hatch)', () => {
it('allows PARTIAL without summary', () => {
expect(checkVerifyGate('PARTIAL', false, 'ANY').allowed).toBe(true)
})
it('allows DIVERGENT without summary', () => {
expect(checkVerifyGate('DIVERGENT', false, 'ANY').allowed).toBe(true)
})
it('still rejects EMPTY (verify_only takes precedence)', () => {
expect(checkVerifyGate('EMPTY', false, 'ANY').allowed).toBe(false)
})
})
it('default verify_required=ALIGNED_OR_PARTIAL when omitted', () => {
// No third arg → falls back to ALIGNED_OR_PARTIAL → PARTIAL needs summary
const r = checkVerifyGate('PARTIAL', false)
expect(r.allowed).toBe(true)
})
it('allows when verify_result is DIVERGENT', () => {
const r = checkVerifyGate('DIVERGENT', false)
expect(r.allowed).toBe(true)
expect(r.allowed).toBe(false)
})
})

View file

@ -34,6 +34,19 @@ enum ClaudeJobStatus {
CANCELLED
}
enum VerifyResult {
ALIGNED
PARTIAL
EMPTY
DIVERGENT
}
enum VerifyRequired {
ALIGNED
ALIGNED_OR_PARTIAL
ANY
}
enum TaskStatus {
TO_DO
IN_PROGRESS
@ -57,13 +70,6 @@ enum SprintStatus {
COMPLETED
}
enum VerifyResult {
ALIGNED
PARTIAL
EMPTY
DIVERGENT
}
model User {
id String @id @default(cuid())
username String @unique
@ -219,6 +225,8 @@ model Sprint {
product_id String
sprint_goal String
status SprintStatus @default(ACTIVE)
start_date DateTime? @db.Date
end_date DateTime? @db.Date
created_at DateTime @default(now())
completed_at DateTime?
stories Story[]
@ -240,8 +248,9 @@ model Task {
priority Int
sort_order Float
status TaskStatus @default(TO_DO)
verify_only Boolean @default(false)
created_at DateTime @default(now())
verify_only Boolean @default(false)
verify_required VerifyRequired @default(ALIGNED_OR_PARTIAL)
created_at DateTime @default(now())
updated_at DateTime @updatedAt
claude_questions ClaudeQuestion[]
claude_jobs ClaudeJob[]
@ -266,12 +275,12 @@ model ClaudeJob {
started_at DateTime?
finished_at DateTime?
pushed_at DateTime?
verify_result VerifyResult?
plan_snapshot String?
branch String?
pr_url String?
summary String?
error String?
verify_result VerifyResult?
retry_count Int @default(0)
created_at DateTime @default(now())
updated_at DateTime @updatedAt
@ -279,6 +288,7 @@ model ClaudeJob {
@@index([user_id, status])
@@index([task_id, status])
@@index([status, claimed_at])
@@index([status, finished_at])
@@map("claude_jobs")
}

View file

@ -122,9 +122,29 @@ export async function prepareDoneUpdate(
}
}
export type VerifyRequired = 'ALIGNED' | 'ALIGNED_OR_PARTIAL' | 'ANY'
const SUMMARY_MIN_LENGTH = 20
/**
* Validate whether a CLAIMED/RUNNING job can transition to DONE based on its
* verify_result + the task's verify_required level.
*
* Decision matrix:
* verifyResult=null reject (run verify_task_against_plan first)
* EMPTY + !verify_only reject
* EMPTY + verify_only allowed
* ALIGNED always allowed
* PARTIAL/DIVERGENT
* required=ALIGNED reject (strict task)
* required=ALIGNED_OR_PARTIAL require non-empty summary explaining drift
* required=ANY allowed (refactor/multi-file edit)
*/
export function checkVerifyGate(
verifyResult: string | null,
verifyOnly: boolean,
verifyRequired: VerifyRequired = 'ALIGNED_OR_PARTIAL',
summary: string | undefined = undefined,
): { allowed: true } | { allowed: false; error: string } {
if (verifyResult === null) {
return {
@ -132,7 +152,8 @@ export function checkVerifyGate(
error: 'Roep eerst verify_task_against_plan aan voordat je DONE markeert.',
}
}
if (verifyResult === 'EMPTY' && !verifyOnly) {
if (verifyResult === 'EMPTY') {
if (verifyOnly) return { allowed: true }
return {
allowed: false,
error:
@ -140,6 +161,28 @@ export function checkVerifyGate(
'Markeer de task als verify_only of pas de implementatie aan.',
}
}
if (verifyResult === 'ALIGNED') return { allowed: true }
// PARTIAL or DIVERGENT
if (verifyRequired === 'ANY') return { allowed: true }
if (verifyRequired === 'ALIGNED') {
return {
allowed: false,
error:
`Plan vereist ALIGNED maar verify gaf ${verifyResult}. ` +
`Pas de implementatie aan zodat alle plan-paden zijn afgedekt, ` +
`of stel verify_required in op ALIGNED_OR_PARTIAL/ANY.`,
}
}
// verifyRequired === 'ALIGNED_OR_PARTIAL': vereist summary
if (!summary || summary.trim().length < SUMMARY_MIN_LENGTH) {
return {
allowed: false,
error:
`Verify gaf ${verifyResult}. Geef een summary (≥${SUMMARY_MIN_LENGTH} chars) die uitlegt ` +
`waarom de implementatie afwijkt van het plan, of stel verify_required in op ANY.`,
}
}
return { allowed: true }
}
@ -218,7 +261,10 @@ export function registerUpdateJobStatusTool(server: McpServer) {
'running (start), done (finished), failed (error). ' +
'The Bearer token must match the token that claimed the job. ' +
'Before marking done: call verify_task_against_plan first — done is rejected when ' +
'verify_result is null or EMPTY (unless task.verify_only is true). ' +
'verify_result is null, EMPTY (unless task.verify_only is true), or when the verify level ' +
'doesnt meet task.verify_required: ALIGNED-only is strict; ALIGNED_OR_PARTIAL accepts ' +
'PARTIAL/DIVERGENT but requires a non-empty summary (≥20 chars) explaining the drift; ANY ' +
'accepts everything. ' +
'Automatically emits an SSE event so the Scrum4Me UI updates in real time. ' +
'Response includes next_action: when wait_for_job_again, immediately call wait_for_job again. When queue_empty, the agent batch is done.',
inputSchema,
@ -238,7 +284,7 @@ export function registerUpdateJobStatusTool(server: McpServer) {
product_id: true,
task_id: true,
verify_result: true,
task: { select: { verify_only: true } },
task: { select: { verify_only: true, verify_required: true } },
},
})
@ -261,6 +307,8 @@ export function registerUpdateJobStatusTool(server: McpServer) {
const gate = checkVerifyGate(
job.verify_result ?? null,
job.task?.verify_only ?? false,
(job.task?.verify_required ?? 'ALIGNED_OR_PARTIAL') as VerifyRequired,
summary,
)
if (!gate.allowed) return toolError(gate.error)

2
vendor/scrum4me vendored

@ -1 +1 @@
Subproject commit 794f7afd2edfef63f468ef89fe28826a3b611d17
Subproject commit e02c6ff9d9eef142cd72011d46f565a10e4b23ac