feat(gate): verify_required levels — ALIGNED/ALIGNED_OR_PARTIAL/ANY
Sluit story 'Verify-gate uitbreiden' in PBI 'Agent verify-flow hardening' af.
The previous gate weighed only EMPTY: any PARTIAL or DIVERGENT verify
slipped through. The Insights batch (2 May 2026) showed why that's
weak — agent-jobs claiming DONE while only delivering helpers, not
the requested UI components, with verify=DIVERGENT/PARTIAL accepted.
New decision matrix:
null → reject (run verify_task_against_plan)
EMPTY + !verify_only → reject
EMPTY + verify_only → allowed
ALIGNED → always allowed
PARTIAL/DIVERGENT
required=ALIGNED → reject (strict task)
required=ALIGNED_OR_PARTIAL (default) → allowed only if summary
≥20 chars (acknowledge drift)
required=ANY → allowed (refactor escape hatch)
`update_job_status('done')` now reads `task.verify_required` from the DB
(field added in Scrum4Me PR #53) and passes it + `summary` to the gate.
Tool description updated with the new rules.
Vendor submodule synced to pick up the schema enum.
Tests: 129/129 (was 120 + 9 new combinatorial gate tests).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0bcca15235
commit
f11081920d
4 changed files with 131 additions and 33 deletions
|
|
@ -1,39 +1,79 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { checkVerifyGate } from '../src/tools/update-job-status.js'
|
||||
|
||||
const LONG_SUMMARY = 'Refactor touched extra files for type narrowing.'
|
||||
|
||||
describe('checkVerifyGate', () => {
|
||||
it('rejects when verify_result is null — agent must verify first', () => {
|
||||
it('rejects when verify_result is null', () => {
|
||||
const r = checkVerifyGate(null, false)
|
||||
expect(r.allowed).toBe(false)
|
||||
if (!r.allowed) expect(r.error).toMatch(/verify_task_against_plan/i)
|
||||
})
|
||||
|
||||
it('rejects when verify_result is EMPTY and task is not verify_only', () => {
|
||||
it('rejects EMPTY when task is not verify_only', () => {
|
||||
const r = checkVerifyGate('EMPTY', false)
|
||||
expect(r.allowed).toBe(false)
|
||||
if (!r.allowed) {
|
||||
expect(r.error).toMatch(/EMPTY/i)
|
||||
expect(r.error).toMatch(/verify_only/i)
|
||||
}
|
||||
if (!r.allowed) expect(r.error).toMatch(/EMPTY/i)
|
||||
})
|
||||
|
||||
it('allows when verify_result is EMPTY and task IS verify_only', () => {
|
||||
const r = checkVerifyGate('EMPTY', true)
|
||||
expect(r.allowed).toBe(true)
|
||||
it('allows EMPTY when task is verify_only', () => {
|
||||
expect(checkVerifyGate('EMPTY', true).allowed).toBe(true)
|
||||
})
|
||||
|
||||
it('allows when verify_result is ALIGNED', () => {
|
||||
const r = checkVerifyGate('ALIGNED', false)
|
||||
expect(r.allowed).toBe(true)
|
||||
it('always allows ALIGNED', () => {
|
||||
expect(checkVerifyGate('ALIGNED', false, 'ALIGNED').allowed).toBe(true)
|
||||
expect(checkVerifyGate('ALIGNED', false, 'ALIGNED_OR_PARTIAL').allowed).toBe(true)
|
||||
expect(checkVerifyGate('ALIGNED', false, 'ANY').allowed).toBe(true)
|
||||
})
|
||||
|
||||
it('allows when verify_result is PARTIAL', () => {
|
||||
describe('verify_required=ALIGNED (strict)', () => {
|
||||
it('rejects PARTIAL', () => {
|
||||
const r = checkVerifyGate('PARTIAL', false, 'ALIGNED', LONG_SUMMARY)
|
||||
expect(r.allowed).toBe(false)
|
||||
if (!r.allowed) expect(r.error).toMatch(/ALIGNED/)
|
||||
})
|
||||
it('rejects DIVERGENT', () => {
|
||||
const r = checkVerifyGate('DIVERGENT', false, 'ALIGNED', LONG_SUMMARY)
|
||||
expect(r.allowed).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('verify_required=ALIGNED_OR_PARTIAL (default — needs summary on drift)', () => {
|
||||
it('rejects PARTIAL without summary', () => {
|
||||
const r = checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', undefined)
|
||||
expect(r.allowed).toBe(false)
|
||||
if (!r.allowed) expect(r.error).toMatch(/summary/i)
|
||||
})
|
||||
it('rejects PARTIAL with too-short summary', () => {
|
||||
const r = checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', 'short')
|
||||
expect(r.allowed).toBe(false)
|
||||
})
|
||||
it('allows PARTIAL with long summary', () => {
|
||||
expect(checkVerifyGate('PARTIAL', false, 'ALIGNED_OR_PARTIAL', LONG_SUMMARY).allowed).toBe(true)
|
||||
})
|
||||
it('rejects DIVERGENT without summary', () => {
|
||||
expect(checkVerifyGate('DIVERGENT', false, 'ALIGNED_OR_PARTIAL', undefined).allowed).toBe(false)
|
||||
})
|
||||
it('allows DIVERGENT with long summary', () => {
|
||||
expect(checkVerifyGate('DIVERGENT', false, 'ALIGNED_OR_PARTIAL', LONG_SUMMARY).allowed).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('verify_required=ANY (refactor escape hatch)', () => {
|
||||
it('allows PARTIAL without summary', () => {
|
||||
expect(checkVerifyGate('PARTIAL', false, 'ANY').allowed).toBe(true)
|
||||
})
|
||||
it('allows DIVERGENT without summary', () => {
|
||||
expect(checkVerifyGate('DIVERGENT', false, 'ANY').allowed).toBe(true)
|
||||
})
|
||||
it('still rejects EMPTY (verify_only takes precedence)', () => {
|
||||
expect(checkVerifyGate('EMPTY', false, 'ANY').allowed).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
it('default verify_required=ALIGNED_OR_PARTIAL when omitted', () => {
|
||||
// No third arg → falls back to ALIGNED_OR_PARTIAL → PARTIAL needs summary
|
||||
const r = checkVerifyGate('PARTIAL', false)
|
||||
expect(r.allowed).toBe(true)
|
||||
})
|
||||
|
||||
it('allows when verify_result is DIVERGENT', () => {
|
||||
const r = checkVerifyGate('DIVERGENT', false)
|
||||
expect(r.allowed).toBe(true)
|
||||
expect(r.allowed).toBe(false)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -34,6 +34,19 @@ enum ClaudeJobStatus {
|
|||
CANCELLED
|
||||
}
|
||||
|
||||
enum VerifyResult {
|
||||
ALIGNED
|
||||
PARTIAL
|
||||
EMPTY
|
||||
DIVERGENT
|
||||
}
|
||||
|
||||
enum VerifyRequired {
|
||||
ALIGNED
|
||||
ALIGNED_OR_PARTIAL
|
||||
ANY
|
||||
}
|
||||
|
||||
enum TaskStatus {
|
||||
TO_DO
|
||||
IN_PROGRESS
|
||||
|
|
@ -57,13 +70,6 @@ enum SprintStatus {
|
|||
COMPLETED
|
||||
}
|
||||
|
||||
enum VerifyResult {
|
||||
ALIGNED
|
||||
PARTIAL
|
||||
EMPTY
|
||||
DIVERGENT
|
||||
}
|
||||
|
||||
model User {
|
||||
id String @id @default(cuid())
|
||||
username String @unique
|
||||
|
|
@ -219,6 +225,8 @@ model Sprint {
|
|||
product_id String
|
||||
sprint_goal String
|
||||
status SprintStatus @default(ACTIVE)
|
||||
start_date DateTime? @db.Date
|
||||
end_date DateTime? @db.Date
|
||||
created_at DateTime @default(now())
|
||||
completed_at DateTime?
|
||||
stories Story[]
|
||||
|
|
@ -240,8 +248,9 @@ model Task {
|
|||
priority Int
|
||||
sort_order Float
|
||||
status TaskStatus @default(TO_DO)
|
||||
verify_only Boolean @default(false)
|
||||
created_at DateTime @default(now())
|
||||
verify_only Boolean @default(false)
|
||||
verify_required VerifyRequired @default(ALIGNED_OR_PARTIAL)
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
claude_questions ClaudeQuestion[]
|
||||
claude_jobs ClaudeJob[]
|
||||
|
|
@ -266,12 +275,12 @@ model ClaudeJob {
|
|||
started_at DateTime?
|
||||
finished_at DateTime?
|
||||
pushed_at DateTime?
|
||||
verify_result VerifyResult?
|
||||
plan_snapshot String?
|
||||
branch String?
|
||||
pr_url String?
|
||||
summary String?
|
||||
error String?
|
||||
verify_result VerifyResult?
|
||||
retry_count Int @default(0)
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
|
|
@ -279,6 +288,7 @@ model ClaudeJob {
|
|||
@@index([user_id, status])
|
||||
@@index([task_id, status])
|
||||
@@index([status, claimed_at])
|
||||
@@index([status, finished_at])
|
||||
@@map("claude_jobs")
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -122,9 +122,29 @@ export async function prepareDoneUpdate(
|
|||
}
|
||||
}
|
||||
|
||||
export type VerifyRequired = 'ALIGNED' | 'ALIGNED_OR_PARTIAL' | 'ANY'
|
||||
|
||||
const SUMMARY_MIN_LENGTH = 20
|
||||
|
||||
/**
|
||||
* Validate whether a CLAIMED/RUNNING job can transition to DONE based on its
|
||||
* verify_result + the task's verify_required level.
|
||||
*
|
||||
* Decision matrix:
|
||||
* verifyResult=null → reject (run verify_task_against_plan first)
|
||||
* EMPTY + !verify_only → reject
|
||||
* EMPTY + verify_only → allowed
|
||||
* ALIGNED → always allowed
|
||||
* PARTIAL/DIVERGENT
|
||||
* required=ALIGNED → reject (strict task)
|
||||
* required=ALIGNED_OR_PARTIAL → require non-empty summary explaining drift
|
||||
* required=ANY → allowed (refactor/multi-file edit)
|
||||
*/
|
||||
export function checkVerifyGate(
|
||||
verifyResult: string | null,
|
||||
verifyOnly: boolean,
|
||||
verifyRequired: VerifyRequired = 'ALIGNED_OR_PARTIAL',
|
||||
summary: string | undefined = undefined,
|
||||
): { allowed: true } | { allowed: false; error: string } {
|
||||
if (verifyResult === null) {
|
||||
return {
|
||||
|
|
@ -132,7 +152,8 @@ export function checkVerifyGate(
|
|||
error: 'Roep eerst verify_task_against_plan aan voordat je DONE markeert.',
|
||||
}
|
||||
}
|
||||
if (verifyResult === 'EMPTY' && !verifyOnly) {
|
||||
if (verifyResult === 'EMPTY') {
|
||||
if (verifyOnly) return { allowed: true }
|
||||
return {
|
||||
allowed: false,
|
||||
error:
|
||||
|
|
@ -140,6 +161,28 @@ export function checkVerifyGate(
|
|||
'Markeer de task als verify_only of pas de implementatie aan.',
|
||||
}
|
||||
}
|
||||
if (verifyResult === 'ALIGNED') return { allowed: true }
|
||||
|
||||
// PARTIAL or DIVERGENT
|
||||
if (verifyRequired === 'ANY') return { allowed: true }
|
||||
if (verifyRequired === 'ALIGNED') {
|
||||
return {
|
||||
allowed: false,
|
||||
error:
|
||||
`Plan vereist ALIGNED maar verify gaf ${verifyResult}. ` +
|
||||
`Pas de implementatie aan zodat alle plan-paden zijn afgedekt, ` +
|
||||
`of stel verify_required in op ALIGNED_OR_PARTIAL/ANY.`,
|
||||
}
|
||||
}
|
||||
// verifyRequired === 'ALIGNED_OR_PARTIAL': vereist summary
|
||||
if (!summary || summary.trim().length < SUMMARY_MIN_LENGTH) {
|
||||
return {
|
||||
allowed: false,
|
||||
error:
|
||||
`Verify gaf ${verifyResult}. Geef een summary (≥${SUMMARY_MIN_LENGTH} chars) die uitlegt ` +
|
||||
`waarom de implementatie afwijkt van het plan, of stel verify_required in op ANY.`,
|
||||
}
|
||||
}
|
||||
return { allowed: true }
|
||||
}
|
||||
|
||||
|
|
@ -218,7 +261,10 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
'running (start), done (finished), failed (error). ' +
|
||||
'The Bearer token must match the token that claimed the job. ' +
|
||||
'Before marking done: call verify_task_against_plan first — done is rejected when ' +
|
||||
'verify_result is null or EMPTY (unless task.verify_only is true). ' +
|
||||
'verify_result is null, EMPTY (unless task.verify_only is true), or when the verify level ' +
|
||||
'doesn’t meet task.verify_required: ALIGNED-only is strict; ALIGNED_OR_PARTIAL accepts ' +
|
||||
'PARTIAL/DIVERGENT but requires a non-empty summary (≥20 chars) explaining the drift; ANY ' +
|
||||
'accepts everything. ' +
|
||||
'Automatically emits an SSE event so the Scrum4Me UI updates in real time. ' +
|
||||
'Response includes next_action: when wait_for_job_again, immediately call wait_for_job again. When queue_empty, the agent batch is done.',
|
||||
inputSchema,
|
||||
|
|
@ -238,7 +284,7 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
product_id: true,
|
||||
task_id: true,
|
||||
verify_result: true,
|
||||
task: { select: { verify_only: true } },
|
||||
task: { select: { verify_only: true, verify_required: true } },
|
||||
},
|
||||
})
|
||||
|
||||
|
|
@ -261,6 +307,8 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
const gate = checkVerifyGate(
|
||||
job.verify_result ?? null,
|
||||
job.task?.verify_only ?? false,
|
||||
(job.task?.verify_required ?? 'ALIGNED_OR_PARTIAL') as VerifyRequired,
|
||||
summary,
|
||||
)
|
||||
if (!gate.allowed) return toolError(gate.error)
|
||||
|
||||
|
|
|
|||
2
vendor/scrum4me
vendored
2
vendor/scrum4me
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit 794f7afd2edfef63f468ef89fe28826a3b611d17
|
||||
Subproject commit e02c6ff9d9eef142cd72011d46f565a10e4b23ac
|
||||
Loading…
Add table
Add a link
Reference in a new issue