feat: verify_task_against_plan calls classifyDiffAgainstPlan + saves verify_result to DB

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Janpeter Visser 2026-05-01 12:59:17 +02:00
parent 24e933fc2f
commit e63ea7026b
2 changed files with 172 additions and 48 deletions

View file

@ -0,0 +1,123 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
vi.mock('../src/prisma.js', () => ({
prisma: {
task: { findUnique: vi.fn() },
claudeJob: { update: vi.fn() },
},
}))
vi.mock('../src/verify/classify.js', () => ({
classifyDiffAgainstPlan: vi.fn(),
}))
import { prisma } from '../src/prisma.js'
import { classifyDiffAgainstPlan } from '../src/verify/classify.js'
import { getDiffInWorktree, saveVerifyResult } from '../src/tools/verify-task-against-plan.js'
const mockPrisma = prisma as unknown as {
task: { findUnique: ReturnType<typeof vi.fn> }
claudeJob: { update: ReturnType<typeof vi.fn> }
}
const mockClassify = classifyDiffAgainstPlan as ReturnType<typeof vi.fn>
// Mock node:child_process so getDiffInWorktree doesn't need a real git repo
vi.mock('node:child_process', () => ({
execFile: vi.fn(),
}))
import { execFile } from 'node:child_process'
const mockExecFile = execFile as unknown as ReturnType<typeof vi.fn>
// Promisify internally calls execFile in callback form: (cmd, args, opts, cb)
function stubExecFile(stdout: string) {
mockExecFile.mockImplementation(
(_cmd: string, _args: string[], _opts: unknown, cb: (err: null, result: { stdout: string; stderr: string }) => void) => {
cb(null, { stdout, stderr: '' })
},
)
}
function stubExecFileError(message: string) {
mockExecFile.mockImplementation(
(_cmd: string, _args: string[], _opts: unknown, cb: (err: Error) => void) => {
cb(new Error(message))
},
)
}
const ALIGNED_DIFF = `diff --git a/src/verify/classify.ts b/src/verify/classify.ts
--- a/src/verify/classify.ts
+++ b/src/verify/classify.ts
+export function classifyDiffAgainstPlan(opts) {}`
describe('getDiffInWorktree', () => {
beforeEach(() => vi.clearAllMocks())
it('returns stdout from git diff', async () => {
stubExecFile(ALIGNED_DIFF)
const result = await getDiffInWorktree('/worktrees/job-abc')
expect(result).toBe(ALIGNED_DIFF)
expect(mockExecFile).toHaveBeenCalledWith(
'git',
['diff', 'origin/main...HEAD'],
expect.objectContaining({ cwd: '/worktrees/job-abc' }),
expect.any(Function),
)
})
it('throws when git diff fails', async () => {
stubExecFileError('not a git repo')
await expect(getDiffInWorktree('/bad/path')).rejects.toThrow('not a git repo')
})
})
describe('saveVerifyResult', () => {
beforeEach(() => vi.clearAllMocks())
it('updates claudeJob with the given verify_result', async () => {
mockPrisma.claudeJob.update.mockResolvedValue({})
await saveVerifyResult('job-123', 'ALIGNED')
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
where: { id: 'job-123' },
data: { verify_result: 'ALIGNED' },
})
})
})
describe('verify_task_against_plan — integration of helpers', () => {
beforeEach(() => {
vi.clearAllMocks()
stubExecFile(ALIGNED_DIFF)
mockClassify.mockReturnValue({ result: 'ALIGNED', reasoning: 'All paths covered.' })
mockPrisma.claudeJob.update.mockResolvedValue({})
})
it('happy path: runs diff, classifies, saves verify_result', async () => {
// Simulate: getDiffInWorktree + classifyDiffAgainstPlan + saveVerifyResult
const diff = await getDiffInWorktree('/wt/job-abc')
mockClassify({ diff, plan: 'Modify `src/verify/classify.ts`.' })
expect(mockClassify).toHaveBeenCalledWith(expect.objectContaining({ diff }))
await saveVerifyResult('job-123', 'ALIGNED')
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
where: { id: 'job-123' },
data: { verify_result: 'ALIGNED' },
})
})
it('EMPTY path: saves EMPTY to DB', async () => {
stubExecFile('') // no diff output
mockClassify.mockReturnValue({ result: 'EMPTY', reasoning: 'Geen bestandswijzigingen in de diff.' })
const diff = await getDiffInWorktree('/wt/job-xyz')
const { result } = mockClassify({ diff, plan: null })
expect(result).toBe('EMPTY')
await saveVerifyResult('job-xyz', 'EMPTY')
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
where: { id: 'job-xyz' },
data: { verify_result: 'EMPTY' },
})
})
})

View file

@ -1,28 +1,47 @@
import { execFile } from 'node:child_process'
import { promisify } from 'node:util'
import { z } from 'zod'
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
import { prisma } from '../prisma.js'
import { getAuth } from '../auth.js'
import { userCanAccessTask } from '../access.js'
import { toolError, toolJson, withToolErrors } from '../errors.js'
import { buildVerifyResult, renderMarkdownReport } from '../lib/verify-plan.js'
import { classifyDiffAgainstPlan, type VerifyResultValue } from '../verify/classify.js'
const exec = promisify(execFile)
const inputSchema = z.object({
task_id: z.string().min(1),
worktree_path: z.string().min(1),
})
export async function getDiffInWorktree(worktreePath: string): Promise<string> {
const { stdout } = await exec('git', ['diff', 'origin/main...HEAD'], { cwd: worktreePath })
return stdout
}
export async function saveVerifyResult(jobId: string, result: VerifyResultValue): Promise<void> {
await prisma.claudeJob.update({
where: { id: jobId },
data: { verify_result: result },
})
}
export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
server.registerTool(
'verify_task_against_plan',
{
title: 'Verify task against plan',
description:
'Compare the frozen plan_snapshot (captured at claim time) against current ' +
'task.implementation_plan, story logs, and commits. Returns a markdown report ' +
'with per-AC ✓/✗/? heuristic checks and a drift-score. Read-only — demo users allowed.',
'Run `git diff origin/main...HEAD` in the worktree and compare it against the ' +
'frozen plan_snapshot captured at claim time. Returns ALIGNED|PARTIAL|EMPTY|DIVERGENT ' +
'and saves verify_result on the active job. ' +
'Call this BEFORE update_job_status("done"). ' +
'If the result is EMPTY and task.verify_only is false, update_job_status("done") will be rejected.',
inputSchema,
annotations: { readOnlyHint: true },
annotations: { readOnlyHint: false },
},
async ({ task_id }) =>
async ({ task_id, worktree_path }) =>
withToolErrors(async () => {
const auth = await getAuth()
if (!auth) return toolError('Unauthorized')
@ -34,62 +53,44 @@ export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
where: { id: task_id },
select: {
id: true,
title: true,
implementation_plan: true,
story: {
select: {
id: true,
acceptance_criteria: true,
logs: {
orderBy: { created_at: 'asc' },
select: {
type: true,
content: true,
commit_hash: true,
commit_message: true,
},
},
},
},
verify_only: true,
claude_jobs: {
where: { status: { in: ['CLAIMED', 'RUNNING', 'DONE', 'FAILED'] } },
where: { status: { in: ['CLAIMED', 'RUNNING'] } },
orderBy: { created_at: 'desc' },
take: 1,
select: { plan_snapshot: true },
select: { id: true, plan_snapshot: true },
},
},
})
if (!task) return toolError(`Task ${task_id} not found`)
const latestJob = task.claude_jobs[0] ?? null
const planSnapshot = latestJob ? latestJob.plan_snapshot : null
const activeJob = task.claude_jobs[0] ?? null
const implementationLogs = task.story.logs
.filter((l) => l.type === 'IMPLEMENTATION_PLAN')
.map((l) => l.content)
let diff: string
try {
diff = await getDiffInWorktree(worktree_path)
} catch (err) {
return toolError(
`git diff failed in worktree (${worktree_path}): ${(err as Error).message ?? 'unknown error'}`,
)
}
const commits = task.story.logs
.filter((l) => l.type === 'COMMIT')
.map((l) => ({ hash: l.commit_hash, message: l.commit_message }))
const result = buildVerifyResult({
taskId: task.id,
taskTitle: task.title,
planSnapshot,
currentPlan: task.implementation_plan,
acceptanceCriteriaText: task.story.acceptance_criteria,
implementationLogs,
commits,
const { result, reasoning } = classifyDiffAgainstPlan({
diff,
plan: activeJob?.plan_snapshot ?? null,
})
if (activeJob) {
await saveVerifyResult(activeJob.id, result)
}
return toolJson({
report: renderMarkdownReport(result),
task_id: result.taskId,
drift_score: result.driftScore,
ac_results: result.acceptanceCriteria,
plan_edited: result.planEdited,
has_baseline: result.hasBaseline,
result: result.toLowerCase() as 'aligned' | 'partial' | 'empty' | 'divergent',
reasoning,
verify_only: task.verify_only,
task_id,
job_id: activeJob?.id ?? null,
})
}),
)