feat: verify_task_against_plan calls classifyDiffAgainstPlan + saves verify_result to DB
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
24e933fc2f
commit
e63ea7026b
2 changed files with 172 additions and 48 deletions
123
__tests__/verify-task-against-plan.test.ts
Normal file
123
__tests__/verify-task-against-plan.test.ts
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
vi.mock('../src/prisma.js', () => ({
|
||||
prisma: {
|
||||
task: { findUnique: vi.fn() },
|
||||
claudeJob: { update: vi.fn() },
|
||||
},
|
||||
}))
|
||||
|
||||
vi.mock('../src/verify/classify.js', () => ({
|
||||
classifyDiffAgainstPlan: vi.fn(),
|
||||
}))
|
||||
|
||||
import { prisma } from '../src/prisma.js'
|
||||
import { classifyDiffAgainstPlan } from '../src/verify/classify.js'
|
||||
import { getDiffInWorktree, saveVerifyResult } from '../src/tools/verify-task-against-plan.js'
|
||||
|
||||
const mockPrisma = prisma as unknown as {
|
||||
task: { findUnique: ReturnType<typeof vi.fn> }
|
||||
claudeJob: { update: ReturnType<typeof vi.fn> }
|
||||
}
|
||||
const mockClassify = classifyDiffAgainstPlan as ReturnType<typeof vi.fn>
|
||||
|
||||
// Mock node:child_process so getDiffInWorktree doesn't need a real git repo
|
||||
vi.mock('node:child_process', () => ({
|
||||
execFile: vi.fn(),
|
||||
}))
|
||||
|
||||
import { execFile } from 'node:child_process'
|
||||
const mockExecFile = execFile as unknown as ReturnType<typeof vi.fn>
|
||||
|
||||
// Promisify internally calls execFile in callback form: (cmd, args, opts, cb)
|
||||
function stubExecFile(stdout: string) {
|
||||
mockExecFile.mockImplementation(
|
||||
(_cmd: string, _args: string[], _opts: unknown, cb: (err: null, result: { stdout: string; stderr: string }) => void) => {
|
||||
cb(null, { stdout, stderr: '' })
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
function stubExecFileError(message: string) {
|
||||
mockExecFile.mockImplementation(
|
||||
(_cmd: string, _args: string[], _opts: unknown, cb: (err: Error) => void) => {
|
||||
cb(new Error(message))
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
const ALIGNED_DIFF = `diff --git a/src/verify/classify.ts b/src/verify/classify.ts
|
||||
--- a/src/verify/classify.ts
|
||||
+++ b/src/verify/classify.ts
|
||||
+export function classifyDiffAgainstPlan(opts) {}`
|
||||
|
||||
describe('getDiffInWorktree', () => {
|
||||
beforeEach(() => vi.clearAllMocks())
|
||||
|
||||
it('returns stdout from git diff', async () => {
|
||||
stubExecFile(ALIGNED_DIFF)
|
||||
const result = await getDiffInWorktree('/worktrees/job-abc')
|
||||
expect(result).toBe(ALIGNED_DIFF)
|
||||
expect(mockExecFile).toHaveBeenCalledWith(
|
||||
'git',
|
||||
['diff', 'origin/main...HEAD'],
|
||||
expect.objectContaining({ cwd: '/worktrees/job-abc' }),
|
||||
expect.any(Function),
|
||||
)
|
||||
})
|
||||
|
||||
it('throws when git diff fails', async () => {
|
||||
stubExecFileError('not a git repo')
|
||||
await expect(getDiffInWorktree('/bad/path')).rejects.toThrow('not a git repo')
|
||||
})
|
||||
})
|
||||
|
||||
describe('saveVerifyResult', () => {
|
||||
beforeEach(() => vi.clearAllMocks())
|
||||
|
||||
it('updates claudeJob with the given verify_result', async () => {
|
||||
mockPrisma.claudeJob.update.mockResolvedValue({})
|
||||
await saveVerifyResult('job-123', 'ALIGNED')
|
||||
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
|
||||
where: { id: 'job-123' },
|
||||
data: { verify_result: 'ALIGNED' },
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('verify_task_against_plan — integration of helpers', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
stubExecFile(ALIGNED_DIFF)
|
||||
mockClassify.mockReturnValue({ result: 'ALIGNED', reasoning: 'All paths covered.' })
|
||||
mockPrisma.claudeJob.update.mockResolvedValue({})
|
||||
})
|
||||
|
||||
it('happy path: runs diff, classifies, saves verify_result', async () => {
|
||||
// Simulate: getDiffInWorktree + classifyDiffAgainstPlan + saveVerifyResult
|
||||
const diff = await getDiffInWorktree('/wt/job-abc')
|
||||
mockClassify({ diff, plan: 'Modify `src/verify/classify.ts`.' })
|
||||
expect(mockClassify).toHaveBeenCalledWith(expect.objectContaining({ diff }))
|
||||
|
||||
await saveVerifyResult('job-123', 'ALIGNED')
|
||||
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
|
||||
where: { id: 'job-123' },
|
||||
data: { verify_result: 'ALIGNED' },
|
||||
})
|
||||
})
|
||||
|
||||
it('EMPTY path: saves EMPTY to DB', async () => {
|
||||
stubExecFile('') // no diff output
|
||||
mockClassify.mockReturnValue({ result: 'EMPTY', reasoning: 'Geen bestandswijzigingen in de diff.' })
|
||||
|
||||
const diff = await getDiffInWorktree('/wt/job-xyz')
|
||||
const { result } = mockClassify({ diff, plan: null })
|
||||
expect(result).toBe('EMPTY')
|
||||
|
||||
await saveVerifyResult('job-xyz', 'EMPTY')
|
||||
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
|
||||
where: { id: 'job-xyz' },
|
||||
data: { verify_result: 'EMPTY' },
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -1,28 +1,47 @@
|
|||
import { execFile } from 'node:child_process'
|
||||
import { promisify } from 'node:util'
|
||||
import { z } from 'zod'
|
||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
||||
import { prisma } from '../prisma.js'
|
||||
import { getAuth } from '../auth.js'
|
||||
import { userCanAccessTask } from '../access.js'
|
||||
import { toolError, toolJson, withToolErrors } from '../errors.js'
|
||||
import { buildVerifyResult, renderMarkdownReport } from '../lib/verify-plan.js'
|
||||
import { classifyDiffAgainstPlan, type VerifyResultValue } from '../verify/classify.js'
|
||||
|
||||
const exec = promisify(execFile)
|
||||
|
||||
const inputSchema = z.object({
|
||||
task_id: z.string().min(1),
|
||||
worktree_path: z.string().min(1),
|
||||
})
|
||||
|
||||
export async function getDiffInWorktree(worktreePath: string): Promise<string> {
|
||||
const { stdout } = await exec('git', ['diff', 'origin/main...HEAD'], { cwd: worktreePath })
|
||||
return stdout
|
||||
}
|
||||
|
||||
export async function saveVerifyResult(jobId: string, result: VerifyResultValue): Promise<void> {
|
||||
await prisma.claudeJob.update({
|
||||
where: { id: jobId },
|
||||
data: { verify_result: result },
|
||||
})
|
||||
}
|
||||
|
||||
export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
|
||||
server.registerTool(
|
||||
'verify_task_against_plan',
|
||||
{
|
||||
title: 'Verify task against plan',
|
||||
description:
|
||||
'Compare the frozen plan_snapshot (captured at claim time) against current ' +
|
||||
'task.implementation_plan, story logs, and commits. Returns a markdown report ' +
|
||||
'with per-AC ✓/✗/? heuristic checks and a drift-score. Read-only — demo users allowed.',
|
||||
'Run `git diff origin/main...HEAD` in the worktree and compare it against the ' +
|
||||
'frozen plan_snapshot captured at claim time. Returns ALIGNED|PARTIAL|EMPTY|DIVERGENT ' +
|
||||
'and saves verify_result on the active job. ' +
|
||||
'Call this BEFORE update_job_status("done"). ' +
|
||||
'If the result is EMPTY and task.verify_only is false, update_job_status("done") will be rejected.',
|
||||
inputSchema,
|
||||
annotations: { readOnlyHint: true },
|
||||
annotations: { readOnlyHint: false },
|
||||
},
|
||||
async ({ task_id }) =>
|
||||
async ({ task_id, worktree_path }) =>
|
||||
withToolErrors(async () => {
|
||||
const auth = await getAuth()
|
||||
if (!auth) return toolError('Unauthorized')
|
||||
|
|
@ -34,62 +53,44 @@ export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
|
|||
where: { id: task_id },
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
implementation_plan: true,
|
||||
story: {
|
||||
select: {
|
||||
id: true,
|
||||
acceptance_criteria: true,
|
||||
logs: {
|
||||
orderBy: { created_at: 'asc' },
|
||||
select: {
|
||||
type: true,
|
||||
content: true,
|
||||
commit_hash: true,
|
||||
commit_message: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
verify_only: true,
|
||||
claude_jobs: {
|
||||
where: { status: { in: ['CLAIMED', 'RUNNING', 'DONE', 'FAILED'] } },
|
||||
where: { status: { in: ['CLAIMED', 'RUNNING'] } },
|
||||
orderBy: { created_at: 'desc' },
|
||||
take: 1,
|
||||
select: { plan_snapshot: true },
|
||||
select: { id: true, plan_snapshot: true },
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if (!task) return toolError(`Task ${task_id} not found`)
|
||||
|
||||
const latestJob = task.claude_jobs[0] ?? null
|
||||
const planSnapshot = latestJob ? latestJob.plan_snapshot : null
|
||||
const activeJob = task.claude_jobs[0] ?? null
|
||||
|
||||
const implementationLogs = task.story.logs
|
||||
.filter((l) => l.type === 'IMPLEMENTATION_PLAN')
|
||||
.map((l) => l.content)
|
||||
let diff: string
|
||||
try {
|
||||
diff = await getDiffInWorktree(worktree_path)
|
||||
} catch (err) {
|
||||
return toolError(
|
||||
`git diff failed in worktree (${worktree_path}): ${(err as Error).message ?? 'unknown error'}`,
|
||||
)
|
||||
}
|
||||
|
||||
const commits = task.story.logs
|
||||
.filter((l) => l.type === 'COMMIT')
|
||||
.map((l) => ({ hash: l.commit_hash, message: l.commit_message }))
|
||||
|
||||
const result = buildVerifyResult({
|
||||
taskId: task.id,
|
||||
taskTitle: task.title,
|
||||
planSnapshot,
|
||||
currentPlan: task.implementation_plan,
|
||||
acceptanceCriteriaText: task.story.acceptance_criteria,
|
||||
implementationLogs,
|
||||
commits,
|
||||
const { result, reasoning } = classifyDiffAgainstPlan({
|
||||
diff,
|
||||
plan: activeJob?.plan_snapshot ?? null,
|
||||
})
|
||||
|
||||
if (activeJob) {
|
||||
await saveVerifyResult(activeJob.id, result)
|
||||
}
|
||||
|
||||
return toolJson({
|
||||
report: renderMarkdownReport(result),
|
||||
task_id: result.taskId,
|
||||
drift_score: result.driftScore,
|
||||
ac_results: result.acceptanceCriteria,
|
||||
plan_edited: result.planEdited,
|
||||
has_baseline: result.hasBaseline,
|
||||
result: result.toLowerCase() as 'aligned' | 'partial' | 'empty' | 'divergent',
|
||||
reasoning,
|
||||
verify_only: task.verify_only,
|
||||
task_id,
|
||||
job_id: activeJob?.id ?? null,
|
||||
})
|
||||
}),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue