feat: verify_task_against_plan calls classifyDiffAgainstPlan + saves verify_result to DB
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
24e933fc2f
commit
e63ea7026b
2 changed files with 172 additions and 48 deletions
123
__tests__/verify-task-against-plan.test.ts
Normal file
123
__tests__/verify-task-against-plan.test.ts
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||||
|
|
||||||
|
vi.mock('../src/prisma.js', () => ({
|
||||||
|
prisma: {
|
||||||
|
task: { findUnique: vi.fn() },
|
||||||
|
claudeJob: { update: vi.fn() },
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('../src/verify/classify.js', () => ({
|
||||||
|
classifyDiffAgainstPlan: vi.fn(),
|
||||||
|
}))
|
||||||
|
|
||||||
|
import { prisma } from '../src/prisma.js'
|
||||||
|
import { classifyDiffAgainstPlan } from '../src/verify/classify.js'
|
||||||
|
import { getDiffInWorktree, saveVerifyResult } from '../src/tools/verify-task-against-plan.js'
|
||||||
|
|
||||||
|
const mockPrisma = prisma as unknown as {
|
||||||
|
task: { findUnique: ReturnType<typeof vi.fn> }
|
||||||
|
claudeJob: { update: ReturnType<typeof vi.fn> }
|
||||||
|
}
|
||||||
|
const mockClassify = classifyDiffAgainstPlan as ReturnType<typeof vi.fn>
|
||||||
|
|
||||||
|
// Mock node:child_process so getDiffInWorktree doesn't need a real git repo
|
||||||
|
vi.mock('node:child_process', () => ({
|
||||||
|
execFile: vi.fn(),
|
||||||
|
}))
|
||||||
|
|
||||||
|
import { execFile } from 'node:child_process'
|
||||||
|
const mockExecFile = execFile as unknown as ReturnType<typeof vi.fn>
|
||||||
|
|
||||||
|
// Promisify internally calls execFile in callback form: (cmd, args, opts, cb)
|
||||||
|
function stubExecFile(stdout: string) {
|
||||||
|
mockExecFile.mockImplementation(
|
||||||
|
(_cmd: string, _args: string[], _opts: unknown, cb: (err: null, result: { stdout: string; stderr: string }) => void) => {
|
||||||
|
cb(null, { stdout, stderr: '' })
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function stubExecFileError(message: string) {
|
||||||
|
mockExecFile.mockImplementation(
|
||||||
|
(_cmd: string, _args: string[], _opts: unknown, cb: (err: Error) => void) => {
|
||||||
|
cb(new Error(message))
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ALIGNED_DIFF = `diff --git a/src/verify/classify.ts b/src/verify/classify.ts
|
||||||
|
--- a/src/verify/classify.ts
|
||||||
|
+++ b/src/verify/classify.ts
|
||||||
|
+export function classifyDiffAgainstPlan(opts) {}`
|
||||||
|
|
||||||
|
describe('getDiffInWorktree', () => {
|
||||||
|
beforeEach(() => vi.clearAllMocks())
|
||||||
|
|
||||||
|
it('returns stdout from git diff', async () => {
|
||||||
|
stubExecFile(ALIGNED_DIFF)
|
||||||
|
const result = await getDiffInWorktree('/worktrees/job-abc')
|
||||||
|
expect(result).toBe(ALIGNED_DIFF)
|
||||||
|
expect(mockExecFile).toHaveBeenCalledWith(
|
||||||
|
'git',
|
||||||
|
['diff', 'origin/main...HEAD'],
|
||||||
|
expect.objectContaining({ cwd: '/worktrees/job-abc' }),
|
||||||
|
expect.any(Function),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('throws when git diff fails', async () => {
|
||||||
|
stubExecFileError('not a git repo')
|
||||||
|
await expect(getDiffInWorktree('/bad/path')).rejects.toThrow('not a git repo')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('saveVerifyResult', () => {
|
||||||
|
beforeEach(() => vi.clearAllMocks())
|
||||||
|
|
||||||
|
it('updates claudeJob with the given verify_result', async () => {
|
||||||
|
mockPrisma.claudeJob.update.mockResolvedValue({})
|
||||||
|
await saveVerifyResult('job-123', 'ALIGNED')
|
||||||
|
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
|
||||||
|
where: { id: 'job-123' },
|
||||||
|
data: { verify_result: 'ALIGNED' },
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('verify_task_against_plan — integration of helpers', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks()
|
||||||
|
stubExecFile(ALIGNED_DIFF)
|
||||||
|
mockClassify.mockReturnValue({ result: 'ALIGNED', reasoning: 'All paths covered.' })
|
||||||
|
mockPrisma.claudeJob.update.mockResolvedValue({})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('happy path: runs diff, classifies, saves verify_result', async () => {
|
||||||
|
// Simulate: getDiffInWorktree + classifyDiffAgainstPlan + saveVerifyResult
|
||||||
|
const diff = await getDiffInWorktree('/wt/job-abc')
|
||||||
|
mockClassify({ diff, plan: 'Modify `src/verify/classify.ts`.' })
|
||||||
|
expect(mockClassify).toHaveBeenCalledWith(expect.objectContaining({ diff }))
|
||||||
|
|
||||||
|
await saveVerifyResult('job-123', 'ALIGNED')
|
||||||
|
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
|
||||||
|
where: { id: 'job-123' },
|
||||||
|
data: { verify_result: 'ALIGNED' },
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('EMPTY path: saves EMPTY to DB', async () => {
|
||||||
|
stubExecFile('') // no diff output
|
||||||
|
mockClassify.mockReturnValue({ result: 'EMPTY', reasoning: 'Geen bestandswijzigingen in de diff.' })
|
||||||
|
|
||||||
|
const diff = await getDiffInWorktree('/wt/job-xyz')
|
||||||
|
const { result } = mockClassify({ diff, plan: null })
|
||||||
|
expect(result).toBe('EMPTY')
|
||||||
|
|
||||||
|
await saveVerifyResult('job-xyz', 'EMPTY')
|
||||||
|
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith({
|
||||||
|
where: { id: 'job-xyz' },
|
||||||
|
data: { verify_result: 'EMPTY' },
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -1,28 +1,47 @@
|
||||||
|
import { execFile } from 'node:child_process'
|
||||||
|
import { promisify } from 'node:util'
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
||||||
import { prisma } from '../prisma.js'
|
import { prisma } from '../prisma.js'
|
||||||
import { getAuth } from '../auth.js'
|
import { getAuth } from '../auth.js'
|
||||||
import { userCanAccessTask } from '../access.js'
|
import { userCanAccessTask } from '../access.js'
|
||||||
import { toolError, toolJson, withToolErrors } from '../errors.js'
|
import { toolError, toolJson, withToolErrors } from '../errors.js'
|
||||||
import { buildVerifyResult, renderMarkdownReport } from '../lib/verify-plan.js'
|
import { classifyDiffAgainstPlan, type VerifyResultValue } from '../verify/classify.js'
|
||||||
|
|
||||||
|
const exec = promisify(execFile)
|
||||||
|
|
||||||
const inputSchema = z.object({
|
const inputSchema = z.object({
|
||||||
task_id: z.string().min(1),
|
task_id: z.string().min(1),
|
||||||
|
worktree_path: z.string().min(1),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
export async function getDiffInWorktree(worktreePath: string): Promise<string> {
|
||||||
|
const { stdout } = await exec('git', ['diff', 'origin/main...HEAD'], { cwd: worktreePath })
|
||||||
|
return stdout
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function saveVerifyResult(jobId: string, result: VerifyResultValue): Promise<void> {
|
||||||
|
await prisma.claudeJob.update({
|
||||||
|
where: { id: jobId },
|
||||||
|
data: { verify_result: result },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
|
export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
|
||||||
server.registerTool(
|
server.registerTool(
|
||||||
'verify_task_against_plan',
|
'verify_task_against_plan',
|
||||||
{
|
{
|
||||||
title: 'Verify task against plan',
|
title: 'Verify task against plan',
|
||||||
description:
|
description:
|
||||||
'Compare the frozen plan_snapshot (captured at claim time) against current ' +
|
'Run `git diff origin/main...HEAD` in the worktree and compare it against the ' +
|
||||||
'task.implementation_plan, story logs, and commits. Returns a markdown report ' +
|
'frozen plan_snapshot captured at claim time. Returns ALIGNED|PARTIAL|EMPTY|DIVERGENT ' +
|
||||||
'with per-AC ✓/✗/? heuristic checks and a drift-score. Read-only — demo users allowed.',
|
'and saves verify_result on the active job. ' +
|
||||||
|
'Call this BEFORE update_job_status("done"). ' +
|
||||||
|
'If the result is EMPTY and task.verify_only is false, update_job_status("done") will be rejected.',
|
||||||
inputSchema,
|
inputSchema,
|
||||||
annotations: { readOnlyHint: true },
|
annotations: { readOnlyHint: false },
|
||||||
},
|
},
|
||||||
async ({ task_id }) =>
|
async ({ task_id, worktree_path }) =>
|
||||||
withToolErrors(async () => {
|
withToolErrors(async () => {
|
||||||
const auth = await getAuth()
|
const auth = await getAuth()
|
||||||
if (!auth) return toolError('Unauthorized')
|
if (!auth) return toolError('Unauthorized')
|
||||||
|
|
@ -34,62 +53,44 @@ export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
|
||||||
where: { id: task_id },
|
where: { id: task_id },
|
||||||
select: {
|
select: {
|
||||||
id: true,
|
id: true,
|
||||||
title: true,
|
verify_only: true,
|
||||||
implementation_plan: true,
|
|
||||||
story: {
|
|
||||||
select: {
|
|
||||||
id: true,
|
|
||||||
acceptance_criteria: true,
|
|
||||||
logs: {
|
|
||||||
orderBy: { created_at: 'asc' },
|
|
||||||
select: {
|
|
||||||
type: true,
|
|
||||||
content: true,
|
|
||||||
commit_hash: true,
|
|
||||||
commit_message: true,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
claude_jobs: {
|
claude_jobs: {
|
||||||
where: { status: { in: ['CLAIMED', 'RUNNING', 'DONE', 'FAILED'] } },
|
where: { status: { in: ['CLAIMED', 'RUNNING'] } },
|
||||||
orderBy: { created_at: 'desc' },
|
orderBy: { created_at: 'desc' },
|
||||||
take: 1,
|
take: 1,
|
||||||
select: { plan_snapshot: true },
|
select: { id: true, plan_snapshot: true },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!task) return toolError(`Task ${task_id} not found`)
|
if (!task) return toolError(`Task ${task_id} not found`)
|
||||||
|
|
||||||
const latestJob = task.claude_jobs[0] ?? null
|
const activeJob = task.claude_jobs[0] ?? null
|
||||||
const planSnapshot = latestJob ? latestJob.plan_snapshot : null
|
|
||||||
|
|
||||||
const implementationLogs = task.story.logs
|
let diff: string
|
||||||
.filter((l) => l.type === 'IMPLEMENTATION_PLAN')
|
try {
|
||||||
.map((l) => l.content)
|
diff = await getDiffInWorktree(worktree_path)
|
||||||
|
} catch (err) {
|
||||||
|
return toolError(
|
||||||
|
`git diff failed in worktree (${worktree_path}): ${(err as Error).message ?? 'unknown error'}`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
const commits = task.story.logs
|
const { result, reasoning } = classifyDiffAgainstPlan({
|
||||||
.filter((l) => l.type === 'COMMIT')
|
diff,
|
||||||
.map((l) => ({ hash: l.commit_hash, message: l.commit_message }))
|
plan: activeJob?.plan_snapshot ?? null,
|
||||||
|
|
||||||
const result = buildVerifyResult({
|
|
||||||
taskId: task.id,
|
|
||||||
taskTitle: task.title,
|
|
||||||
planSnapshot,
|
|
||||||
currentPlan: task.implementation_plan,
|
|
||||||
acceptanceCriteriaText: task.story.acceptance_criteria,
|
|
||||||
implementationLogs,
|
|
||||||
commits,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if (activeJob) {
|
||||||
|
await saveVerifyResult(activeJob.id, result)
|
||||||
|
}
|
||||||
|
|
||||||
return toolJson({
|
return toolJson({
|
||||||
report: renderMarkdownReport(result),
|
result: result.toLowerCase() as 'aligned' | 'partial' | 'empty' | 'divergent',
|
||||||
task_id: result.taskId,
|
reasoning,
|
||||||
drift_score: result.driftScore,
|
verify_only: task.verify_only,
|
||||||
ac_results: result.acceptanceCriteria,
|
task_id,
|
||||||
plan_edited: result.planEdited,
|
job_id: activeJob?.id ?? null,
|
||||||
has_baseline: result.hasBaseline,
|
|
||||||
})
|
})
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue