feat: verify_task_against_plan MCP tool

Read-only tool that compares frozen plan_snapshot against current task.implementation_plan + story logs + commits. Returns markdown report with per-AC ✓/✗/? keyword heuristic, drift-score, and plan diff. Demo users allowed (readOnlyHint: true). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-30 19:36:37 +02:00 · 2026-04-30 19:36:37 +02:00 · f51b7a6178
commit f51b7a6178
parent de7803a370
3 changed files with 276 additions and 0 deletions
--- a/src/index.ts
+++ b/src/index.ts
@ -19,6 +19,7 @@ import { registerListOpenQuestionsTool } from './tools/list-open-questions.js'
 import { registerCancelQuestionTool } from './tools/cancel-question.js'
 import { registerWaitForJobTool } from './tools/wait-for-job.js'
 import { registerUpdateJobStatusTool } from './tools/update-job-status.js'
+import { registerVerifyTaskAgainstPlanTool } from './tools/verify-task-against-plan.js'
 import { registerImplementNextStoryPrompt } from './prompts/implement-next-story.js'

 const VERSION = '0.1.0'
@ -51,6 +52,7 @@ async function main() {
  registerCancelQuestionTool(server)
  registerWaitForJobTool(server)
  registerUpdateJobStatusTool(server)
+  registerVerifyTaskAgainstPlanTool(server)
  registerImplementNextStoryPrompt(server)

  const transport = new StdioServerTransport()
--- a/src/lib/verify-plan.ts
+++ b/src/lib/verify-plan.ts
@ -0,0 +1,178 @@
+// Core logic for verify_task_against_plan: diff, AC heuristic, drift score.
+
+export function parseAcceptanceCriteria(text: string | null): string[] {
+  if (!text) return []
+  return text
+    .split('\n')
+    .map((line) => line.replace(/^[-•*]\s*/, '').replace(/^\d+\.\s*/, '').trim())
+    .filter((line) => line.length > 0)
+}
+
+export function extractKeywords(text: string): string[] {
+  const raw = text.split(/[\s,;:()[\]{}]+/)
+  const keywords: Set<string> = new Set()
+  for (const word of raw) {
+    const clean = word.replace(/^[^\w./\-]+|[^\w./\-]+$/g, '')
+    if (!clean) continue
+    if (clean.includes('.') || clean.includes('/')) {
+      keywords.add(clean.toLowerCase()) // filenames / paths
+    } else if (/^[A-Z][a-z]/.test(clean) && clean.length > 4) {
+      keywords.add(clean.toLowerCase()) // CamelCase identifiers
+    } else if (clean.length > 6) {
+      keywords.add(clean.toLowerCase()) // long lowercase words
+    }
+  }
+  return [...keywords]
+}
+
+export function checkACStatus(acText: string, corpus: string): '✓' | '✗' | '?' {
+  const keywords = extractKeywords(acText)
+  if (keywords.length === 0) return '?'
+  const corpusLower = corpus.toLowerCase()
+  const matched = keywords.filter((kw) => corpusLower.includes(kw))
+  const ratio = matched.length / keywords.length
+  if (ratio >= 0.5) return '✓'
+  if (ratio > 0) return '?'
+  return '✗'
+}
+
+export function computeDriftScore(results: Array<{ status: '✓' | '✗' | '?' }>): number {
+  if (results.length === 0) return 0
+  const passed = results.filter((r) => r.status === '✓').length
+  return Math.round((passed / results.length) * 100)
+}
+
+export function lineDiff(snapshot: string, current: string): string | null {
+  if (snapshot === current) return null
+  const aLines = snapshot.split('\n')
+  const bLines = current.split('\n')
+  const out: string[] = []
+  const len = Math.max(aLines.length, bLines.length)
+  for (let i = 0; i < len; i++) {
+    const a = i < aLines.length ? aLines[i] : undefined
+    const b = i < bLines.length ? bLines[i] : undefined
+    if (a === b) continue
+    if (a !== undefined) out.push(`- ${a}`)
+    if (b !== undefined) out.push(`+ ${b}`)
+  }
+  return out.length > 0 ? out.join('\n') : null
+}
+
+export interface ACResult {
+  text: string
+  status: '✓' | '✗' | '?'
+}
+
+export interface VerifyResult {
+  taskId: string
+  taskTitle: string
+  hasBaseline: boolean
+  planSnapshot: string | null
+  currentPlan: string | null
+  planEdited: boolean
+  planDiff: string | null
+  implementationLogs: string[]
+  commits: Array<{ hash: string | null; message: string | null }>
+  acceptanceCriteria: ACResult[]
+  driftScore: number
+}
+
+export function buildVerifyResult(opts: {
+  taskId: string
+  taskTitle: string
+  planSnapshot: string | null
+  currentPlan: string | null
+  acceptanceCriteriaText: string | null
+  implementationLogs: string[]
+  commits: Array<{ hash: string | null; message: string | null }>
+}): VerifyResult {
+  const { taskId, taskTitle, planSnapshot, currentPlan, acceptanceCriteriaText, implementationLogs, commits } = opts
+
+  const hasBaseline = planSnapshot !== null
+
+  const planEdited = hasBaseline && planSnapshot !== (currentPlan ?? '')
+  const planDiff = planEdited ? lineDiff(planSnapshot!, currentPlan ?? '') : null
+
+  const corpus = [
+    currentPlan ?? '',
+    ...implementationLogs,
+    ...commits.map((c) => `${c.hash ?? ''} ${c.message ?? ''}`),
+  ].join('\n')
+
+  const acTexts = parseAcceptanceCriteria(acceptanceCriteriaText)
+  const acceptanceCriteria: ACResult[] = acTexts.map((text) => ({
+    text,
+    status: checkACStatus(text, corpus),
+  }))
+
+  const driftScore = computeDriftScore(acceptanceCriteria)
+
+  return {
+    taskId,
+    taskTitle,
+    hasBaseline,
+    planSnapshot,
+    currentPlan,
+    planEdited,
+    planDiff,
+    implementationLogs,
+    commits,
+    acceptanceCriteria,
+    driftScore,
+  }
+}
+
+export function renderMarkdownReport(r: VerifyResult): string {
+  const lines: string[] = []
+
+  lines.push(`# Verify task: ${r.taskTitle} (\`${r.taskId}\`)`, '')
+
+  lines.push('## Plan')
+  if (!r.hasBaseline) {
+    lines.push('- Snapshot: **no baseline** (job was claimed before snapshot feature)')
+  } else {
+    const snipLen = 120
+    const snip = (r.planSnapshot ?? '').length > snipLen
+      ? (r.planSnapshot ?? '').slice(0, snipLen) + '…'
+      : (r.planSnapshot ?? '(leeg)')
+    lines.push(`- Snapshot: ${snip}`)
+  }
+  const curSnip = (r.currentPlan ?? '').length > 120
+    ? (r.currentPlan ?? '').slice(0, 120) + '…'
+    : (r.currentPlan ?? '(leeg)')
+  lines.push(`- Current: ${curSnip}`)
+  lines.push(`- Edited onderweg: **${r.planEdited ? 'yes' : 'no'}**`)
+
+  if (r.planEdited && r.planDiff) {
+    lines.push('', '### Diff (snapshot → current)', '```diff', r.planDiff, '```')
+  }
+  lines.push('')
+
+  const passed = r.acceptanceCriteria.filter((a) => a.status === '✓').length
+  const total = r.acceptanceCriteria.length
+  lines.push(`## AC-checks (${passed}/${total} ✓ — drift-score ${r.driftScore}%)`)
+  if (total === 0) {
+    lines.push('_Geen acceptance criteria op de parent story._')
+  } else {
+    for (const ac of r.acceptanceCriteria) {
+      lines.push(`- ${ac.status} ${ac.text}`)
+    }
+  }
+  lines.push('')
+
+  lines.push('## Realisatie')
+  lines.push(`- ${r.implementationLogs.length} log_implementation-entr${r.implementationLogs.length === 1 ? 'y' : 'ies'}`)
+  if (r.commits.length === 0) {
+    lines.push('- Geen commits gelogd')
+  } else {
+    for (const c of r.commits) {
+      lines.push(`- commit \`${c.hash ?? '?'}\` — ${c.message ?? '(geen bericht)'}`)
+    }
+  }
+  lines.push('')
+
+  lines.push('---')
+  lines.push('⚠️ Heuristiek-rapport — handmatige PR-review blijft nodig')
+
+  return lines.join('\n')
+}
--- a/src/tools/verify-task-against-plan.ts
+++ b/src/tools/verify-task-against-plan.ts
@ -0,0 +1,96 @@
+import { z } from 'zod'
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
+import { prisma } from '../prisma.js'
+import { getAuth } from '../auth.js'
+import { userCanAccessTask } from '../access.js'
+import { toolError, toolJson, withToolErrors } from '../errors.js'
+import { buildVerifyResult, renderMarkdownReport } from '../lib/verify-plan.js'
+
+const inputSchema = z.object({
+  task_id: z.string().min(1),
+})
+
+export function registerVerifyTaskAgainstPlanTool(server: McpServer) {
+  server.registerTool(
+    'verify_task_against_plan',
+    {
+      title: 'Verify task against plan',
+      description:
+        'Compare the frozen plan_snapshot (captured at claim time) against current ' +
+        'task.implementation_plan, story logs, and commits. Returns a markdown report ' +
+        'with per-AC ✓/✗/? heuristic checks and a drift-score. Read-only — demo users allowed.',
+      inputSchema,
+      annotations: { readOnlyHint: true },
+    },
+    async ({ task_id }) =>
+      withToolErrors(async () => {
+        const auth = await getAuth()
+        if (!auth) return toolError('Unauthorized')
+        if (!(await userCanAccessTask(task_id, auth.userId))) {
+          return toolError(`Task ${task_id} not found or not accessible`)
+        }
+
+        const task = await prisma.task.findUnique({
+          where: { id: task_id },
+          select: {
+            id: true,
+            title: true,
+            implementation_plan: true,
+            story: {
+              select: {
+                id: true,
+                acceptance_criteria: true,
+                logs: {
+                  orderBy: { created_at: 'asc' },
+                  select: {
+                    type: true,
+                    content: true,
+                    commit_hash: true,
+                    commit_message: true,
+                  },
+                },
+              },
+            },
+            claude_jobs: {
+              where: { status: { in: ['CLAIMED', 'RUNNING', 'DONE', 'FAILED'] } },
+              orderBy: { created_at: 'desc' },
+              take: 1,
+              select: { plan_snapshot: true },
+            },
+          },
+        })
+
+        if (!task) return toolError(`Task ${task_id} not found`)
+
+        const latestJob = task.claude_jobs[0] ?? null
+        const planSnapshot = latestJob ? latestJob.plan_snapshot : null
+
+        const implementationLogs = task.story.logs
+          .filter((l) => l.type === 'IMPLEMENTATION_PLAN')
+          .map((l) => l.content)
+
+        const commits = task.story.logs
+          .filter((l) => l.type === 'COMMIT')
+          .map((l) => ({ hash: l.commit_hash, message: l.commit_message }))
+
+        const result = buildVerifyResult({
+          taskId: task.id,
+          taskTitle: task.title,
+          planSnapshot,
+          currentPlan: task.implementation_plan,
+          acceptanceCriteriaText: task.story.acceptance_criteria,
+          implementationLogs,
+          commits,
+        })
+
+        return toolJson({
+          report: renderMarkdownReport(result),
+          task_id: result.taskId,
+          drift_score: result.driftScore,
+          ac_results: result.acceptanceCriteria,
+          plan_edited: result.planEdited,
+          has_baseline: result.hasBaseline,
+        })
+      }),
+  )
+}