Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
Janpeter Visser
fba2d67796
fix(update_job_status): status-gedreven lifecycle-timestamps (#51)
Een job kon CLAIMED -> done/failed/skipped gaan zonder ooit `running` te
rapporteren, waardoor started_at NULL bleef terwijl finished_at wel gezet
werd. Dat brak de invariant claimed_at <= started_at <= finished_at en
elke duur-analyse.

Nieuwe pure helper resolveJobTimestamps zet de lifecycle-timestamps
set-once op basis van de status: started_at wordt gebackfild bij een
terminale overgang, claimed_at defensief gevuld als die ontbreekt. De
running-tak is nu set-once i.p.v. bij elke call overschrijven.

Co-authored-by: Madhura68 <ID+Madhura68@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 23:21:44 +02:00
Janpeter Visser
51fc65e715
fix(update_idea_plan_reviewed): nooit stilzwijgend goedkeuren (IDEA-066) (#50)
De status-logica sprak z'n eigen tool-beschrijving tegen. De code deed:
  approved  -> PLAN_REVIEWED
  rejected  -> PLAN_REVIEW_FAILED
  else      -> PLAN_REVIEWED   // "Default to approved if not specified"

Een review die 'pending' (needs manual approval) of helemaal geen
approval_status teruggaf, markeerde het idee dus als PLAN_REVIEWED
(goedgekeurd) — precies omgekeerd aan wat de beschrijving belooft.

Fix: alleen een expliciete approval_status='approved' brengt het idee
naar PLAN_REVIEWED; 'rejected', 'pending' én een weggelaten
approval_status gaan allemaal naar PLAN_REVIEW_FAILED (mens beslist).
Nooit stilzwijgend goedkeuren.

Verder:
- Handler geextraheerd naar handleUpdateIdeaPlanReviewed + inputSchema
  geexporteerd, conform het create-sprint/update-sprint-patroon, zodat
  de logica zonder McpServer-wrapper testbaar is.
- Tool-beschrijving + header-comment aangescherpt zodat code en docs
  niet meer divergeren.
- Nieuw test-bestand: 6 tests (approved/rejected/pending/omitted
  status-transitie, not-found, log-persistentie).

Build groen, 379 tests groen.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:46:31 +02:00
4 changed files with 313 additions and 58 deletions

View file

@ -0,0 +1,140 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
vi.mock('../src/prisma.js', () => ({
prisma: {
idea: { update: vi.fn() },
ideaLog: { create: vi.fn() },
$transaction: vi.fn(),
},
}))
vi.mock('../src/auth.js', () => ({
requireWriteAccess: vi.fn(),
PermissionDeniedError: class PermissionDeniedError extends Error {
constructor(message = 'Demo accounts cannot perform write operations') {
super(message)
this.name = 'PermissionDeniedError'
}
},
}))
vi.mock('../src/access.js', () => ({
userOwnsIdea: vi.fn(),
}))
import { prisma } from '../src/prisma.js'
import { requireWriteAccess } from '../src/auth.js'
import { userOwnsIdea } from '../src/access.js'
import { handleUpdateIdeaPlanReviewed } from '../src/tools/update-idea-plan-reviewed.js'
const mockPrisma = prisma as unknown as {
idea: { update: ReturnType<typeof vi.fn> }
ideaLog: { create: ReturnType<typeof vi.fn> }
$transaction: ReturnType<typeof vi.fn>
}
const mockRequireWriteAccess = requireWriteAccess as ReturnType<typeof vi.fn>
const mockUserOwnsIdea = userOwnsIdea as ReturnType<typeof vi.fn>
const IDEA_ID = 'idea-1'
const USER_ID = 'user-1'
const REVIEW_LOG = {
rounds: [{ score: 88 }],
convergence: { stable_at_round: 2 },
approval: { status: 'approved' },
}
beforeEach(() => {
vi.clearAllMocks()
mockRequireWriteAccess.mockResolvedValue({
userId: USER_ID,
tokenId: 'tok-1',
username: 'alice',
isDemo: false,
})
mockUserOwnsIdea.mockResolvedValue(true)
// $transaction returns the array of its two operations' results; the handler
// only reads result[0] (the idea.update result).
mockPrisma.$transaction.mockImplementation(async () => [
{ id: IDEA_ID, status: 'PLACEHOLDER', code: 'IDEA-1' },
{},
])
})
function parseResult(result: Awaited<ReturnType<typeof handleUpdateIdeaPlanReviewed>>) {
const text = result.content?.[0]?.type === 'text' ? result.content[0].text : ''
try {
return JSON.parse(text)
} catch {
return text
}
}
// The handler builds `data.status` inside the idea.update call passed to
// $transaction. We capture it by inspecting the prisma.idea.update mock args.
function statusPassedToUpdate(): string | undefined {
const call = mockPrisma.idea.update.mock.calls[0]
return call?.[0]?.data?.status
}
describe('handleUpdateIdeaPlanReviewed — status transition', () => {
it('approval_status="approved" → PLAN_REVIEWED', async () => {
await handleUpdateIdeaPlanReviewed({
idea_id: IDEA_ID,
review_log: REVIEW_LOG,
approval_status: 'approved',
})
expect(statusPassedToUpdate()).toBe('PLAN_REVIEWED')
})
it('approval_status="rejected" → PLAN_REVIEW_FAILED', async () => {
await handleUpdateIdeaPlanReviewed({
idea_id: IDEA_ID,
review_log: REVIEW_LOG,
approval_status: 'rejected',
})
expect(statusPassedToUpdate()).toBe('PLAN_REVIEW_FAILED')
})
it('approval_status="pending" → PLAN_REVIEW_FAILED (needs manual approval, never silently approved)', async () => {
await handleUpdateIdeaPlanReviewed({
idea_id: IDEA_ID,
review_log: REVIEW_LOG,
approval_status: 'pending',
})
expect(statusPassedToUpdate()).toBe('PLAN_REVIEW_FAILED')
})
it('omitted approval_status → PLAN_REVIEW_FAILED (safe default, not PLAN_REVIEWED)', async () => {
await handleUpdateIdeaPlanReviewed({
idea_id: IDEA_ID,
review_log: REVIEW_LOG,
})
expect(statusPassedToUpdate()).toBe('PLAN_REVIEW_FAILED')
})
it('returns "Idea not found" when the user does not own the idea', async () => {
mockUserOwnsIdea.mockResolvedValue(false)
const result = await handleUpdateIdeaPlanReviewed({
idea_id: IDEA_ID,
review_log: REVIEW_LOG,
approval_status: 'approved',
})
expect(parseResult(result)).toContain('Idea not found')
expect(mockPrisma.idea.update).not.toHaveBeenCalled()
})
it('persists review_log + reviewed_at and logs a PLAN_REVIEW_RESULT entry', async () => {
await handleUpdateIdeaPlanReviewed({
idea_id: IDEA_ID,
review_log: REVIEW_LOG,
approval_status: 'approved',
})
const updateArg = mockPrisma.idea.update.mock.calls[0]?.[0]
expect(updateArg?.data?.plan_review_log).toEqual(REVIEW_LOG)
expect(updateArg?.data?.reviewed_at).toBeInstanceOf(Date)
const logArg = mockPrisma.ideaLog.create.mock.calls[0]?.[0]
expect(logArg?.data?.type).toBe('PLAN_REVIEW_RESULT')
expect(logArg?.data?.idea_id).toBe(IDEA_ID)
})
})

View file

@ -0,0 +1,74 @@
// Unit-tests voor resolveJobTimestamps — de status-gedreven timestamp-helper
// van update_job_status. Pure functie, geen mocks (zoals update-job-status-gate).
import { describe, it, expect } from 'vitest'
import { resolveJobTimestamps } from '../src/tools/update-job-status.js'
const NOW = new Date('2026-05-14T12:00:00.000Z')
const EARLIER = new Date('2026-05-14T11:00:00.000Z')
describe('resolveJobTimestamps', () => {
describe('running', () => {
it('sets started_at when not yet set, no finished_at', () => {
const r = resolveJobTimestamps('running', { claimed_at: EARLIER, started_at: null }, NOW)
expect(r.started_at).toBe(NOW)
expect(r.finished_at).toBeUndefined()
expect(r.claimed_at).toBeUndefined()
})
it('is set-once: does not re-stamp started_at when already set', () => {
const r = resolveJobTimestamps('running', { claimed_at: EARLIER, started_at: EARLIER }, NOW)
expect(r.started_at).toBeUndefined()
expect(r.finished_at).toBeUndefined()
expect(r.claimed_at).toBeUndefined()
})
})
describe('terminal transitions (done/failed/skipped)', () => {
it.each(['done', 'failed', 'skipped'] as const)(
'backfills started_at and sets finished_at for %s when started_at is null',
(status) => {
const r = resolveJobTimestamps(status, { claimed_at: EARLIER, started_at: null }, NOW)
expect(r.started_at).toBe(NOW)
expect(r.finished_at).toBe(NOW)
expect(r.claimed_at).toBeUndefined()
},
)
it('only sets finished_at when started_at is already set', () => {
const r = resolveJobTimestamps('done', { claimed_at: EARLIER, started_at: EARLIER }, NOW)
expect(r.started_at).toBeUndefined()
expect(r.finished_at).toBe(NOW)
expect(r.claimed_at).toBeUndefined()
})
})
describe('claimed_at backfill', () => {
it.each(['running', 'done', 'failed', 'skipped'] as const)(
'backfills claimed_at for %s when it is null',
(status) => {
const r = resolveJobTimestamps(status, { claimed_at: null, started_at: null }, NOW)
expect(r.claimed_at).toBe(NOW)
},
)
it('never returns claimed_at when it is already set', () => {
const r = resolveJobTimestamps('done', { claimed_at: EARLIER, started_at: EARLIER }, NOW)
expect(r.claimed_at).toBeUndefined()
})
})
it('returns only finished_at when all timestamps are already set and status is terminal', () => {
const r = resolveJobTimestamps('failed', { claimed_at: EARLIER, started_at: EARLIER }, NOW)
expect(r).toEqual({ finished_at: NOW })
})
it('defaults now to a fresh Date when omitted', () => {
const before = Date.now()
const r = resolveJobTimestamps('running', { claimed_at: EARLIER, started_at: null })
const after = Date.now()
expect(r.started_at).toBeInstanceOf(Date)
expect(r.started_at!.getTime()).toBeGreaterThanOrEqual(before)
expect(r.started_at!.getTime()).toBeLessThanOrEqual(after)
})
})

View file

@ -1,6 +1,8 @@
// MCP-tool: writes the review-log result after a IDEA_REVIEW_PLAN grill-job
// and transitions the idea.status to PLAN_REVIEWED (on success) or
// PLAN_REVIEW_FAILED (on failure).
// MCP-tool: writes the review-log result after an IDEA_REVIEW_PLAN job and
// transitions idea.status. Only an explicit approval_status='approved' moves
// the idea to PLAN_REVIEWED; anything else (rejected, pending, or omitted)
// goes to PLAN_REVIEW_FAILED — a human must then decide. The tool never
// silently approves.
//
// Called by the worker as the final step of a review-plan session.
@ -12,7 +14,7 @@ import { requireWriteAccess } from '../auth.js'
import { userOwnsIdea } from '../access.js'
import { toolError, toolJson, withToolErrors } from '../errors.js'
const inputSchema = z.object({
export const inputSchema = z.object({
idea_id: z.string().min(1),
review_log: z.object({}).passthrough(), // Full ReviewLog from orchestrator (JSON object)
approval_status: z
@ -20,64 +22,72 @@ const inputSchema = z.object({
.optional(),
})
export async function handleUpdateIdeaPlanReviewed(
{ idea_id, review_log, approval_status }: z.infer<typeof inputSchema>,
) {
return withToolErrors(async () => {
const auth = await requireWriteAccess()
if (!(await userOwnsIdea(idea_id, auth.userId))) {
return toolError('Idea not found')
}
// Alleen een expliciete 'approved' brengt het idee naar PLAN_REVIEWED.
// 'rejected', 'pending' én een weggelaten approval_status betekenen
// allemaal "niet auto-goedgekeurd — mens moet beslissen" en gaan naar
// PLAN_REVIEW_FAILED. Nooit stilzwijgend goedkeuren (de vorige
// `: 'PLAN_REVIEWED'`-default deed dat wel bij pending/undefined).
const nextStatus =
approval_status === 'approved' ? 'PLAN_REVIEWED' : 'PLAN_REVIEW_FAILED'
// Log summary metrics from review_log
const logSummary = buildReviewLogSummary(review_log)
const result = await prisma.$transaction([
prisma.idea.update({
where: { id: idea_id },
data: {
plan_review_log: review_log as any,
reviewed_at: new Date(),
status: nextStatus,
},
select: { id: true, status: true, code: true },
}),
prisma.ideaLog.create({
data: {
idea_id,
type: 'PLAN_REVIEW_RESULT',
content: logSummary.summary,
metadata: {
approval_status,
convergence_status: logSummary.convergence_status,
final_score: logSummary.final_score,
rounds_completed: logSummary.rounds_completed,
},
},
}),
])
return toolJson({
ok: true,
idea: result[0],
review_log_summary: logSummary,
})
})
}
export function registerUpdateIdeaPlanReviewedTool(server: McpServer) {
server.registerTool(
'update_idea_plan_reviewed',
{
title: 'Mark plan as reviewed',
description:
'Save review-log after plan review cycle and transition idea.status to PLAN_REVIEWED (if approved) or PLAN_REVIEW_FAILED (if rejected/pending requires manual approval). Forbidden for demo accounts.',
'Save review-log after a plan review cycle and transition idea.status. ' +
'Only approval_status="approved" → PLAN_REVIEWED; "rejected", "pending", ' +
'or an omitted approval_status → PLAN_REVIEW_FAILED (needs manual ' +
'approval — never silently approved). Forbidden for demo accounts.',
inputSchema,
},
async ({ idea_id, review_log, approval_status }) =>
withToolErrors(async () => {
const auth = await requireWriteAccess()
if (!(await userOwnsIdea(idea_id, auth.userId))) {
return toolError('Idea not found')
}
// Determine target status based on approval
const nextStatus =
approval_status === 'approved'
? 'PLAN_REVIEWED'
: approval_status === 'rejected'
? 'PLAN_REVIEW_FAILED'
: 'PLAN_REVIEWED' // Default to approved if not specified
// Log summary metrics from review_log
const logSummary = buildReviewLogSummary(review_log)
const result = await prisma.$transaction([
prisma.idea.update({
where: { id: idea_id },
data: {
plan_review_log: review_log as any,
reviewed_at: new Date(),
status: nextStatus,
},
select: { id: true, status: true, code: true },
}),
prisma.ideaLog.create({
data: {
idea_id,
type: 'PLAN_REVIEW_RESULT',
content: logSummary.summary,
metadata: {
approval_status,
convergence_status: logSummary.convergence_status,
final_score: logSummary.final_score,
rounds_completed: logSummary.rounds_completed,
},
},
}),
])
return toolJson({
ok: true,
idea: result[0],
review_log_summary: logSummary,
})
}),
handleUpdateIdeaPlanReviewed,
)
}

View file

@ -390,6 +390,32 @@ export function resolveNextAction(
return queueCount > 0 ? 'wait_for_job_again' : 'queue_empty'
}
export type JobTimestampUpdate = {
claimed_at?: Date
started_at?: Date
finished_at?: Date
}
// Bepaalt welke lifecycle-timestamps update_job_status schrijft bij een
// status-overgang. Set-once (backfill alleen als nu null) houdt de invariant
// claimed_at ≤ started_at ≤ finished_at: een job die CLAIMED → done gaat
// zonder `running`-rapport krijgt alsnog een started_at, en claimed_at
// (normaal door wait_for_job bij claim gezet) wordt nooit overschreven.
export function resolveJobTimestamps(
status: 'running' | 'done' | 'failed' | 'skipped',
current: { claimed_at: Date | null; started_at: Date | null },
now: Date = new Date(),
): JobTimestampUpdate {
const isTerminal = status === 'done' || status === 'failed' || status === 'skipped'
const update: JobTimestampUpdate = {}
if (current.claimed_at == null) update.claimed_at = now
if (current.started_at == null && (status === 'running' || isTerminal)) {
update.started_at = now
}
if (isTerminal) update.finished_at = now
return update
}
export async function maybeCreateAutoPr(opts: {
jobId: string
productId: string
@ -569,6 +595,8 @@ export function registerUpdateJobStatusTool(server: McpServer) {
'Report progress on a claimed ClaudeJob. Allowed transitions from CLAIMED/RUNNING: ' +
'running (start), done (finished), failed (error), skipped (no-op exit). ' +
'The Bearer token must match the token that claimed the job. ' +
'Stamps started_at on running and finished_at on done/failed/skipped, and backfills ' +
'claimed_at/started_at when missing so claimed_at ≤ started_at ≤ finished_at always holds. ' +
'Before marking done: call verify_task_against_plan first — done is rejected when ' +
'verify_result is null, EMPTY (unless task.verify_only is true), or when the verify level ' +
'doesnt meet task.verify_required: ALIGNED-only is strict; ALIGNED_OR_PARTIAL accepts ' +
@ -608,6 +636,8 @@ export function registerUpdateJobStatusTool(server: McpServer) {
select: {
id: true,
status: true,
claimed_at: true,
started_at: true,
claimed_by_token_id: true,
user_id: true,
product_id: true,
@ -751,10 +781,11 @@ export function registerUpdateJobStatusTool(server: McpServer) {
where: { id: job_id },
data: {
status: dbStatus,
...(actualStatus === 'running' ? { started_at: now } : {}),
...(actualStatus === 'done' || actualStatus === 'failed' || actualStatus === 'skipped'
? { finished_at: now }
: {}),
...resolveJobTimestamps(
actualStatus,
{ claimed_at: job.claimed_at, started_at: job.started_at },
now,
),
...(branchToWrite !== undefined ? { branch: branchToWrite } : {}),
...(pushedAt !== undefined ? { pushed_at: pushedAt } : {}),
...(summary !== undefined ? { summary } : {}),