PBI-57: 'skipped' no-op exit + cascade preserves original error
When verify_task_against_plan returns EMPTY because the requested changes
already live in origin/main (parallel work, earlier PR, race between
siblings), the worker had no clean exit: update_job_status only accepted
running|done|failed. 'failed' triggered the PBI fail-cascade which then
overwrote the error column with 'cancelled_by_self' and cancelled all
sibling tasks of the PBI — see Scrum4Me job cmovkur8 / T-695 for the
reference incident.
This change introduces a fourth status and tightens the cascade:
ST-1273 — 'skipped' exit in update_job_status (T-706 + T-707)
- src/tools/update-job-status.ts: status enum + DB_STATUS_MAP +
resolveNextAction now include 'skipped'. cleanupWorktreeForTerminalStatus
signature widened to ('done'|'failed'|'skipped'); SKIPPED uses keepBranch
semantics identical to FAILED (no push, no branch keep). New input guard:
'skipped' is only valid for TASK_IMPLEMENTATION jobs and requires a
non-empty error (≥10 chars) explaining the reason — it bypasses the
verify-gate, the auto-PR, the SprintRun finalize/fail paths and the
PBI fail-cascade. Locks are still released on terminal exit.
- Tool description spells out when to pick 'skipped' so MCP clients see it.
- New __tests__/update-job-status-skipped.test.ts: resolveNextAction with
'skipped' (wait_for_job_again / queue_empty), and cleanupWorktreeForTerminalStatus
with status='skipped' (keepBranch=false even with a branch reported,
defers cleanup with active siblings).
ST-1274 — cascade ignores SKIPPED + appends trace (T-708 + T-709)
- src/cancel/pbi-cascade.ts: runCascade reads job.status, returns EMPTY
when status === 'SKIPPED' (no sibling cancel). Trace persistence now
reads the current error first and writes `${original}\n---\n${trace}`
(truncated at 1900 chars), so the original failure cause is preserved
for forensics instead of being overwritten.
- New cases in __tests__/cancel-pbi-cascade.test.ts: SKIPPED entry-guard
(no findMany / updateMany / update), original error preserved with
trace appended after '---', trace-only fallback when no original
error, 1900-char truncation keeps the head of the original.
All 282 scrum4me-mcp tests pass; tsc build clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8ffb680a1a
commit
458b7a7d45
4 changed files with 212 additions and 9 deletions
|
|
@ -285,4 +285,66 @@ describe('cancelPbiOnFailure', () => {
|
|||
|
||||
expect(out.warnings.some((w) => w.includes('boom'))).toBe(true)
|
||||
})
|
||||
|
||||
it('no-ops when failed job has status SKIPPED (no-op exit, niet een echte fail)', async () => {
|
||||
mockPrisma.claudeJob.findUnique.mockResolvedValue({ ...FAILED_JOB, status: 'SKIPPED' })
|
||||
|
||||
const out = await cancelPbiOnFailure('job-failed')
|
||||
|
||||
expect(out.cancelled_job_ids).toEqual([])
|
||||
expect(mockPrisma.claudeJob.findMany).not.toHaveBeenCalled()
|
||||
expect(mockPrisma.claudeJob.updateMany).not.toHaveBeenCalled()
|
||||
expect(mockPrisma.claudeJob.update).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('appends the cascade trace to an existing error (preserves original cause)', async () => {
|
||||
// findUnique wordt twee keer aangeroepen: eerst voor failedJob (status FAILED + originele error),
|
||||
// daarna door de append-trace om de huidige error te lezen vóór update.
|
||||
mockPrisma.claudeJob.findUnique
|
||||
.mockResolvedValueOnce({ ...FAILED_JOB, status: 'FAILED' })
|
||||
.mockResolvedValueOnce({ error: 'timeout: agent died after 5min' })
|
||||
mockPrisma.claudeJob.findMany.mockResolvedValue([])
|
||||
|
||||
await cancelPbiOnFailure('job-failed')
|
||||
|
||||
expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
where: { id: 'job-failed' },
|
||||
data: expect.objectContaining({
|
||||
error: expect.stringMatching(/timeout: agent died after 5min[\s\S]*---[\s\S]*cancelled_by_self/),
|
||||
}),
|
||||
}),
|
||||
)
|
||||
})
|
||||
|
||||
it('falls back to trace-only when there is no existing error', async () => {
|
||||
mockPrisma.claudeJob.findUnique
|
||||
.mockResolvedValueOnce({ ...FAILED_JOB, status: 'FAILED' })
|
||||
.mockResolvedValueOnce({ error: null })
|
||||
mockPrisma.claudeJob.findMany.mockResolvedValue([])
|
||||
|
||||
await cancelPbiOnFailure('job-failed')
|
||||
|
||||
const updateCall = mockPrisma.claudeJob.update.mock.calls[0]?.[0] as
|
||||
| { data: { error: string } }
|
||||
| undefined
|
||||
expect(updateCall?.data.error).toMatch(/^cancelled_by_self/)
|
||||
expect(updateCall?.data.error).not.toContain('---')
|
||||
})
|
||||
|
||||
it('truncates the merged error at 1900 chars while preserving the head of the original', async () => {
|
||||
const longOriginal = 'X'.repeat(1800)
|
||||
mockPrisma.claudeJob.findUnique
|
||||
.mockResolvedValueOnce({ ...FAILED_JOB, status: 'FAILED' })
|
||||
.mockResolvedValueOnce({ error: longOriginal })
|
||||
mockPrisma.claudeJob.findMany.mockResolvedValue([])
|
||||
|
||||
await cancelPbiOnFailure('job-failed')
|
||||
|
||||
const updateCall = mockPrisma.claudeJob.update.mock.calls[0]?.[0] as
|
||||
| { data: { error: string } }
|
||||
| undefined
|
||||
expect(updateCall?.data.error.length).toBeLessThanOrEqual(1900)
|
||||
expect(updateCall?.data.error.startsWith('X')).toBe(true)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
95
__tests__/update-job-status-skipped.test.ts
Normal file
95
__tests__/update-job-status-skipped.test.ts
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
// Unit-tests voor de no-op SKIPPED exit-route in update_job_status (PBI-57 ST-1273).
|
||||
// Volle handler-integratie wordt niet hier getest — die hangt aan tientallen
|
||||
// MCP/Prisma-mocks. Wel testen we de geëxporteerde helpers die expliciet
|
||||
// SKIPPED-aware zijn gemaakt: resolveNextAction en cleanupWorktreeForTerminalStatus.
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
|
||||
vi.mock('../src/prisma.js', () => ({
|
||||
prisma: {
|
||||
claudeJob: { findUnique: vi.fn(), count: vi.fn() },
|
||||
},
|
||||
}))
|
||||
|
||||
vi.mock('../src/git/worktree.js', () => ({
|
||||
removeWorktreeForJob: vi.fn(),
|
||||
}))
|
||||
|
||||
vi.mock('../src/tools/wait-for-job.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../src/tools/wait-for-job.js')>()
|
||||
return {
|
||||
...original,
|
||||
resolveRepoRoot: vi.fn(),
|
||||
}
|
||||
})
|
||||
|
||||
import { prisma } from '../src/prisma.js'
|
||||
import { removeWorktreeForJob } from '../src/git/worktree.js'
|
||||
import { resolveRepoRoot } from '../src/tools/wait-for-job.js'
|
||||
import {
|
||||
cleanupWorktreeForTerminalStatus,
|
||||
resolveNextAction,
|
||||
} from '../src/tools/update-job-status.js'
|
||||
|
||||
const mockRemove = removeWorktreeForJob as ReturnType<typeof vi.fn>
|
||||
const mockResolve = resolveRepoRoot as ReturnType<typeof vi.fn>
|
||||
const mockPrisma = prisma as unknown as {
|
||||
claudeJob: {
|
||||
findUnique: ReturnType<typeof vi.fn>
|
||||
count: ReturnType<typeof vi.fn>
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
mockPrisma.claudeJob.findUnique.mockResolvedValue({ task: { story_id: 'story-default' } })
|
||||
mockPrisma.claudeJob.count.mockResolvedValue(0)
|
||||
})
|
||||
|
||||
describe('resolveNextAction — skipped pad', () => {
|
||||
it('returns wait_for_job_again when queue has jobs after skipped', () => {
|
||||
expect(resolveNextAction(2, 'skipped')).toBe('wait_for_job_again')
|
||||
})
|
||||
|
||||
it('returns queue_empty when queue is empty after skipped', () => {
|
||||
expect(resolveNextAction(0, 'skipped')).toBe('queue_empty')
|
||||
})
|
||||
})
|
||||
|
||||
describe('cleanupWorktreeForTerminalStatus — skipped pad', () => {
|
||||
it('calls removeWorktreeForJob with keepBranch=false when skipped (no push happened)', async () => {
|
||||
mockResolve.mockResolvedValue('/repos/my-project')
|
||||
mockRemove.mockResolvedValue({ removed: true })
|
||||
|
||||
await cleanupWorktreeForTerminalStatus('prod-001', 'job-skip', 'skipped', undefined)
|
||||
|
||||
expect(mockRemove).toHaveBeenCalledWith({
|
||||
repoRoot: '/repos/my-project',
|
||||
jobId: 'job-skip',
|
||||
keepBranch: false,
|
||||
})
|
||||
})
|
||||
|
||||
it('keeps keepBranch=false when skipped even if a branch is reported', async () => {
|
||||
mockResolve.mockResolvedValue('/repos/my-project')
|
||||
mockRemove.mockResolvedValue({ removed: true })
|
||||
|
||||
await cleanupWorktreeForTerminalStatus('prod-001', 'job-skip', 'skipped', 'feat/job-skip')
|
||||
|
||||
expect(mockRemove).toHaveBeenCalledWith({
|
||||
repoRoot: '/repos/my-project',
|
||||
jobId: 'job-skip',
|
||||
keepBranch: false,
|
||||
})
|
||||
})
|
||||
|
||||
it('defers cleanup when sibling jobs in same story are still active (skipped path)', async () => {
|
||||
mockResolve.mockResolvedValue('/repos/my-project')
|
||||
mockPrisma.claudeJob.findUnique.mockResolvedValue({ task: { story_id: 'story-shared' } })
|
||||
mockPrisma.claudeJob.count.mockResolvedValue(1)
|
||||
|
||||
await cleanupWorktreeForTerminalStatus('prod-001', 'job-skip', 'skipped', undefined)
|
||||
|
||||
expect(mockRemove).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
|
@ -47,6 +47,7 @@ async function runCascade(failedJobId: string): Promise<CascadeOutcome> {
|
|||
select: {
|
||||
id: true,
|
||||
kind: true,
|
||||
status: true,
|
||||
product_id: true,
|
||||
task_id: true,
|
||||
branch: true,
|
||||
|
|
@ -65,6 +66,8 @@ async function runCascade(failedJobId: string): Promise<CascadeOutcome> {
|
|||
|
||||
if (!failedJob) return EMPTY
|
||||
if (failedJob.kind !== 'TASK_IMPLEMENTATION') return EMPTY
|
||||
// SKIPPED is een no-op exit (zie update_job_status). Geen cascade naar siblings.
|
||||
if (failedJob.status === 'SKIPPED') return EMPTY
|
||||
const pbi = failedJob.task?.story?.pbi
|
||||
if (!pbi) return EMPTY
|
||||
|
||||
|
|
@ -194,12 +197,21 @@ async function runCascade(failedJobId: string): Promise<CascadeOutcome> {
|
|||
|
||||
// 4. Persist a trace on the failed-job's error field so the operator can
|
||||
// follow up. Use a structured one-liner to keep the column readable.
|
||||
// Append to the existing error (separated by '\n---\n') so the original
|
||||
// failure reason is preserved instead of being overwritten by the trace.
|
||||
const trace = formatTrace(outcome)
|
||||
if (trace) {
|
||||
try {
|
||||
const fresh = await prisma.claudeJob.findUnique({
|
||||
where: { id: failedJobId },
|
||||
select: { error: true },
|
||||
})
|
||||
const merged = fresh?.error
|
||||
? `${fresh.error}\n---\n${trace}`.slice(0, 1900)
|
||||
: trace.slice(0, 1900)
|
||||
await prisma.claudeJob.update({
|
||||
where: { id: failedJobId },
|
||||
data: { error: trace.slice(0, 1900) },
|
||||
data: { error: merged },
|
||||
})
|
||||
} catch (err) {
|
||||
console.warn(`[pbi-cascade] failed to persist trace for ${failedJobId}:`, err)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
// update_job_status — agent rapporteert voortgang: running | done | failed.
|
||||
// update_job_status — agent rapporteert voortgang: running | done | failed | skipped.
|
||||
// Auth: Bearer-token moet matchen claimed_by_token_id van de job.
|
||||
// Triggert automatisch een SSE-event naar de UI via pg_notify.
|
||||
//
|
||||
// 'skipped' is de no-op exit voor TASK_IMPLEMENTATION jobs waar verify_task_against_plan
|
||||
// EMPTY oplevert omdat de wijzigingen al in origin/main staan (parallel werk, eerdere
|
||||
// PR, race tussen siblings). Geen verify-gate, geen PR, geen cascade. De worker moet
|
||||
// de bijbehorende task apart op DONE zetten via update_task_status als de inhoudelijke
|
||||
// vereisten al zijn voldaan.
|
||||
|
||||
import { z } from 'zod'
|
||||
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
||||
|
|
@ -38,7 +44,7 @@ async function fetchConflictFiles(prUrl: string): Promise<string[]> {
|
|||
|
||||
const inputSchema = z.object({
|
||||
job_id: z.string().min(1),
|
||||
status: z.enum(['running', 'done', 'failed']),
|
||||
status: z.enum(['running', 'done', 'failed', 'skipped']),
|
||||
branch: z.string().min(1).optional(),
|
||||
summary: z.string().max(1_000).optional(),
|
||||
error: z.string().max(2_000).optional(),
|
||||
|
|
@ -52,7 +58,7 @@ const inputSchema = z.object({
|
|||
export async function cleanupWorktreeForTerminalStatus(
|
||||
productId: string,
|
||||
jobId: string,
|
||||
status: 'done' | 'failed',
|
||||
status: 'done' | 'failed' | 'skipped',
|
||||
branch: string | undefined,
|
||||
): Promise<void> {
|
||||
const repoRoot = await resolveRepoRoot(productId)
|
||||
|
|
@ -329,11 +335,12 @@ const DB_STATUS_MAP = {
|
|||
running: 'RUNNING',
|
||||
done: 'DONE',
|
||||
failed: 'FAILED',
|
||||
skipped: 'SKIPPED',
|
||||
} as const
|
||||
|
||||
export function resolveNextAction(
|
||||
queueCount: number,
|
||||
status: 'running' | 'done' | 'failed',
|
||||
status: 'running' | 'done' | 'failed' | 'skipped',
|
||||
): 'wait_for_job_again' | 'queue_empty' | 'idle' {
|
||||
if (status === 'running') return 'idle'
|
||||
return queueCount > 0 ? 'wait_for_job_again' : 'queue_empty'
|
||||
|
|
@ -501,13 +508,18 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
title: 'Update job status',
|
||||
description:
|
||||
'Report progress on a claimed ClaudeJob. Allowed transitions from CLAIMED/RUNNING: ' +
|
||||
'running (start), done (finished), failed (error). ' +
|
||||
'running (start), done (finished), failed (error), skipped (no-op exit). ' +
|
||||
'The Bearer token must match the token that claimed the job. ' +
|
||||
'Before marking done: call verify_task_against_plan first — done is rejected when ' +
|
||||
'verify_result is null, EMPTY (unless task.verify_only is true), or when the verify level ' +
|
||||
'doesn’t meet task.verify_required: ALIGNED-only is strict; ALIGNED_OR_PARTIAL accepts ' +
|
||||
'PARTIAL/DIVERGENT but requires a non-empty summary (≥20 chars) explaining the drift; ANY ' +
|
||||
'accepts everything. ' +
|
||||
"Use 'skipped' for TASK_IMPLEMENTATION when verify_task_against_plan returns EMPTY because " +
|
||||
'the requested changes are already present in origin/main (parallel work, earlier PR, race ' +
|
||||
"between siblings). 'skipped' requires a non-empty error (≥10 chars) describing the reason " +
|
||||
"(e.g. 'no_op_changes_already_in_main') and skips the verify-gate, auto-PR and PBI fail-cascade. " +
|
||||
'Mark the underlying task DONE separately via update_task_status if its requirements are met. ' +
|
||||
'Automatically emits an SSE event so the Scrum4Me UI updates in real time. ' +
|
||||
'Optionally accepts token-usage fields (model_id + input/output/cache_read/cache_write tokens) ' +
|
||||
'for cost tracking — typically populated by a PostToolUse hook from the local Claude Code transcript, ' +
|
||||
|
|
@ -565,6 +577,23 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
return toolError(`Job is already in terminal state: ${job.status.toLowerCase()}`)
|
||||
}
|
||||
|
||||
// 'skipped' = no-op exit. Only valid for TASK_IMPLEMENTATION (verify=EMPTY
|
||||
// patroon) en vereist een non-empty error met ≥10 chars uitleg, zoals
|
||||
// 'no_op_changes_already_in_main'. Geen verify-gate, geen PR, geen
|
||||
// PBI fail-cascade, geen propagation naar task/story/PBI.
|
||||
if (status === 'skipped') {
|
||||
if (job.kind !== 'TASK_IMPLEMENTATION') {
|
||||
return toolError(
|
||||
`'skipped' is alleen toegestaan voor TASK_IMPLEMENTATION (kind=${job.kind})`,
|
||||
)
|
||||
}
|
||||
if (!error || error.trim().length < 10) {
|
||||
return toolError(
|
||||
"'skipped' vereist non-empty error met reden (≥10 chars), bv. 'no_op_changes_already_in_main'",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// For DONE: push first, adjust DB status based on result
|
||||
let actualStatus = status
|
||||
let pushedAt: Date | undefined
|
||||
|
|
@ -663,7 +692,9 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
data: {
|
||||
status: dbStatus,
|
||||
...(actualStatus === 'running' ? { started_at: now } : {}),
|
||||
...(actualStatus === 'done' || actualStatus === 'failed' ? { finished_at: now } : {}),
|
||||
...(actualStatus === 'done' || actualStatus === 'failed' || actualStatus === 'skipped'
|
||||
? { finished_at: now }
|
||||
: {}),
|
||||
...(branchToWrite !== undefined ? { branch: branchToWrite } : {}),
|
||||
...(pushedAt !== undefined ? { pushed_at: pushedAt } : {}),
|
||||
...(summary !== undefined ? { summary } : {}),
|
||||
|
|
@ -881,7 +912,10 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
}
|
||||
|
||||
// Best-effort worktree cleanup on terminal transitions (skip if push failed — worktree preserved)
|
||||
if ((actualStatus === 'done' || actualStatus === 'failed') && !skipWorktreeCleanup) {
|
||||
if (
|
||||
(actualStatus === 'done' || actualStatus === 'failed' || actualStatus === 'skipped') &&
|
||||
!skipWorktreeCleanup
|
||||
) {
|
||||
await cleanupWorktreeForTerminalStatus(job.product_id, job_id, actualStatus, branchToWrite)
|
||||
}
|
||||
|
||||
|
|
@ -973,7 +1007,7 @@ export function registerUpdateJobStatusTool(server: McpServer) {
|
|||
|
||||
// PBI-9: release product-worktree locks on terminal transitions.
|
||||
// No-op for jobs without registered locks (i.e. TASK_IMPLEMENTATION).
|
||||
if (actualStatus === 'done' || actualStatus === 'failed') {
|
||||
if (actualStatus === 'done' || actualStatus === 'failed' || actualStatus === 'skipped') {
|
||||
await releaseLocksOnTerminal(job_id)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue