From 70e58f8b28b6c19ff5cb375bfec45cfdaa08be1b Mon Sep 17 00:00:00 2001 From: Madhura68 Date: Wed, 6 May 2026 10:08:31 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20PBI=20fail-cascade=20=E2=80=94=20cancel?= =?UTF-8?q?=20siblings=20+=20undo=20commits?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wanneer een TASK_IMPLEMENTATION-job FAILED wordt, cancelt cancelPbiOnFailure alle queued/claimed/running siblings binnen dezelfde PBI (over alle stories heen) en draait gepushte commits ongedaan: - Open PR → gh pr close --delete-branch (PR-close + remote-branch- delete in één). - Gemergde PR → revert-PR via git revert -m 1 in een korte worktree, gepusht naar revert/-, gh pr create zonder auto-merge (mens reviewed). - Branch zonder PR → best-effort git push origin --delete. Race-protectie: update_job_status weigert nu een statuswijziging op een job die al CANCELLED is met een specifieke JOB_CANCELLED-error, zodat een parallelle worker zijn lokale werk weggooit ipv een DONE te forceren. Idempotent — een tweede cascade voor dezelfde PBI is een no-op. Non-blocking — alle fouten worden warnings in de trace op de oorspronkelijke failed job zijn error-veld; cascade throwt nooit naar de caller. Niet in scope: per-product opt-out, sprint-niveau cascade, idea-job cascade. 11 nieuwe vitest-cases dekken DB-cascade, branch-grouping, open/ merged/no-PR paden, repo-root-mismatch en de never-throws-garantie. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 10 + __tests__/cancel-pbi-cascade.test.ts | 288 +++++++++++++++++++++++++++ src/cancel/pbi-cascade.ts | 237 ++++++++++++++++++++++ src/git/pr.ts | 170 ++++++++++++++++ src/git/push.ts | 24 +++ src/tools/update-job-status.ts | 18 ++ 6 files changed, 747 insertions(+) create mode 100644 __tests__/cancel-pbi-cascade.test.ts create mode 100644 src/cancel/pbi-cascade.ts diff --git a/CLAUDE.md b/CLAUDE.md index 2fcf0ba..83fe087 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,6 +26,16 @@ MCP server that exposes the Scrum4Me dev-flow as native tools for Claude Code. A story with 3 sub-tasks lands as **1 branch** with 3 commits and **1 PR** (assuming `auto_pr=true`). Sibling sub-tasks share the same `pr_url` — `maybeCreateAutoPr` reuses an existing PR from a sibling job instead of opening duplicates. Story-level PR title (`: `) so the GitHub view reads as one logical change rather than per-task fragments. +### PBI fail-cascade + +When a `TASK_IMPLEMENTATION` job ends in `FAILED`, `cancelPbiOnFailure` (`src/cancel/pbi-cascade.ts`) cancels every queued/claimed/running sibling under the **same PBI** (across all stories) and undoes already-pushed commits: + +- **Open PR** → `gh pr close --delete-branch` with a cascade-comment. +- **Merged PR** → revert-PR opened against the base branch via `git revert -m 1 `. **No** auto-merge on the revert PR — review by hand. +- **Branch without PR** → best-effort `git push origin --delete `. + +A trace (cancelled job count, closed/reverted PRs, deleted branches) is written to the original failed job's `error` column. Race-protection: if a parallel worker tries to `update_job_status` on a job that the cascade already set to `CANCELLED`, the call is rejected with a `JOB_CANCELLED` error so the agent discards local work and calls `wait_for_job` again. The cascade is idempotent and never throws — failures become warnings on the failed-job's trace. + ### Required configuration Set env var per product: diff --git a/__tests__/cancel-pbi-cascade.test.ts b/__tests__/cancel-pbi-cascade.test.ts new file mode 100644 index 0000000..8b55688 --- /dev/null +++ b/__tests__/cancel-pbi-cascade.test.ts @@ -0,0 +1,288 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' + +vi.mock('../src/prisma.js', () => ({ + prisma: { + claudeJob: { + findUnique: vi.fn(), + findMany: vi.fn(), + updateMany: vi.fn(), + update: vi.fn(), + }, + }, +})) + +vi.mock('../src/tools/wait-for-job.js', async (importOriginal) => { + const original = await importOriginal() + return { ...original, resolveRepoRoot: vi.fn() } +}) + +vi.mock('../src/git/worktree.js', () => ({ + removeWorktreeForJob: vi.fn(), +})) + +vi.mock('../src/git/pr.js', () => ({ + closePullRequest: vi.fn(), + getPullRequestState: vi.fn(), + createRevertPullRequest: vi.fn(), +})) + +vi.mock('../src/git/push.js', () => ({ + deleteRemoteBranch: vi.fn(), +})) + +import { prisma } from '../src/prisma.js' +import { resolveRepoRoot } from '../src/tools/wait-for-job.js' +import { removeWorktreeForJob } from '../src/git/worktree.js' +import { + closePullRequest, + getPullRequestState, + createRevertPullRequest, +} from '../src/git/pr.js' +import { deleteRemoteBranch } from '../src/git/push.js' +import { cancelPbiOnFailure } from '../src/cancel/pbi-cascade.js' + +const mockPrisma = prisma as unknown as { + claudeJob: { + findUnique: ReturnType + findMany: ReturnType + updateMany: ReturnType + update: ReturnType + } +} +const mockResolveRepoRoot = resolveRepoRoot as ReturnType +const mockRemoveWorktree = removeWorktreeForJob as ReturnType +const mockClosePr = closePullRequest as ReturnType +const mockGetPrState = getPullRequestState as ReturnType +const mockCreateRevertPr = createRevertPullRequest as ReturnType +const mockDeleteBranch = deleteRemoteBranch as ReturnType + +beforeEach(() => { + vi.clearAllMocks() + mockPrisma.claudeJob.update.mockResolvedValue({}) + mockPrisma.claudeJob.updateMany.mockResolvedValue({ count: 0 }) + mockResolveRepoRoot.mockResolvedValue('/repos/proj') + mockRemoveWorktree.mockResolvedValue(undefined) + // Sensible defaults so an un-stubbed branch in a test doesn't throw on + // `result.deleted` / `result.ok` access. Tests that care override these. + mockDeleteBranch.mockResolvedValue({ deleted: true }) + mockClosePr.mockResolvedValue({ ok: true }) +}) + +const FAILED_JOB = { + id: 'job-failed', + kind: 'TASK_IMPLEMENTATION', + product_id: 'prod-1', + task_id: 'task-failed', + branch: 'feat/story-aaaabbbb', + pr_url: null, + task: { story: { pbi: { id: 'pbi-1', code: 'PBI-7' } } }, +} + +describe('cancelPbiOnFailure', () => { + it('no-ops for non-TASK_IMPLEMENTATION jobs', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue({ ...FAILED_JOB, kind: 'IDEA_GRILL' }) + + const out = await cancelPbiOnFailure('job-failed') + + expect(out.cancelled_job_ids).toEqual([]) + expect(mockPrisma.claudeJob.findMany).not.toHaveBeenCalled() + expect(mockPrisma.claudeJob.updateMany).not.toHaveBeenCalled() + }) + + it('no-ops when failed job has no PBI parent', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue({ + ...FAILED_JOB, + task: null, + }) + const out = await cancelPbiOnFailure('job-failed') + expect(out).toEqual({ + cancelled_job_ids: [], + closed_prs: [], + reverted_prs: [], + deleted_branches: [], + warnings: [], + }) + }) + + it('cancels eligible siblings and writes a trace to the failed job', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue(FAILED_JOB) + mockPrisma.claudeJob.findMany.mockResolvedValue([ + { id: 'job-sib1', branch: 'feat/story-aaaabbbb', pr_url: null, status: 'QUEUED', task_id: 't2' }, + { id: 'job-sib2', branch: 'feat/story-ccccdddd', pr_url: null, status: 'CLAIMED', task_id: 't3' }, + ]) + + const out = await cancelPbiOnFailure('job-failed') + + expect(mockPrisma.claudeJob.updateMany).toHaveBeenCalledWith( + expect.objectContaining({ + where: { id: { in: ['job-sib1', 'job-sib2'] } }, + data: expect.objectContaining({ + status: 'CANCELLED', + error: 'cancelled_by_pbi_failure', + }), + }), + ) + expect(out.cancelled_job_ids).toEqual(['job-sib1', 'job-sib2']) + expect(mockPrisma.claudeJob.update).toHaveBeenCalledWith( + expect.objectContaining({ + where: { id: 'job-failed' }, + data: expect.objectContaining({ error: expect.stringContaining('cancelled_by_self') }), + }), + ) + }) + + it('idempotent: empty eligible set means no updateMany call', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue(FAILED_JOB) + mockPrisma.claudeJob.findMany.mockResolvedValue([]) + + await cancelPbiOnFailure('job-failed') + + expect(mockPrisma.claudeJob.updateMany).not.toHaveBeenCalled() + }) + + it('closes an open PR with the cascade comment', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue({ + ...FAILED_JOB, + pr_url: 'https://github.com/o/r/pull/1', + }) + mockPrisma.claudeJob.findMany.mockResolvedValue([]) + mockGetPrState.mockResolvedValue({ + state: 'OPEN', + mergeCommit: null, + baseRefName: 'main', + title: 'feat: x', + }) + mockClosePr.mockResolvedValue({ ok: true }) + + const out = await cancelPbiOnFailure('job-failed') + + expect(mockClosePr).toHaveBeenCalledWith( + expect.objectContaining({ + prUrl: 'https://github.com/o/r/pull/1', + comment: expect.stringContaining('PBI PBI-7'), + }), + ) + expect(out.closed_prs).toEqual(['https://github.com/o/r/pull/1']) + }) + + it('creates a revert-PR when an affected PR is already merged', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue({ + ...FAILED_JOB, + pr_url: 'https://github.com/o/r/pull/9', + }) + mockPrisma.claudeJob.findMany.mockResolvedValue([]) + mockGetPrState.mockResolvedValue({ + state: 'MERGED', + mergeCommit: 'abc123def', + baseRefName: 'main', + title: 'feat: shipped', + }) + mockCreateRevertPr.mockResolvedValue({ url: 'https://github.com/o/r/pull/10' }) + + const out = await cancelPbiOnFailure('job-failed') + + expect(mockCreateRevertPr).toHaveBeenCalledWith( + expect.objectContaining({ + repoRoot: '/repos/proj', + mergeSha: 'abc123def', + baseRef: 'main', + originalTitle: 'feat: shipped', + originalBranch: 'feat/story-aaaabbbb', + jobId: 'job-failed', + pbiCode: 'PBI-7', + }), + ) + expect(out.reverted_prs).toEqual([ + { original: 'https://github.com/o/r/pull/9', revertPr: 'https://github.com/o/r/pull/10' }, + ]) + expect(mockClosePr).not.toHaveBeenCalled() + }) + + it('skips revert when no repo root is configured + emits a warning', async () => { + mockResolveRepoRoot.mockResolvedValue(null) + mockPrisma.claudeJob.findUnique.mockResolvedValue({ + ...FAILED_JOB, + pr_url: 'https://github.com/o/r/pull/9', + }) + mockPrisma.claudeJob.findMany.mockResolvedValue([]) + mockGetPrState.mockResolvedValue({ + state: 'MERGED', + mergeCommit: 'abc', + baseRefName: 'main', + title: 'x', + }) + + const out = await cancelPbiOnFailure('job-failed') + + expect(mockCreateRevertPr).not.toHaveBeenCalled() + expect(out.warnings.some((w) => /no repo root/i.test(w))).toBe(true) + }) + + it('deletes a remote branch when there is no PR for it', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue({ + ...FAILED_JOB, + pr_url: null, + }) + mockPrisma.claudeJob.findMany.mockResolvedValue([]) + mockDeleteBranch.mockResolvedValue({ deleted: true }) + + const out = await cancelPbiOnFailure('job-failed') + + expect(mockDeleteBranch).toHaveBeenCalledWith({ + repoRoot: '/repos/proj', + branch: 'feat/story-aaaabbbb', + }) + expect(out.deleted_branches).toEqual(['feat/story-aaaabbbb']) + }) + + it('groups siblings sharing a branch so the PR is only closed once', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue({ + ...FAILED_JOB, + branch: 'feat/story-shared', + pr_url: 'https://github.com/o/r/pull/1', + }) + mockPrisma.claudeJob.findMany.mockResolvedValue([ + { + id: 'job-sib', + branch: 'feat/story-shared', + pr_url: 'https://github.com/o/r/pull/1', + status: 'QUEUED', + task_id: 't2', + }, + ]) + mockGetPrState.mockResolvedValue({ + state: 'OPEN', + mergeCommit: null, + baseRefName: 'main', + title: 't', + }) + mockClosePr.mockResolvedValue({ ok: true }) + + await cancelPbiOnFailure('job-failed') + + expect(mockClosePr).toHaveBeenCalledTimes(1) + }) + + it('removes worktrees of cancelled siblings', async () => { + mockPrisma.claudeJob.findUnique.mockResolvedValue(FAILED_JOB) + mockPrisma.claudeJob.findMany.mockResolvedValue([ + { id: 'job-sib1', branch: null, pr_url: null, status: 'QUEUED', task_id: 't2' }, + ]) + + await cancelPbiOnFailure('job-failed') + + expect(mockRemoveWorktree).toHaveBeenCalledWith({ + repoRoot: '/repos/proj', + jobId: 'job-sib1', + keepBranch: false, + }) + }) + + it('never throws — wraps unexpected errors into warnings', async () => { + mockPrisma.claudeJob.findUnique.mockRejectedValue(new Error('boom')) + + const out = await cancelPbiOnFailure('job-failed') + + expect(out.warnings.some((w) => w.includes('boom'))).toBe(true) + }) +}) diff --git a/src/cancel/pbi-cascade.ts b/src/cancel/pbi-cascade.ts new file mode 100644 index 0000000..05a014f --- /dev/null +++ b/src/cancel/pbi-cascade.ts @@ -0,0 +1,237 @@ +// PBI fail-cascade — wanneer een TASK_IMPLEMENTATION-job FAILED wordt, +// cancellen we alle queued/claimed/running siblings binnen dezelfde PBI +// en draaien we eerder gepushte commits ongedaan via PR-close of een +// auto-revert-PR. Idempotent en non-blocking: elke fout wordt gelogd in +// het error-veld van de oorspronkelijke failed-job en stopt de cascade niet. + +import { prisma } from '../prisma.js' +import { resolveRepoRoot } from '../tools/wait-for-job.js' +import { removeWorktreeForJob } from '../git/worktree.js' +import { + closePullRequest, + createRevertPullRequest, + getPullRequestState, +} from '../git/pr.js' +import { deleteRemoteBranch } from '../git/push.js' + +export type CascadeOutcome = { + cancelled_job_ids: string[] + closed_prs: string[] + reverted_prs: { original: string; revertPr: string }[] + deleted_branches: string[] + warnings: string[] +} + +const EMPTY: CascadeOutcome = { + cancelled_job_ids: [], + closed_prs: [], + reverted_prs: [], + deleted_branches: [], + warnings: [], +} + +// Public entry. Always returns; never throws. +export async function cancelPbiOnFailure(failedJobId: string): Promise { + try { + return await runCascade(failedJobId) + } catch (err) { + console.warn(`[pbi-cascade] unexpected error for failedJob=${failedJobId}:`, err) + return { ...EMPTY, warnings: [`unexpected: ${(err as Error).message}`] } + } +} + +async function runCascade(failedJobId: string): Promise { + const failedJob = await prisma.claudeJob.findUnique({ + where: { id: failedJobId }, + select: { + id: true, + kind: true, + product_id: true, + task_id: true, + branch: true, + pr_url: true, + task: { + select: { + story: { + select: { + pbi: { select: { id: true, code: true } }, + }, + }, + }, + }, + }, + }) + + if (!failedJob) return EMPTY + if (failedJob.kind !== 'TASK_IMPLEMENTATION') return EMPTY + const pbi = failedJob.task?.story?.pbi + if (!pbi) return EMPTY + + // 1. Atomic cascade: select + updateMany. Race-window between SELECT + // and UPDATE is harmless because the cascade is idempotent — a second + // invocation simply finds zero rows. + const eligible = await prisma.claudeJob.findMany({ + where: { + id: { not: failedJobId }, + status: { in: ['QUEUED', 'CLAIMED', 'RUNNING'] }, + task: { story: { pbi_id: pbi.id } }, + }, + select: { id: true, branch: true, pr_url: true, status: true, task_id: true }, + }) + + if (eligible.length > 0) { + await prisma.claudeJob.updateMany({ + where: { id: { in: eligible.map((j) => j.id) } }, + data: { + status: 'CANCELLED', + finished_at: new Date(), + error: 'cancelled_by_pbi_failure', + }, + }) + } + + const outcome: CascadeOutcome = { + cancelled_job_ids: eligible.map((j) => j.id), + closed_prs: [], + reverted_prs: [], + deleted_branches: [], + warnings: [], + } + + // 2. Group affected jobs (cascade-set ∪ failed) by branch to avoid + // closing the same PR twice for siblings sharing a story-branch. + const branchSet = new Map() + const all = [...eligible, { branch: failedJob.branch, pr_url: failedJob.pr_url }] + for (const j of all) { + if (!j.branch) continue + const existing = branchSet.get(j.branch) + // Prefer a non-null pr_url if any sibling has one. + if (!existing) { + branchSet.set(j.branch, { prUrl: j.pr_url ?? null }) + } else if (!existing.prUrl && j.pr_url) { + branchSet.set(j.branch, { prUrl: j.pr_url }) + } + } + + const repoRoot = await resolveRepoRoot(failedJob.product_id) + const cascadeComment = `PBI ${pbi.code ?? pbi.id} cascaded fail — see job ${failedJobId}` + + for (const [branch, { prUrl }] of branchSet) { + if (prUrl) { + const info = await getPullRequestState({ prUrl, cwd: repoRoot ?? undefined }) + if ('error' in info) { + outcome.warnings.push(`gh pr view ${prUrl}: ${info.error}`) + continue + } + if (info.state === 'CLOSED') { + // Already closed; nothing to do for the PR. Branch may still exist. + if (repoRoot) await tryDeleteBranch(repoRoot, branch, outcome) + continue + } + if (info.state === 'OPEN') { + const closed = await closePullRequest({ + prUrl, + comment: cascadeComment, + cwd: repoRoot ?? undefined, + }) + if ('error' in closed) { + outcome.warnings.push(`close ${prUrl}: ${closed.error}`) + } else { + outcome.closed_prs.push(prUrl) + } + continue + } + if (info.state === 'MERGED') { + if (!repoRoot) { + outcome.warnings.push( + `merged PR ${prUrl} not reverted: no repo root configured for product ${failedJob.product_id}`, + ) + continue + } + if (!info.mergeCommit) { + outcome.warnings.push(`merged PR ${prUrl} has no mergeCommit — skipping revert`) + continue + } + const revert = await createRevertPullRequest({ + repoRoot, + mergeSha: info.mergeCommit, + baseRef: info.baseRefName, + originalTitle: info.title, + originalBranch: branch, + jobId: failedJobId, + pbiCode: pbi.code, + }) + if ('error' in revert) { + outcome.warnings.push(`revert ${prUrl}: ${revert.error}`) + } else { + outcome.reverted_prs.push({ original: prUrl, revertPr: revert.url }) + } + continue + } + } else { + // Branch without PR: best-effort delete on remote. + if (repoRoot) await tryDeleteBranch(repoRoot, branch, outcome) + } + } + + // 3. Worktree cleanup for every cancelled job (and the failed job itself + // is handled elsewhere by cleanupWorktreeForTerminalStatus). For + // cancelled jobs we always discard the branch locally — they did not + // succeed. + if (repoRoot) { + for (const j of eligible) { + try { + await removeWorktreeForJob({ repoRoot, jobId: j.id, keepBranch: false }) + } catch (err) { + outcome.warnings.push(`worktree cleanup for ${j.id}: ${(err as Error).message}`) + } + } + } + + // 4. Persist a trace on the failed-job's error field so the operator can + // follow up. Use a structured one-liner to keep the column readable. + const trace = formatTrace(outcome) + if (trace) { + try { + await prisma.claudeJob.update({ + where: { id: failedJobId }, + data: { error: trace.slice(0, 1900) }, + }) + } catch (err) { + console.warn(`[pbi-cascade] failed to persist trace for ${failedJobId}:`, err) + } + } + + return outcome +} + +async function tryDeleteBranch( + repoRoot: string, + branch: string, + outcome: CascadeOutcome, +): Promise { + const result = await deleteRemoteBranch({ repoRoot, branch }) + if (result.deleted) { + outcome.deleted_branches.push(branch) + return + } + if (result.reason === 'not-found') { + // Already gone — silent no-op. + return + } + outcome.warnings.push( + `delete-branch ${branch} (${result.reason}): ${result.stderr.slice(0, 120)}`, + ) +} + +function formatTrace(o: CascadeOutcome): string { + const parts: string[] = ['cancelled_by_self'] + if (o.cancelled_job_ids.length) parts.push(`siblings_cancelled=${o.cancelled_job_ids.length}`) + if (o.closed_prs.length) parts.push(`closed=${o.closed_prs.join(',')}`) + if (o.reverted_prs.length) { + parts.push(`reverted=${o.reverted_prs.map((r) => `${r.original}->${r.revertPr}`).join(';')}`) + } + if (o.deleted_branches.length) parts.push(`branches_deleted=${o.deleted_branches.join(',')}`) + if (o.warnings.length) parts.push(`warnings=${o.warnings.length}`) + return parts.join('; ') +} diff --git a/src/git/pr.ts b/src/git/pr.ts index ffc0554..f30ac8e 100644 --- a/src/git/pr.ts +++ b/src/git/pr.ts @@ -1,5 +1,7 @@ import { execFile } from 'node:child_process' import { promisify } from 'node:util' +import * as path from 'node:path' +import * as os from 'node:os' const exec = promisify(execFile) @@ -53,3 +55,171 @@ export async function createPullRequest(opts: { return { url } } + +export type PrState = 'OPEN' | 'MERGED' | 'CLOSED' + +export type PrInfo = { + state: PrState + mergeCommit: string | null + baseRefName: string + title: string +} + +export async function getPullRequestState(opts: { + prUrl: string + cwd?: string +}): Promise { + const { prUrl } = opts + try { + const { stdout } = await exec( + 'gh', + ['pr', 'view', prUrl, '--json', 'state,mergeCommit,baseRefName,title'], + opts.cwd ? { cwd: opts.cwd } : {}, + ) + const parsed = JSON.parse(stdout) as { + state: string + mergeCommit: { oid: string } | null + baseRefName: string + title: string + } + const state = parsed.state.toUpperCase() as PrState + if (state !== 'OPEN' && state !== 'MERGED' && state !== 'CLOSED') { + return { error: `unexpected PR state: ${parsed.state}` } + } + return { + state, + mergeCommit: parsed.mergeCommit?.oid ?? null, + baseRefName: parsed.baseRefName, + title: parsed.title, + } + } catch (err) { + const msg = (err as { stderr?: string }).stderr ?? (err as Error).message ?? '' + return { error: `gh pr view failed: ${msg.slice(0, 300)}` } + } +} + +export async function closePullRequest(opts: { + prUrl: string + comment: string + cwd?: string +}): Promise<{ ok: true } | { error: string }> { + try { + await exec( + 'gh', + ['pr', 'close', opts.prUrl, '--delete-branch', '--comment', opts.comment], + opts.cwd ? { cwd: opts.cwd } : {}, + ) + return { ok: true } + } catch (err) { + const msg = (err as { stderr?: string }).stderr ?? (err as Error).message ?? '' + return { error: `gh pr close failed: ${msg.slice(0, 300)}` } + } +} + +// Creates a revert-PR for a merged PR. Uses an isolated worktree so it +// never touches the user's main checkout. Returns the new PR URL or an +// error string. The revert PR is opened WITHOUT auto-merge — the user +// must review + merge it manually so an unintended cascade can be undone. +export async function createRevertPullRequest(opts: { + repoRoot: string + mergeSha: string + baseRef: string + originalTitle: string + originalBranch: string + jobId: string + pbiCode: string | null +}): Promise<{ url: string } | { error: string }> { + const { + repoRoot, + mergeSha, + baseRef, + originalTitle, + originalBranch, + jobId, + pbiCode, + } = opts + + const worktreeDir = + process.env.SCRUM4ME_AGENT_WORKTREE_DIR ?? path.join(os.homedir(), '.scrum4me-agent-worktrees') + const wtPath = path.join(worktreeDir, `revert-${jobId}`) + const revertBranch = `revert/${originalBranch}-${jobId.slice(-8)}` + + const run = async (cmd: string, args: string[], cwd: string) => { + await exec(cmd, args, { cwd }) + } + + // Cleanup helper, best-effort + const cleanup = async () => { + try { + await exec('git', ['worktree', 'remove', '--force', wtPath], { cwd: repoRoot }) + } catch { + // ignore — worktree may not exist if creation failed + } + } + + try { + await run('git', ['fetch', 'origin', baseRef, mergeSha], repoRoot) + await run('git', ['worktree', 'add', '-b', revertBranch, wtPath, `origin/${baseRef}`], repoRoot) + + try { + await run('git', ['revert', '-m', '1', mergeSha, '--no-edit'], wtPath) + } catch (err) { + await cleanup() + const msg = (err as { stderr?: string }).stderr ?? (err as Error).message ?? '' + if (/conflict/i.test(msg)) { + return { error: `git revert conflicts on ${mergeSha}: ${msg.slice(0, 200)}` } + } + return { error: `git revert failed: ${msg.slice(0, 200)}` } + } + + await run('git', ['push', '-u', 'origin', revertBranch], wtPath) + + const pbiTag = pbiCode ? `PBI ${pbiCode}` : 'PBI' + const title = `Revert: ${originalTitle}` + const body = [ + `Auto-revert by Scrum4Me agent.`, + ``, + `Reason: ${pbiTag} failed (cascade from job \`${jobId}\`).`, + `Reverts merge commit \`${mergeSha}\`.`, + ``, + `**Review carefully before merging** — auto-merge is intentionally NOT enabled on revert PRs.`, + ].join('\n') + + let prUrl: string + try { + const { stdout } = await exec( + 'gh', + [ + 'pr', + 'create', + '--base', + baseRef, + '--head', + revertBranch, + '--title', + title, + '--body', + body, + ], + { cwd: wtPath }, + ) + const lines = stdout.trim().split('\n').filter(Boolean) + prUrl = lines[lines.length - 1]?.trim() ?? '' + if (!prUrl.startsWith('http')) { + await cleanup() + return { error: `gh pr create produced unexpected output: ${stdout.slice(0, 200)}` } + } + } catch (err) { + await cleanup() + const msg = (err as { stderr?: string }).stderr ?? (err as Error).message ?? '' + return { error: `gh pr create (revert) failed: ${msg.slice(0, 300)}` } + } + + await cleanup() + return { url: prUrl } + } catch (err) { + await cleanup() + const msg = (err as { stderr?: string }).stderr ?? (err as Error).message ?? '' + return { error: `revert worktree setup failed: ${msg.slice(0, 300)}` } + } +} diff --git a/src/git/push.ts b/src/git/push.ts index 6003dc3..9c2bbdc 100644 --- a/src/git/push.ts +++ b/src/git/push.ts @@ -51,3 +51,27 @@ export async function pushBranchForJob(opts: { return { pushed: false, reason: 'unknown', stderr } } } + +export type DeleteRemoteResult = + | { deleted: true } + | { deleted: false; reason: 'not-found' | 'no-credentials' | 'unknown'; stderr: string } + +export async function deleteRemoteBranch(opts: { + repoRoot: string + branch: string +}): Promise { + const { repoRoot, branch } = opts + try { + await exec('git', ['push', 'origin', '--delete', branch], { cwd: repoRoot }) + return { deleted: true } + } catch (err) { + const stderr = (err as { stderr?: string }).stderr ?? (err as Error).message ?? '' + if (/remote ref does not exist|unable to delete .* remote ref does not exist/i.test(stderr)) { + return { deleted: false, reason: 'not-found', stderr } + } + if (/Authentication failed|could not read Username/i.test(stderr)) { + return { deleted: false, reason: 'no-credentials', stderr } + } + return { deleted: false, reason: 'unknown', stderr } + } +} diff --git a/src/tools/update-job-status.ts b/src/tools/update-job-status.ts index 6f21c4a..5a25579 100644 --- a/src/tools/update-job-status.ts +++ b/src/tools/update-job-status.ts @@ -14,6 +14,7 @@ import { removeWorktreeForJob } from '../git/worktree.js' import { resolveRepoRoot } from './wait-for-job.js' import { pushBranchForJob } from '../git/push.js' import { createPullRequest } from '../git/pr.js' +import { cancelPbiOnFailure } from '../cancel/pbi-cascade.js' const inputSchema = z.object({ job_id: z.string().min(1), @@ -313,6 +314,15 @@ export function registerUpdateJobStatusTool(server: McpServer) { if (job.claimed_by_token_id !== tokenId) { return toolError('PERMISSION_DENIED: This job was not claimed by your token') } + if (job.status === 'CANCELLED') { + // PBI fail-cascade got here first. The agent must abandon any + // local work and call wait_for_job again instead of forcing this + // job into DONE/FAILED. + return toolError( + 'JOB_CANCELLED: This job was cancelled by the PBI fail-cascade. ' + + 'Discard your local changes and call wait_for_job for the next item.', + ) + } if (!['CLAIMED', 'RUNNING'].includes(job.status)) { return toolError(`Job is already in terminal state: ${job.status.toLowerCase()}`) } @@ -471,6 +481,14 @@ export function registerUpdateJobStatusTool(server: McpServer) { await cleanupWorktreeForTerminalStatus(job.product_id, job_id, actualStatus, branchToWrite) } + // PBI fail-cascade: when a TASK_IMPLEMENTATION job ends in FAILED, + // cancel all queued/claimed/running siblings under the same PBI and + // undo any pushed commits (close open PRs / open revert-PRs for + // already-merged ones). Idempotent + non-blocking — never throws. + if (actualStatus === 'failed' && job.kind === 'TASK_IMPLEMENTATION' && job.task_id) { + await cancelPbiOnFailure(job_id) + } + const queueCount = await prisma.claudeJob.count({ where: { user_id: userId, status: 'QUEUED' }, })