diff --git a/.env.example b/.env.example index 542d6b8..07a27ea 100644 --- a/.env.example +++ b/.env.example @@ -7,5 +7,3 @@ OPS_AGENT_URL="http://127.0.0.1:3099" REPO_PATHS="/srv/scrum4me/repos/scrum4me,/srv/ops/repos/ops-dashboard" # Comma-separated list of systemd unit names to show on the /systemd page (must match commands.yml allowed list) SYSTEMD_UNITS="scrum4me-web,ops-agent" -# Worker run-logs directory inside the container (read-only bind mount; see docker-compose.yml) -WORKER_LOGS_DIR="/var/worker-logs/idea" diff --git a/app/api/worker-logs/[name]/route.ts b/app/api/worker-logs/[name]/route.ts deleted file mode 100644 index c0b8f28..0000000 --- a/app/api/worker-logs/[name]/route.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { NextRequest } from 'next/server' -import { getCurrentUser } from '@/lib/session' -import { readRunLog, WorkerLogError } from '@/lib/worker-logs' -import { parseRunLog } from '@/lib/parse-worker-log' - -export const dynamic = 'force-dynamic' - -// GET /api/worker-logs/.log — full parsed timeline for one run-log. -export async function GET( - _request: NextRequest, - { params }: { params: Promise<{ name: string }> }, -) { - const user = await getCurrentUser() - if (!user) { - return Response.json({ error: 'unauthorized' }, { status: 401 }) - } - - const { name: rawName } = await params - const name = decodeURIComponent(rawName) - - try { - const raw = await readRunLog(name) - return Response.json(parseRunLog(raw, name)) - } catch (err) { - if (err instanceof WorkerLogError) { - const status = err.code === 'invalid' ? 400 : err.code === 'not-found' ? 404 : 500 - return Response.json({ error: err.message }, { status }) - } - const message = err instanceof Error ? err.message : 'failed to read worker log' - return Response.json({ error: message }, { status: 500 }) - } -} diff --git a/app/api/worker-logs/route.ts b/app/api/worker-logs/route.ts deleted file mode 100644 index 96c85d3..0000000 --- a/app/api/worker-logs/route.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { NextRequest } from 'next/server' -import { getCurrentUser } from '@/lib/session' -import { listRunLogs } from '@/lib/worker-logs' - -export const dynamic = 'force-dynamic' - -// GET /api/worker-logs?limit=10 — newest-first run-log summaries for the table. -export async function GET(request: NextRequest) { - const user = await getCurrentUser() - if (!user) { - return Response.json({ error: 'unauthorized' }, { status: 401 }) - } - - const limitParam = request.nextUrl.searchParams.get('limit') - const limit = limitParam ? Number(limitParam) : 10 - - try { - const logs = await listRunLogs(limit) - return Response.json({ logs }) - } catch (err) { - // Surfaces a missing bind mount legibly (e.g. WORKER_LOGS_DIR not mounted). - const message = err instanceof Error ? err.message : 'failed to list worker logs' - return Response.json({ error: message }, { status: 500 }) - } -} diff --git a/app/caddy/_components/caddy-codemirror.tsx b/app/caddy/_components/caddy-codemirror.tsx deleted file mode 100644 index 15b010c..0000000 --- a/app/caddy/_components/caddy-codemirror.tsx +++ /dev/null @@ -1,25 +0,0 @@ -'use client' -import CodeMirror from '@uiw/react-codemirror' -import { caddyfileLanguage } from '@/lib/codemirror/caddyfile-mode' -import { EditorView } from '@codemirror/view' - -type Props = { - value: string - onChange: (next: string) => void - readOnly?: boolean -} - -export default function CaddyCodeMirror({ value, onChange, readOnly }: Props) { - return ( - - ) -} diff --git a/app/caddy/_components/caddy-editor.tsx b/app/caddy/_components/caddy-editor.tsx index 6d5f0d1..14a4427 100644 --- a/app/caddy/_components/caddy-editor.tsx +++ b/app/caddy/_components/caddy-editor.tsx @@ -1,21 +1,11 @@ 'use client' import { useCallback, useEffect, useState } from 'react' -import dynamic from 'next/dynamic' import Link from 'next/link' import { useFlowRun } from '@/hooks/useFlowRun' import ConfirmDialog from '@/components/ConfirmDialog' import StreamingTerminal from '@/components/StreamingTerminal' -const CaddyCodeMirror = dynamic(() => import('./caddy-codemirror'), { - ssr: false, - loading: () => ( -
- Loading editor… -
- ), -}) - type Phase = 'edit' | 'writing' | 'validating' | 'validated' | 'saving' | 'saved' type DialogPending = 'validate' | 'save' | null @@ -116,13 +106,17 @@ export default function CaddyEditor({ initialContent, initialError }: Props) { )} - { - setContent(next) + onChange={(e) => { + setContent(e.target.value) + // Reset validated state if user edits after validation if (phase === 'validated' || phase === 'saved') setPhase('edit') }} readOnly={isActive} + rows={24} + spellCheck={false} + className="w-full rounded-lg border border-border bg-zinc-950 p-4 font-mono text-xs text-zinc-100 focus:outline-none focus:ring-1 focus:ring-ring resize-y disabled:opacity-50" /> diff --git a/app/caddy/page.tsx b/app/caddy/page.tsx index ff20c4b..eb8d580 100644 --- a/app/caddy/page.tsx +++ b/app/caddy/page.tsx @@ -1,7 +1,6 @@ import { redirect } from 'next/navigation' import Link from 'next/link' -import { createHighlighter, type Highlighter } from 'shiki' -import caddyfileGrammar from '@/lib/grammars/caddyfile.json' +import { codeToHtml } from 'shiki' import { getCurrentUser } from '@/lib/session' import { execAgent } from '@/lib/agent-client' import { parseCertList, type CertInfo } from '@/lib/parse-caddy' @@ -9,18 +8,6 @@ import CaddyView from './_components/caddy-view' export const dynamic = 'force-dynamic' -let highlighterPromise: Promise | null = null -function getHighlighter() { - if (!highlighterPromise) { - highlighterPromise = createHighlighter({ - themes: ['github-dark'], - // eslint-disable-next-line @typescript-eslint/no-explicit-any - langs: [caddyfileGrammar as any], - }) - } - return highlighterPromise -} - export default async function CaddyPage() { const user = await getCurrentUser() if (!user) redirect('/login') @@ -29,8 +16,7 @@ export default async function CaddyPage() { let configError: string | null = null try { const raw = await execAgent('caddy_show_config') - const highlighter = await getHighlighter() - configHtml = highlighter.codeToHtml(raw || '# (empty)', { + configHtml = await codeToHtml(raw || '# (empty)', { lang: 'caddyfile', theme: 'github-dark', }) diff --git a/app/flows/page.tsx b/app/flows/page.tsx index e2790a7..05848fe 100644 --- a/app/flows/page.tsx +++ b/app/flows/page.tsx @@ -5,11 +5,6 @@ import { getCurrentUser } from '@/lib/session' export const dynamic = 'force-dynamic' const FLOWS = [ - { - href: '/flows/redeploy-all', - title: 'Redeploy All', - desc: 'Volledige stack-redeploy: scrum4me-web + MCP-worker (cache-busted)', - }, { href: '/flows/update-scrum4me-web', title: 'Update Scrum4Me website', @@ -20,11 +15,6 @@ const FLOWS = [ title: 'Update Caddy config', desc: 'Reload Caddy met nieuwe Caddyfile + cert renewal check', }, - { - href: '/flows/server-backup', - title: 'Server backup', - desc: 'pg_dumpall + restic naar NAS én B2 — handmatige backup of restore-test', - }, ] export default async function FlowsIndex() { diff --git a/app/flows/redeploy-all/_components/flow-panel.tsx b/app/flows/redeploy-all/_components/flow-panel.tsx deleted file mode 100644 index d81ea26..0000000 --- a/app/flows/redeploy-all/_components/flow-panel.tsx +++ /dev/null @@ -1,135 +0,0 @@ -'use client' - -import { useState, useCallback } from 'react' -import Link from 'next/link' -import { useFlowRun } from '@/hooks/useFlowRun' -import StreamingTerminal from '@/components/StreamingTerminal' -import ConfirmDialog from '@/components/ConfirmDialog' - -const FLOW_KEY = 'redeploy_all' - -const STEPS = [ - 'git status Scrum4Me (show current state)', - 'git fetch Scrum4Me (fetch remote refs)', - 'git log (commits ahead of upstream)', - 'git pull --ff-only Scrum4Me (aborts if dirty)', - 'npm ci (install dependencies)', - 'prisma migrate deploy (apply migrations)', - 'npm run build (build application)', - 'systemctl restart scrum4me-web', - 'smoke test: curl /api/products (expect 200 or 401)', - 'git status scrum4me-docker (show current state)', - 'git fetch scrum4me-docker (fetch remote refs)', - 'git pull --ff-only scrum4me-docker (aborts if dirty)', - 'git pull --ff-only scrum4me-mcp (lokale sync)', - 'rebuild worker image — cache-busted MCP clone', - 'docker compose up -d --force-recreate worker-idea', - 'wait for worker pre-flight to pass', -] - -export default function FlowPanel() { - const [pendingDryRun, setPendingDryRun] = useState(null) - const [completedFlowRunId, setCompletedFlowRunId] = useState(null) - - const handleComplete = useCallback((flowRunId: string) => { - setCompletedFlowRunId(flowRunId) - }, []) - - const flowRun = useFlowRun(handleComplete) - - const handleConfirm = useCallback(() => { - if (pendingDryRun === null) return - const dryRun = pendingDryRun - setPendingDryRun(null) - setCompletedFlowRunId(null) - flowRun.startFlow(FLOW_KEY, dryRun) - }, [pendingDryRun, flowRun]) - - const handleReset = useCallback(() => { - flowRun.reset() - setCompletedFlowRunId(null) - }, [flowRun]) - - return ( -
-
-
-

- Volledige stack-redeploy: eerst de hoofd-app (scrum4me-web — pull, - migrate, build, restart), dan de MCP-worker (cache-busted image - rebuild zodat de nieuwe scrum4me-mcp code wordt opgepikt). -

-

- repos: Scrum4Me · scrum4me-docker · scrum4me-mcp -

-
-
    - {STEPS.map((step, i) => ( -
  1. - {i + 1}. - {step} -
  2. - ))} -
-
- -
- - - {flowRun.status !== 'idle' && flowRun.status !== 'running' && ( - - )} -
- - {flowRun.status !== 'idle' && ( -
-
- Output - {completedFlowRunId && ( - - View in audit log → - - )} -
- -
- )} - - ` ${i + 1}. ${s}`).join('\n')}` - } - onConfirm={handleConfirm} - onCancel={() => setPendingDryRun(null)} - /> -
- ) -} diff --git a/app/flows/redeploy-all/page.tsx b/app/flows/redeploy-all/page.tsx deleted file mode 100644 index 8971cbb..0000000 --- a/app/flows/redeploy-all/page.tsx +++ /dev/null @@ -1,27 +0,0 @@ -import Link from 'next/link' -import { redirect } from 'next/navigation' -import { getCurrentUser } from '@/lib/session' -import FlowPanel from './_components/flow-panel' - -export const dynamic = 'force-dynamic' - -export default async function RedeployAllPage() { - const user = await getCurrentUser() - if (!user) redirect('/login') - - return ( -
-
-
- - ← Home - - / -

Redeploy All

-
- - -
-
- ) -} diff --git a/app/flows/server-backup/_components/flow-panel.tsx b/app/flows/server-backup/_components/flow-panel.tsx deleted file mode 100644 index e61c25a..0000000 --- a/app/flows/server-backup/_components/flow-panel.tsx +++ /dev/null @@ -1,178 +0,0 @@ -'use client' - -import { useState, useCallback } from 'react' -import Link from 'next/link' -import { useFlowRun } from '@/hooks/useFlowRun' -import StreamingTerminal from '@/components/StreamingTerminal' -import ConfirmDialog from '@/components/ConfirmDialog' - -// One panel runs either flow; we switch step-list + description based on the -// currently active or last-triggered action. Wording mirrors the existing -// /settings/backups → server-backup-section so the two entry points stay -// consistent. The actual work in both flows runs out-of-band via systemd -// (server-backup.service) — the ops-agent flow just kicks it off and tails -// the resulting log / status file. - -type Kind = 'backup' | 'restore' - -type FlowSpec = { - flowKey: string - buttonLabel: string - shortDescription: string - steps: string[] - confirmTitle: string - confirmBody: string -} - -const FLOWS: Record = { - backup: { - flowKey: 'server_backup_full', - buttonLabel: 'Backup now', - shortDescription: - 'Volledige server-backup: pg_dumpall van alle databases + restic snapshot naar NAS én Backblaze B2 (Object Lock). Draait dagelijks via timer; deze knop triggert handmatig.', - steps: [ - 'trigger_server_backup (systemctl start server-backup.service)', - 'tail_backup_log_today (live log mee-stream)', - 'read_backup_status (lees status.json met repo-totalen + duur)', - ], - confirmTitle: 'Trigger server backup', - confirmBody: - 'flow: server_backup_full\n\nSteps:\n 1. trigger_server_backup (systemctl start server-backup.service)\n 2. tail_backup_log_today\n 3. read_backup_status\n\nThe actual work happens in systemd; this flow kicks it off and tails the log.', - }, - restore: { - flowKey: 'server_backup_restore_test', - buttonLabel: 'Run restore test', - shortDescription: - 'Non-destructieve restore-test: haalt de laatste snapshot uit de NAS-repo terug naar /tmp/restore-test en verifieert dat kritieke files er zijn. Raakt niets in de live stack.', - steps: [ - 'trigger_restore_test (restore latest NAS snapshot to /tmp/restore-test/)', - 'read_backup_status (lees assertions + per-file outcome)', - ], - confirmTitle: 'Run restore test (NAS)', - confirmBody: - 'flow: server_backup_restore_test\n\nSteps:\n 1. trigger_restore_test (restore latest NAS snapshot to /tmp/restore-test/)\n 2. read_backup_status\n\nNon-destructive — restores into /tmp only and asserts critical files exist.', - }, -} - -export default function FlowPanel() { - // `displayKind` drives the steps/description card; updated optimistically - // when the user presses a button so the displayed flow matches the pending - // confirm. `activeKind` only flips once the flow actually starts. - const [displayKind, setDisplayKind] = useState('backup') - const [pendingKind, setPendingKind] = useState(null) - const [activeKind, setActiveKind] = useState(null) - const [completedFlowRunId, setCompletedFlowRunId] = useState(null) - - const handleComplete = useCallback((flowRunId: string) => { - setCompletedFlowRunId(flowRunId) - }, []) - - const flowRun = useFlowRun(handleComplete) - - const handleClickKind = useCallback((kind: Kind) => { - setDisplayKind(kind) - setPendingKind(kind) - }, []) - - const handleConfirm = useCallback(() => { - if (pendingKind === null) return - const kind = pendingKind - setPendingKind(null) - setCompletedFlowRunId(null) - setActiveKind(kind) - flowRun.startFlow(FLOWS[kind].flowKey, false) - }, [pendingKind, flowRun]) - - const handleReset = useCallback(() => { - flowRun.reset() - setCompletedFlowRunId(null) - setActiveKind(null) - }, [flowRun]) - - const spec = FLOWS[displayKind] - - return ( -
-
-
-

{spec.shortDescription}

-

- flow: {spec.flowKey} -

-
-
    - {spec.steps.map((step, i) => ( -
  1. - {i + 1}. - {step} -
  2. - ))} -
-
- -
- - - {flowRun.status !== 'idle' && flowRun.status !== 'running' && ( - - )} -
- - {flowRun.status !== 'idle' && ( -
-
- - Output{activeKind ? ` — ${FLOWS[activeKind].buttonLabel}` : ''} - - {completedFlowRunId && ( - - View in audit log → - - )} -
- -
- )} - - setPendingKind(null)} - /> -
- ) -} diff --git a/app/flows/server-backup/page.tsx b/app/flows/server-backup/page.tsx deleted file mode 100644 index 7568e16..0000000 --- a/app/flows/server-backup/page.tsx +++ /dev/null @@ -1,31 +0,0 @@ -import Link from 'next/link' -import { redirect } from 'next/navigation' -import { getCurrentUser } from '@/lib/session' -import FlowPanel from './_components/flow-panel' - -export const dynamic = 'force-dynamic' - -export default async function ServerBackupPage() { - const user = await getCurrentUser() - if (!user) redirect('/login') - - return ( -
-
-
- - ← Home - - / - - Flows - - / -

Server backup

-
- - -
-
- ) -} diff --git a/app/settings/backups/_components/backups-panel.tsx b/app/settings/backups/_components/backups-panel.tsx index bf0a2ef..b2707d9 100644 --- a/app/settings/backups/_components/backups-panel.tsx +++ b/app/settings/backups/_components/backups-panel.tsx @@ -1,52 +1,171 @@ 'use client' +import { useState, useCallback } from 'react' +import Link from 'next/link' +import { useFlowRun } from '@/hooks/useFlowRun' +import StreamingTerminal from '@/components/StreamingTerminal' +import ConfirmDialog from '@/components/ConfirmDialog' import type { BackupFile } from '../page' -import type { - BackupStatusEnvelope, - ResticSnapshot, - ResticStats, -} from '../_lib/types' -import DatabaseBackupsSection from './database-backups-section' -import ServerBackupSection from './server-backup-section' + +function formatSize(bytes: number): string { + if (bytes === 0) return '—' + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB` + return `${(bytes / (1024 * 1024)).toFixed(1)} MB` +} type Props = { backups: BackupFile[] listError: string | null - envelope: BackupStatusEnvelope - nasSnapshots: ResticSnapshot[] - b2Snapshots: ResticSnapshot[] - nasStats: ResticStats | null - b2Stats: ResticStats | null - serverBackupErrors: { - status?: string - nasSnapshots?: string - b2Snapshots?: string - nasStats?: string - b2Stats?: string - } } -export default function BackupsPanel({ - backups, - listError, - envelope, - nasSnapshots, - b2Snapshots, - nasStats, - b2Stats, - serverBackupErrors, -}: Props) { +export default function BackupsPanel({ backups, listError }: Props) { + const [pending, setPending] = useState(false) + const [completedFlowRunId, setCompletedFlowRunId] = useState(null) + + const handleComplete = useCallback((flowRunId: string) => { + setCompletedFlowRunId(flowRunId) + }, []) + + const flowRun = useFlowRun(handleComplete) + + const handleConfirm = useCallback(() => { + setPending(false) + setCompletedFlowRunId(null) + flowRun.startFlow('backup_ops_db', false) + }, [flowRun]) + + const handleReset = useCallback(() => { + flowRun.reset() + setCompletedFlowRunId(null) + }, [flowRun]) + return ( -
- -
- + {/* Description */} +
+

+ Backs up the ops_dashboard database using{' '} + pg_dump. Dumps are stored in{' '} + /srv/ops/backups/ and retained for 30 days. + For automated daily backups, enable the systemd timer:{' '} + deploy/ops-agent/ops-db-backup.timer. +

+ +
    +
  1. + 1. + pg_dump ops_dashboard → /srv/ops/backups/ops_db_YYYYMMDD_HHMM.dump +
  2. +
  3. + 2. + cleanup: delete backup files older than 30 days +
  4. +
+
+ + {/* Action buttons */} +
+ + {flowRun.status !== 'idle' && flowRun.status !== 'running' && ( + + )} +
+ + {/* Terminal output */} + {flowRun.status !== 'idle' && ( +
+
+ Output + {completedFlowRunId && ( + + View in audit log → + + )} +
+ + {flowRun.status === 'done' && ( +

+ Reload this page to see the updated backup list. +

+ )} +
+ )} + + {/* Backup list */} +
+

Existing backups

+ + {listError && ( +
+ Could not list backups: {listError} +
+ )} + + {!listError && backups.length === 0 && ( +
+ No backups found in /srv/ops/backups/ +
+ )} + + {!listError && backups.length > 0 && ( +
+ + + + + + + + + + {backups.map((b, i) => ( + + + + + + ))} + +
+ Timestamp + FileSize
{b.label}{b.name} + {formatSize(b.sizeBytes)} +
+
+ )} + +

+ Backups older than 30 days are removed automatically by the cleanup step. +

+
+ + {/* Confirm dialog */} + setPending(false)} />
) diff --git a/app/settings/backups/_components/database-backups-section.tsx b/app/settings/backups/_components/database-backups-section.tsx deleted file mode 100644 index 81a0285..0000000 --- a/app/settings/backups/_components/database-backups-section.tsx +++ /dev/null @@ -1,172 +0,0 @@ -'use client' - -import { useCallback, useState } from 'react' -import Link from 'next/link' -import { useFlowRun } from '@/hooks/useFlowRun' -import StreamingTerminal from '@/components/StreamingTerminal' -import ConfirmDialog from '@/components/ConfirmDialog' -import type { BackupFile } from '../page' - -function formatSize(bytes: number): string { - if (bytes === 0) return '—' - if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB` - return `${(bytes / (1024 * 1024)).toFixed(1)} MB` -} - -type Props = { - backups: BackupFile[] - listError: string | null -} - -export default function DatabaseBackupsSection({ backups, listError }: Props) { - const [pending, setPending] = useState(false) - const [completedFlowRunId, setCompletedFlowRunId] = useState(null) - - const handleComplete = useCallback((flowRunId: string) => { - setCompletedFlowRunId(flowRunId) - }, []) - - const flowRun = useFlowRun(handleComplete) - - const handleConfirm = useCallback(() => { - setPending(false) - setCompletedFlowRunId(null) - flowRun.startFlow('backup_ops_db', false) - }, [flowRun]) - - const handleReset = useCallback(() => { - flowRun.reset() - setCompletedFlowRunId(null) - }, [flowRun]) - - return ( -
-
-

Database backups

- flow: backup_ops_db -
- -
-

- Backs up the ops_dashboard database using{' '} - pg_dump. Dumps are stored in{' '} - /srv/ops/backups/ and retained for 30 days. - For automated daily backups, enable the systemd timer:{' '} - deploy/ops-agent/ops-db-backup.timer. -

- -
    -
  1. - 1. - pg_dump ops_dashboard → /srv/ops/backups/ops_db_YYYYMMDD_HHMM.dump -
  2. -
  3. - 2. - cleanup: delete backup files older than 30 days -
  4. -
-
- -
- - {flowRun.status !== 'idle' && flowRun.status !== 'running' && ( - - )} -
- - {flowRun.status !== 'idle' && ( -
-
- Output - {completedFlowRunId && ( - - View in audit log → - - )} -
- - {flowRun.status === 'done' && ( -

- Reload this page to see the updated backup list. -

- )} -
- )} - -
-

Existing backups

- - {listError && ( -
- Could not list backups: {listError} -
- )} - - {!listError && backups.length === 0 && ( -
- No backups found in /srv/ops/backups/ -
- )} - - {!listError && backups.length > 0 && ( -
- - - - - - - - - - {backups.map((b, i) => ( - - - - - - ))} - -
- Timestamp - FileSize
{b.label}{b.name} - {formatSize(b.sizeBytes)} -
-
- )} - -

- Backups older than 30 days are removed automatically by the cleanup step. -

-
- - setPending(false)} - /> -
- ) -} diff --git a/app/settings/backups/_components/server-backup-section.tsx b/app/settings/backups/_components/server-backup-section.tsx deleted file mode 100644 index bd64cb9..0000000 --- a/app/settings/backups/_components/server-backup-section.tsx +++ /dev/null @@ -1,447 +0,0 @@ -'use client' - -import { useCallback, useState } from 'react' -import Link from 'next/link' -import { useFlowRun } from '@/hooks/useFlowRun' -import StreamingTerminal from '@/components/StreamingTerminal' -import ConfirmDialog from '@/components/ConfirmDialog' -import type { - BackupPhase, - BackupStatus, - BackupStatusEnvelope, - OverallStatus, - PhaseStatus, - ResticSnapshot, - ResticStats, -} from '../_lib/types' - -type Props = { - envelope: BackupStatusEnvelope - nasSnapshots: ResticSnapshot[] - b2Snapshots: ResticSnapshot[] - nasStats: ResticStats | null - b2Stats: ResticStats | null - errors: { - status?: string - nasSnapshots?: string - b2Snapshots?: string - nasStats?: string - b2Stats?: string - } -} - -type ActiveFlow = 'backup' | 'restore' | null - -function formatBytes(bytes: number | null | undefined): string { - if (bytes == null) return '—' - if (bytes < 1024) return `${bytes} B` - if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB` - if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB` - return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB` -} - -function formatDuration(seconds: number | null | undefined): string { - if (seconds == null || seconds === 0) return '—' - if (seconds < 60) return `${seconds}s` - if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${seconds % 60}s` - const h = Math.floor(seconds / 3600) - const m = Math.floor((seconds % 3600) / 60) - return `${h}h ${m}m` -} - -function formatTimestamp(iso: string | null | undefined): string { - if (!iso) return '—' - try { - const d = new Date(iso) - if (Number.isNaN(d.getTime())) return iso - const yyyy = d.getFullYear() - const mm = String(d.getMonth() + 1).padStart(2, '0') - const dd = String(d.getDate()).padStart(2, '0') - const hh = String(d.getHours()).padStart(2, '0') - const mi = String(d.getMinutes()).padStart(2, '0') - return `${yyyy}-${mm}-${dd} ${hh}:${mi}` - } catch { - return iso - } -} - -function overallBadgeClass(status: OverallStatus): string { - switch (status) { - case 'success': - return 'bg-green-500/15 text-green-500 border-green-500/30' - case 'partial_failure': - return 'bg-amber-500/15 text-amber-500 border-amber-500/30' - case 'failed': - return 'bg-destructive/15 text-destructive border-destructive/30' - default: - return 'bg-muted/50 text-muted-foreground border-border' - } -} - -function phaseIcon(status: PhaseStatus): { glyph: string; color: string } { - switch (status) { - case 'success': - return { glyph: '✓', color: 'text-green-500' } - case 'skipped': - return { glyph: '–', color: 'text-muted-foreground' } - case 'degraded': - return { glyph: '!', color: 'text-amber-500' } - case 'failed': - return { glyph: '✗', color: 'text-destructive' } - case 'pending': - default: - return { glyph: '○', color: 'text-muted-foreground/50' } - } -} - -function phaseDurationSeconds(phase: BackupPhase): number | null { - if (!phase.startedAt || !phase.completedAt) return null - const start = new Date(phase.startedAt).getTime() - const end = new Date(phase.completedAt).getTime() - if (Number.isNaN(start) || Number.isNaN(end)) return null - return Math.max(0, Math.round((end - start) / 1000)) -} - -function StatusCard({ status }: { status: BackupStatus | null }) { - if (!status) { - return ( -
- No backup run recorded yet. Trigger one with the "Backup now" button below. -
- ) - } - return ( -
-
-
- - {status.overallStatus.replace('_', ' ')} - - - Last run {formatTimestamp(status.completedAt)} on{' '} - {status.host || '—'} - -
- - duration {formatDuration(status.durationSeconds)} - -
-
- {status.phases.map((p) => { - const icon = phaseIcon(p.status) - const dur = phaseDurationSeconds(p) - return ( -
- {icon.glyph} -
- {p.name} - - {p.status} - {dur != null ? ` · ${formatDuration(dur)}` : ''} - -
-
- ) - })} -
-
- ) -} - -function StatsBlock({ stats, label, error }: { stats: ResticStats | null; label: string; error?: string }) { - if (error) { - return ( -
- {label}: {error} -
- ) - } - if (!stats) { - return ( -
- {label}: no stats yet -
- ) - } - const dedup = - stats.dedupRatio != null && Number.isFinite(stats.dedupRatio) - ? `${stats.dedupRatio.toFixed(2)}×` - : '—' - return ( -
-
- - {label} - - - {stats.snapshotsCount} snapshot{stats.snapshotsCount === 1 ? '' : 's'} - -
-
-
restore size
-
{formatBytes(stats.restoreSizeBytes)}
-
raw data
-
{formatBytes(stats.rawDataBytes)}
-
dedup ratio
-
{dedup}
-
-
- ) -} - -function SnapshotsTable({ - snapshots, - label, - error, -}: { - snapshots: ResticSnapshot[] - label: string - error?: string -}) { - return ( -
-
-

{label}

- {snapshots.length} shown -
- {error ? ( -
- {error} -
- ) : snapshots.length === 0 ? ( -
- No snapshots in this repo yet. -
- ) : ( -
- - - - - - - - - - - {snapshots.map((s, i) => ( - - - - - - - ))} - -
TimeIDTags - Files / size added -
{formatTimestamp(s.time)}{s.shortId} - {s.tags.join(', ') || '—'} - - {s.summary?.files_new != null - ? `${s.summary.files_new} new · ${formatBytes(s.summary.data_added ?? 0)}` - : '—'} -
-
- )} -
- ) -} - -export default function ServerBackupSection({ - envelope, - nasSnapshots, - b2Snapshots, - nasStats, - b2Stats, - errors, -}: Props) { - const [pending, setPending] = useState(null) - const [completedFlowRunId, setCompletedFlowRunId] = useState(null) - const [activeFlow, setActiveFlow] = useState(null) - - const handleComplete = useCallback((flowRunId: string) => { - setCompletedFlowRunId(flowRunId) - }, []) - - const flowRun = useFlowRun(handleComplete) - - const startFlow = useCallback( - (kind: 'backup' | 'restore') => { - setPending(null) - setCompletedFlowRunId(null) - setActiveFlow(kind) - flowRun.startFlow( - kind === 'backup' ? 'server_backup_full' : 'server_backup_restore_test', - false, - ) - }, - [flowRun], - ) - - const handleReset = useCallback(() => { - flowRun.reset() - setCompletedFlowRunId(null) - setActiveFlow(null) - }, [flowRun]) - - return ( -
-
-

Server backup (restic)

- flows: server_backup_full · restore_test -
- -
-

- Daily server-wide backup at 03:30: pg_dumpall + - Forgejo dump, then restic to NAS (local) and Backblaze B2{' '} - (offsite, Object Lock). Authoritative restore sources are the database dumps; live datadirs - are excluded. See{' '} - - docs/runbooks/server-backup.md - {' '} - for the full procedure. -

-
- - - {errors.status && ( -
- Could not read backup status: {errors.status} -
- )} - -
- - -
- -
- - - {flowRun.status !== 'idle' && flowRun.status !== 'running' && ( - - )} -
- - {flowRun.status !== 'idle' && ( -
-
- - Output {activeFlow ? `(${activeFlow === 'backup' ? 'backup' : 'restore test'})` : ''} - - {completedFlowRunId && ( - - View in audit log → - - )} -
- - {flowRun.status === 'done' && ( -

- Reload this page to see the updated status, snapshots, and stats. -

- )} -
- )} - -
- - -
- - {envelope.lastRestoreTest && ( -
-
-

Last restore test

- - {envelope.lastRestoreTest.overallStatus.replace('_', ' ')} - -
-

- {formatTimestamp(envelope.lastRestoreTest.completedAt)} · repo{' '} - {envelope.lastRestoreTest.repo} · snapshot{' '} - - {envelope.lastRestoreTest.snapshotId?.slice(0, 8) ?? '—'} - {' '} - · {envelope.lastRestoreTest.assertions.length} assertions -

- {envelope.lastRestoreTest.assertions.some((a) => a.status !== 'ok') && ( -
    - {envelope.lastRestoreTest.assertions - .filter((a) => a.status !== 'ok') - .map((a) => ( -
  • - {a.status === 'missing' ? '✗ missing' : '! empty'} · {a.path} -
  • - ))} -
- )} -
- )} - - startFlow('backup')} - onCancel={() => setPending(null)} - /> - startFlow('restore')} - onCancel={() => setPending(null)} - /> -
- ) -} diff --git a/app/settings/backups/_lib/parse.ts b/app/settings/backups/_lib/parse.ts deleted file mode 100644 index 70d23fe..0000000 --- a/app/settings/backups/_lib/parse.ts +++ /dev/null @@ -1,191 +0,0 @@ -import type { - BackupPhase, - BackupStatus, - BackupStatusEnvelope, - OverallStatus, - PhaseStatus, - ResticSnapshot, - ResticStats, - RestoreTestAssertion, - RestoreTestStatus, -} from './types' - -const PHASE_ORDER = [ - 'postgres_dump', - 'forgejo_dump', - 'forgejo_db_dump', - 'restic_nas', - 'restic_b2', - 'forget_nas', - 'check_nas', - 'check_b2', -] as const - -function isRecord(v: unknown): v is Record { - return typeof v === 'object' && v !== null && !Array.isArray(v) -} - -function asString(v: unknown): string | null { - return typeof v === 'string' ? v : null -} - -function asNumber(v: unknown): number | null { - return typeof v === 'number' && Number.isFinite(v) ? v : null -} - -function asPhaseStatus(v: unknown): PhaseStatus { - if ( - v === 'success' || - v === 'skipped' || - v === 'degraded' || - v === 'failed' || - v === 'pending' - ) { - return v - } - return 'pending' -} - -function asOverallStatus(v: unknown): OverallStatus { - if (v === 'success' || v === 'partial_failure' || v === 'failed') return v - return 'unknown' -} - -function parsePhase(name: string, raw: unknown): BackupPhase { - if (!isRecord(raw)) { - return { - name, - status: 'pending', - exitCode: null, - startedAt: null, - completedAt: null, - error: null, - } - } - return { - name, - status: asPhaseStatus(raw.status), - exitCode: asNumber(raw.exit_code), - startedAt: asString(raw.started_at), - completedAt: asString(raw.completed_at), - error: asString(raw.error), - snapshotId: asString(raw.snapshot_id) ?? undefined, - filesNew: asNumber(raw.files_new), - dataAddedBytes: asNumber(raw.data_added_bytes), - outputFile: asString(raw.output_file) ?? undefined, - bytes: asNumber(raw.bytes), - } -} - -function parseBackupStatus(raw: unknown): BackupStatus | null { - if (!isRecord(raw)) return null - const phasesRaw = isRecord(raw.phases) ? raw.phases : {} - const phases = PHASE_ORDER.map((name) => parsePhase(name, phasesRaw[name])) - return { - schemaVersion: asNumber(raw.schema_version) ?? 1, - overallStatus: asOverallStatus(raw.overall_status), - startedAt: asString(raw.started_at) ?? '', - completedAt: asString(raw.completed_at) ?? '', - durationSeconds: asNumber(raw.duration_seconds) ?? 0, - host: asString(raw.host) ?? '', - phases, - } -} - -function parseRestoreTestAssertion(raw: unknown): RestoreTestAssertion | null { - if (!isRecord(raw)) return null - const status = raw.status - if (status !== 'ok' && status !== 'empty' && status !== 'missing') return null - return { - path: asString(raw.path) ?? '', - status, - bytes: asNumber(raw.bytes) ?? 0, - } -} - -function parseRestoreTestStatus(raw: unknown): RestoreTestStatus | null { - if (!isRecord(raw)) return null - const assertionsRaw = Array.isArray(raw.assertions) ? raw.assertions : [] - const assertions: RestoreTestAssertion[] = [] - for (const a of assertionsRaw) { - const parsed = parseRestoreTestAssertion(a) - if (parsed) assertions.push(parsed) - } - return { - schemaVersion: asNumber(raw.schema_version) ?? 1, - overallStatus: asOverallStatus(raw.overall_status), - startedAt: asString(raw.started_at) ?? '', - completedAt: asString(raw.completed_at) ?? '', - durationSeconds: asNumber(raw.duration_seconds) ?? 0, - repo: asString(raw.repo) ?? '', - snapshotId: asString(raw.snapshot_id), - restoreExitCode: asNumber(raw.restore_exit_code), - target: asString(raw.target) ?? undefined, - assertions, - error: asString(raw.error) ?? undefined, - } -} - -export function parseStatusEnvelope(output: string): BackupStatusEnvelope { - try { - const trimmed = output.trim() - if (!trimmed) return { lastRun: null, lastRestoreTest: null } - const parsed: unknown = JSON.parse(trimmed) - if (!isRecord(parsed)) return { lastRun: null, lastRestoreTest: null } - return { - lastRun: parseBackupStatus(parsed.last_run), - lastRestoreTest: parseRestoreTestStatus(parsed.last_restore_test), - } - } catch { - return { lastRun: null, lastRestoreTest: null } - } -} - -export function parseResticSnapshots(output: string, repo: 'nas' | 'b2'): ResticSnapshot[] { - try { - const trimmed = output.trim() - if (!trimmed) return [] - const parsed: unknown = JSON.parse(trimmed) - if (!Array.isArray(parsed)) return [] - const result: ResticSnapshot[] = [] - for (const s of parsed) { - if (!isRecord(s)) continue - const id = asString(s.id) - if (!id) continue - const shortId = asString(s.short_id) ?? id.slice(0, 8) - const time = asString(s.time) ?? '' - const hostname = asString(s.hostname) ?? '' - const tags = Array.isArray(s.tags) - ? s.tags.filter((t): t is string => typeof t === 'string') - : [] - const paths = Array.isArray(s.paths) - ? s.paths.filter((p): p is string => typeof p === 'string') - : [] - const summary = isRecord(s.summary) ? (s.summary as ResticSnapshot['summary']) : null - result.push({ id, shortId, time, hostname, tags, paths, repo, summary }) - } - return result - } catch { - return [] - } -} - -export function parseResticStats(output: string, repo: 'nas' | 'b2'): ResticStats | null { - try { - const trimmed = output.trim() - if (!trimmed) return null - const parsed: unknown = JSON.parse(trimmed) - if (!isRecord(parsed)) return null - return { - repo, - snapshotsCount: asNumber(parsed.snapshots_count) ?? 0, - restoreSizeBytes: asNumber(parsed.restore_size_bytes), - restoreSizeFiles: asNumber(parsed.restore_size_files), - rawDataBytes: asNumber(parsed.raw_data_bytes), - rawBlobCount: asNumber(parsed.raw_blob_count), - dedupRatio: asNumber(parsed.dedup_ratio), - } - } catch { - return null - } -} diff --git a/app/settings/backups/_lib/types.ts b/app/settings/backups/_lib/types.ts deleted file mode 100644 index 5b768f0..0000000 --- a/app/settings/backups/_lib/types.ts +++ /dev/null @@ -1,78 +0,0 @@ -export type PhaseStatus = 'success' | 'skipped' | 'degraded' | 'failed' | 'pending' -export type OverallStatus = 'success' | 'partial_failure' | 'failed' | 'unknown' - -export interface BackupPhase { - name: string - status: PhaseStatus - exitCode: number | null - startedAt: string | null - completedAt: string | null - error: string | null - snapshotId?: string - filesNew?: number | null - dataAddedBytes?: number | null - outputFile?: string - bytes?: number | null -} - -export interface BackupStatus { - schemaVersion: number - overallStatus: OverallStatus - startedAt: string - completedAt: string - durationSeconds: number - host: string - phases: BackupPhase[] -} - -export interface RestoreTestAssertion { - path: string - status: 'ok' | 'empty' | 'missing' - bytes: number -} - -export interface RestoreTestStatus { - schemaVersion: number - overallStatus: OverallStatus - startedAt: string - completedAt: string - durationSeconds: number - repo: string - snapshotId: string | null - restoreExitCode: number | null - target?: string - assertions: RestoreTestAssertion[] - error?: string -} - -export interface BackupStatusEnvelope { - lastRun: BackupStatus | null - lastRestoreTest: RestoreTestStatus | null -} - -export interface ResticSnapshot { - id: string - shortId: string - time: string - hostname: string - tags: string[] - paths: string[] - repo: 'nas' | 'b2' - summary?: { - files_new?: number - files_changed?: number - data_added?: number - total_files_processed?: number - total_bytes_processed?: number - } | null -} - -export interface ResticStats { - repo: 'nas' | 'b2' - snapshotsCount: number - restoreSizeBytes: number | null - restoreSizeFiles: number | null - rawDataBytes: number | null - rawBlobCount: number | null - dedupRatio: number | null -} diff --git a/app/settings/backups/page.tsx b/app/settings/backups/page.tsx index 3a17103..89f72c7 100644 --- a/app/settings/backups/page.tsx +++ b/app/settings/backups/page.tsx @@ -3,16 +3,6 @@ import { redirect } from 'next/navigation' import { getCurrentUser } from '@/lib/session' import { execAgent } from '@/lib/agent-client' import BackupsPanel from './_components/backups-panel' -import { - parseResticSnapshots, - parseResticStats, - parseStatusEnvelope, -} from './_lib/parse' -import type { - BackupStatusEnvelope, - ResticSnapshot, - ResticStats, -} from './_lib/types' export const dynamic = 'force-dynamic' @@ -37,74 +27,23 @@ function parseBackupList(output: string): BackupFile[] { .filter((b) => b.name) } -function errorMessage(err: unknown): string { - return err instanceof Error ? err.message : 'agent call failed' -} - -async function tryExec(command: string): Promise<{ output: string | null; error: string | null }> { - try { - const output = await execAgent(command) - return { output, error: null } - } catch (err) { - return { output: null, error: errorMessage(err) } - } -} - export default async function BackupsPage() { const user = await getCurrentUser() if (!user) redirect('/login') - // Run all agent calls in parallel; per-call error isolation so one failure - // does not blank the entire page. - const [ - backupListResult, - statusResult, - nasSnapshotsResult, - b2SnapshotsResult, - nasStatsResult, - b2StatsResult, - ] = await Promise.all([ - tryExec('list_ops_backups'), - tryExec('read_backup_status'), - tryExec('restic_snapshots_nas'), - tryExec('restic_snapshots_b2'), - tryExec('restic_stats_nas'), - tryExec('restic_stats_b2'), - ]) + let backups: BackupFile[] = [] + let listError: string | null = null - const backups: BackupFile[] = backupListResult.output - ? parseBackupList(backupListResult.output) - : [] - const listError = backupListResult.error - - const envelope: BackupStatusEnvelope = statusResult.output - ? parseStatusEnvelope(statusResult.output) - : { lastRun: null, lastRestoreTest: null } - - const nasSnapshots: ResticSnapshot[] = nasSnapshotsResult.output - ? parseResticSnapshots(nasSnapshotsResult.output, 'nas') - : [] - const b2Snapshots: ResticSnapshot[] = b2SnapshotsResult.output - ? parseResticSnapshots(b2SnapshotsResult.output, 'b2') - : [] - const nasStats: ResticStats | null = nasStatsResult.output - ? parseResticStats(nasStatsResult.output, 'nas') - : null - const b2Stats: ResticStats | null = b2StatsResult.output - ? parseResticStats(b2StatsResult.output, 'b2') - : null - - const serverBackupErrors = { - status: statusResult.error ?? undefined, - nasSnapshots: nasSnapshotsResult.error ?? undefined, - b2Snapshots: b2SnapshotsResult.error ?? undefined, - nasStats: nasStatsResult.error ?? undefined, - b2Stats: b2StatsResult.error ?? undefined, + try { + const output = await execAgent('list_ops_backups') + backups = parseBackupList(output) + } catch (err) { + listError = err instanceof Error ? err.message : 'failed to list backups' } return (
-
+
← Home @@ -113,16 +52,7 @@ export default async function BackupsPage() {

Backups

- +
) diff --git a/app/worker-logs/_components/run-log-detail.tsx b/app/worker-logs/_components/run-log-detail.tsx deleted file mode 100644 index 299310d..0000000 --- a/app/worker-logs/_components/run-log-detail.tsx +++ /dev/null @@ -1,291 +0,0 @@ -'use client' - -import { useCallback, useEffect, useState, type ReactElement } from 'react' -import type { LogEvent, MetaTag, ParsedRunLog } from '@/lib/parse-worker-log' -import { cn, formatDuration } from '@/lib/utils' - -async function fetchDetail(fileName: string): Promise { - const res = await fetch(`/api/worker-logs/${encodeURIComponent(fileName)}`, { cache: 'no-store' }) - const body = await res.json().catch(() => ({})) - if (!res.ok) throw new Error(body?.error ?? `request failed (${res.status})`) - return body as ParsedRunLog -} - -const META_TAG_STYLES: Record = { - claim: 'text-muted-foreground', - auth: 'text-muted-foreground', - quota: 'text-muted-foreground', - 'no-job': 'text-muted-foreground', - claimed: 'text-blue-600 dark:text-blue-400', - worktree: 'text-muted-foreground', - config: 'text-blue-600 dark:text-blue-400', - payload: 'text-muted-foreground', - spawn: 'text-blue-600 dark:text-blue-400', - 'claude-done': 'text-blue-600 dark:text-blue-400', - cleanup: 'text-muted-foreground', - exit: 'text-muted-foreground', - error: 'text-destructive', - 'token-expired': 'text-destructive', - timeout: 'text-muted-foreground', - other: 'text-muted-foreground', -} - -function timeOnly(ts: string | null): string { - if (!ts) return '' - const d = new Date(ts) - return isNaN(d.getTime()) ? '' : d.toLocaleTimeString() -} - -function inputPreview(input: string): string { - const oneLine = input.replace(/\s+/g, ' ').trim() - return oneLine.length > 100 ? `${oneLine.slice(0, 100)}…` : oneLine -} - -function TruncNote({ chars }: { chars?: number }) { - return ( -
- — afgekapt{chars != null ? ` (${chars} chars totaal)` : ''} -
- ) -} - -function EventBlock({ event }: { event: LogEvent }): ReactElement { - switch (event.kind) { - case 'meta': - return ( -
- {timeOnly(event.ts)} - - {event.tag} - - {event.text} -
- ) - - case 'system-init': - return ( -
-
Sessie gestart
-
-
- model {event.model} -
-
- permission {event.permissionMode} -
-
- claude v{event.version || '?'} -
-
- {event.cwd && ( -
- cwd: {event.cwd} -
- )} - {(event.tools.length > 0 || event.mcpServers.length > 0) && ( -
- - {event.tools.length} tools · {event.mcpServers.length} MCP-server(s) - -
- {event.mcpServers.length > 0 &&
mcp: {event.mcpServers.join(', ')}
} -
{event.tools.join(', ')}
-
-
- )} -
- ) - - case 'assistant-text': - return ( -
-
{event.text}
- {event.truncated && } -
- ) - - case 'thinking': - return ( -
- - thinking… - -
- {event.text} - {event.truncated && } -
-
- ) - - case 'tool-call': - return ( -
- - - ▸ {event.name} - - {inputPreview(event.input)} - - - -
-            {event.input}
-          
- {event.truncated && } -
- ) - - case 'tool-result': - return ( -
- - - {event.isError ? '✕ result (error)' : '◂ result'} - {event.fullLength} chars - - -
-            {event.body || '(body weggelaten — timeline ingekort)'}
-          
- {event.truncated && } -
- ) - - case 'rate-limit': - return ( -
- - rate limit: {event.status} - -
- ) - - case 'result': - return ( -
-
- Resultaat: {event.subtype} - {event.durationMs != null && ( - {formatDuration(event.durationMs)} - )} - {event.numTurns != null && ( - {event.numTurns} turns - )} - {event.totalCostUsd != null && ( - ${event.totalCostUsd.toFixed(2)} - )} -
- {event.resultText && ( -
- {event.resultText} -
- )} - {event.resultTruncated && } -
- ) - - case 'raw': - return ( -
- {event.text} -
- ) - } -} - -export default function RunLogDetail({ fileName }: { fileName: string }) { - const [data, setData] = useState(null) - const [error, setError] = useState(null) - const [loading, setLoading] = useState(true) - - const load = useCallback(async () => { - try { - const d = await fetchDetail(fileName) - setData(d) - setError(null) - } catch (err) { - setError(err instanceof Error ? err.message : 'kon log niet laden') - } finally { - setLoading(false) - } - }, [fileName]) - - useEffect(() => { - setLoading(true) - setData(null) - setError(null) - load() - }, [load]) - - // Keep refreshing while the run is still in progress. - useEffect(() => { - if (!data?.inProgress) return - const id = setInterval(load, 5000) - return () => clearInterval(id) - }, [data?.inProgress, load]) - - if (loading) { - return
log laden…
- } - if (error) { - return ( -
- {error} -
- ) - } - if (!data) return null - - const { summary, events } = data - - return ( -
-
- {summary.fileName} - {summary.jobId && job {summary.jobId}} - {summary.model && {summary.model}} - {summary.permissionMode && {summary.permissionMode}} - {summary.durationMs != null && {formatDuration(summary.durationMs)}} - {data.inProgress && ( - ● running… - )} - {data.responseTruncated && ( - timeline ingekort (zeer grote log) - )} -
- - {summary.errorSummary && ( -
- {summary.errorSummary} -
- )} - -
- {events.length === 0 ? ( -
geen events
- ) : ( - events.map((event, i) => ) - )} -
-
- ) -} diff --git a/app/worker-logs/_components/worker-logs-view.tsx b/app/worker-logs/_components/worker-logs-view.tsx deleted file mode 100644 index be2d6cd..0000000 --- a/app/worker-logs/_components/worker-logs-view.tsx +++ /dev/null @@ -1,202 +0,0 @@ -'use client' - -import { Fragment, useCallback, useEffect, useState } from 'react' -import type { RunLogSummary, RunStatus } from '@/lib/parse-worker-log' -import { cn, formatDuration, relativeTime } from '@/lib/utils' -import RunLogDetail from './run-log-detail' - -const LIMIT_OPTIONS = [10, 25, 50, 100] -const COLUMN_COUNT = 7 - -const STATUS_STYLES: Record = { - idle: { - badge: 'bg-zinc-100 text-zinc-600 dark:bg-zinc-800 dark:text-zinc-400', - dot: 'bg-zinc-400 dark:bg-zinc-500', - }, - running: { - badge: 'bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400', - dot: 'bg-amber-500 dark:bg-amber-400', - }, - success: { - badge: 'bg-green-100 text-green-800 dark:bg-green-900/30 dark:text-green-400', - dot: 'bg-green-500 dark:bg-green-400', - }, - error: { - badge: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400', - dot: 'bg-red-500 dark:bg-red-400', - }, - 'token-expired': { - badge: 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400', - dot: 'bg-red-500 dark:bg-red-400', - }, - unknown: { - badge: 'bg-zinc-100 text-zinc-600 dark:bg-zinc-800 dark:text-zinc-400', - dot: 'bg-zinc-400 dark:bg-zinc-500', - }, -} - -export function StatusBadge({ status }: { status: RunStatus }) { - const s = STATUS_STYLES[status] - return ( - - - {status} - - ) -} - -async function fetchLogs(limit: number): Promise { - const res = await fetch(`/api/worker-logs?limit=${limit}`, { cache: 'no-store' }) - const body = await res.json().catch(() => ({})) - if (!res.ok) throw new Error(body?.error ?? `request failed (${res.status})`) - return (body.logs ?? []) as RunLogSummary[] -} - -type Props = { - initialLogs: RunLogSummary[] - initialError: string | null -} - -export default function WorkerLogsView({ initialLogs, initialError }: Props) { - const [logs, setLogs] = useState(initialLogs) - const [limit, setLimit] = useState(10) - const [selected, setSelected] = useState(null) - const [error, setError] = useState(initialError) - const [refreshing, setRefreshing] = useState(false) - const [lastUpdated, setLastUpdated] = useState(new Date()) - - const refresh = useCallback(async () => { - setRefreshing(true) - try { - const data = await fetchLogs(limit) - setLogs(data) - setError(null) - setLastUpdated(new Date()) - } catch (err) { - setError(err instanceof Error ? err.message : 'refresh failed') - } finally { - setRefreshing(false) - } - }, [limit]) - - useEffect(() => { - refresh() - const id = setInterval(refresh, 10000) - return () => clearInterval(id) - }, [refresh]) - - return ( -
-
-
- toon - {LIMIT_OPTIONS.map((opt) => ( - - ))} - {refreshing && ( - refreshing… - )} -
- - updated {lastUpdated.toLocaleTimeString()} · auto-refreshes every 10s - -
- - {error && ( -
- {error} -
- )} - -
- - - - - - - - - - - - - - {logs.length === 0 && !error ? ( - - - - ) : ( - logs.map((log) => { - const isSelected = selected === log.fileName - return ( - - setSelected(isSelected ? null : log.fileName)} - title={log.errorSummary ?? undefined} - className={cn( - 'cursor-pointer border-b border-border transition-colors', - isSelected ? 'bg-muted/50' : 'hover:bg-muted/30', - )} - > - - - - - - - - - {isSelected && ( - - - - )} - - ) - }) - )} - -
StartedStatusJobModelTurnsDurationCost
- No worker runs found -
- {log.startedAt ? ( - - {relativeTime(new Date(log.startedAt))} - - ) : ( - {log.runId} - )} - - - - {log.jobId ? `…${log.jobId.slice(-8)}` : '—'} - {log.model ?? '—'} - {log.numTurns ?? '—'} - - {log.durationMs != null ? formatDuration(log.durationMs) : '—'} - - {log.totalCostUsd != null ? `$${log.totalCostUsd.toFixed(2)}` : '—'} -
- -
-
-
- ) -} diff --git a/app/worker-logs/page.tsx b/app/worker-logs/page.tsx deleted file mode 100644 index 48edf2a..0000000 --- a/app/worker-logs/page.tsx +++ /dev/null @@ -1,34 +0,0 @@ -import { redirect } from 'next/navigation' -import { getCurrentUser } from '@/lib/session' -import { listRunLogs } from '@/lib/worker-logs' -import type { RunLogSummary } from '@/lib/parse-worker-log' -import WorkerLogsView from './_components/worker-logs-view' - -export const dynamic = 'force-dynamic' - -export default async function WorkerLogsPage() { - const user = await getCurrentUser() - if (!user) redirect('/login') - - let initialLogs: RunLogSummary[] = [] - let initialError: string | null = null - try { - initialLogs = await listRunLogs(10) - } catch (err) { - initialError = err instanceof Error ? err.message : 'Failed to read worker logs' - } - - return ( -
-
-
-

Worker Logs

-

- Recente runs van de Scrum4Me-worker — klik een rij voor de uitgewerkte timeline -

-
- -
-
- ) -} diff --git a/components/AppNav.tsx b/components/AppNav.tsx index 3347739..6d848b5 100644 --- a/components/AppNav.tsx +++ b/components/AppNav.tsx @@ -12,7 +12,6 @@ const NAV_ITEMS = [ { href: '/caddy', label: 'Caddy' }, { href: '/flows', label: 'Flows' }, { href: '/audit', label: 'Audit' }, - { href: '/worker-logs', label: 'Worker Logs' }, { href: '/settings', label: 'Settings' }, ] diff --git a/deploy/ops-agent/sudoers b/deploy/ops-agent/sudoers index 4927d1e..93c5646 100644 --- a/deploy/ops-agent/sudoers +++ b/deploy/ops-agent/sudoers @@ -1,19 +1,9 @@ # /etc/sudoers.d/ops-agent -# NOPASSWD for explicit invocations by the ops-agent service account. -# Only the service names + wrapper scripts whitelisted in commands.yml are listed here. +# NOPASSWD for explicit systemctl restart invocations by the ops-agent service account. +# Only the service names whitelisted in commands.yml are listed here. # Installed by deploy/ops-agent/setup.sh. ops-agent ALL=(root) NOPASSWD: \ /usr/bin/systemctl restart scrum4me-web, \ /usr/bin/systemctl restart ops-agent, \ - /usr/bin/systemctl restart caddy, \ - /srv/backups/scripts/wrappers/read-status.sh, \ - /srv/backups/scripts/wrappers/restic-snapshots.sh nas, \ - /srv/backups/scripts/wrappers/restic-snapshots.sh b2, \ - /srv/backups/scripts/wrappers/restic-stats.sh nas, \ - /srv/backups/scripts/wrappers/restic-stats.sh b2, \ - /srv/backups/scripts/wrappers/restic-check.sh nas, \ - /srv/backups/scripts/wrappers/restic-check.sh b2, \ - /srv/backups/scripts/wrappers/trigger-backup.sh, \ - /srv/backups/scripts/wrappers/trigger-restore-test.sh nas, \ - /srv/backups/scripts/wrappers/trigger-restore-test.sh b2 + /usr/bin/systemctl restart caddy diff --git a/deploy/server-backup/README.md b/deploy/server-backup/README.md deleted file mode 100644 index d2b0c50..0000000 --- a/deploy/server-backup/README.md +++ /dev/null @@ -1,141 +0,0 @@ -# Server backup — deploy artefacten - -Dagelijkse server-brede backup met restic naar **NAS** (lokaal) en **Backblaze B2** (offsite, Object Lock). Inclusief structured statusfile die de ops-dashboard kan lezen. - -De volledige beschrijving — voorwaarden, B2 keys, Object Lock, Forgejo-restore-test, integriteits-schedule — staat in [`docs/runbooks/server-backup.md`](../../docs/runbooks/server-backup.md). - -## Bestanden - -| Bestand | Doel | Plek op host | -|---|---|---| -| `server-backup.sh` | hoofd-script (phase-based, flock, statusfile) | `/srv/backups/scripts/server-backup.sh` | -| `restore-test.sh` | restore latest snapshot + check critical files | `/srv/backups/scripts/restore-test.sh` | -| `server-backup.service` | systemd oneshot | `/etc/systemd/system/server-backup.service` | -| `server-backup.timer` | daily 03:30 + 10 min jitter | `/etc/systemd/system/server-backup.timer` | -| `restic-backup.env.example` | env-template (repos, B2 keys, Forgejo) | kopiëren naar `/etc/restic-backup.env` | - -Bovendien aan te maken (niet in deze repo, omdat het secrets zijn): - -- `/etc/restic-backup.password` — alleen het restic-wachtwoord (mode `0400 root:root`). - -## Snelle installatie (zie runbook voor alle context) - -```bash -# 1. Tools en directories -sudo apt update && sudo apt install -y restic jq - -sudo mkdir -p /srv/backups/scripts /srv/backups/logs /srv/backups/status \ - /var/backups/databases -sudo chmod 0750 /srv/backups/logs /srv/backups/status - -# 2. Scripts plaatsen -sudo cp deploy/server-backup/server-backup.sh /srv/backups/scripts/ -sudo cp deploy/server-backup/restore-test.sh /srv/backups/scripts/ -sudo chmod 0750 /srv/backups/scripts/*.sh -sudo chown root:root /srv/backups/scripts/*.sh - -# 3. Env + password -sudo cp deploy/server-backup/restic-backup.env.example /etc/restic-backup.env -sudo chmod 0600 /etc/restic-backup.env -sudo chown root:root /etc/restic-backup.env -# Genereer wachtwoord — bewaar dit OOK in je password manager. -sudo sh -c 'openssl rand -hex 24 > /etc/restic-backup.password' -sudo chmod 0400 /etc/restic-backup.password - -# 4. Vul /etc/restic-backup.env (RESTIC_REPO_NAS, RESTIC_REPO_B2, -# B2_ACCOUNT_ID, B2_ACCOUNT_KEY, FORGEJO_*). Zie runbook deel A+B. - -# 5. Repos initialiseren (zie runbook deel C voor Object Lock + key-capabilities) -sudo -E bash -c 'set -a; . /etc/restic-backup.env; set +a; \ - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password; \ - restic -r "$RESTIC_REPO_NAS" init && \ - restic -r "$RESTIC_REPO_B2" init' - -# 6. Systemd -sudo cp deploy/server-backup/server-backup.service /etc/systemd/system/ -sudo cp deploy/server-backup/server-backup.timer /etc/systemd/system/ -sudo systemctl daemon-reload -sudo systemctl enable --now server-backup.timer -systemctl list-timers | grep server-backup - -# 7. Eerste run handmatig (volgen via journalctl) -sudo systemctl start server-backup.service -journalctl -u server-backup.service -f -``` - -### Ops-agent wiring (na stap 1-7) - -Voor de **/flows/server-backup**-pagina en **/settings/backups** in het dashboard -moet ops-agent ook weten van de wrappers, commands, flow-YAMLs en de -NOPASSWD-sudoers-regels. Dat doet een idempotent install-script: - -```bash -sudo bash deploy/server-backup/install-flows.sh -``` - -Wat het regelt (en wat het bewust **niet** doet) staat in de header van het -script. Re-run safe; backups van `commands.yml` en `sudoers.d/ops-agent` worden -bewaard met `.bak.`-suffix. Daarna is de UI op -`/flows/server-backup` direct te gebruiken. - -## Verifiëren - -```bash -# Statusfile -sudo jq . /srv/backups/status/last-run.json - -# Snapshots -sudo -E bash -c 'set -a; . /etc/restic-backup.env; set +a; \ - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password; \ - restic -r "$RESTIC_REPO_NAS" snapshots; \ - restic -r "$RESTIC_REPO_B2" snapshots' - -# Restore-test (NAS, niet-destructief — restored naar /tmp/restore-test) -sudo /srv/backups/scripts/restore-test.sh nas -sudo jq . /srv/backups/status/last-restore-test.json -``` - -## Statusfile-schema - -Het script schrijft `/srv/backups/status/last-run.json` na elke run (success of failure), atomisch via temp + `mv`. De ops-dashboard leest deze file via `read_backup_status` (zie `ops-agent/commands.yml.example`). - -```json -{ - "schema_version": 1, - "overall_status": "success | partial_failure | failed", - "started_at": "2026-05-15T03:30:00+02:00", - "completed_at": "2026-05-15T03:48:21+02:00", - "duration_seconds": 1101, - "host": "scrum4me-srv", - "phases": { - "postgres_dump": { "status": "success", "exit_code": 0, "...": "..." }, - "forgejo_dump": { "status": "skipped", "exit_code": 99, "...": "..." }, - "forgejo_db_dump": { "status": "skipped", "exit_code": 99 }, - "restic_nas": { "status": "success", "exit_code": 0, "snapshot_id": "abc123" }, - "restic_b2": { "status": "degraded", "exit_code": 3, "error": "1 file unreadable" }, - "forget_nas": { "status": "success", "exit_code": 0 }, - "check_nas": { "status": "success", "exit_code": 0 }, - "check_b2": { "status": "success", "exit_code": 0 } - } -} -``` - -Per phase `status`: - -| status | betekenis | telt mee als | -|---|---|---| -| `success` | exit 0 | success | -| `skipped` | exit 99 — phase niet van toepassing (bv. Forgejo niet geïnstalleerd) | success | -| `degraded` | exit 3 — restic snapshot is gemaakt maar bepaalde files waren onleesbaar | partial_failure | -| `failed` | andere non-zero exit | partial_failure of failed (zie `overall_status`) | -| `pending` | phase niet gerund (script aborted vóór deze phase) | partial_failure | - -`overall_status` regels: - -- **`failed`** als `postgres_dump` faalt (DB-dump is autoritatief), of als **beide** restic repos falen. -- **`partial_failure`** bij enige `failed` of `degraded` phase die niet kritisch is (bv. één restic repo down, of forgejo_dump faalt terwijl postgres lukt). -- **`success`** als geen enkele phase `failed` of `degraded` is. - -## Volgorde tov bestaande `ops-db-backup.timer` - -De bestaande `deploy/ops-agent/ops-db-backup.timer` draait om **02:00** en doet alleen `pg_dump ops_dashboard` naar `/srv/ops/backups/`. Deze nieuwe `server-backup.timer` draait om **03:30** en pickt die map mee in zijn restic-backup. Beide blijven naast elkaar bestaan. diff --git a/deploy/server-backup/install-flows.sh b/deploy/server-backup/install-flows.sh deleted file mode 100755 index b0b094b..0000000 --- a/deploy/server-backup/install-flows.sh +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env bash -# Idempotent installer that wires the server-backup flow into ops-agent. -# -# What this DOES install: -# 1. /srv/backups/scripts/wrappers/*.sh (wrapper scripts used by ops-agent) -# 2. /etc/ops-agent/flows/server_backup_*.yml (flow YAMLs for full + restore-test) -# 3. /etc/ops-agent/commands.yml (appends backup commands if missing) -# 4. /etc/sudoers.d/ops-agent (appends wrapper allowlist, visudo-validated) -# 5. systemctl restart ops-agent (pick up new commands/flows) -# 6. systemctl enable --now server-backup.timer (daily backup) -# -# What this DOES NOT do (do manually first — see README "Snelle installatie"): -# - Create /etc/restic-backup.env (with NAS path, B2 keys, Forgejo container name) -# - Create /etc/restic-backup.password -# - Initialise the restic repos (NAS + B2) -# - Install /srv/backups/scripts/{server-backup.sh,restore-test.sh} -# - Install /etc/systemd/system/server-backup.{service,timer} -# -# Re-run safe: each step checks for prior state and skips. Backups of mutated -# files (commands.yml, sudoers) are kept with a .bak. suffix. -# -# Usage: -# sudo bash deploy/server-backup/install-flows.sh - -set -euo pipefail - -# Resolve repo root from this script's location, so it works regardless of cwd. -SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" -REPO="$(cd -- "$SCRIPT_DIR/../.." &>/dev/null && pwd)" - -WRAPPERS_SRC="$REPO/deploy/server-backup/wrappers" -FLOWS_SRC="$REPO/ops-agent/flows.example" -COMMANDS_SRC="$REPO/ops-agent/commands.yml.example" - -WRAPPERS_DST=/srv/backups/scripts/wrappers -FLOWS_DST=/etc/ops-agent/flows -COMMANDS_DST=/etc/ops-agent/commands.yml -SUDOERS_DST=/etc/sudoers.d/ops-agent - -if [[ $EUID -ne 0 ]]; then - echo "ERROR: run as root (sudo)." >&2 - exit 1 -fi - -step() { echo; echo "── $* ──"; } -ok() { echo " ✓ $*"; } -skip() { echo " · $* (already in place)"; } -note() { echo " ! $*"; } - -# ────────────────────────────────────────────────────────────────────────────── -step "1. Install wrappers to $WRAPPERS_DST" - -mkdir -p "$WRAPPERS_DST" -chown root:root "$WRAPPERS_DST" -chmod 0750 "$WRAPPERS_DST" - -if [[ ! -d "$WRAPPERS_SRC" ]]; then - echo "ERROR: $WRAPPERS_SRC not found — repo state unexpected (expected at $REPO)" >&2 - exit 2 -fi - -for src in "$WRAPPERS_SRC"/*.sh; do - name=$(basename "$src") - dst="$WRAPPERS_DST/$name" - if [[ -f "$dst" ]] && cmp -s "$src" "$dst"; then - skip "$name" - else - install -o root -g root -m 0750 "$src" "$dst" - ok "$name installed" - fi -done - -# ────────────────────────────────────────────────────────────────────────────── -step "2. Install flow YAMLs to $FLOWS_DST" - -mkdir -p "$FLOWS_DST" - -for f in server_backup_full.yml server_backup_restore_test.yml; do - src="$FLOWS_SRC/$f" - dst="$FLOWS_DST/$f" - if [[ ! -f "$src" ]]; then - echo "ERROR: $src missing — repo state unexpected" >&2 - exit 3 - fi - if [[ -f "$dst" ]] && cmp -s "$src" "$dst"; then - skip "$f" - else - install -o root -g root -m 0644 "$src" "$dst" - ok "$f installed" - fi -done - -# ────────────────────────────────────────────────────────────────────────────── -step "3. Append missing commands to $COMMANDS_DST" - -# Commands we want to ensure exist. Names must match the YAML in commands.yml.example. -NEEDED_CMDS=( - trigger_server_backup - trigger_restore_test - tail_backup_log_today - read_backup_status - restic_snapshots_nas - restic_snapshots_b2 - restic_stats_nas - restic_stats_b2 -) - -if [[ ! -f "$COMMANDS_DST" ]]; then - echo "ERROR: $COMMANDS_DST missing — run base ops-agent install first" >&2 - exit 4 -fi - -TS=$(date +%Y%m%d-%H%M%S) -cp -p "$COMMANDS_DST" "${COMMANDS_DST}.bak.${TS}" - -# Check which commands are missing -missing_cmds=() -for cmd in "${NEEDED_CMDS[@]}"; do - if grep -qE "^ ${cmd}:" "$COMMANDS_DST"; then - skip "command $cmd already in commands.yml" - else - missing_cmds+=("$cmd") - fi -done - -if [[ ${#missing_cmds[@]} -eq 0 ]]; then - skip "no commands to add" - rm "${COMMANDS_DST}.bak.${TS}" # no-op edit, drop the backup -else - # Extract each missing command's YAML block from commands.yml.example. - # The block-detection regex MUST include digits — command names like - # restic_snapshots_b2 / restic_stats_b2 contain digits, otherwise the - # following block (e.g. restic_stats_nas) would swallow them. - tmp=$(mktemp) - python3 - "$COMMANDS_SRC" "${missing_cmds[@]}" >> "$tmp" <<'PY' -import sys, re -src_path = sys.argv[1] -wanted = sys.argv[2:] -with open(src_path) as f: - src = f.read() -# Top-level command blocks: each starts at column-2 with ":" line. -# A block ends at the next sibling-key line (same indentation) or EOF. -pattern = re.compile(r"(^ [a-z0-9_]+:[\s\S]*?)(?=^ [a-z0-9_]+:|\Z)", re.M) -blocks = {} -for m in pattern.finditer(src): - block = m.group(1) - name_match = re.match(r"^ ([a-z0-9_]+):", block) - if name_match: - blocks[name_match.group(1)] = block.rstrip() + "\n" -exit_code = 0 -for cmd in wanted: - if cmd in blocks: - sys.stdout.write("\n" + blocks[cmd]) - else: - sys.stderr.write(f"ERROR: {cmd} not found in {src_path}\n") - exit_code = 1 -sys.exit(exit_code) -PY - - if [[ -s "$tmp" ]]; then - cat "$tmp" >> "$COMMANDS_DST" - rm "$tmp" - ok "appended ${#missing_cmds[@]} commands: ${missing_cmds[*]}" - note "backup at ${COMMANDS_DST}.bak.${TS}" - else - rm "$tmp" - echo "ERROR: extraction produced empty output — aborting" >&2 - exit 5 - fi -fi - -# ────────────────────────────────────────────────────────────────────────────── -step "4. Ensure sudoers allows ops-agent to run wrappers" - -WRAPPER_PATHS=( - /srv/backups/scripts/wrappers/trigger-backup.sh - /srv/backups/scripts/wrappers/trigger-restore-test.sh - /srv/backups/scripts/wrappers/read-status.sh - /srv/backups/scripts/wrappers/restic-snapshots.sh - /srv/backups/scripts/wrappers/restic-stats.sh - /srv/backups/scripts/wrappers/restic-check.sh -) - -# Build proposed sudoers content: existing file + missing wrapper-NOPASSWD lines. -SUDOERS_TMP=$(mktemp /tmp/sudoers-ops-agent.XXXXXX) -chmod 0440 "$SUDOERS_TMP" -cp "$SUDOERS_DST" "$SUDOERS_TMP" - -added_lines=0 -for path in "${WRAPPER_PATHS[@]}"; do - pattern="NOPASSWD:[[:space:]]*${path//\//\\/}\\b" - if grep -qE "$pattern" "$SUDOERS_TMP"; then - skip "$(basename "$path") already in sudoers" - else - echo "ops-agent ALL=(root) NOPASSWD: $path *" >> "$SUDOERS_TMP" - ok "added NOPASSWD for $(basename "$path")" - added_lines=$((added_lines + 1)) - fi -done - -if [[ $added_lines -gt 0 ]]; then - # Validate with visudo before swapping in — bail loud if invalid (prevents lockout). - if visudo -c -f "$SUDOERS_TMP" >/dev/null; then - cp -p "$SUDOERS_DST" "${SUDOERS_DST}.bak.${TS}" - install -o root -g root -m 0440 "$SUDOERS_TMP" "$SUDOERS_DST" - rm "$SUDOERS_TMP" - ok "sudoers updated (visudo-validated); backup at ${SUDOERS_DST}.bak.${TS}" - else - echo "ERROR: visudo validation failed — sudoers not modified" >&2 - echo " check $SUDOERS_TMP" >&2 - exit 6 - fi -else - rm "$SUDOERS_TMP" - skip "sudoers already complete" -fi - -# ────────────────────────────────────────────────────────────────────────────── -step "5. Restart ops-agent (reload commands.yml + flows)" - -systemctl restart ops-agent -sleep 1 -if systemctl is-active --quiet ops-agent; then - ok "ops-agent restarted ($(systemctl show -p ActiveEnterTimestamp ops-agent --value))" -else - echo "ERROR: ops-agent failed to start — check 'journalctl -u ops-agent -n 50'" >&2 - exit 7 -fi - -# ────────────────────────────────────────────────────────────────────────────── -step "6. Enable server-backup.timer" - -if systemctl is-enabled --quiet server-backup.timer; then - skip "server-backup.timer already enabled" -else - systemctl enable server-backup.timer - ok "server-backup.timer enabled" -fi - -if systemctl is-active --quiet server-backup.timer; then - skip "server-backup.timer already active" -else - systemctl start server-backup.timer - ok "server-backup.timer started" -fi - -# Show next-firing -echo -note "next scheduled runs:" -systemctl list-timers --no-pager | grep -E "NEXT|server-backup" | head -5 - -# ────────────────────────────────────────────────────────────────────────────── -step "Done" - -echo -echo "Test via the UI:" -echo " /flows/server-backup → click 'Run restore test' (non-destructive)" -echo -echo "Or test via curl on this host:" -echo " TOKEN=\$(cat /etc/ops-agent/secret)" -echo " curl -sS -H \"Authorization: Bearer \$TOKEN\" \\" -echo " -H 'Content-Type: application/json' \\" -echo " -X POST http://127.0.0.1:3099/agent/v1/flow \\" -echo " --data '{\"flow_key\":\"server_backup_restore_test\",\"dry_run\":false}'" diff --git a/deploy/server-backup/restic-backup.env.example b/deploy/server-backup/restic-backup.env.example deleted file mode 100644 index 9031fdf..0000000 --- a/deploy/server-backup/restic-backup.env.example +++ /dev/null @@ -1,44 +0,0 @@ -# Copy to /etc/restic-backup.env on the host. Permissions: 0600 root:root. -# RESTIC_PASSWORD lives in /etc/restic-backup.password (mode 0400 root:root) -# — the backup script sets RESTIC_PASSWORD_FILE from there, so the password -# never appears in the process listing or this env file. - -# ── Restic repositories ──────────────────────────────────────────────────── -# Local NAS path (must be mounted before the timer fires; see runbook). -RESTIC_REPO_NAS=/mnt/nas/backups/restic/scrum4me-srv - -# Backblaze B2 repo, format: b2:: -# Bucket must have Object Lock (Governance) with default retention >= 30 days. -RESTIC_REPO_B2=b2:scrum4me-srv-backup:scrum4me-srv - -# ── Backblaze B2 server key ──────────────────────────────────────────────── -# Capabilities REQUIRED: listBuckets, listFiles, readFiles, writeFiles -# Capabilities FORBIDDEN: deleteFiles, deleteKeys, bypassGovernance -# Create with: -# b2 application-key create \ -# --bucket scrum4me-srv-backup \ -# --name-prefix scrum4me-srv \ -# server-backup-key \ -# listBuckets,listFiles,readFiles,writeFiles -B2_ACCOUNT_ID=REPLACE_WITH_B2_KEY_ID -B2_ACCOUNT_KEY=REPLACE_WITH_B2_APPLICATION_KEY - -# ── Forgejo backup target (optional — set to skip if Forgejo not deployed) ─ -# Container name as it appears in `docker ps`. Set to "" or comment out to -# skip the Forgejo phases entirely. -FORGEJO_CONTAINER=scrum4me-forgejo -# Path to app.ini INSIDE the Forgejo container (used by `forgejo dump -c`). -FORGEJO_CONFIG=/data/gitea/conf/app.ini -# Postgres database name for Forgejo (empty = use SQLite, skip forgejo_db_dump). -FORGEJO_DB_NAME=forgejo -# Postgres container + role for Forgejo's DB (defaults match scrum4me stack). -FORGEJO_DB_CONTAINER=scrum4me-postgres -FORGEJO_DB_USER=scrum4me - -# ── Scrum4Me Postgres (required for postgres_dump phase) ─────────────────── -PG_CONTAINER=scrum4me-postgres -PG_DUMPALL_USER=scrum4me - -# ── Optional bandwidth limit for restic B2 upload (KiB/s; 0 = unlimited) ── -# Translated by the script into `restic --limit-upload "$BACKUP_LIMIT_UPLOAD_KIB"`. -# BACKUP_LIMIT_UPLOAD_KIB=5000 diff --git a/deploy/server-backup/restore-test.sh b/deploy/server-backup/restore-test.sh deleted file mode 100644 index 0cc8dae..0000000 --- a/deploy/server-backup/restore-test.sh +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env bash -# Restore the latest restic snapshot to /tmp/restore-test/ and assert that a -# small set of critical files came back intact. Used by the monthly maintenance -# check and by the dashboard's "Restore test" button. -# -# Usage: -# server-backup-restore-test.sh [nas|b2] -# -# Default repo is "nas" (faster, no B2 download fees). - -umask 077 -set -uo pipefail - -REPO_LABEL="${1:-nas}" -RESTORE_DIR="${RESTORE_DIR:-/tmp/restore-test}" -RESTIC_PASSWORD_FILE_PATH="${RESTIC_PASSWORD_FILE_PATH:-/etc/restic-backup.password}" -STATUS_FILE="${STATUS_FILE:-/srv/backups/status/last-restore-test.json}" -STATUS_DIR="$(dirname "$STATUS_FILE")" -STARTED_AT="$(date -Is)" -SECONDS=0 - -# Load env (idempotent: ok if already in environment). -if [ -z "${RESTIC_REPO_NAS:-}" ] && [ -r /etc/restic-backup.env ]; then - # shellcheck disable=SC1091 - set -a; . /etc/restic-backup.env; set +a -fi - -case "$REPO_LABEL" in - nas) REPO="${RESTIC_REPO_NAS:?RESTIC_REPO_NAS not set}" ;; - b2) REPO="${RESTIC_REPO_B2:?RESTIC_REPO_B2 not set}" ;; - *) echo "ERROR: repo label must be 'nas' or 'b2', got '$REPO_LABEL'" >&2; exit 2 ;; -esac - -if [ ! -r "$RESTIC_PASSWORD_FILE_PATH" ]; then - echo "ERROR: restic password file $RESTIC_PASSWORD_FILE_PATH not readable" >&2 - exit 1 -fi -export RESTIC_PASSWORD_FILE="$RESTIC_PASSWORD_FILE_PATH" - -for tool in jq restic; do - command -v "$tool" >/dev/null 2>&1 || { echo "ERROR: '$tool' not on PATH" >&2; exit 1; } -done - -mkdir -p "$STATUS_DIR" -chmod 0750 "$STATUS_DIR" - -echo "════════════════════════════════════════════════════════════════" -echo " Restore test — started $STARTED_AT" -echo " Repo: $REPO_LABEL ($REPO)" -echo " Target: $RESTORE_DIR" -echo "════════════════════════════════════════════════════════════════" - -# Clean previous attempt to keep results unambiguous. -rm -rf "$RESTORE_DIR" -mkdir -p "$RESTORE_DIR" - -# Find latest snapshot id. -SNAPSHOT_ID=$(restic -r "$REPO" snapshots --json --latest 1 2>/dev/null \ - | jq -r '.[0].short_id // .[0].id // empty') - -if [ -z "$SNAPSHOT_ID" ]; then - echo "ERROR: no snapshots found in $REPO_LABEL repo" - jq -n \ - --arg started "$STARTED_AT" \ - --arg completed "$(date -Is)" \ - --argjson duration "$SECONDS" \ - --arg repo "$REPO_LABEL" \ - '{ - schema_version: 1, - overall_status: "failed", - started_at: $started, - completed_at: $completed, - duration_seconds: $duration, - repo: $repo, - snapshot_id: null, - error: "no snapshots in repo", - assertions: [] - }' > "$STATUS_FILE" - chmod 0644 "$STATUS_FILE" - exit 1 -fi - -echo "Restoring snapshot $SNAPSHOT_ID (filtered) …" -# Restore ALLEEN de paden waar we op asserten — een full restore zou disk -# nodig hebben gelijk aan de restore-size van de snapshot (honderden GiB) en -# is voor een correctheids-test onnodig. /tmp is vaak tmpfs of klein — -# vandaar dat een full restore daar onmiddellijk vastloopt op ENOSPC. -# Houd deze lijst gesynchroniseerd met ASSERTION_PATHS hieronder. -RESTORE_RC=0 -restic -r "$REPO" restore "$SNAPSHOT_ID" --target "$RESTORE_DIR" \ - --include /srv/scrum4me/compose/docker-compose.yml \ - --include /srv/scrum4me/caddy/Caddyfile \ - --include /etc/restic-backup.env \ - --include /var/backups/databases \ - || RESTORE_RC=$? - -if [ "$RESTORE_RC" -ne 0 ]; then - echo "ERROR: restic restore exited $RESTORE_RC" -fi - -# Assertions: each is a path that MUST exist and be non-empty. -# Adjust to your stack after first run (and update the runbook addendum). -ASSERTION_PATHS=( - "$RESTORE_DIR/srv/scrum4me/compose/docker-compose.yml" - "$RESTORE_DIR/srv/scrum4me/caddy/Caddyfile" - "$RESTORE_DIR/etc/restic-backup.env" -) - -# Latest postgres dump — match the newest file (glob may resolve to zero). -shopt -s nullglob -PG_DUMPS=("$RESTORE_DIR/var/backups/databases/"postgres-*.sql.gz) -shopt -u nullglob -if [ "${#PG_DUMPS[@]}" -gt 0 ]; then - # pick lexicographic last (= newest date, ISO format) - LATEST_PG="${PG_DUMPS[-1]}" - ASSERTION_PATHS+=("$LATEST_PG") -fi - -ASSERTIONS_JSON='[]' -ANY_FAILED=0 -for p in "${ASSERTION_PATHS[@]}"; do - if [ -s "$p" ]; then - status="ok" - bytes=$(stat -c %s "$p") - echo " ✓ $p ($bytes bytes)" - elif [ -e "$p" ]; then - status="empty" - bytes=0 - ANY_FAILED=1 - echo " ✗ $p (exists but empty)" - else - status="missing" - bytes=0 - ANY_FAILED=1 - echo " ✗ $p (missing)" - fi - ASSERTIONS_JSON=$(jq -c \ - --arg path "$p" \ - --arg status "$status" \ - --argjson bytes "$bytes" \ - '. + [{path: $path, status: $status, bytes: $bytes}]' \ - <<< "$ASSERTIONS_JSON") -done - -if [ "$RESTORE_RC" -ne 0 ]; then - OVERALL="failed" -elif [ "$ANY_FAILED" -ne 0 ]; then - OVERALL="partial_failure" -else - OVERALL="success" -fi - -jq -n \ - --arg started "$STARTED_AT" \ - --arg completed "$(date -Is)" \ - --argjson duration "$SECONDS" \ - --arg repo "$REPO_LABEL" \ - --arg snapshot "$SNAPSHOT_ID" \ - --arg overall "$OVERALL" \ - --argjson restore_exit "$RESTORE_RC" \ - --argjson assertions "$ASSERTIONS_JSON" \ - '{ - schema_version: 1, - overall_status: $overall, - started_at: $started, - completed_at: $completed, - duration_seconds: $duration, - repo: $repo, - snapshot_id: $snapshot, - restore_exit_code: $restore_exit, - target: "'"$RESTORE_DIR"'", - assertions: $assertions - }' > "$STATUS_FILE" -chmod 0644 "$STATUS_FILE" - -echo "" -echo "════════════════════════════════════════════════════════════════" -echo " Restore test — finished $(date -Is)" -echo " Overall: $OVERALL" -echo " Status file: $STATUS_FILE" -echo "════════════════════════════════════════════════════════════════" - -case "$OVERALL" in - success) exit 0 ;; - partial_failure) exit 75 ;; - failed|*) exit 1 ;; -esac diff --git a/deploy/server-backup/server-backup.service b/deploy/server-backup/server-backup.service deleted file mode 100644 index a1034a9..0000000 --- a/deploy/server-backup/server-backup.service +++ /dev/null @@ -1,38 +0,0 @@ -[Unit] -Description=Server-wide backup (pg_dumpall + restic to NAS + B2) -Documentation=file:///srv/scrum4me/ops-dashboard/docs/runbooks/server-backup.md -After=network-online.target docker.service -Wants=network-online.target -# NAS-mount moet beschikbaar zijn voordat restic naar de NAS-repo schrijft; -# triggert de cifs automount voor /mnt/nas/backups als die nog niet actief is. -RequiresMountsFor=/mnt/nas/backups - -[Service] -Type=oneshot -EnvironmentFile=/etc/restic-backup.env -ExecStart=/srv/backups/scripts/server-backup.sh -TimeoutStartSec=4h -RuntimeMaxSec=6h -Nice=10 -IOSchedulingClass=best-effort -IOSchedulingPriority=7 -# Sandboxing — backup needs root for /etc + docker exec, but limit the rest. -# /mnt/nas/backups MOET in ReadWritePaths anders kan restic niet naar de -# NAS-repo schrijven door ProtectSystem=strict. -ProtectSystem=strict -ReadWritePaths=/var/backups /srv/backups /run /tmp /mnt/nas/backups -ProtectHome=read-only -NoNewPrivileges=yes -PrivateTmp=yes -ProtectKernelTunables=yes -ProtectKernelModules=yes -ProtectControlGroups=yes -StandardOutput=journal -StandardError=journal -SyslogIdentifier=server-backup - -# Exit code semantics from server-backup.sh: -# 0 = success (all phases ok) -# 75 = partial_failure (some non-critical phase failed/degraded) -# 1 = failed (a critical dump phase failed or both restic repos failed) -SuccessExitStatus=75 diff --git a/deploy/server-backup/server-backup.sh b/deploy/server-backup/server-backup.sh deleted file mode 100644 index 0afdb55..0000000 --- a/deploy/server-backup/server-backup.sh +++ /dev/null @@ -1,504 +0,0 @@ -#!/usr/bin/env bash -# Daily server-wide backup: dumps databases, runs restic to NAS + B2, -# writes a structured statusfile that the ops-dashboard can read. -# -# Install: -# cp deploy/server-backup/server-backup.sh /srv/backups/scripts/server-backup.sh -# chmod 0750 /srv/backups/scripts/server-backup.sh -# chown root:root /srv/backups/scripts/server-backup.sh -# -# Requires: bash, jq, flock, restic, docker, gzip. See runbook for setup. - -umask 077 -set -uo pipefail - -# ── Configuration ────────────────────────────────────────────────────────── -STATUS_DIR="${STATUS_DIR:-/srv/backups/status}" -LOG_DIR="${LOG_DIR:-/srv/backups/logs}" -DB_DUMP_DIR="${DB_DUMP_DIR:-/var/backups/databases}" -RESTIC_PASSWORD_FILE_PATH="${RESTIC_PASSWORD_FILE_PATH:-/etc/restic-backup.password}" -LOCKFILE="${LOCKFILE:-/run/server-backup.lock}" -RUN_DATE="$(date +%F)" -STARTED_AT="$(date -Is)" -SECONDS=0 - -# Phase order — must match write_status_json + determine_exit_code expectations. -PHASE_ORDER=( - postgres_dump - forgejo_dump - forgejo_db_dump - restic_nas - restic_b2 - forget_nas - check_nas - check_b2 -) - -declare -A PHASE_STATUS PHASE_EXIT PHASE_START PHASE_END PHASE_ERR PHASE_EXTRA -OVERALL_STATUS="unknown" - -# ── Single-instance lock ─────────────────────────────────────────────────── -exec 9>"$LOCKFILE" || { echo "ERROR: cannot open lockfile $LOCKFILE" >&2; exit 1; } -if ! flock -n 9; then - echo "ERROR: another server-backup is already running (lock $LOCKFILE held)" >&2 - exit 75 -fi - -# ── Env + secret loading ─────────────────────────────────────────────────── -# When invoked via systemd, EnvironmentFile=/etc/restic-backup.env has already -# been loaded. When invoked manually for testing, source it ourselves. -if [ -z "${RESTIC_REPO_NAS:-}" ] && [ -r /etc/restic-backup.env ]; then - # shellcheck disable=SC1091 - set -a; . /etc/restic-backup.env; set +a -fi - -: "${RESTIC_REPO_NAS:?RESTIC_REPO_NAS not set (see /etc/restic-backup.env)}" -: "${RESTIC_REPO_B2:?RESTIC_REPO_B2 not set (see /etc/restic-backup.env)}" - -if [ ! -r "$RESTIC_PASSWORD_FILE_PATH" ]; then - echo "ERROR: restic password file $RESTIC_PASSWORD_FILE_PATH not readable" >&2 - exit 1 -fi -export RESTIC_PASSWORD_FILE="$RESTIC_PASSWORD_FILE_PATH" - -# Required tooling -for tool in jq restic docker gzip flock; do - if ! command -v "$tool" >/dev/null 2>&1; then - echo "ERROR: required tool '$tool' not on PATH" >&2 - exit 1 - fi -done - -# ── Logging ──────────────────────────────────────────────────────────────── -mkdir -p "$LOG_DIR" "$STATUS_DIR" "$DB_DUMP_DIR" -chmod 0750 "$LOG_DIR" "$STATUS_DIR" -LOG_FILE="$LOG_DIR/server-backup-$RUN_DATE.log" -# Mirror everything to LOG_FILE and the journal. -exec > >(tee -a "$LOG_FILE") 2>&1 - -echo "════════════════════════════════════════════════════════════════" -echo " Server backup — started $STARTED_AT" -echo " Host: $(hostname)" -echo " NAS repo: $RESTIC_REPO_NAS" -echo " B2 repo: $RESTIC_REPO_B2" -echo "════════════════════════════════════════════════════════════════" - -# ── Phase runner ─────────────────────────────────────────────────────────── -# Runs the function passed as first arg, captures stdout+stderr into a phase -# buffer, records status / exit_code / timestamps / error tail. -run_phase() { - local name="$1"; shift - local phase_buf - phase_buf=$(mktemp -t "backup-phase-${name}.XXXXXX") - - echo "" - echo "─── phase: $name ─── $(date -Is)" - PHASE_START[$name]=$(date -Is) - - local rc=0 - # Run in a sub-shell so set -e inside callees doesn't kill us. - ( - "$@" - ) 2>&1 | tee "$phase_buf" - rc=${PIPESTATUS[0]} - - PHASE_EXIT[$name]=$rc - case "$rc" in - 0) PHASE_STATUS[$name]=success ;; - 3) PHASE_STATUS[$name]=degraded ;; # restic: snapshot created but some files unreadable - 99) PHASE_STATUS[$name]=skipped ;; # our convention for "not applicable" - *) PHASE_STATUS[$name]=failed ;; - esac - - if [ "$rc" -ne 0 ] && [ "$rc" -ne 99 ] && [ -s "$phase_buf" ]; then - # Keep last few non-empty lines as a compact error summary. - PHASE_ERR[$name]=$(tail -n 5 "$phase_buf" | tr '\n' ' ' | head -c 500) - fi - - PHASE_END[$name]=$(date -Is) - rm -f "$phase_buf" - echo "─── end $name (exit=$rc, status=${PHASE_STATUS[$name]})" -} - -# Convention: a phase function returns 99 to mark itself "skipped" — the -# overall outcome treats this as success. -SKIPPED=99 - -# ── Phase 1: pg_dumpall (Scrum4Me Postgres cluster) ──────────────────────── -dump_postgres_all() { - local pg_container="${PG_CONTAINER:-scrum4me-postgres}" - local pg_user="${PG_DUMPALL_USER:-scrum4me}" - - if ! docker ps --format '{{.Names}}' | grep -qx "$pg_container"; then - echo "Postgres container '$pg_container' not running — cannot continue." - return 1 - fi - - local tmp="$DB_DUMP_DIR/.postgres-$RUN_DATE.sql.gz.tmp" - local final="$DB_DUMP_DIR/postgres-$RUN_DATE.sql.gz" - rm -f "$tmp" - - set -o pipefail - docker exec "$pg_container" pg_dumpall -U "$pg_user" --clean --if-exists \ - | gzip -c > "$tmp" - local rc=$? - set +o pipefail - - if [ "$rc" -ne 0 ]; then - rm -f "$tmp" - return "$rc" - fi - - mv "$tmp" "$final" - chmod 0640 "$final" - local bytes - bytes=$(stat -c %s "$final" 2>/dev/null || echo 0) - PHASE_EXTRA[postgres_dump]="output_file=$final;bytes=$bytes" - echo "wrote $final ($bytes bytes)" -} - -# ── Phase 2: Forgejo dump (filesystem + repos) ───────────────────────────── -dump_forgejo() { - local fj="${FORGEJO_CONTAINER:-}" - if [ -z "$fj" ]; then - echo "FORGEJO_CONTAINER unset — skipping Forgejo dump." - return "$SKIPPED" - fi - if ! docker ps --format '{{.Names}}' | grep -qx "$fj"; then - echo "Forgejo container '$fj' not running — skipping." - return "$SKIPPED" - fi - - local config="${FORGEJO_CONFIG:-/data/gitea/conf/app.ini}" - local tmp="$DB_DUMP_DIR/.forgejo-$RUN_DATE.zip.tmp" - local final="$DB_DUMP_DIR/forgejo-$RUN_DATE.zip" - rm -f "$tmp" - - # `forgejo dump -f -` streams the zip to stdout. We run as the `git` user - # inside the container (standard Forgejo image convention). - # - # NB: Forgejo 11.x heeft GEEN `--skip-db` flag (verwijderd na de Gitea-fork); - # de DB komt dus mee in de zip. Onze separate `forgejo_db_dump`-fase blijft - # de autoritatieve restore-bron — de in-zip DB-dump is een redundante kopie. - set -o pipefail - docker exec -u git "$fj" forgejo dump -c "$config" --type zip -f - > "$tmp" - local rc=$? - set +o pipefail - - if [ "$rc" -ne 0 ]; then - rm -f "$tmp" - return "$rc" - fi - - mv "$tmp" "$final" - chmod 0640 "$final" - local bytes - bytes=$(stat -c %s "$final" 2>/dev/null || echo 0) - PHASE_EXTRA[forgejo_dump]="output_file=$final;bytes=$bytes" - echo "wrote $final ($bytes bytes)" -} - -# ── Phase 3: Forgejo Postgres DB dump (authoritative for DB restore) ─────── -dump_forgejo_db() { - local db_name="${FORGEJO_DB_NAME:-}" - if [ -z "$db_name" ]; then - echo "FORGEJO_DB_NAME unset — skipping Forgejo DB dump (assume SQLite)." - return "$SKIPPED" - fi - local db_container="${FORGEJO_DB_CONTAINER:-scrum4me-postgres}" - local db_user="${FORGEJO_DB_USER:-scrum4me}" - - if ! docker ps --format '{{.Names}}' | grep -qx "$db_container"; then - echo "DB container '$db_container' not running — skipping Forgejo DB dump." - return "$SKIPPED" - fi - - local tmp="$DB_DUMP_DIR/.forgejo-db-$RUN_DATE.sql.gz.tmp" - local final="$DB_DUMP_DIR/forgejo-db-$RUN_DATE.sql.gz" - rm -f "$tmp" - - set -o pipefail - docker exec "$db_container" pg_dump -U "$db_user" --clean --if-exists "$db_name" \ - | gzip -c > "$tmp" - local rc=$? - set +o pipefail - - if [ "$rc" -ne 0 ]; then - rm -f "$tmp" - return "$rc" - fi - - mv "$tmp" "$final" - chmod 0640 "$final" - local bytes - bytes=$(stat -c %s "$final" 2>/dev/null || echo 0) - PHASE_EXTRA[forgejo_db_dump]="output_file=$final;bytes=$bytes" - echo "wrote $final ($bytes bytes)" -} - -# ── Phases 4 + 5: restic backup to NAS / B2 ──────────────────────────────── -# Live Docker datadirs are excluded — dumps (above) are the authoritative -# restore source for Postgres and Forgejo. -RESTIC_BACKUP_PATHS=( - /etc - /home/janpeter - /root - /opt - /srv - /usr/local/bin - "$DB_DUMP_DIR" - /srv/ops/backups -) -RESTIC_EXCLUDES=( - --exclude='**/node_modules' - --exclude='**/.next/cache' - --exclude='**/.cache' - --exclude='**/.git/objects/pack' - --exclude='/srv/backups/logs' - --exclude='/tmp' - --exclude='/var/tmp' - --exclude='/srv/scrum4me/postgres' # live Postgres datadir — non-authoritative - --exclude='/srv/forgejo/data/git' # live Forgejo git objects — non-authoritative - --exclude='/srv/forgejo/data/lfs' - --exclude='/srv/forgejo/data/queues' -) - -restic_backup_to() { - local repo="$1"; local label="$2" - local extra_args=() - if [ "$label" = "b2" ] && [ -n "${BACKUP_LIMIT_UPLOAD_KIB:-}" ]; then - extra_args+=(--limit-upload "$BACKUP_LIMIT_UPLOAD_KIB") - fi - - # Capture restic JSON output so we can extract the snapshot id. - local json_out - json_out=$(mktemp -t "restic-backup-${label}.XXXXXX.json") - - # --no-scan keeps the lockfile interaction light; --skip-if-unchanged still - # records a snapshot per restic semantics so the dashboard sees a daily entry. - restic -r "$repo" backup \ - --tag scheduled \ - --tag "host=$(hostname)" \ - --json \ - "${extra_args[@]}" \ - "${RESTIC_EXCLUDES[@]}" \ - "${RESTIC_BACKUP_PATHS[@]}" \ - | tee "$json_out" - local rc=${PIPESTATUS[0]} - - # Extract snapshot id from the final summary line (last JSON object of type=summary). - local snap - snap=$(jq -rs 'map(select(.message_type=="summary")) | last | .snapshot_id // empty' < "$json_out" 2>/dev/null || true) - local files_new - files_new=$(jq -rs 'map(select(.message_type=="summary")) | last | .files_new // empty' < "$json_out" 2>/dev/null || true) - local data_added - data_added=$(jq -rs 'map(select(.message_type=="summary")) | last | .data_added // empty' < "$json_out" 2>/dev/null || true) - - if [ -n "$snap" ]; then - PHASE_EXTRA["restic_$label"]="snapshot_id=$snap;files_new=${files_new:-0};data_added_bytes=${data_added:-0}" - fi - - rm -f "$json_out" - return "$rc" -} - -# ── Phase 6: prune NAS only (B2 is Object Lock — pruning runs off-server) ── -restic_forget_nas() { - restic -r "$RESTIC_REPO_NAS" forget \ - --keep-daily 7 \ - --keep-weekly 4 \ - --keep-monthly 12 \ - --prune -} - -# ── Phase 7: integrity check (light daily; weekly read-data-subset on Sun) ─ -is_sunday() { - [ "$(date +%u)" = "7" ] -} - -restic_check_nas() { - if is_sunday; then - restic -r "$RESTIC_REPO_NAS" check --read-data-subset=2.5% - else - restic -r "$RESTIC_REPO_NAS" check - fi -} - -restic_check_b2() { - if is_sunday; then - # On B2 a read-data-subset costs bandwidth + B2 download fees. Keep the - # subset tiny on Sundays; deeper checks run monthly off-server. - restic -r "$RESTIC_REPO_B2" check --read-data-subset=1% - else - restic -r "$RESTIC_REPO_B2" check - fi -} - -# ── Statusfile writer ────────────────────────────────────────────────────── -# Builds a structured JSON statusfile in /srv/backups/status/last-run.json -# atomically (write to tmp, then mv). -write_status_json() { - local tmpfile - tmpfile=$(mktemp -t "backup-status.XXXXXX.json") - - # Build the phases object incrementally with jq for safe escaping. - local phases_json='{}' - local name status exit_code started ended err extra - local snapshot_id files_new data_added output_file bytes - for name in "${PHASE_ORDER[@]}"; do - status="${PHASE_STATUS[$name]:-pending}" - exit_code="${PHASE_EXIT[$name]:-}" - started="${PHASE_START[$name]:-}" - ended="${PHASE_END[$name]:-}" - err="${PHASE_ERR[$name]:-}" - extra="${PHASE_EXTRA[$name]:-}" - - snapshot_id="" - files_new="" - data_added="" - output_file="" - bytes="" - if [ -n "$extra" ]; then - # extra is a semicolon-separated list of key=value pairs - local pair key val - IFS=';' read -ra pairs <<< "$extra" - for pair in "${pairs[@]}"; do - key="${pair%%=*}" - val="${pair#*=}" - case "$key" in - snapshot_id) snapshot_id="$val" ;; - files_new) files_new="$val" ;; - data_added_bytes) data_added="$val" ;; - output_file) output_file="$val" ;; - bytes) bytes="$val" ;; - esac - done - fi - - # exit_code as JSON number when present, null otherwise. - local exit_arg='null' - if [ -n "$exit_code" ]; then - exit_arg="$exit_code" - fi - - phases_json=$( - jq -c -n \ - --argjson base "$phases_json" \ - --arg name "$name" \ - --arg status "$status" \ - --argjson exit_code "$exit_arg" \ - --arg started "$started" \ - --arg ended "$ended" \ - --arg err "$err" \ - --arg snapshot_id "$snapshot_id" \ - --arg files_new "$files_new" \ - --arg data_added "$data_added" \ - --arg output_file "$output_file" \ - --arg bytes "$bytes" \ - ' - $base + { - ($name): ({ - status: $status, - exit_code: $exit_code, - started_at: (if $started == "" then null else $started end), - completed_at: (if $ended == "" then null else $ended end), - error: (if $err == "" then null else $err end) - } - + (if $snapshot_id != "" then { snapshot_id: $snapshot_id } else {} end) - + (if $files_new != "" then { files_new: ($files_new | tonumber? // null) } else {} end) - + (if $data_added != "" then { data_added_bytes: ($data_added | tonumber? // null) } else {} end) - + (if $output_file != "" then { output_file: $output_file } else {} end) - + (if $bytes != "" then { bytes: ($bytes | tonumber? // null) } else {} end)) - }' - ) - done - - jq -n \ - --arg overall "$OVERALL_STATUS" \ - --arg started "$STARTED_AT" \ - --arg completed "$(date -Is)" \ - --argjson duration "$SECONDS" \ - --arg host "$(hostname)" \ - --argjson phases "$phases_json" \ - '{ - schema_version: 1, - overall_status: $overall, - started_at: $started, - completed_at: $completed, - duration_seconds: $duration, - host: $host, - phases: $phases - }' > "$tmpfile" - - mv "$tmpfile" "$STATUS_DIR/last-run.json" - chmod 0644 "$STATUS_DIR/last-run.json" -} - -# ── Outcome aggregation ──────────────────────────────────────────────────── -# success → exit 0 -# partial_failure → exit 75 (visible but distinguishable from hard failure) -# failed → exit 1 -determine_exit_code() { - local critical_failure=false - local has_failure=false - local has_degraded=false - local name status - - for name in "${PHASE_ORDER[@]}"; do - status="${PHASE_STATUS[$name]:-pending}" - case "$status" in - success|skipped) ;; - degraded) has_degraded=true ;; - failed) - has_failure=true - case "$name" in - postgres_dump) critical_failure=true ;; # losing the DB dump is catastrophic - esac - ;; - esac - done - - # Losing BOTH restic repos is also catastrophic. - if [ "${PHASE_STATUS[restic_nas]:-}" = "failed" ] \ - && [ "${PHASE_STATUS[restic_b2]:-}" = "failed" ]; then - critical_failure=true - fi - - # NB: deze functie wordt direct (niet via $(...)) aangeroepen, anders gaan - # de OVERALL_STATUS-assignments verloren in de subshell — write_status_json - # zou dan "unknown" wegschrijven en de eind-banner idem. - if [ "$critical_failure" = true ]; then - OVERALL_STATUS="failed" - EXIT_CODE=1 - elif [ "$has_failure" = true ] || [ "$has_degraded" = true ]; then - OVERALL_STATUS="partial_failure" - EXIT_CODE=75 - else - OVERALL_STATUS="success" - EXIT_CODE=0 - fi -} - -# ── Main sequence ────────────────────────────────────────────────────────── -run_phase postgres_dump dump_postgres_all -run_phase forgejo_dump dump_forgejo -run_phase forgejo_db_dump dump_forgejo_db -run_phase restic_nas restic_backup_to "$RESTIC_REPO_NAS" nas -run_phase restic_b2 restic_backup_to "$RESTIC_REPO_B2" b2 -run_phase forget_nas restic_forget_nas -run_phase check_nas restic_check_nas -run_phase check_b2 restic_check_b2 - -determine_exit_code # sets OVERALL_STATUS + EXIT_CODE in this shell -write_status_json - -echo "" -echo "════════════════════════════════════════════════════════════════" -echo " Server backup — finished $(date -Is)" -echo " Overall status: $OVERALL_STATUS (exit $EXIT_CODE)" -echo " Duration: ${SECONDS}s" -echo " Status file: $STATUS_DIR/last-run.json" -echo " Log file: $LOG_FILE" -echo "════════════════════════════════════════════════════════════════" - -exit "$EXIT_CODE" diff --git a/deploy/server-backup/server-backup.timer b/deploy/server-backup/server-backup.timer deleted file mode 100644 index ea80dc2..0000000 --- a/deploy/server-backup/server-backup.timer +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=Daily server-wide backup (timer) - -[Timer] -# Daily at 03:30 local. After ops-db-backup.timer (02:00) so the ops_dashboard -# pg_dump from /srv/ops/backups/ is fresh when restic picks it up. -OnCalendar=*-*-* 03:30:00 -Persistent=true -RandomizedDelaySec=600 - -[Install] -WantedBy=timers.target diff --git a/deploy/server-backup/wrappers/read-status.sh b/deploy/server-backup/wrappers/read-status.sh deleted file mode 100644 index b1e0081..0000000 --- a/deploy/server-backup/wrappers/read-status.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Read /srv/backups/status/last-run.json. Returns "{}" if missing, so the -# dashboard can render an "unknown" state instead of erroring. - -set -uo pipefail - -STATUS_FILE="${STATUS_FILE:-/srv/backups/status/last-run.json}" -RESTORE_STATUS_FILE="${RESTORE_STATUS_FILE:-/srv/backups/status/last-restore-test.json}" - -# We emit a small wrapper object with both files so the UI can render the -# server-backup status AND the most recent restore-test status from one call. -last_run='{}' -if [ -r "$STATUS_FILE" ]; then - last_run=$(cat "$STATUS_FILE") -fi - -last_restore='null' -if [ -r "$RESTORE_STATUS_FILE" ]; then - last_restore=$(cat "$RESTORE_STATUS_FILE") -fi - -jq -n \ - --argjson last_run "$last_run" \ - --argjson last_restore "$last_restore" \ - '{ last_run: $last_run, last_restore_test: $last_restore }' diff --git a/deploy/server-backup/wrappers/restic-check.sh b/deploy/server-backup/wrappers/restic-check.sh deleted file mode 100644 index ecd8eb6..0000000 --- a/deploy/server-backup/wrappers/restic-check.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# Run a light restic integrity check on the given repo. -# Usage: restic-check.sh nas|b2 - -set -uo pipefail - -LABEL="${1:-}" -if [ "$LABEL" != "nas" ] && [ "$LABEL" != "b2" ]; then - echo "label must be nas or b2" >&2 - exit 2 -fi - -if [ -z "${RESTIC_REPO_NAS:-}" ] && [ -r /etc/restic-backup.env ]; then - set -a; . /etc/restic-backup.env; set +a -fi - -case "$LABEL" in - nas) REPO="${RESTIC_REPO_NAS:?RESTIC_REPO_NAS not set}" ;; - b2) REPO="${RESTIC_REPO_B2:?RESTIC_REPO_B2 not set}" ;; -esac - -export RESTIC_PASSWORD_FILE="${RESTIC_PASSWORD_FILE:-/etc/restic-backup.password}" - -restic -r "$REPO" check diff --git a/deploy/server-backup/wrappers/restic-snapshots.sh b/deploy/server-backup/wrappers/restic-snapshots.sh deleted file mode 100644 index bd6e6a8..0000000 --- a/deploy/server-backup/wrappers/restic-snapshots.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -# List recent restic snapshots from a labelled repo. Output: JSON array. -# Usage: restic-snapshots.sh nas|b2 - -set -uo pipefail - -LABEL="${1:-}" -if [ "$LABEL" != "nas" ] && [ "$LABEL" != "b2" ]; then - echo '{"error":"label must be nas or b2"}' >&2 - exit 2 -fi - -# Load env (idempotent — systemd already loaded it for service contexts). -if [ -z "${RESTIC_REPO_NAS:-}" ] && [ -r /etc/restic-backup.env ]; then - set -a; . /etc/restic-backup.env; set +a -fi - -case "$LABEL" in - nas) REPO="${RESTIC_REPO_NAS:?RESTIC_REPO_NAS not set}" ;; - b2) REPO="${RESTIC_REPO_B2:?RESTIC_REPO_B2 not set}" ;; -esac - -export RESTIC_PASSWORD_FILE="${RESTIC_PASSWORD_FILE:-/etc/restic-backup.password}" - -# Show last 30 snapshots, newest first, with the fields the UI needs. -restic -r "$REPO" snapshots --json 2>/dev/null \ - | jq --arg repo "$LABEL" ' - sort_by(.time) | reverse | .[0:30] - | map({ - id: .id, - short_id: (.short_id // (.id[0:8])), - time: .time, - hostname: .hostname, - tags: (.tags // []), - paths: (.paths // []), - summary: (.summary // null), - repo: $repo - }) - ' diff --git a/deploy/server-backup/wrappers/restic-stats.sh b/deploy/server-backup/wrappers/restic-stats.sh deleted file mode 100644 index 4eeea6d..0000000 --- a/deploy/server-backup/wrappers/restic-stats.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash -# Repo stats: combines restic stats in two modes plus snapshot count. -# Output: JSON object with restore_size_bytes, raw_data_bytes, dedup_ratio. -# Usage: restic-stats.sh nas|b2 - -set -uo pipefail - -LABEL="${1:-}" -if [ "$LABEL" != "nas" ] && [ "$LABEL" != "b2" ]; then - echo '{"error":"label must be nas or b2"}' >&2 - exit 2 -fi - -if [ -z "${RESTIC_REPO_NAS:-}" ] && [ -r /etc/restic-backup.env ]; then - set -a; . /etc/restic-backup.env; set +a -fi - -case "$LABEL" in - nas) REPO="${RESTIC_REPO_NAS:?RESTIC_REPO_NAS not set}" ;; - b2) REPO="${RESTIC_REPO_B2:?RESTIC_REPO_B2 not set}" ;; -esac - -export RESTIC_PASSWORD_FILE="${RESTIC_PASSWORD_FILE:-/etc/restic-backup.password}" - -# restore-size: total bytes if every file in every snapshot were re-extracted. -restore_json=$(restic -r "$REPO" stats --mode restore-size --json 2>/dev/null || echo '{}') -# raw-data: total unique blob bytes after dedup + compression. -raw_json=$(restic -r "$REPO" stats --mode raw-data --json 2>/dev/null || echo '{}') -# Snapshot count for the same repo. -snap_count=$(restic -r "$REPO" snapshots --json 2>/dev/null | jq 'length // 0') - -jq -n \ - --arg repo "$LABEL" \ - --argjson restore "$restore_json" \ - --argjson raw "$raw_json" \ - --argjson snap_count "${snap_count:-0}" \ - ' - { - repo: $repo, - snapshots_count: $snap_count, - restore_size_bytes: ($restore.total_size // null), - restore_size_files: ($restore.total_file_count // null), - raw_data_bytes: ($raw.total_size // null), - raw_blob_count: ($raw.total_blob_count // null), - dedup_ratio: ( - if ($restore.total_size != null) and ($raw.total_size != null) and ($raw.total_size > 0) - then (($restore.total_size | tonumber) / ($raw.total_size | tonumber)) - else null - end - ) - }' diff --git a/deploy/server-backup/wrappers/trigger-backup.sh b/deploy/server-backup/wrappers/trigger-backup.sh deleted file mode 100644 index 6a2ee61..0000000 --- a/deploy/server-backup/wrappers/trigger-backup.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# Trigger server-backup.service ad-hoc. Refuses if a run is already active -# (the script itself also flock's, but checking here gives a friendlier error). - -set -uo pipefail - -UNIT=server-backup.service - -active=$(systemctl is-active "$UNIT" 2>/dev/null || true) -if [ "$active" = "active" ] || [ "$active" = "activating" ]; then - echo "ERROR: $UNIT is already $active — refusing to trigger." >&2 - exit 75 -fi - -# Use --no-block so we return immediately; the dashboard will poll via -# read-status.sh and tail the log to follow progress. -systemctl start --no-block "$UNIT" -echo "Triggered $UNIT. Follow with: journalctl -u $UNIT -f" diff --git a/deploy/server-backup/wrappers/trigger-restore-test.sh b/deploy/server-backup/wrappers/trigger-restore-test.sh deleted file mode 100644 index d0368b3..0000000 --- a/deploy/server-backup/wrappers/trigger-restore-test.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -# Run a non-destructive restore test against the NAS repo. Streams output to -# stdout (so the dashboard's StreamingTerminal can render it) and writes the -# structured result to /srv/backups/status/last-restore-test.json. - -set -uo pipefail - -REPO_LABEL="${1:-nas}" - -if [ ! -x /srv/backups/scripts/restore-test.sh ]; then - echo "ERROR: /srv/backups/scripts/restore-test.sh not installed" >&2 - exit 1 -fi - -exec /srv/backups/scripts/restore-test.sh "$REPO_LABEL" diff --git a/docs/handleiding.md b/docs/handleiding.md deleted file mode 100644 index d5ceb1a..0000000 --- a/docs/handleiding.md +++ /dev/null @@ -1,128 +0,0 @@ -# Handleiding — Ops Dashboard - -Voor de dagelijkse beheerder van een single-host server-stack (Docker, systemd, Git, Caddy, Postgres). Deze handleiding beschrijft *hoe* je de app gebruikt, niet *hoe* hij werkt — voor dat laatste zie [`specs/technical.md`](./specs/technical.md). - -## Eerste inlog - -1. Open `https:///` (bv. `https://ops.jp-visser.nl`). -2. Je wordt doorgestuurd naar `/login`. Vul de admin-credentials in die je tijdens deploy in `SEED_USER_EMAIL` / `SEED_USER_PASSWORD` hebt gezet. -3. Na succesvolle login zit je 24 uur ingelogd via een HttpOnly-cookie. Daarna opnieuw inloggen. - -Wachtwoord vergeten? Geen reset-flow in de app — los op met een SQL-update: - -```bash -# Genereer een nieuwe bcrypt-hash voor je nieuwe wachtwoord -docker run --rm -e PW='' node:22-alpine sh -c ' - cd /tmp && npm init -y >/dev/null 2>&1 && npm install --silent bcryptjs >/dev/null 2>&1 - node -e "console.log(require(\"bcryptjs\").hashSync(process.env.PW, 12))" -' -# Plak in psql: -docker exec -it scrum4me-postgres psql -U scrum4me -d ops_dashboard \ - -c "UPDATE \"User\" SET pwd_hash = '' WHERE email = '';" -``` - -## Dashboard (home) - -Vijf live status-widgets, auto-refresh ~5 sec: - -| Widget | Toont | -|---|---| -| **Docker** | Aantal draaiende containers + lijst | -| **Git** | Branch en uncommitted-status per geconfigureerd repo-pad | -| **systemd** | Service-status (active/inactive/failed) per geconfigureerde unit | -| **Caddy** | TLS-certs met dichtstbijzijnde expiratiedatum (geel = <30 dagen) | -| **Audit** | Laatste flow-run met timestamp en exit-status | - -Klik een widget aan om naar de detail-pagina te gaan. - -## Modules - -### `/docker` — Containers - -Tabel van `docker ps` met auto-refresh. Klik op een container-naam voor detail (logs, image, ports, status). - -Read-only — geen start/stop/restart vanuit de UI. Voor wijzigingen: een **Flow** (zie hieronder). - -### `/git` — Repositories - -Per geconfigureerd pad in `REPO_PATHS` (env var): branch, uncommitted-files (M/A/D/??), laatste 3 commits. Klik door voor diff-viewer. - -Read-only — pulls/commits gaan via een Flow. - -### `/systemd` — Services - -Lijst van services uit `SYSTEMD_UNITS` (env var). Toont status, laatste log-regels. Klik door voor full journal-tail van die unit. - -Restart-knop: alleen voor units die in `commands.yml` zijn whitelisted én in `sudoers.d/ops-agent` met `NOPASSWD` staan. - -### `/caddy` — Reverse-proxy & TLS - -Toont de actieve Caddyfile (syntax-highlighted) plus alle TLS-certs (subject + expiry). "Edit"-knop opent een editor — opslaan **valideert** de Caddyfile via `caddy validate` voordat het wordt geschreven. - -### `/flows` — Multi-step deployments - -Twee voor-gedefinieerde flows: - -- **Update Scrum4Me website** — pull main, build container, restart, smoke-test -- **Update Caddy config** — schrijf nieuwe Caddyfile, valideer, restart Caddy, verifieer dat alle hostnames nog reageren - -Een flow draait stap-voor-stap met **dry-run** als standaard. Na dry-run zie je per stap wat het gaat doen. Klik "Run" om echt uit te voeren. Tijdens executie zie je live stdout/stderr per stap. - -### `/audit` — Flow-runs - -Chronologische lijst van alle gestarte flows: starttijd, duur, exit-status, wie 'm startte. Klik door voor de volledige output (stdout/stderr per stap). - -### `/settings/backups` — Backups - -Postgres backup-management: - -- Lijst van bestaande dump-bestanden in `/srv/scrum4me/backups` -- "Backup now"-knop maakt een dump met timestamp-naam -- Restore-runbook (handmatige stappen — geen automatische restore vanuit UI om de blast-radius klein te houden) - -## Veelvoorkomende taken - -### Container hangt — wat nu? - -1. `/docker` → klik container-naam → bekijk logs -2. Diagnose? Open een SSH-sessie en gebruik `docker logs`, `docker exec` etc. (Niet vanuit de UI — dat is buiten scope.) -3. Restart nodig? Voeg de container toe aan `commands.yml` whitelist (op de host) + run via `/flows` - -### Caddy-config wijzigen - -1. `/caddy` → "Edit" -2. Pas Caddyfile aan in de editor -3. Save → app draait `caddy validate` → bij succes wordt het geschreven en Caddy herstart -4. Verifieer in `/caddy` dat het cert-overzicht klopt - -> Voor breaking changes (verkeerde syntax of niet-bestaande site): de validate-stap blokkeert. Bij twijfel: maak eerst een backup van `/srv/scrum4me/caddy/Caddyfile`. - -### Sprint mergen via flow - -`/flows/update-scrum4me-web` — kies branch (default `main`), klik dry-run, lees wat het doet, klik "Run". Stap-output stream live. Na success: smoke-test verifieert dat de homepage 200 geeft. - -## Wat kan **niet** vanuit de UI - -- SSH-toegang of arbitrary shell-commando's (alleen whitelisted commands.yml-keys) -- User-management (één admin via seed; multi-user is buiten scope) -- Container starten met andere image of args (alleen restart van bestaande) -- Wachtwoord reset (SQL-update vereist) -- Cert handmatig forceren (Caddy doet auto-ACME) - -## Logs voor incident-response - -| Component | Log-locatie | -|---|---| -| Dashboard app | `docker logs scrum4me-ops-dashboard` | -| ops-agent | `journalctl -u ops-agent -f` | -| Caddy | `docker logs scrum4me-caddy` | -| Postgres | `docker logs scrum4me-postgres` | - -Audit-trail van wat-doet-wie-wanneer: tabel `FlowRun` + `FlowStep` in de `ops_dashboard` database, of via `/audit` in de UI. - -## Veiligheidsadvies - -- Houd port 3099 (ops-agent) **niet** open naar de buitenwereld. UFW-regel scoped op `172.18.0.0/16`. Zie [`runbooks/post-install.md`](./runbooks/post-install.md). -- Roteer `OPS_AGENT_SECRET` jaarlijks: nieuw secret in `.env` én `/etc/ops-agent/secret`, dan beide herstarten. -- Voeg geen wildcards toe in `sudoers.d/ops-agent` — elke `systemctl`-actie moet een expliciete service-naam zijn. -- `commands.yml` is single source of truth voor wat de agent mag — alles wat niet in de whitelist staat, kan een aanvaller niet uitvoeren ook al heeft hij het secret. diff --git a/docs/runbooks/server-backup.md b/docs/runbooks/server-backup.md deleted file mode 100644 index d96c093..0000000 --- a/docs/runbooks/server-backup.md +++ /dev/null @@ -1,562 +0,0 @@ -# Server-brede backup (restic + NAS + B2, dashboard-bediend) - -## Context - -`scrum4me-srv` draait een Docker-stack (Scrum4Me-web, worker-idea, ops-dashboard, -postgres-17, caddy) plus Forgejo. De huidige backup-dekking — alleen -`pg_dump ops_dashboard` naar `/srv/ops/backups/` met 30 dagen retentie op één -disk — laat **alles anders** vallen: Scrum4Me-data, Forgejo, Caddy-certs, -Docker-volumes en `/etc` zijn weg bij brand, diefstal, ransomware of disk-fail. - -Doel: de server **herbouwbaar** maken vanuit een encrypted, gededupliceerde, -versioned backup met twee onafhankelijke kopieën — **NAS** lokaal en -**Backblaze B2** offsite — bediend vanuit de ops-dashboard. De bestaande -`backup_ops_db`-flow blijft draaien; restic pickt zijn dump-directory mee. - -**Belangrijke ontwerpkeuzes** (uitgebreid toegelicht in de review onder -`/Users/janpetervisser/Development/Scrum4Me/docs/recommendations/server-backup-plan-review-2026-05-15.md`): - -- **B2 Object Lock + server-key zonder `deleteFiles`** — een aanvaller met root - op de server kan geen B2-snapshots weghalen tot Object Lock-retention - verloopt. Dat is de ransomware-bescherming. Prune op B2 gebeurt maandelijks - vanaf de laptop met een aparte hoge-cap maintenance-key. -- **Authoritative restore-bron = dumps, niet live datadirs.** Postgres- en - Forgejo-data-directories zijn expliciet `--exclude`'d uit restic; - `pg_dumpall` en `forgejo dump` + aparte `pg_dump ` zijn de - autoritatieve bronnen. -- **Phase-based script met structured statusfile.** Eén falende fase laat de - rest doorlopen; per-phase status / exit-code / timestamps / error-tail komen - in `/srv/backups/status/last-run.json` die de dashboard live leest. -- **Single-instance lock** via `flock /run/server-backup.lock` — UI-knop en - systemd-timer kunnen elkaar niet overlappen. - -## Voorwaarden (aantoonbaar voldaan vóór uitvoering) - -- [ ] Bash, jq, restic, docker, gzip, flock op `$PATH` (`apt install restic jq` voor de eerste twee — de rest zit standaard). -- [ ] De Scrum4Me-stack draait in Docker (`docker ps | grep scrum4me-postgres`). -- [ ] `/srv/scrum4me/compose/docker-compose.yml` bestaat (anders herzie je het exclude-pad in `server-backup.sh`). -- [ ] Tijd loopt synchroon (`timedatectl status`) — backups gebruiken ISO-timestamps. - -## Voorwaarden (input van de gebruiker nodig) - -- **NAS-mount** — pad zoals `/mnt/nas/backups` met genoeg ruimte (initieel ≥ 100 GB; restic is gededupliceerd, dus daarna groeit het traag). -- **Backblaze B2-account** — credit-card geregistreerd, bucket aanmaken vereist een operator-actie. -- **Restic-wachtwoord** — `openssl rand -hex 24`, bewaard in je password manager **én** in `/etc/restic-backup.password` op de server. Beide nodig — kwijt op één plek = repo onleesbaar. -- **B2 maintenance-key** — bewaard alleen op je laptop in passwordmanager. Niet op de server. - ---- - -## Deel A — Voorbereiding op `scrum4me-srv` - -Uit te voeren als `root` op `scrum4me-srv`. - -1. **Tools installeren** - ```bash - sudo apt update - sudo apt install -y restic jq - restic version - ``` - -2. **Directories aanmaken** - ```bash - sudo mkdir -p /srv/backups/scripts /srv/backups/logs /srv/backups/status \ - /var/backups/databases - sudo chmod 0750 /srv/backups/logs /srv/backups/status - ``` - -3. **NAS-mount aanmaken** — een nieuwe mount op `/mnt/nas/backups` die naar - de subdir `backups` van de bestaande `ssd`-share op de NAS wijst. Geen - nieuwe Samba-share op de NAS nodig — de cifs-`prefixpath`-optie mount het - subpad direct. - - ```bash - # 1. Subdir op de NAS aanmaken via de bestaande ssd-mount - sudo mkdir -p /mnt/nas/ssd/backups - - # 2. cifs-utils geïnstalleerd? (voor andere /mnt/nas-shares is dat al zo) - dpkg -l | grep -q '^ii cifs-utils' || sudo apt install -y cifs-utils - - # 3. Fstab-regel toevoegen — uid/gid=0 + mode 0700/0600 = root-only - sudo tee -a /etc/fstab <<'EOF' - //192.168.0.155/ssd /mnt/nas/backups cifs credentials=/etc/samba/credentials-nas,uid=0,gid=0,iocharset=utf8,vers=3.0,nofail,_netdev,x-systemd.automount,prefixpath=backups,file_mode=0600,dir_mode=0700 0 0 - EOF - - # 4. systemd reload + mount - sudo systemctl daemon-reload - sudo mount /mnt/nas/backups - mountpoint -q /mnt/nas/backups && echo "OK" || echo "FAIL" - df -h /mnt/nas/backups - ``` - - `_netdev,x-systemd.automount,nofail` zorgt dat de mount automatisch terugkomt - bij reboot zónder de boot te laten hangen als de NAS even weg is. De - `RequiresMountsFor=/mnt/nas/backups` in `server-backup.service` triggert - bovendien de automount voor de timer-run. - -4. **Restic-wachtwoord genereren en plaatsen** - ```bash - sudo sh -c 'openssl rand -hex 24 > /etc/restic-backup.password' - sudo chmod 0400 /etc/restic-backup.password - sudo chown root:root /etc/restic-backup.password - ``` - **Kopieer dezelfde string naar je password manager** vóór je verder gaat. Een gegeneerd wachtwoord dat alleen op de server staat is geen wachtwoord — het is een ticking time bomb. - ---- - -## Deel B — Backblaze B2 inrichten (Object Lock + scoped keys) - -Doel: een bucket waarvan **bestaande** snapshots niet door de server gewist kunnen worden, plus twee separate keys: één voor de server (alleen schrijven/lezen) en één voor de operator (alle rechten, alleen vanaf laptop gebruikt). - -1. **Bucket aanmaken** in de Backblaze-UI of via `b2` CLI: - - Naam: `scrum4me-srv-backup` (of een variant; vermeld in `/etc/restic-backup.env`). - - Privacy: **Private**. - - **File Lock: Enabled, Governance mode, default retention = 30 days**. Governance betekent: een key met `bypassGovernance` kan locks omzeilen — die capability geven we **alleen** aan de maintenance-key. - - Lifecycle rules: **geen** (lifecycle conflicts met Object Lock). - - Encryption: server-side encryption aanlaten (B2 standaard). - -2. **Server-key** aanmaken (gaat naar `/etc/restic-backup.env` op de server): - ```bash - # via b2 CLI: - b2 application-key create \ - --bucket scrum4me-srv-backup \ - --name-prefix scrum4me-srv \ - server-backup-key \ - listBuckets,listFiles,readFiles,writeFiles - ``` - Bewaar de output (`keyID` + `applicationKey`). Verifieer in de UI dat de key **niet** `deleteFiles`, **niet** `deleteKeys`, **niet** `bypassGovernance` heeft. - -3. **Maintenance-key** aanmaken (gaat in je password manager op de laptop): - ```bash - b2 application-key create \ - --bucket scrum4me-srv-backup \ - scrum4me-srv-maintenance-key \ - listBuckets,listFiles,readFiles,writeFiles,deleteFiles,bypassGovernance - ``` - Deze key komt **nooit** op de server. Gebruik alleen voor `restic forget --prune` vanaf je laptop (zie Deel H). - -4. **`/etc/restic-backup.env` aanmaken** - ```bash - sudo cp /srv/scrum4me/ops-dashboard/deploy/server-backup/restic-backup.env.example \ - /etc/restic-backup.env - sudo chmod 0600 /etc/restic-backup.env - sudo chown root:root /etc/restic-backup.env - sudo nano /etc/restic-backup.env - ``` - Vul in: `RESTIC_REPO_NAS`, `RESTIC_REPO_B2`, `B2_ACCOUNT_ID` (= keyID), `B2_ACCOUNT_KEY` (= applicationKey). Forgejo-velden in Deel F. - -**Dreigingsmodel** - -| Dreiging | Gedekt door dit ontwerp? | -|---|---| -| Disk-fail / corruptie | ✓ NAS + B2 = 2× redundancy | -| Brand / diefstal / waterschade | ✓ B2 is offsite | -| Ransomware op de server | ✓ B2 Object Lock — bestaande snapshots immutable tot retention verloopt | -| Server-compromise (root) | ✓ server-key kan geen B2-files verwijderen | -| Laptop-compromise + server-compromise simultaan | ✗ maintenance-key dan ook in handen van aanvaller — geen verdediging | -| Backblaze account-compromise | ✗ — buiten scope; mitigeer met 2FA en audit-trail | -| Verlies restic-wachtwoord | ✗ — repos onleesbaar; bewaar wachtwoord óók in password manager | - ---- - -## Deel C — Restic-repos initialiseren - -1. **NAS-repo init** - ```bash - sudo -E bash -c ' - set -a; . /etc/restic-backup.env; set +a - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password - restic -r "$RESTIC_REPO_NAS" init - ' - ``` - -2. **B2-repo init** - ```bash - sudo -E bash -c ' - set -a; . /etc/restic-backup.env; set +a - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password - restic -r "$RESTIC_REPO_B2" init - ' - ``` - -3. **Retentie droogtest** — controleer dat het forget-beleid niet té agressief is op een eerste-snapshot-only repo. (Op een verse repo verwijdert `forget` niets, maar dit toont dat alle paden + auth werken.) - ```bash - sudo -E bash -c ' - set -a; . /etc/restic-backup.env; set +a - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password - restic -r "$RESTIC_REPO_NAS" forget --keep-daily 7 --keep-weekly 4 --keep-monthly 12 --dry-run - ' - ``` - ---- - -## Deel D — Scripts en systemd-units plaatsen - -1. **Scripts kopiëren** - ```bash - sudo cp /srv/scrum4me/ops-dashboard/deploy/server-backup/server-backup.sh /srv/backups/scripts/ - sudo cp /srv/scrum4me/ops-dashboard/deploy/server-backup/restore-test.sh /srv/backups/scripts/ - sudo chmod 0750 /srv/backups/scripts/*.sh - sudo chown root:root /srv/backups/scripts/*.sh - ``` - -2. **Systemd-units kopiëren** - ```bash - sudo cp /srv/scrum4me/ops-dashboard/deploy/server-backup/server-backup.service /etc/systemd/system/ - sudo cp /srv/scrum4me/ops-dashboard/deploy/server-backup/server-backup.timer /etc/systemd/system/ - sudo systemctl daemon-reload - sudo systemctl enable --now server-backup.timer - ``` - -3. **Timer verifiëren** - ```bash - systemctl list-timers | grep server-backup - ``` - Toont next-run morgen 03:30 (+ randomized delay tot 10 min). - ---- - -## Deel E — Eerste run handmatig + statusfile-verificatie - -1. **Trigger** - ```bash - sudo systemctl start server-backup.service - ``` - -2. **Live volgen** - ```bash - journalctl -u server-backup.service -f - ``` - Verwacht: 8 fasen (postgres_dump, forgejo_dump, forgejo_db_dump, restic_nas, restic_b2, forget_nas, check_nas, check_b2), elk met een `─── phase: X ───` start- en `─── end X (exit=N, status=S)` eindregel. - -3. **Statusfile** - ```bash - sudo jq . /srv/backups/status/last-run.json - ``` - Verwacht: `overall_status: "success"`, alle 5 verplichte fasen `success` (Forgejo mag `skipped` zijn als die nog niet geconfigureerd is). - -4. **Snapshots** - ```bash - sudo -E bash -c ' - set -a; . /etc/restic-backup.env; set +a - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password - restic -r "$RESTIC_REPO_NAS" snapshots - restic -r "$RESTIC_REPO_B2" snapshots - ' - ``` - Beide tonen één snapshot met `host=scrum4me-srv` en tags `scheduled`. - ---- - -## Deel F — Forgejo subplan - -Vóór de eerste full-backup run: inventariseer Forgejo en bevestig (of corrigeer) de defaults in `restic-backup.env`. Bij twijfel — zet `FORGEJO_CONTAINER=` (leeg) zodat de Forgejo-fases als `skipped` markeren tot je verifieerd hebt. - -### F1. Inventarisatie - -```bash -docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}' | grep -i forgejo -``` - -Noteer: -- container-naam (vermoedelijk `forgejo`). -- image-versie (`codeberg.org/forgejo/forgejo:`). - -### F2. Configpaden in de container - -```bash -docker inspect scrum4me-forgejo --format '{{ range .Mounts }}{{ .Source }} -> {{ .Destination }}{{ println }}{{ end }}' -docker exec scrum4me-forgejo ls -la /data/gitea/conf/app.ini -``` - -Standaard: `app.ini` in `/data/gitea/conf/app.ini` binnen de container. Wijkt dat af, pas `FORGEJO_CONFIG=` in `/etc/restic-backup.env` aan. - -### F3. DB-koppeling controleren - -```bash -docker exec scrum4me-forgejo grep -E '^DB_TYPE|^HOST|^NAME|^USER' /data/gitea/conf/app.ini -``` - -- `DB_TYPE=postgres` met `NAME=forgejo` ⇒ zet `FORGEJO_DB_NAME=forgejo`, en als de Postgres-container niet `scrum4me-postgres` is: `FORGEJO_DB_CONTAINER=...`. -- `DB_TYPE=sqlite` ⇒ laat `FORGEJO_DB_NAME=` leeg; SQLite-DB komt mee in `forgejo dump`. - -### F4. Dump-strategie - -Het script doet **drie** dingen voor Forgejo: - -1. `forgejo dump --skip-db -c --type zip -f -` — codebases, attachments, hooks, LFS metadata, etc. -2. Separate `pg_dump ` — autoritatieve DB-restore-bron (Forgejo docs documenteren bekende import-issues bij DB-inhoud uit `forgejo dump`, daarom `--skip-db`). -3. Live datadirs (`/srv/forgejo/data/git`, `/srv/forgejo/data/lfs`, `/srv/forgejo/data/queues`) worden **niet** door restic gekopieerd — dat zijn live B-Trees waar een file-level kopie inconsistent zou zijn. - -### F5. Restore-test in geïsoleerde compose-stack - -Vóór je de Forgejo-restore voor real nodig hebt: test hem een keer. Maak een tijdelijke directory met een verse Forgejo + Postgres, voer de dumps in, draai `forgejo doctor check --all`. - -```bash -# Minimaal restore-test-recept (vul in op basis van je Forgejo-versie) -RESTORE_DIR=/tmp/forgejo-restore-test -mkdir -p "$RESTORE_DIR" -cd "$RESTORE_DIR" - -# 1. compose-stack met blanco Forgejo + Postgres -cat > docker-compose.yml <<'YAML' -services: - forgejo: - image: codeberg.org/forgejo/forgejo: - volumes: [ "./forgejo-data:/data" ] - depends_on: [ db ] - db: - image: postgres:17 - environment: - POSTGRES_USER: forgejo - POSTGRES_PASSWORD: testtest - POSTGRES_DB: forgejo - volumes: [ "./db-data:/var/lib/postgresql/data" ] -YAML - -docker compose up -d - -# 2. DB-dump terugzetten -gunzip < /var/backups/databases/forgejo-db-$(date +%F).sql.gz \ - | docker compose exec -T db psql -U forgejo forgejo - -# 3. Forgejo-dump uitpakken in de data-volume -docker compose stop forgejo -unzip /var/backups/databases/forgejo-$(date +%F).zip -d forgejo-data/ -docker compose start forgejo - -# 4. Health-checks -docker compose exec forgejo forgejo doctor check --all -curl -fsS http://localhost:3000/api/v1/version -``` - -Slaagt `forgejo doctor check --all` en het `/api/v1/version`-endpoint? Dan is je Forgejo-restore werkend. Tear-down: `docker compose down -v && rm -rf "$RESTORE_DIR"`. - ---- - -## Deel G — Restore-procedure in productie - -### G1. Files uit een snapshot terughalen - -```bash -# Snapshot kiezen -sudo -E bash -c ' - set -a; . /etc/restic-backup.env; set +a - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password - restic -r "$RESTIC_REPO_NAS" snapshots -' - -# Restore (latest, alleen /etc — voorbeeld) -sudo -E bash -c ' - set -a; . /etc/restic-backup.env; set +a - export RESTIC_PASSWORD_FILE=/etc/restic-backup.password - restic -r "$RESTIC_REPO_NAS" restore latest --target /tmp/restore --include /etc -' -``` - -### G2. Postgres herstellen (Scrum4Me-cluster) - -```bash -# Stop de apps die met de DB praten -docker compose -f /srv/scrum4me/compose/docker-compose.yml stop scrum4me-web ops-dashboard worker-idea - -# Restore dumpall (drop + recreate alle DBs in de cluster — vandaar --clean --if-exists in de dump) -gunzip < /var/backups/databases/postgres-2026-05-15.sql.gz \ - | docker exec -i scrum4me-postgres psql -U scrum4me - -# Apps weer aan -docker compose -f /srv/scrum4me/compose/docker-compose.yml start scrum4me-web ops-dashboard worker-idea -``` - -Voor partial restore (alleen één database): pak die DB uit de dumpall-tekst met `pg_restore` of `awk`-block extractie. Voor alleen `ops_dashboard` is de bestaande [recovery.md](recovery.md) sectie 2a primair. - -### G3. Forgejo herstellen - -Volg [F5](#f5-restore-test-in-geïsoleerde-compose-stack) maar dan met de echte Forgejo-compose-stack en zonder tear-down. Belangrijk: stop de live Forgejo eerst, vervang `/srv/forgejo/data` volledig, restore DB, start Forgejo, `forgejo doctor check --all`. - ---- - -## Deel H — Maintenance vanaf de laptop (maandelijks) - -Doel: B2-snapshots ouder dan retention-policy daadwerkelijk pruning, plus een diepere integriteits-check die op de server te duur zou zijn. - -1. **Voorbereiding** (eenmalig op laptop): - ```bash - brew install restic jq - # Maintenance-key uit password manager - export B2_ACCOUNT_ID= - export B2_ACCOUNT_KEY= - export RESTIC_REPOSITORY=b2:scrum4me-srv-backup:scrum4me-srv - read -rs RESTIC_PASSWORD < /dev/tty # uit password manager - export RESTIC_PASSWORD - ``` - -2. **Prune-check** (eerst dry-run om te zien wat er zou gebeuren): - ```bash - restic forget --keep-daily 7 --keep-weekly 4 --keep-monthly 12 --dry-run - ``` - -3. **Daadwerkelijke prune** (vereist `bypassGovernance` capability — alleen via maintenance-key): - ```bash - restic forget --keep-daily 7 --keep-weekly 4 --keep-monthly 12 --prune - ``` - -4. **Diepere check**: - ```bash - restic check --read-data-subset=10% - ``` - B2-bandbreedte: 10% van een 50 GB repo = 5 GB download, B2-prijs ~ $0.05 (gratis 1 GB/dag). - -5. **Cleanup environment** — sluit shell of `unset RESTIC_PASSWORD B2_ACCOUNT_*`. - ---- - -## Deel I — Integriteits-schedule (samenvatting) - -| Cadans | Wie | Wat | Waarom | -|---|---|---|---| -| Dagelijks 03:30 | server (systemd timer) | `restic check` op beide repos | snelle metadata-/structure-validatie | -| Wekelijks (zondag) | server (zelfde script) | `restic check --read-data-subset=2.5%` op NAS, `1%` op B2 | sample-based data-integrity | -| Maandelijks | operator (laptop) | `restic check --read-data-subset=10%` + `forget --prune` op B2 | diepere check + prune (B2 server-key heeft geen delete-rechten) | -| Maandelijks | operator (server) | `/srv/backups/scripts/restore-test.sh nas` + handmatige Forgejo-stack-restore (F5) | end-to-end restore-verificatie | - ---- - -## Te wijzigen / nieuw aangemaakte bestanden - -**Op `scrum4me-srv`** (alleen via deploy uit deze repo, geen handmatige edits): - -- `/srv/backups/scripts/server-backup.sh` (uit `deploy/server-backup/`). -- `/srv/backups/scripts/restore-test.sh` (idem). -- `/etc/systemd/system/server-backup.service`, `server-backup.timer` (uit `deploy/server-backup/`). -- `/etc/restic-backup.env` — secrets, niet in repo. -- `/etc/restic-backup.password` — secret, niet in repo. - -**In deze repo (`ops-dashboard`)**, nieuw aangemaakt: - -- `deploy/server-backup/*` — alle deploy-artefacten. -- `docs/runbooks/server-backup.md` — dit document. -- Later (Fase 3+4): `ops-agent/commands.yml.example`-uitbreiding, `ops-agent/flows.example/server_backup_*.yml`, `app/settings/backups/_components/server-backup-section.tsx`. - -**Op de laptop**, in password manager: - -- restic-wachtwoord (identiek aan `/etc/restic-backup.password`). -- B2 maintenance-key (keyID + applicationKey). - ---- - -## Veelvoorkomende fouten - -| Symptoom | Oorzaak | Fix | -|---|---|---| -| `unable to open repository ... no such file or directory` (NAS) | NAS-mount weg na reboot | `mountpoint -q /mnt/nas/backups` — fix `fstab`/`autofs`; herstart `server-backup.service` | -| `unable to open repository ... AccessDenied` (B2) | server-key heeft verkeerde capabilities of bucket-prefix | check `b2 application-key list`; capabilities moeten `listBuckets,listFiles,readFiles,writeFiles` zijn, name-prefix moet matchen | -| `Object Lock In Place` bij `forget --prune` op B2 | server probeert ten onrechte B2 te prunen (heeft die capability niet) | het script prune'd alleen NAS — als deze fout opduikt: handmatige `restic forget` op B2 gedraaid (zou off-server moeten); gebruik maintenance-key | -| `restic snapshot tag scheduled` ontbreekt in UI | run heeft `--tag scheduled` niet meegekregen | check script — `restic_backup_to` zet beide tags hardcoded | -| `forgejo dump` faalt met permission denied | container-user niet `git` | pas `dump_forgejo` aan: `docker exec -u ` | -| restic exit code 3 in statusfile | sommige files waren niet leesbaar tijdens snapshot (open file lock) | non-fataal — log toont welke files; meestal logs of sockets; eventueel toevoegen aan `RESTIC_EXCLUDES` | -| `another server-backup is already running` exit 75 | timer en UI-knop tegelijk, of vorige run hangt | `systemctl status server-backup.service`; bij hang: `systemctl kill server-backup.service`, lockfile `/run/server-backup.lock` opruimen | -| `last-run.json` niet geüpdatet | script gecrashed vóór `write_status_json` | `journalctl -u server-backup.service --since=today` — meestal env-file of password-file probleem | -| Postgres-datadir in restic snapshot terug te zien | excludes verkeerd geconfigureerd | check `RESTIC_EXCLUDES` in script — moet `/srv/scrum4me/postgres` bevatten | - ---- - -## Verificatie (end-to-end) - -1. **Eerste run slaagt** — Deel E groen, statusfile `overall_status: success`. -2. **Snapshots zichtbaar** op beide repos via `restic snapshots`. -3. **Restore-test slaagt** — `restore-test.sh nas` → `overall_status: success` in `/srv/backups/status/last-restore-test.json`, alle assertions `ok`. -4. **Forgejo-restore-stack** (F5) — `forgejo doctor check --all` rond zonder errors, `/api/v1/version` antwoordt. -5. **Reboot-test** — server reboot, `systemctl list-timers` toont `server-backup.timer` met next-run gepland; NAS-mount automatisch terug. -6. **Failure-injectie**: - - NAS unmount → script eindigt met `overall_status: partial_failure`, `phases.restic_nas.status: failed`, B2-snapshot wel aanwezig, systemd exit 75. - - B2-key tijdelijk ongeldig → `phases.restic_b2.status: failed`, NAS-snapshot wel, exit 75. - - Beide repos onbereikbaar → `overall_status: failed`, exit 1. -7. **Concurrency** — tweede `systemctl start server-backup.service` tijdens lopende run → exit 75, log toont `another server-backup is already running`. -8. **Maandelijkse maintenance** — eerst keer succesvol uitgevoerd vanaf laptop, B2 `forget --prune` slaagt zonder Object Lock-fouten. - ---- - -# Addendum — uitvoering 2026-05-15 - -Eerste install op `scrum4me-srv`. Werk begon met dat alleen `restic` + `jq` -geïnstalleerd waren — Deel A2-Deel E + restore-test draaiden in deze sessie. - -## Vastgestelde topologie en concrete waarden - -| Plan-placeholder | Werkelijkheid op `scrum4me-srv` | -|---|---| -| Repo-pad in deploy-stappen | `/srv/scrum4me/ops-dashboard/` (runbook had `/srv/ops/repos/...` — bestaat niet op deze server). Ook in `recovery.md` en `deploy/ops-dashboard-updater/update.sh` staan nog `/srv/ops/repos`-verwijzingen — losse cleanup-taak. | -| NAS-mount | `/mnt/nas/backups` via cifs-`prefixpath=backups` op `//192.168.0.155/ssd`. **Geen aparte Samba-share aangemaakt** — de subdir `backups/` op de bestaande `ssd`-share is genoeg dankzij `prefixpath`. fstab-regel: `uid=0,gid=0,prefixpath=backups,file_mode=0600,dir_mode=0700,_netdev,x-systemd.automount,nofail`. | -| B2-bucket-naam | **`ScrumForMeSrvBackup`** (PascalCase) — niet de in het plan voorgestelde `scrum4me-srv-backup`. `RESTIC_REPO_B2=b2:ScrumForMeSrvBackup:scrum4me-srv` (case-sensitive). | -| B2-bucket-instellingen | Object Lock = Enabled, Mode = Governance, Default Retention = 30 days. Geen lifecycle rules. | -| B2 server-key capabilities | `listBuckets,listFiles,readFiles,writeFiles` (gemaakt via webportal als "Read and Write" — daar zat de juiste capability-set automatisch in). Geen `deleteFiles`, geen `bypassGovernance`. | -| B2 storage-cap | $10/maand. Bij 16 GB op B2 (zie cijfers onder) is dat $0,10/maand storage — ruim binnen de cap. | -| B2 maintenance-key | **Nog niet aangemaakt** — pas nodig bij eerste maandelijkse prune. Aanmaken vanaf laptop, met `Allow file deletes` en `Allow bypass governance retention` aangevinkt. | -| Forgejo-container | **`scrum4me-forgejo`** (image `codeberg.org/forgejo/forgejo:11`). | -| Forgejo `git`-user | uid 1000, bestaat ✓ — `docker exec -u git scrum4me-forgejo` werkt. | -| Forgejo data-locatie | docker named volume `forgejo_forgejo-data` (NIET `/srv/forgejo/data/...` zoals het runbook nog noemt — die paden bestaan niet maar de excludes zijn no-ops). | -| Forgejo-DB | rol `forgejo`, db `forgejo`, in `scrum4me-postgres`-container (zelfde Postgres als ops_dashboard, scrum4me). | -| Postgres data live | bind-mount `/srv/scrum4me/postgres/` (excluded). | -| restic-password locatie | `/etc/restic-backup.password` (mode 0400, root:root). Óók in passwordmanager onder "restic — scrum4me-srv". | -| systemd-timer | `server-backup.timer` enabled, dagelijks 03:30 + max 10 min randomized delay. | - -## Wijzigingen aan de in commit `ab87c0f` gemergede code - -Door deze sessie heen vier kleine fixes nodig gebleken — allemaal in deze PR: - -- **`server-backup.sh`**: `--skip-db` weggehaald uit `dump_forgejo`. Forgejo - 11.x heeft die flag niet (verwijderd na de Gitea-fork). Output van - `forgejo dump --help`: alleen `--skip-repository|-log|-custom-dir|-lfs-data|-attachment-data|-package-data|-index|-repo-archives`. De DB komt nu mee in de zip — redundant met `forgejo_db_dump`-fase, maar onschuldig. -- **`server-backup.sh`**: subshell-bug in `determine_exit_code` — werd aangeroepen via `EXIT_CODE=$(determine_exit_code)`, dus `OVERALL_STATUS=...` werd in de subshell gezet en lekte niet naar de parent. Resultaat: `last-run.json` schreef altijd `overall_status: "unknown"` en de eind-banner idem. Fix: directe call die zowel `OVERALL_STATUS` als `EXIT_CODE` in de parent-shell zet. -- **`restore-test.sh`**: deed een **full restore** zonder `--include`-filter — probeerde 476 GiB naar `/tmp` (7.6 GB tmpfs) te schrijven, ENOSPC + 3.3M errors + alle assertions "missing". Gefixt met `--include` op alleen de assertion-paden (`/etc/restic-backup.env`, `/srv/scrum4me/{compose/docker-compose.yml,caddy/Caddyfile}`, `/var/backups/databases`). Restore is nu 59 MB in 10s. -- **`server-backup.service`**: `RequiresMountsFor=/mnt/nas/backups` toegevoegd (triggert cifs-automount bij timer-fire), `ReadWritePaths` uitgebreid met `/mnt/nas/backups` (`ProtectSystem=strict` blokkeert anders schrijven naar de NAS-repo), `Documentation=`-URL gecorrigeerd. De pre-existing `RuntimeMaxSec= has no effect with Type=oneshot`-warning is cosmetisch en niet aangepakt. - -## Onderweg geleerde quirks - -- **`sudo -E` werkt niet op deze sudoers** — geeft warning `preserving the entire environment is not supported`. Niet erg: de scripts sourcen het env-file binnen de sudo'd shell zelf (`sudo bash -c '. /etc/restic-backup.env; ...'`), dus `-E` is overbodig. -- **B2 401-error op `b2_list_buckets` was misleidend** — keys waren prima (`b2_authorize_account` werkte), het probleem was dat `RESTIC_REPO_B2` een andere bucket-naam had dan waar de key voor scoped is. B2 geeft dan 401 i.p.v. 403/404. -- **B2 cap-error verschijnt als `403: Cannot upload files, storage cap exceeded`** — niet 402/payment-related. Cap kan op nul staan voor accounts die nog nooit een bucket vol hadden; verhogen via *Account → Caps & Alerts → Storage Cap*. -- **47× dedup** op de eerste snapshot — vooral door de drie git-repos in `/srv/scrum4me/repos/` plus de ~12k worker-log files in `/srv/scrum4me/worker-logs/idea/runs/` met veel overlap. - -## Eerste-run-cijfers - -``` -NAS: 16 GB op disk (du) - Restore-size: 974 GiB (over 2 snapshots; ~487 GiB per snap) - Raw-data: 20.6 GiB (post-dedup) - On-disk: 15.6 GiB (post-compressie, 1.32x) - Snapshots: 2 (eerste run + post-fix re-run) - -B2: ~16 GB op disk (vergelijkbare dedup + compressie) - Snapshots: 1 (na cap-bump + script-fix) - Storage-kost: ≈ $0,10/maand bij huidige grootte - -Eerste run (forgejo+B2 faalden): 47:42 wall-clock -Tweede run (alles success): ~15 min -Restore-test (na --include fix): 10s, 59 MiB gerestored -Files in snapshot: ~2.1M (1.9M unique blobs) -``` - -## Verificatie-status - -| Plan-stap | Status | -|---|---| -| 1. Eerste run slaagt | ✓ (na 2 attempts; success in run 2 om 15:23) | -| 2. Snapshots zichtbaar | ✓ NAS×2, B2×1 | -| 3. Restore-test slaagt | ✓ 4/4 assertions ok in 10s | -| 4. Forgejo-restore-stack (F5) | ✗ niet uitgevoerd — separate vervolg | -| 5. Reboot-test | ✗ niet uitgevoerd — productie-reboot, los moment | -| 6. Failure-injectie | ✗ niet bewust uitgevoerd; we hebben **wel** organisch failure paths gezien (B2-cap, forgejo `--skip-db`) en die rapporteerden zoals verwacht (exit 75, juiste per-phase-status) | -| 7. Concurrency | ✗ niet getest — `flock`-pad zit in script | -| 8. Maandelijkse maintenance vanaf laptop | — over een maand, met dan-aan-te-maken maintenance-key | - -## Te bewerken bestanden op `scrum4me-srv` - -- `/etc/fstab` — extra cifs-regel voor `/mnt/nas/backups` (zie Deel A3). -- `/etc/restic-backup.env` — secrets, mode 0600 root:root. -- `/etc/restic-backup.password` — mode 0400 root:root, óók in passwordmanager. -- `/etc/systemd/system/server-backup.{service,timer}` — uit de repo's - `deploy/server-backup/`. -- `/srv/backups/{scripts,logs,status}/`, `/var/backups/databases/`. -- `/srv/scrum4me/ops-dashboard/deploy/server-backup/*` — code uit deze PR - (na `git pull` op de server). diff --git a/docs/runbooks/tailscale-setup.md b/docs/runbooks/tailscale-setup.md deleted file mode 100644 index 5e578a5..0000000 --- a/docs/runbooks/tailscale-setup.md +++ /dev/null @@ -1,446 +0,0 @@ -# Ubuntu-omgeving (Postgres + app) via Tailscale bereikbaar maken vanaf de Mac - -## Context - -Er zijn twee Scrum4Me-omgevingen: -- **Omgeving 1** — productie: Vercel + Neon (managed Postgres). -- **Omgeving 2** — nieuw: een eigen Ubuntu-server (`scrum4me-srv`) die de - **volledige Scrum4Me-app (Next.js achter een reverse proxy) + zelf-gehoste - Postgres** gaat draaien. - -Het doel: vanaf de Mac (`janpeters-macbook-pro`) omgeving 2 kunnen gebruiken — -voor (1) een DB-client (psql/GUI), (2) de `scrum4me-docker` runner lokaal in -Docker, en (3) lokale dev van de hoofd-Scrum4Me-app. - -Tailscale is al geïnstalleerd en verbonden op beide machines: -- `janpeters-macbook-pro` → `100.73.234.116` -- `scrum4me-srv` → `100.118.195.120` (Linux, SSH aan) - -Wat nog ontbreekt: zowel Postgres als de Next.js-app op de Ubuntu-server -luisteren standaard alleen op `localhost` en zijn nog niet bereikbaar over de -Tailscale-interface. De database zelf is **al volledig ingericht** (schema + -data) — er is geen migratie- of seed-werk nodig, alleen netwerk-, auth- en -connectie-configuratie. - -**Beslissingen (van de gebruiker):** -- App-deploy op Ubuntu: **reverse proxy** (nginx/Caddy) vóór Next.js. -- DB-toegang: **hele tailnet** mag erbij (`100.64.0.0/10`) — bewuste keuze; - later eventueel te versmallen via Tailscale ACLs/groups. -- Postgres-rol: **nog onzeker** — het plan voegt een controle toe en adviseert - een dedicated rol. - -**Canonieke `SCRUM4ME_BASE_URL`:** `http://100.118.195.120` (reverse proxy op -poort 80 op de Tailscale-interface, **plain HTTP**). Tailscale (WireGuard) -verzorgt de transportencryptie binnen de tailnet, dus een tweede TLS-laag is -hier niet nodig. Dit raw-IP-adres resolvet ook vanuit een Docker-container -(geen MagicDNS-afhankelijkheid). HTTPS op de proxy is optionele hardening — zie -de noot onderaan; kies je daarvoor, gebruik dan een hostnaam die óók vanuit -Docker oplost en pas álle URL's hieronder consistent aan. - -**Bevinding uit de codebase:** in `scrum4me-docker` is de DB-koppeling puur -config. Zowel `bin/run-one-job.ts` (regel 30, 115) als de MCP-server -(`mcp-config.json` regel 9-10) lezen `DATABASE_URL` / `DIRECT_URL` uit de -omgeving. `bin/check-tokens.sh` (regel 35-38) doet bovendien een harde -`curl ${SCRUM4ME_BASE_URL}/api/products` — onbereikbaarheid is fataal -(regel 52-57). Er zijn **geen code-wijzigingen** nodig — alleen `.env`. - -## Voorwaarden (aantoonbaar voldaan vóór uitvoering) - -- [ ] Tailscale actief op beide machines (`tailscale status` toont beide nodes) -- [ ] SSH naar scrum4me-srv werkt (`ssh scrum4me-srv echo ok`) -- [ ] DB-schema aanwezig (tabellen + data) — géén migratie nodig - -## Voorwaarden (input van de gebruiker nodig) - -- Postgres-rol + wachtwoord + databasenaam op de Ubuntu-server (de "USER", - "PASS", "DBNAME" hieronder). Niet in de chat delen — alleen lokaal invullen. -- De reverse proxy biedt de app aan op `http://100.118.195.120` (poort 80). - Wijkt dit af, pas dan overal de canonieke URL consistent aan. - ---- - -## Deel A — Ubuntu: Postgres openstellen op de Tailscale-interface - -Uit te voeren op `scrum4me-srv` (via `ssh scrum4me-srv` of `tailscale ssh`). - -1. **Tailscale-IP bevestigen** - ```bash - tailscale status - tailscale ip -4 # verwacht: 100.118.195.120 - ``` - -2. **`listen_addresses` uitbreiden** — Postgres bindt standaard alleen aan - localhost. Vind het configbestand en pas aan: - ```bash - sudo -u postgres psql -c 'SHOW config_file;' # bv. /etc/postgresql/16/main/postgresql.conf - ``` - Zet in dat bestand: - ``` - listen_addresses = 'localhost,100.118.195.120' - ``` - Bewust **niet** `'*'` — zo bindt Postgres alleen aan localhost + het - Tailscale-adres, nooit aan de publieke interface. - - > ⚠️ **Boot-order:** door aan `100.118.195.120` te binden moet `tailscale0` - > al bestaan bij boot. Stap A6 maakt de systemd-ordering verplicht — sla A6 - > niet over, anders faalt Postgres na een reboot. - -3. **Rol, auth-methode en grants controleren/instellen** (voorkomt een - login- of permission-fout ná goede netwerkconfig). De rol is nog onzeker, - dus eerst inventariseren: - ```bash - sudo -u postgres psql -c '\du' - sudo -u postgres psql -c 'SHOW password_encryption;' - ``` - **Advies:** gebruik (of maak) een **dedicated runtime-rol** die alleen de - rechten heeft die de app/runner nodig heeft — geen superuser: - ```sql - CREATE ROLE scrum4me_app LOGIN PASSWORD 'lokaal-wachtwoord'; - GRANT CONNECT ON DATABASE DBNAME TO scrum4me_app; - - -- runtime-rechten op het bestaande (gevulde) public-schema: - GRANT USAGE ON SCHEMA public TO scrum4me_app; - GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO scrum4me_app; - GRANT USAGE, SELECT, UPDATE ON ALL SEQUENCES IN SCHEMA public TO scrum4me_app; - - -- zodat ook later toegevoegde tabellen/sequences werken: - ALTER DEFAULT PRIVILEGES IN SCHEMA public - GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO scrum4me_app; - ALTER DEFAULT PRIVILEGES IN SCHEMA public - GRANT USAGE, SELECT, UPDATE ON SEQUENCES TO scrum4me_app; - ``` - **Migraties:** deze runtime-rol krijgt bewust géén DDL-rechten. De DB is al - ingericht, dus in normale operatie draaien er geen migraties via deze rol. - Moet je later vanaf de Mac toch een Prisma-migratie draaien, gebruik dan de - DB-owner-rol (apart wachtwoord), niet `scrum4me_app`. - - **SCRAM-verifier:** stap A4 kiest `scram-sha-256`. Een rol waarvan het - wachtwoord nog als **md5** is opgeslagen kan dan niet inloggen. Forceer een - SCRAM-verifier door het wachtwoord opnieuw te zetten (alleen lokaal op de - server, niet in chat/docs delen): - ```sql - ALTER ROLE scrum4me_app WITH PASSWORD 'lokaal-wachtwoord'; - ``` - -4. **`pg_hba.conf` — toegang vanaf de tailnet toestaan** - ```bash - sudo -u postgres psql -c 'SHOW hba_file;' - ``` - Voeg een regel toe (boven de bestaande `host`-regels). De gebruiker koos - bewust voor toegang vanaf de **hele tailnet**; maak rol en database wel - expliciet i.p.v. `all all`: - ``` - # Scrum4Me clients via Tailscale - host DBNAME scrum4me_app 100.64.0.0/10 scram-sha-256 - ``` - Let op: hiermee mag elke tailnet-node mét geldige credentials verbinden. - Wil je dat later inperken, doe dat via Tailscale ACLs/groups of versmal - het CIDR naar specifieke node-IP's. - -5. **Firewall (defense-in-depth)** — alleen relevant als `ufw` actief is: - ```bash - sudo ufw status - sudo ufw allow in on tailscale0 to any port 5432 proto tcp - ``` - Open 5432 **nooit** generiek (`sudo ufw allow 5432` zonder interface) — - dat zou de DB internet-breed openstellen. - -6. **Boot-order — VERPLICHT.** Postgres bindt aan `100.118.195.120`, een adres - dat pas bestaat nadat `tailscaled` `tailscale0` heeft opgezet. - **Zonder deze override faalt Postgres bij reboot** ("cannot assign requested - address"). Voeg een systemd-override toe: - ```bash - sudo systemctl edit postgresql # of postgresql@-main - ``` - ```ini - [Unit] - After=tailscaled.service - Requires=tailscaled.service - ``` - -7. **Postgres herstarten en verifiëren** - ```bash - sudo systemctl restart postgresql - sudo ss -tlnp | grep 5432 # moet 127.0.0.1:5432 én 100.118.195.120:5432 tonen - ``` - ---- - -## Deel B — Ubuntu: de Scrum4Me-app (reverse proxy) bereikbaar maken op Tailscale - -De runner-tokencheck (`check-tokens.sh`) cURL't `${SCRUM4ME_BASE_URL}/api/products` -en faalt hard als die URL onbereikbaar is. De Next.js-app draait achter een -reverse proxy, dus de **proxy** moet op het Tailscale-adres luisteren — niet -alleen op `localhost`. Canoniek: `http://100.118.195.120` (poort 80, plain HTTP). - -1. **Reverse proxy op de Tailscale-interface laten luisteren (poort 80)** - - **nginx:** in het server-block het `listen`-adres aan het Tailscale-IP - binden: - ``` - listen 100.118.195.120:80; - ``` - `sudo nginx -t` → `sudo systemctl reload nginx`. - - **Caddy:** site-adres `http://100.118.195.120:80` in de `Caddyfile`. - - Next.js zelf mag op `127.0.0.1:` blijven; alleen de proxy is - extern bereikbaar. - -2. **Boot-order — VERPLICHT.** Net als Postgres bindt de proxy aan een adres - dat `tailscaled` eerst moet aanmaken. **Zonder deze override faalt nginx/Caddy - bij reboot.** - ```bash - sudo systemctl edit nginx # of caddy - ``` - ```ini - [Unit] - After=tailscaled.service - Requires=tailscaled.service - ``` - -3. **Firewall voor poort 80** — alleen bij actieve `ufw`: - ```bash - sudo ufw allow in on tailscale0 to any port 80 proto tcp - ``` - -4. **Lokaal op de server verifiëren** - ```bash - curl -fsS -H "Authorization: Bearer $SCRUM4ME_TOKEN" \ - http://100.118.195.120/api/products >/dev/null && echo OK - ``` - ---- - -## Deel C — Mac: connectiviteit verifiëren (DB én app) - -1. **Tailscale-bereik** (al bevestigd: `scrum4me-srv` zichtbaar op - `100.118.195.120`): - ```bash - tailscale status - tailscale ping scrum4me-srv - ``` - -2. **MagicDNS check** — kan de Mac de server op hostnaam bereiken? - ```bash - ping -c1 scrum4me-srv - ``` - Zo ja: native macOS-clients mogen `scrum4me-srv` als host gebruiken. - -3. **Postgres — TCP- en psql-test** - ```bash - nc -vz 100.118.195.120 5432 - psql "postgresql://USER:PASS@100.118.195.120:5432/DBNAME?sslmode=disable" -c '\dt' - ``` - -4. **App — vanaf de Mac én vanuit een Docker-container** (Docker Desktop heeft - geen Tailscale-MagicDNS; daarom de canonieke raw-IP-URL): - ```bash - # vanaf de Mac - curl -fsS -H "Authorization: Bearer $SCRUM4ME_TOKEN" \ - http://100.118.195.120/api/products >/dev/null && echo "mac OK" - - # vanuit een container (simuleert de runner) - docker run --rm --env SCRUM4ME_TOKEN alpine sh -lc \ - 'wget -qO- --header "Authorization: Bearer $SCRUM4ME_TOKEN" \ - http://100.118.195.120/api/products >/dev/null && echo "docker OK"' - ``` - Slaagt de container-test niet (geen route naar de tailnet vanuit Docker - Desktop), dan moet Tailscale in/naast de runner-container draaien — apart - uit te zoeken; eerst de directe route testen. - ---- - -## Deel D — De drie consumenten koppelen - -Welke host elke consument gebruikt verschilt — MagicDNS werkt wél native op -macOS, maar niet binnen een Docker-container: - -| Consumer | Host | Reden | -|---|---|---| -| DB-client (psql/TablePlus) native | `scrum4me-srv` | MagicDNS werkt op macOS | -| scrum4me-docker runner (Docker) | `100.118.195.120` | Docker Desktop heeft geen MagicDNS | -| Hoofd-app lokale dev | `scrum4me-srv` | MagicDNS werkt op macOS | - -### 1. DB-client (psql / TablePlus / DBeaver) — native op macOS -Connection-string: -``` -postgresql://USER:PASS@scrum4me-srv:5432/DBNAME?sslmode=disable -``` -GUI-clients: host `scrum4me-srv` (of `100.118.195.120`), poort `5432`, -SSL uit. - -### 2. scrum4me-docker runner — bewerk `/Users/janpetervisser/Development/scrum4me-docker/.env` -``` -DATABASE_URL=postgresql://USER:PASS@100.118.195.120:5432/DBNAME?sslmode=disable -DIRECT_URL=postgresql://USER:PASS@100.118.195.120:5432/DBNAME?sslmode=disable -SCRUM4ME_BASE_URL=http://100.118.195.120 -``` -Belangrijk: -- **Gebruik het rauwe Tailscale-IP, niet `scrum4me-srv`.** Docker Desktop- - containers krijgen geen Tailscale-MagicDNS; de hostnaam resolvet niet - binnen de container. -- Laat de Neon-specifieke params (`channel_binding=require`, - `sslmode=verify-full`) weg — die gelden niet voor zelf-gehoste Postgres. -- Zorg dat `SCRUM4ME_TOKEN` een token van **omgeving 2** is — de tokencheck - loopt tegen de Ubuntu-app, niet meer tegen Vercel. - -> `SCRUM4ME_TOKEN` haal je op via de Ubuntu-app: Settings → API Tokens → nieuw -> token aanmaken. Een bestaand Vercel-token werkt **niet** tegen de -> Ubuntu-omgeving. - -### 3. Hoofd-Scrum4Me-app (lokale dev) — bewerk `/Users/janpetervisser/Development/Scrum4Me` -Dit is een **andere repo** dan `scrum4me-docker`. In die repo de `.env.local` -(of `.env`) aanpassen. De app draait native op macOS, dus de MagicDNS-hostnaam -`scrum4me-srv` mag hier wél: -``` -DATABASE_URL=postgresql://USER:PASS@scrum4me-srv:5432/DBNAME?sslmode=disable -DIRECT_URL=postgresql://USER:PASS@scrum4me-srv:5432/DBNAME?sslmode=disable -``` - ---- - -## SSL-keuze - -Aanbeveling: **`sslmode=disable`** voor Postgres en **plain HTTP** voor de app- -URL. Tailscale (WireGuard) versleutelt het transport al end-to-end binnen de -tailnet; een tweede TLS-laag op een zelf-gehoste Postgres of op de proxy levert -hier vooral configuratie-gedoe op. - -Optionele hardening (later, samenhangend uit te voeren): -- TLS-certs op Postgres + `sslmode=require`. -- HTTPS op de reverse proxy. Doe dit dan met een **DNS-naam die ook vanuit - Docker oplost** (raw-IP + cert geeft validatiefouten), en pas `SCRUM4ME_BASE_URL` - én alle verificatie-`curl`s consistent aan naar die `https://`-hostnaam. - -## Twee omgevingen naast elkaar houden - -Omdat omgeving 1 (Neon) blijft bestaan: bewaar twee env-varianten, bv. -`.env.neon` en `.env.ubuntu`, en symlink de actieve naar `.env`: -```bash -ln -sf .env.ubuntu .env # activeer Ubuntu-omgeving -ln -sf .env.neon .env # activeer Neon-omgeving -``` -Lichtgewicht en voorkomt dat je per ongeluk de verkeerde DB raakt. - -## Te wijzigen bestanden - -**Op `scrum4me-srv`:** -- `postgresql.conf` — `listen_addresses`. -- `pg_hba.conf` — tailnet-regel voor `DBNAME` + dedicated rol. -- Postgres-rol — dedicated `scrum4me_app`-rol + grants + `ALTER ROLE ... PASSWORD`. -- nginx/Caddy-config — `listen` op `100.118.195.120:80`. -- systemd-overrides — `After=/Requires=tailscaled.service` voor `postgresql` - en de proxy. -- evt. `ufw`-regels voor poort 5432 en 80 op `tailscale0`. - -**Op de Mac:** -- `/Users/janpetervisser/Development/scrum4me-docker/.env` — `DATABASE_URL`, - `DIRECT_URL`, `SCRUM4ME_BASE_URL`, `SCRUM4ME_TOKEN`. -- `/Users/janpetervisser/Development/Scrum4Me/.env.local` — `DATABASE_URL`, - `DIRECT_URL` (andere repo). -- Géén codewijzigingen in `scrum4me-docker`. - -## Verificatie (end-to-end) - -1. **Netwerk:** `nc -vz 100.118.195.120 5432` vanaf de Mac slaagt. -2. **DB-client:** `psql ".../DBNAME?sslmode=disable" -c '\dt'` toont de - Scrum4Me-tabellen; een test-`INSERT`/`SELECT` bevestigt dat de - `scrum4me_app`-grants kloppen. -3. **App-bereik:** de `curl`/`docker run`-tests uit Deel C-4 geven beide `OK`. -4. **Reboot-test:** herstart `scrum4me-srv`; controleer daarna met - `sudo ss -tlnp` dat Postgres én de proxy weer op `100.118.195.120` luisteren, - en herhaal de Mac/Docker-connectiviteitstests. -5. **Runner:** na `.env`-update `docker compose up -d --force-recreate`, - dan `docker compose logs -f` — `check-tokens.sh` moet - "OK: 100.118.195.120:5432 reachable" én "OK: SCRUM4ME_TOKEN works" loggen, - en de daemon-loop moet een job kunnen claimen uit de Ubuntu-DB. -6. **Hoofd-app:** lokale dev-server in `/Users/janpetervisser/Development/Scrum4Me` - start en leest data uit de Ubuntu-DB. - -## Veelvoorkomende fouten - -| Fout | Oorzaak | Fix | -|---|---|---| -| `could not translate host name "scrum4me-srv"` | MagicDNS niet actief (Docker) | Gebruik raw IP `100.118.195.120` | -| `cannot assign requested address` bij Postgres-start | `tailscale0` bestaat nog niet | A6 systemd-override toevoegen | -| `FATAL: password authentication failed` | SCRAM-verifier niet bijgewerkt | `ALTER ROLE scrum4me_app WITH PASSWORD '...'` herhalen | - ---- - -# Addendum — uitvoering Ubuntu-kant 2026-05-14 - -> Deel A + B zijn uitgevoerd. De server bleek een **andere topologie** te hebben -> dan dit plan aannam: Postgres én de reverse proxy draaien als **Docker -> containers**, niet host-geïnstalleerd. Dit addendum beschrijft wat er feitelijk -> is gebeurd. Deel C + D (Mac-kant) staan nog open. - -## Vastgestelde topologie (wijkt af van de aannames) - -| Plan nam aan | Werkelijkheid op `scrum4me-srv` | -|---|---| -| Host-Postgres (`/etc/postgresql/...`, `systemctl postgresql`) | Docker container `scrum4me-postgres` (postgres:17), data-volume `/srv/scrum4me/postgres` | -| Host nginx/Caddy | Docker container `scrum4me-caddy` (caddy:2), al luisterend op `0.0.0.0:80` + `:443` | -| Migratie/seed mogelijk nodig | Bevestigd niet nodig — db `scrum4me` was gevuld | - -Concrete waarden die het plan openliet: -- **Host** = dit ís `scrum4me-srv` (`100.118.195.120`) — Deel A/B dus direct uitgevoerd, niet via SSH. -- **DBNAME** = `scrum4me` -- **Rol** = `scrum4me_app` aangemaakt (non-superuser, DML-only), wachtwoord lokaal gegenereerd via `openssl rand -hex 24`. - -## Deel A — zoals feitelijk uitgevoerd (Docker-variant) - -| Plan-stap | Aanpassing | -|---|---| -| **A2** `listen_addresses` in `postgresql.conf` | **N.v.t.** — de container luistert intern al op `0.0.0.0`. Host-exposure = Docker port-mapping. In `/srv/scrum4me/compose/docker-compose.yml` toegevoegd: `- "100.118.195.120:5432:5432"` náást de bestaande `127.0.0.1:5432:5432`. Specifiek IP i.p.v. `0.0.0.0` — Docker's iptables-DNAT scoped dan op dat IP, publiek blijft dicht. | -| **A3** rol + grants | Identiek SQL, maar uitgevoerd via `docker exec -i scrum4me-postgres psql -U scrum4me -d scrum4me`. Idempotent script (`CREATE ROLE` of `ALTER ROLE ... PASSWORD`). De `ALTER ROLE ... PASSWORD` zet meteen een SCRAM-verifier. **Let op:** `CREATE ROLE` op de gedeelde productie-DB wordt door de auto-mode classifier geblokkeerd — moet via een script dat de gebruiker zelf draait. | -| **A4** `pg_hba.conf` | Bestand zit in het data-volume: host-pad `/srv/scrum4me/postgres/pg_hba.conf` (root-owned, sudo nodig). Regel toegevoegd onderaan (append is veilig — first-match, geen conflict). **Bevinding:** de postgres-image heeft al een catch-all `host all all all scram-sha-256` — onze scoped regel is dáárdoor strikt genomen redundant. Echte bescherming = IP-scoped port-binding + ufw. Catch-all strakker maken = aparte taak (hij draagt de docker-netwerk-clients). | -| **A5** ufw | Identiek: `ufw allow in on tailscale0 to any port 5432 proto tcp`. | -| **A6** boot-order | **Niet** `postgresql.service` (bestaat niet) maar `docker.service`. Drop-in `/etc/systemd/system/docker.service.d/tailscale-order.conf`. Bewust `After=tailscaled.service` + **`Wants=`** i.p.v. het door het plan voorgestelde `Requires=` — `Requires` op `docker.service` is fragiel (faalt tailscaled ooit, dan start de hele docker-stack niet). `After=` lost de race op; `Wants=` trekt tailscaled mee zonder hard-fail. | -| **A7** restart + verify | `docker compose up -d postgres` (recreate — `restart` pakt port-wijzigingen niet). `ss -tln` toont nu `127.0.0.1:5432` én `100.118.195.120:5432`. Verificatie met een wegwerp-container: `docker run --rm --network host postgres:17 psql "postgresql://scrum4me_app:...@100.118.195.120:5432/scrum4me?sslmode=disable" -c '\dt'` — `--network host` simuleert exact hoe de Mac het ziet. | - -## Deel B — zoals feitelijk uitgevoerd (Docker-variant) - -| Plan-stap | Aanpassing | -|---|---| -| **B1** proxy op tailscale-interface | **Grotendeels al gedaan** — de Caddy-container publiceert al `0.0.0.0:80`, dus luistert al op `tailscale0`. Alleen een site-block toegevoegd aan `/srv/scrum4me/caddy/Caddyfile`: `100.118.195.120:80 { reverse_proxy 172.18.0.1:3000 }`. | -| **B2** boot-order proxy | **Niet nodig.** Caddy bindt aan `0.0.0.0:80`, niet aan een IP-specifiek adres — er is geen `tailscale0`-race. (Alleen Postgres had de IP-specifieke binding, vandaar dat A6 wél nodig was.) | -| **B3** ufw poort 80 | **Niet nodig.** Poort 80 stond al op `ALLOW IN Anywhere`. | -| **B4** verifiëren | `curl -sI http://100.118.195.120/` → `200 OK`, geen redirect. `/api/products` → 401 (bereikbaar, auth vereist). | - -## Bugs / valkuilen tegengekomen tijdens uitvoering - -1. **Caddy single-file bind-mount wordt stale na een atomic-rename edit.** - `/srv/scrum4me/caddy/Caddyfile` is als enkel bestand ge-bind-mount. Editors - (en de Edit-tooling) schrijven vaak via write-temp + rename = nieuwe inode. - De container blijft naar de oude inode wijzen → `caddy reload` leest de - **oude** content, schijnbaar zonder fout. Symptoom hier: het nieuwe - site-block leek "stil gedropt" door Caddy's adapter, maar de container zág - het block simpelweg niet. - **Fix / regel:** na een Caddyfile-edit `docker compose up -d --force-recreate - caddy` (of `restart`) — **niet** `caddy reload`. De recreate her-bindt de - mount op de nieuwe inode. (Eerder in het project werkte een Caddyfile-edit - wél, juist omdat daar toevallig een `restart` op volgde.) - -2. **`http://` vs `:80` syntax — bleek een rode haring.** - Aanvankelijk leek Caddy's Caddyfile-adapter `http://100.118.195.120` te - droppen. Geïsoleerd getest werkte beide syntaxen prima; het echte probleem - was bug #1 (stale mount). De definitieve regel gebruikt `100.118.195.120:80` - — ondubbelzinnig plain-HTTP-op-poort-80. - -## Verificatie-status - -| Plan verificatie-stap | Status | -|---|---| -| 1. `nc`/TCP naar 5432 | ✓ `psql` als `scrum4me_app` via `100.118.195.120:5432` werkt, ziet tabellen | -| 2. DB-client grants | ✓ `SELECT` op `idea_products` werkt onder `scrum4me_app` | -| 3. App-bereik | ✓ `http://100.118.195.120/` → 200, `/api/products` → 401 | -| 4. **Reboot-test** | ✗ **Nog niet gedaan** — productie-server niet herstart. Handmatig uitvoeren op rustig moment; check daarna `ss -tlnp \| grep 5432`. | -| 5. Runner | — Mac-kant (Deel C/D), nog open | -| 6. Hoofd-app lokale dev | — Mac-kant, nog open | - -## Gewijzigde bestanden op `scrum4me-srv` - -- `/srv/scrum4me/compose/docker-compose.yml` — postgres `ports`: extra `100.118.195.120:5432:5432` -- `/srv/scrum4me/caddy/Caddyfile` — site-block `100.118.195.120:80` -- `/srv/scrum4me/postgres/pg_hba.conf` — tailnet-regel (+ `.bak-`) -- `/etc/systemd/system/docker.service.d/tailscale-order.conf` — boot-order drop-in (nieuw) -- ufw — regel `5432/tcp on tailscale0` -- Postgres-rol `scrum4me_app` — aangemaakt met grants op db `scrum4me` diff --git a/docs/specs/functional.md b/docs/specs/functional.md deleted file mode 100644 index ca2512c..0000000 --- a/docs/specs/functional.md +++ /dev/null @@ -1,147 +0,0 @@ -# Functionele specificatie — Ops Dashboard - -## Doel - -Eén web-UI waarmee de eigenaar van een single-host server-stack (Docker + systemd + Git-checkouts + Caddy + Postgres) dezelfde operaties kan uitvoeren die anders in een SSH-terminal gebeuren — met audit-log, herhaalbare flows en minder typefouten. - -**Schaal:** één host, één admin-gebruiker. Multi-host/team is buiten scope. - -## Gebruikers en rollen - -| Rol | Beschrijving | Hoeveelheid | -|---|---|---| -| **admin** | Volle toegang tot alle modules en flows. Single account, geseed via env. | 1 | - -Geen RBAC, geen tenant-isolatie, geen "view-only"-modus. Wie inlogt kan alles. - -## Functionele scope per module - -### Dashboard (`/`) - -5 live status-widgets met auto-refresh ~5s: - -| Widget | Data-bron | Indicator | -|---|---|---| -| Docker | `docker ps --format json` | Count van running containers, lijst (naam + status) | -| Git | `git status --short --branch` per pad in `REPO_PATHS` | Branch + dirty-vlag | -| systemd | `systemctl is-active ` per item in `SYSTEMD_UNITS` | Active / Inactive / Failed | -| Caddy | `caddy admin-cmd certificates` (of equiv. shell-output parse) | Aantal certs + dichtstbijzijnde expiry | -| Audit | DB-query op `FlowRun` desc | Laatste run + status | - -**Acceptatie:** -- Widget laadt < 1 s na page-load -- Auto-refresh werkt in achtergrond zonder volledig herrenderen -- Bij fout (agent down, command faalt): widget toont rood errorblok, niet de hele page - -### Auth (`/login`) - -- Email + wachtwoord (single user) -- 5 failed attempts in 1 minuut → 429 rate-limit per IP -- Succesvolle login → session-cookie 24u, HttpOnly, SameSite=strict, Secure (production) -- `/api/auth/logout` invalideert sessie en wist cookie - -### Docker (`/docker`) - -- Lijst running containers (CONTAINER ID, IMAGE, COMMAND, CREATED, STATUS, PORTS, NAMES) -- Auto-refresh elke 5s -- `/docker/[name]` → detail-page met logs (laatste 200 regels), image-info, environment -- **Geen** start/stop/restart vanuit UI — alleen via flows - -### Git (`/git`) - -- Per pad in `REPO_PATHS`: huidige branch, ahead/behind count, modified-files-count, laatste 3 commits -- `/git/[repo]` → diff-viewer voor uncommitted changes + commit-historie laatste 20 - -### systemd (`/systemd`) - -- Per unit in `SYSTEMD_UNITS`: active/inactive/failed, last-changed-timestamp -- `/systemd/[unit]` → laatste 100 journal-regels van die unit, met level-filter -- **Restart-actie**: alleen voor units die expliciet in `sudoers.d/ops-agent` met NOPASSWD staan - -### Caddy (`/caddy`) - -- Toon huidige `/srv/scrum4me/caddy/Caddyfile` met syntax-highlighting -- Toon alle uitgegeven certs (subject, issuer, expiry, dichtstbijzijnde eerst) -- Geel-warning bij expiry < 30 dagen, rood bij < 7 dagen -- `/caddy/edit` → editor met save-knop; save valideert via `caddy validate` voor commit en restart van caddy-container - -### Flows (`/flows`) - -Twee voor-gedefinieerde flows in YAML in `ops-agent/flows.example/`: - -| Flow | Stappen | -|---|---| -| `update_scrum4me_web` | git pull → npm run build → docker compose up -d --build → smoke-test op homepage | -| `update_caddy_config` | write nieuw Caddyfile → caddy validate → docker compose restart caddy → check cert renewal | - -Per flow: -- Dry-run default (toont alleen wat het zou doen) -- "Run"-knop voert echt uit; toont live SSE-stream van stdout/stderr per stap -- Bij stap-fail: stop, markeer FlowRun als `failed`, latere stappen niet uitgevoerd -- Bij success: FlowRun = `success`, totaalduur opgeslagen - -### Audit (`/audit`) - -- Lijst van alle `FlowRun` records, default 50 laatste, sort desc op `started_at` -- Filter op status, datumrange, flow-name -- `/audit/[flow_run_id]` → volledige output per stap, scrollable - -### Settings/Backups (`/settings/backups`) - -- Lijst van `.sql.gz` bestanden in `/srv/scrum4me/backups`, met size + mtime -- "Backup now"-knop → maakt nieuwe dump met `pg_dumpall` voor alle databases -- Restore: **handmatig vanuit terminal** — UI toont alleen de stappen als runbook - -## State-machine flows - -``` - ┌────────┐ - │ pending │ - └────┬────┘ - │ (start request) - ┌────▼────┐ - │ running │ - └────┬────┘ - ┌────┼────┬────────┐ - ▼ ▼ ▼ ▼ - success failed cancelled timeout (>30min) -``` - -`pending` → `running`: bij ontvangst start-request -`running` → `success`: alle stappen exit-code 0 -`running` → `failed`: een stap exit-code ≠ 0 -`running` → `cancelled`: user klikt cancel -`running` → `timeout`: na 30 min nog steeds running (cleanup-job) - -## Hard limits - -- Max 1 actieve flow tegelijk (lock-file in `/var/run/agent/`); 2e start-request → 409 Conflict -- Stdout/stderr per stap geknipt op 64 KB om audit-log niet te laten exploderen -- Session-TTL hard 24 uur, geen "remember me" -- Auto-refresh max 1 keer per 5 seconden om agent niet te overbelasten - -## Niet-functionele eisen - -| Eis | Doel | -|---|---| -| First load < 1s | Single-user, lokale Postgres, geen onnodige round-trips | -| Module-page TTI < 2s | Server-side render met direct agent-call, geen client-fetch waterval | -| Audit-trail volledig | Elke flow-start logt user + tijdstempel + args; elke command-execution logt exit + duration | -| Geen geheime data in URLs | Tokens in headers, secrets nooit in query-params | -| CSP strict | `script-src 'self' 'unsafe-inline'`; geen externe CDNs | -| HTTPS-only in productie | Caddy auto-ACME; cookies Secure-flag in prod-mode | - -## Buiten scope - -- Meerdere admins / RBAC -- Meerdere hosts / cluster-management -- Custom container starten (alleen restart bestaande) -- Real-time alerts (geen pager, geen email) -- Externe monitoring-integratie (Grafana/Prometheus/Sentry) -- Wachtwoord-reset-flow / SSO - -## Verwante documenten - -- [Technische specificatie](./technical.md) — hoe het werkt -- [Handleiding](../handleiding.md) — hoe je het gebruikt -- [Post-install runbook](../runbooks/post-install.md) — eerste deploy diff --git a/docs/specs/technical.md b/docs/specs/technical.md deleted file mode 100644 index 33e67cb..0000000 --- a/docs/specs/technical.md +++ /dev/null @@ -1,270 +0,0 @@ -# Technische specificatie — Ops Dashboard - -## Architectuur in één plaatje - -``` -┌────────────────┐ HTTPS ┌──────┐ HTTP ┌─────────────────┐ -│ Browser (jou) ├─────────►│Caddy ├────────►│ ops-dashboard │ -│ │ │ :443 │ │ Next.js 16 :3000│ -└────────────────┘ └──────┘ └────┬────────┬───┘ - │ │ - HMAC HTTP │ │ TCP/SQL - :3099 │ │ - ┌───────────────▼┐ │ - │ ops-agent │ │ - │ Fastify on host│ │ - │ spawn/exec │ │ - └───┬────────────┘ │ - │ │ - ┌───────┴───────┐ ┌───────▼────────┐ - │ Whitelisted │ │ Postgres 17 │ - │ host commands │ │ db=ops_dashb.. │ - │ docker/git/etc │ └────────────────┘ - └───────────────┘ -``` - -Drie processen, één host: - -1. **ops-dashboard** — Next.js app in Docker, op compose-bridge, exposed via Caddy -2. **ops-agent** — Node/Fastify service direct op host (geen container), heeft sudoers + docker.sock access -3. **postgres** — Docker container, dezelfde als die Scrum4Me al gebruikt; ops-dashboard heeft eigen DB `ops_dashboard` - -## Stack - -| Laag | Technologie | Versie | Reden | -|---|---|---|---| -| App framework | Next.js | 16.2 (App Router) | RSC server-side fetching matched onze "render with agent data" patroon | -| UI library | React | 19 | Bundled bij Next 16 | -| Styling | Tailwind CSS | 4 | Utility-first; geen custom design system | -| UI primitives | `@base-ui/react` | 1.4 | Headless components, geen Radix-lock-in | -| Code highlighting | shiki | 1.29 | Server-side highlighting in Caddyfile view | -| Database ORM | Prisma | 7.8 (via `@prisma/adapter-pg`) | Same as Scrum4Me; één skill om beide te onderhouden | -| Auth (password) | bcryptjs | 3 | Geen native bindings nodig | -| Session | Custom in `lib/session.ts` | — | Eenvoudig: token in DB, hash in cookie | -| Agent | Fastify | 5 | Lichtgewicht, native SSE-streaming | -| Agent whitelist | js-yaml | 4 | Read-only configfile | - -## Deploy-topologie - -| Component | Locatie | Beheer | -|---|---|---| -| ops-dashboard | Docker container `scrum4me-ops-dashboard`, image `ops-dashboard:latest` | `docker compose` in `/srv/scrum4me/compose/docker-compose.yml` | -| ops-agent | systemd unit `ops-agent.service`, host-binary `/opt/ops-agent/dist/index.js` | systemd, geïnstalleerd via `deploy/ops-agent/setup.sh` | -| Caddyfile-route | Block in `/srv/scrum4me/caddy/Caddyfile` | Handmatig, na add restart Caddy-container | -| Database | Postgres-container `scrum4me-postgres`, db `ops_dashboard` | Hergebruik bestaande container | -| Backups | `/srv/scrum4me/backups/*.sql.gz` | Cron of handmatig via UI | - -Caddy routeert `ops.jp-visser.nl` → service-naam `ops-dashboard:3000` op compose-bridge. - -## Data-model - -``` -User -├── id cuid (string PK) -├── email unique -├── pwd_hash bcrypt $2b$12$... -└── created_at - -Session -├── id cuid (PK) -├── user_id → User -├── token_hash sha256 hex (cookie waarde wordt gehashed opgeslagen) -└── expires_at 24h na create - -FlowRun -├── id cuid (PK) -├── user_id → User -├── flow_name string (bv. "update_scrum4me_web") -├── status enum: pending|running|success|failed|cancelled -├── started_at -├── finished_at nullable -└── (1:N) FlowStep - -FlowStep -├── id cuid (PK) -├── flow_run_id → FlowRun (cascade delete) -├── step_index int -├── name string (zoals in YAML flow-definitie) -├── exit_code int nullable -├── stdout text (max 64KB, geknipt) -├── stderr text (max 64KB, geknipt) -├── started_at -└── finished_at nullable -``` - -Migrations in `prisma/migrations/`. Seed in `prisma/seed.ts` (creëert eerste admin uit `SEED_USER_*`). - -## Auth-flow - -``` -1. Browser GET /login - ← Set-Cookie: csrf_token=; SameSite=strict; httpOnly=false - ← HTML form - -2. Browser POST /api/auth/login - Headers: - Cookie: csrf_token=; ops_session=... - x-csrf-token: ← double-submit CSRF check - Body: { email, password } - -3. Server: - a. proxy.ts CSRF check (cookie==header) - b. /api/auth/login route: - - rate-limit per IP (5/min) - - prisma.user.findUnique({ email }) - - bcrypt.compare(password, user.pwd_hash) - c. Bij succes: - - generateSessionToken (32 bytes hex) - - prisma.session.create({ token_hash: sha256(token), expires_at: now+24h }) - - Set-Cookie ops_session=; HttpOnly; SameSite=strict; Secure (in prod) - -4. Browser GET / - Server: proxy.ts → als geen ops_session cookie → redirect /login - Anders: getCurrentUser() leest cookie, hashed, prisma.session.findUnique({ token_hash }) -``` - -CSRF: double-submit cookie pattern. CSP, X-Frame-Options, X-Content-Type-Options, Referrer-Policy via proxy.ts response-headers. - -## Agent-protocol - -Dashboard → agent communicatie via `lib/agent-client.ts`: - -``` -POST http://172.18.0.1:3099/agent/v1/exec -Headers: - Authorization: Bearer - Content-Type: application/json -Body: - { command_key: "docker_ps", args?: string[], stdin?: string } -``` - -Response: SSE stream - -``` -event: stdout -data: {"data": ""} - -event: stderr -data: {"data": ""} - -event: exit -data: {"code": 0} -``` - -Agent server-side flow per call: -1. `req.body.command_key` → lookup in `/etc/ops-agent/commands.yml` -2. Bij hit: spawn `def.cmd[0]` met `def.cmd.slice(1) ++ args` (geen shell, geen interpolatie) -3. Stream stdout/stderr chunks naar SSE -4. Bij `child.close`: write `event: exit`, end response -5. Bij `child.error`: write `event: error`, end response -6. Bij `reply.raw.close` (client-disconnect): `child.kill()` -7. Audit-log naar journalctl: `{audit:true, command_key, args, exit_code, duration_ms}` - -`commands.yml` voorbeeld: - -```yaml -docker_ps: - cmd: ["docker", "ps", "--format", "json"] - description: "List running containers" - -git_status: - cmd: ["git", "status", "--short", "--branch"] - cwd_pattern: true # args[0] = cwd, rest = command args - description: "Git status in a repo" - -systemctl_restart_caddy: - cmd: ["sudo", "/usr/bin/systemctl", "restart", "caddy"] - description: "Restart caddy service" -``` - -Geen `command_key` in whitelist → 403 Forbidden. - -## Flows engine - -YAML-definitie in `ops-agent/flows.example/*.yml`: - -```yaml -name: update_scrum4me_web -description: Pull main, build, restart container, verify -steps: - - name: Pull latest main - command_key: git_pull - args: ["/srv/scrum4me/repos/Scrum4Me", "main"] - precondition: git_status_clean - - name: Build container - command_key: docker_compose_build - args: ["scrum4me-web"] - - name: Restart - command_key: docker_compose_up - args: ["-d", "scrum4me-web"] - - name: Smoke test - command_key: curl_status - args: ["https://scrum4me.jp-visser.nl"] - expect_exit_code: 0 -``` - -Runner (`ops-agent/src/lib/flow-runner.ts`): -- Sequential, fail-fast -- Per stap: check preconditions, spawn, capture stdout/stderr, store in FlowStep -- Bij dry-run: vervang `spawn` door log van `def.cmd ++ args` -- Bij echte run: stream via SSE naar dashboard `/api/flows/run` route - -## Realtime in de UI - -Niet via WebSocket of Server-Sent Events op de dashboard-side. Auto-refresh wordt server-rendered (`export const dynamic = 'force-dynamic'`) met client-side `useEffect(setInterval, 5000)` om `router.refresh()` te triggeren. - -Flow-execution: client opent `EventSource` op `/api/flows/run/[id]` die de SSE van de agent doorstuurt. - -## Configuratie - -Verplicht in `.env`: - -```bash -DATABASE_URL=postgresql://USER:PASS@postgres:5432/ops_dashboard -OPS_AGENT_URL=http://172.18.0.1:3099 -OPS_AGENT_SECRET= -SEED_USER_EMAIL=admin@example.com -SEED_USER_PASSWORD= -``` - -Optioneel: - -```bash -SYSTEMD_UNITS=scrum4me-web,ops-agent # comma-separated -REPO_PATHS=/srv/scrum4me/repos/Scrum4Me,… # comma-separated absolute paths -``` - -Bij start: app valideert dat verplichte env vars gezet zijn; faalt fast met duidelijke error. - -## Security-eigenschappen - -| Eigenschap | Implementatie | -|---|---| -| Wachtwoord-hashing | bcrypt 12 rounds | -| Session-cookie | HttpOnly, SameSite=strict, Secure in prod, 24u TTL | -| CSRF | Double-submit cookie pattern, validated in `proxy.ts` voor POSTs | -| CSP | Strict in response headers — geen inline scripts behalve Next.js internals met nonce | -| Agent-auth | HMAC via Bearer-token (`OPS_AGENT_SECRET`) — symmetrisch | -| Command-injection | `spawn(bin, args, {shell: false})` — geen shell-interpolatie ooit | -| Whitelist | `commands.yml` is single source of truth voor wat draaibaar is | -| Sudo | `sudoers.d/ops-agent` met absolute paden + service-namen, geen wildcards | -| Audit | Elke `/agent/v1/exec` call logt naar journalctl met `{audit:true, …}` markeer | -| Rate-limit | Login 5/min/IP; agent per-secret zonder rate-limit (single-user trust) | -| Bind | Agent bindt op `0.0.0.0:3099`; UFW staat alleen `172.18.0.0/16` toe | - -## Niet-functionele eigenschappen - -| Eigenschap | Specificatie | -|---|---| -| Geen multi-tenancy | Eén user-row in DB, app verifieert alleen "is er een geldig session-record"; geen `WHERE user_id = ?` filter (single-tenant) | -| Geen retry/queue | Failed flows blijven failed; user moet handmatig opnieuw klikken | -| Geen migrations-automation | `prisma migrate deploy` is **niet** in de boot-flow; doe je expliciet bij elke deploy | -| Geen graceful shutdown | Container SIGTERM → in-flight requests verloren; geen drain | -| Logging | Stdout/stderr van containers via `docker logs`; agent via `journalctl -u ops-agent`; geen aggregator | - -## Open punten - -- **Echte caddyfile-grammar** (IDEA-061) — nu nginx-fallback -- **Multi-user / RBAC** — buiten scope, mogelijk later -- **Rate-limit op agent** — voor multi-user toekomst nodig -- **Real-time alerts** — momenteel pull-based, push naar Slack/Tailscale-only nog niet diff --git a/lib/codemirror/caddyfile-mode.ts b/lib/codemirror/caddyfile-mode.ts deleted file mode 100644 index 87de6b7..0000000 --- a/lib/codemirror/caddyfile-mode.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { StreamLanguage, type StreamParser } from '@codemirror/language' - -const CADDY_DIRECTIVES = new Set([ - 'reverse_proxy', 'encode', 'file_server', 'handle', 'handle_errors', - 'root', 'header', 'redir', 'rewrite', 'respond', 'route', 'tls', - 'log', 'basicauth', 'request_body', 'try_files', 'php_fastcgi', - 'templates', 'import', 'bind', 'metrics', 'admin', 'auto_https', -]) -const CADDY_GLOBAL = new Set(['email', 'storage', 'order', 'servers', 'log']) - -const parser: StreamParser = { - token(stream) { - if (stream.eatSpace()) return null - if (stream.match(/^#.*/)) return 'comment' - if (stream.match(/^"(?:[^"\\]|\\.)*"/)) return 'string' - if (stream.match(/^@[A-Za-z_][\w-]*/)) return 'variableName' - if (stream.match(/^[{}]/)) return 'brace' - const word = stream.match(/^[A-Za-z_][\w.-]*/) as RegExpMatchArray | null - if (word) { - const w = word[0] - if (CADDY_DIRECTIVES.has(w)) return 'keyword' - if (CADDY_GLOBAL.has(w)) return 'typeName' - return 'variableName' - } - stream.next() - return null - }, - languageData: { commentTokens: { line: '#' } }, -} - -export const caddyfileLanguage = StreamLanguage.define(parser) diff --git a/lib/grammars/caddyfile.json b/lib/grammars/caddyfile.json deleted file mode 100644 index 5d545a1..0000000 --- a/lib/grammars/caddyfile.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "name": "caddyfile", - "scopeName": "source.Caddyfile", - "fileTypes": ["Caddyfile"], - "patterns": [ - { "include": "#comment" }, - { "include": "#site-address" }, - { "include": "#named-matcher-def" }, - { "include": "#named-matcher-ref" }, - { "include": "#directive" }, - { "include": "#placeholder" }, - { "include": "#string-double" }, - { "include": "#string-backtick" }, - { "include": "#number" }, - { "include": "#braces" } - ], - "repository": { - "comment": { - "name": "comment.line.number-sign.caddyfile", - "match": "#.*$" - }, - "site-address": { - "name": "entity.name.section.caddyfile", - "match": "^(?:https?://)?[a-zA-Z0-9][a-zA-Z0-9.*-]*(?::[0-9]+)?(?=\\s*(?:\\{|,|$))" - }, - "named-matcher-def": { - "name": "entity.other.attribute-name.caddyfile", - "match": "@[a-zA-Z_][a-zA-Z0-9_-]*(?=\\s)" - }, - "named-matcher-ref": { - "name": "entity.other.attribute-name.caddyfile", - "match": "(?<=\\s)@[a-zA-Z_][a-zA-Z0-9_-]*" - }, - "directive": { - "patterns": [ - { - "name": "keyword.control.caddyfile", - "match": "\\b(reverse_proxy|encode|file_server|handle_errors|handle_path|handle|root|header|request_header|response_header|redir|respond|rewrite|uri|try_files|php_fastcgi|push|templates|basicauth|forward_auth|map|vars|log|tls|bind|import|snippet|abort|error|static_response|acme_server|invoke)\\b" - }, - { - "name": "support.function.caddyfile", - "match": "\\b(on_demand|off|auto|internal|force|strip_prefix|replace|path_regexp|method|host|header_regexp|remote_ip|client_ip|not|query|cookie|expression|path|protocol|vars_regexp|file|jwt|geo_ip)\\b" - }, - { - "name": "keyword.other.option.caddyfile", - "match": "\\b(auto_https|admin|debug|grace_period|shutdown_delay|servers|storage|order|email|acme_ca|acme_ca_root|acme_eab|ocsp_stapling|key_type|cert_issuer|local_certs|skip_install_trust|renew_interval|check_interval|persistent_key|insecure_secrets_log|prefer_wildcard|resolvers|max_size|retention|format|output|level|sampling|include|exclude|dial|upstream|transport|lb_policy|health_uri|health_interval|health_timeout|health_status|health_body|flush_interval|buffer_requests|buffer_responses|max_buffer_size|trusted_proxies|to|from|prefix|replacements|gzip|zstd|br)\\b" - } - ] - }, - "placeholder": { - "name": "variable.other.caddyfile", - "match": "\\{(?:http\\.(?:request|response|vars|regexp|handlers)|tls|env|vars|system|time|rand|counter|uuid|path|query|header|cookie|form|file|dir|args|blocks|labels|err|http)[^}]*\\}" - }, - "string-double": { - "name": "string.quoted.double.caddyfile", - "begin": "\"", - "end": "\"", - "patterns": [ - { - "name": "constant.character.escape.caddyfile", - "match": "\\\\." - }, - { - "name": "variable.other.caddyfile", - "match": "\\{[^}]+\\}" - } - ] - }, - "string-backtick": { - "name": "string.quoted.other.caddyfile", - "begin": "`", - "end": "`" - }, - "number": { - "name": "constant.numeric.caddyfile", - "match": "\\b[0-9]+(?:\\.[0-9]+)?(?:s|ms|m|h|d|kb|mb|gb)?\\b" - }, - "braces": { - "name": "punctuation.section.block.caddyfile", - "match": "[{}]" - } - } -} diff --git a/lib/parse-worker-log.ts b/lib/parse-worker-log.ts deleted file mode 100644 index eb3e175..0000000 --- a/lib/parse-worker-log.ts +++ /dev/null @@ -1,444 +0,0 @@ -// lib/parse-worker-log.ts -// -// Parser for Scrum4Me worker run-logs (/srv/scrum4me/worker-logs/idea/runs/*.log). -// Each file is produced by `tsx run-one-job.ts > run_log 2>&1` and is a mix of -// plain-text `[run-one-job]` annotation lines and Claude Code `stream-json` -// event lines (the worker spawns `claude --output-format stream-json --verbose`). -// -// Two entry points: -// summarizeRunLog(raw, fileName) — one cheap line scan, for the table. -// parseRunLog(raw, fileName) — full event timeline, for the detail panel. -// -// Pure module, no dependencies — mirrors lib/parse-docker.ts / lib/parse-systemd.ts. - -export type RunStatus = 'idle' | 'running' | 'success' | 'error' | 'token-expired' | 'unknown' - -export interface RunLogSummary { - fileName: string - runId: string - startedAt: string | null - status: RunStatus - jobId: string | null - model: string | null - permissionMode: string | null - durationMs: number | null - numTurns: number | null - totalCostUsd: number | null - exitCode: number | null - eventCount: number - inProgress: boolean - errorSummary: string | null -} - -export type MetaTag = - | 'claim' - | 'auth' - | 'quota' - | 'no-job' - | 'claimed' - | 'worktree' - | 'config' - | 'payload' - | 'spawn' - | 'claude-done' - | 'cleanup' - | 'exit' - | 'error' - | 'token-expired' - | 'timeout' - | 'other' - -export type LogEvent = - | { kind: 'meta'; ts: string | null; tag: MetaTag; text: string } - | { - kind: 'system-init' - ts: string | null - model: string - permissionMode: string - tools: string[] - mcpServers: string[] - sessionId: string - cwd: string - version: string - } - | { kind: 'assistant-text'; ts: string | null; text: string; truncated: boolean } - | { kind: 'thinking'; ts: string | null; text: string; truncated: boolean } - | { kind: 'tool-call'; ts: string | null; id: string; name: string; input: string; truncated: boolean } - | { - kind: 'tool-result' - ts: string | null - toolUseId: string - isError: boolean - body: string - truncated: boolean - fullLength: number - } - | { kind: 'rate-limit'; ts: string | null; status: string } - | { - kind: 'result' - ts: string | null - subtype: string - isError: boolean - durationMs: number | null - numTurns: number | null - totalCostUsd: number | null - resultText: string - resultTruncated: boolean - } - | { kind: 'raw'; ts: string | null; text: string } - -export interface ParsedRunLog { - summary: RunLogSummary - events: LogEvent[] - inProgress: boolean - responseTruncated: boolean -} - -// Per-item caps keep the detail payload bounded even for ~350 KB raw logs. -const TOOL_RESULT_CAP = 8 * 1024 -const TEXT_CAP = 16 * 1024 -const TOOL_INPUT_CAP = 4 * 1024 -const RESPONSE_CAP = 1_500_000 - -const META_RE = /^(\S+)\s+\[run-one-job\]\s+(.*)$/ - -function cap(s: string, max: number): { text: string; truncated: boolean } { - if (s.length <= max) return { text: s, truncated: false } - return { text: s.slice(0, max), truncated: true } -} - -/** Strip the `.log` / `.log.gz` suffix — the run id is the timestamp filename. */ -export function runIdFromFileName(fileName: string): string { - return fileName.replace(/\.log(\.gz)?$/, '') -} - -/** run-agent.sh names each file `$(date -u +%Y%m%dT%H%M%SZ).log`, so the name is the start time. */ -function startedAtFromRunId(runId: string): string | null { - const m = runId.match(/^(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z$/) - if (!m) return null - return `${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:${m[6]}Z` -} - -function classifyMeta(msg: string): MetaTag { - if (msg.startsWith('claim attempt')) return 'claim' - if (msg.startsWith('auth ok')) return 'auth' - if (msg.startsWith('quota probe')) return 'quota' - if (msg.startsWith('no job claimed')) return 'no-job' - if (msg.startsWith('claimed job_id=')) return 'claimed' - if (msg.startsWith('worktree path=')) return 'worktree' - if (msg.startsWith('config ')) return 'config' - if (msg.startsWith('payload written')) return 'payload' - if (msg.startsWith('spawn claude')) return 'spawn' - if (msg.startsWith('claude done')) return 'claude-done' - if (msg.startsWith('cleanup')) return 'cleanup' - if (msg.startsWith('exit code=')) return 'exit' - if (msg.startsWith('ERROR')) return 'error' - if (msg.startsWith('TOKEN_EXPIRED detected')) return 'token-expired' - if (msg.startsWith('claim timeout')) return 'timeout' - return 'other' -} - -/** Cheap single-pass summary for the table — at most one JSON.parse (the result line). */ -export function summarizeRunLog(raw: string, fileName: string): RunLogSummary { - const runId = runIdFromFileName(fileName) - const lines = raw.split('\n') - - let jobId: string | null = null - let model: string | null = null - let permissionMode: string | null = null - let claudeExit: number | null = null - let runExit: number | null = null - let durationMs: number | null = null - let numTurns: number | null = null - let totalCostUsd: number | null = null - let eventCount = 0 - let hasResult = false - let resultIsError = false - let resultSubtype: string | null = null - let tokenExpired = false - let hasErrorLine = false - let firstErrorMsg: string | null = null - - for (const line of lines) { - if (!line) continue - const m = line.match(META_RE) - if (m) { - const msg = m[2] - if (msg.startsWith('claimed job_id=')) { - jobId = msg.slice('claimed job_id='.length).trim() || jobId - } else if (msg.startsWith('config ')) { - model = /\bmodel=(\S+)/.exec(msg)?.[1] ?? model - permissionMode = /\bpermission_mode=(\S+)/.exec(msg)?.[1] ?? permissionMode - } else if (msg.startsWith('claude done')) { - const e = /\bexit_code=(-?\d+)/.exec(msg) - if (e) claudeExit = Number(e[1]) - const d = /\bduration_ms=(\d+)/.exec(msg) - if (d) durationMs = Number(d[1]) - } else if (msg.startsWith('exit code=')) { - const e = /exit code=(-?\d+)/.exec(msg) - if (e) runExit = Number(e[1]) - } else if (msg.startsWith('TOKEN_EXPIRED detected')) { - tokenExpired = true - } else if (msg.startsWith('ERROR')) { - hasErrorLine = true - if (!firstErrorMsg) firstErrorMsg = msg.replace(/^ERROR\s*/, '').slice(0, 300) - } - continue - } - const trimmed = line.trimStart() - if (trimmed.startsWith('{')) { - eventCount++ - if (!hasResult && trimmed.startsWith('{"type":"result"')) { - try { - const obj = JSON.parse(trimmed) - hasResult = true - resultIsError = !!obj.is_error - resultSubtype = typeof obj.subtype === 'string' ? obj.subtype : null - if (typeof obj.num_turns === 'number') numTurns = obj.num_turns - if (typeof obj.total_cost_usd === 'number') totalCostUsd = obj.total_cost_usd - if (durationMs == null && typeof obj.duration_ms === 'number') durationMs = obj.duration_ms - } catch { - // malformed result line — ignore - } - } - } - } - - const exitCode = claudeExit ?? runExit - const terminal = runExit != null || hasResult || hasErrorLine || tokenExpired - const inProgress = !terminal - - let status: RunStatus - if (tokenExpired) { - status = 'token-expired' - } else if (jobId) { - if (inProgress) { - status = 'running' - } else if ( - resultIsError || - hasErrorLine || - (claudeExit != null && claudeExit !== 0) || - (runExit != null && runExit !== 0) - ) { - status = 'error' - } else { - status = 'success' - } - } else { - // No job was claimed this iteration — the worker was idle / waiting. - status = 'idle' - } - - let errorSummary: string | null = null - if (status === 'error' || status === 'token-expired') { - errorSummary = - firstErrorMsg ?? - (tokenExpired ? 'TOKEN_EXPIRED detected in output' : null) ?? - (resultIsError ? `result: ${resultSubtype ?? 'error'}` : null) ?? - (exitCode != null ? `exit code ${exitCode}` : null) - } - - return { - fileName, - runId, - startedAt: startedAtFromRunId(runId), - status, - jobId, - model, - permissionMode, - durationMs, - numTurns, - totalCostUsd, - exitCode, - eventCount, - inProgress, - errorSummary, - } -} - -function normalizeContent(content: unknown): string { - if (typeof content === 'string') return content - if (Array.isArray(content)) { - return content - .map((b) => { - if (typeof b === 'string') return b - if (b && typeof b === 'object' && typeof (b as { text?: unknown }).text === 'string') { - return (b as { text: string }).text - } - return JSON.stringify(b) - }) - .join('\n') - } - if (content == null) return '' - return JSON.stringify(content) -} - -/* eslint-disable @typescript-eslint/no-explicit-any -- stream-json events are genuinely dynamic */ -function pushJsonEvent(events: LogEvent[], obj: any): void { - const type = obj?.type - const ts: string | null = typeof obj?.timestamp === 'string' ? obj.timestamp : null - - if (type === 'system') { - const mcp = Array.isArray(obj.mcp_servers) - ? obj.mcp_servers.map((s: any) => (typeof s?.name === 'string' ? s.name : String(s))) - : [] - events.push({ - kind: 'system-init', - ts, - model: typeof obj.model === 'string' ? obj.model : '—', - permissionMode: typeof obj.permissionMode === 'string' ? obj.permissionMode : '—', - tools: Array.isArray(obj.tools) ? obj.tools.filter((t: unknown) => typeof t === 'string') : [], - mcpServers: mcp, - sessionId: typeof obj.session_id === 'string' ? obj.session_id : '', - cwd: typeof obj.cwd === 'string' ? obj.cwd : '', - version: typeof obj.claude_code_version === 'string' ? obj.claude_code_version : '', - }) - return - } - - if (type === 'rate_limit_event') { - events.push({ - kind: 'rate-limit', - ts, - status: typeof obj.rate_limit_info?.status === 'string' ? obj.rate_limit_info.status : 'unknown', - }) - return - } - - if (type === 'assistant') { - const content = obj?.message?.content - if (Array.isArray(content)) { - for (const block of content) { - if (block?.type === 'text' && typeof block.text === 'string') { - const c = cap(block.text, TEXT_CAP) - events.push({ kind: 'assistant-text', ts, text: c.text, truncated: c.truncated }) - } else if (block?.type === 'thinking' && typeof block.thinking === 'string') { - const c = cap(block.thinking, TEXT_CAP) - events.push({ kind: 'thinking', ts, text: c.text, truncated: c.truncated }) - } else if (block?.type === 'tool_use') { - let inputStr: string - try { - inputStr = JSON.stringify(block.input, null, 2) - } catch { - inputStr = String(block.input) - } - const c = cap(inputStr, TOOL_INPUT_CAP) - events.push({ - kind: 'tool-call', - ts, - id: typeof block.id === 'string' ? block.id : '', - name: typeof block.name === 'string' ? block.name : 'tool', - input: c.text, - truncated: c.truncated, - }) - } - } - } - return - } - - if (type === 'user') { - const content = obj?.message?.content - if (Array.isArray(content)) { - for (const block of content) { - if (block?.type === 'tool_result') { - const body = normalizeContent(block.content) - const c = cap(body, TOOL_RESULT_CAP) - events.push({ - kind: 'tool-result', - ts, - toolUseId: typeof block.tool_use_id === 'string' ? block.tool_use_id : '', - isError: !!block.is_error, - body: c.text, - truncated: c.truncated, - fullLength: body.length, - }) - } - } - } - return - } - - if (type === 'result') { - const c = cap(typeof obj.result === 'string' ? obj.result : '', TEXT_CAP) - events.push({ - kind: 'result', - ts, - subtype: typeof obj.subtype === 'string' ? obj.subtype : 'unknown', - isError: !!obj.is_error, - durationMs: typeof obj.duration_ms === 'number' ? obj.duration_ms : null, - numTurns: typeof obj.num_turns === 'number' ? obj.num_turns : null, - totalCostUsd: typeof obj.total_cost_usd === 'number' ? obj.total_cost_usd : null, - resultText: c.text, - resultTruncated: c.truncated, - }) - return - } - - // Unknown event type — keep a compact raw note so nothing is silently dropped. - events.push({ kind: 'raw', ts, text: cap(`${type ?? 'event'}: ${JSON.stringify(obj)}`, 2048).text }) -} -/* eslint-enable @typescript-eslint/no-explicit-any */ - -function estimateSize(e: LogEvent): number { - switch (e.kind) { - case 'assistant-text': - case 'thinking': - case 'raw': - return e.text.length - case 'tool-call': - return e.input.length - case 'tool-result': - return e.body.length - case 'result': - return e.resultText.length - default: - return 64 - } -} - -/** Bound the whole payload — drop tool-result bodies oldest-first if still too large. */ -function enforceResponseCap(events: LogEvent[]): boolean { - let total = 0 - for (const e of events) total += estimateSize(e) - if (total <= RESPONSE_CAP) return false - for (const e of events) { - if (total <= RESPONSE_CAP) break - if (e.kind === 'tool-result' && e.body) { - total -= e.body.length - e.body = '' - e.truncated = true - } - } - return true -} - -/** Full event timeline for the detail panel. */ -export function parseRunLog(raw: string, fileName: string): ParsedRunLog { - const summary = summarizeRunLog(raw, fileName) - const events: LogEvent[] = [] - - for (const line of raw.split('\n')) { - if (!line.trim()) continue - const m = line.match(META_RE) - if (m) { - events.push({ kind: 'meta', ts: m[1], tag: classifyMeta(m[2]), text: m[2] }) - continue - } - const trimmed = line.trimStart() - if (trimmed.startsWith('{')) { - try { - pushJsonEvent(events, JSON.parse(trimmed)) - } catch { - // partial / malformed JSON line (e.g. a log read mid-write) — keep it raw - events.push({ kind: 'raw', ts: null, text: cap(line, TOOL_RESULT_CAP).text }) - } - continue - } - // Non-JSON, non-meta noise (e.g. a bare `Warning: ...` from claude). - events.push({ kind: 'raw', ts: null, text: cap(line, TOOL_RESULT_CAP).text }) - } - - const responseTruncated = enforceResponseCap(events) - return { summary, events, inProgress: summary.inProgress, responseTruncated } -} diff --git a/lib/utils.ts b/lib/utils.ts index af3e365..49c77ca 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -14,15 +14,3 @@ export function relativeTime(date: Date): string { if (hours < 24) return `${hours}u geleden` return `${Math.floor(hours / 24)}d geleden` } - -/** Human-readable duration from a millisecond count. */ -export function formatDuration(ms: number): string { - if (ms < 1000) return `${ms}ms` - const totalSec = Math.round(ms / 1000) - if (totalSec < 60) return `${totalSec}s` - const minutes = Math.floor(totalSec / 60) - const seconds = totalSec % 60 - if (minutes < 60) return `${minutes}m ${seconds}s` - const hours = Math.floor(minutes / 60) - return `${hours}u ${minutes % 60}m` -} diff --git a/lib/worker-logs.ts b/lib/worker-logs.ts deleted file mode 100644 index 0c4e003..0000000 --- a/lib/worker-logs.ts +++ /dev/null @@ -1,116 +0,0 @@ -// lib/worker-logs.ts -// -// Server-only filesystem access to the worker run-logs. The directory is -// mounted read-only into the ops-dashboard container (see docker-compose.yml: -// `/srv/scrum4me/worker-logs:/var/worker-logs:ro`). Path configurable via the -// WORKER_LOGS_DIR env var. -// -// Only imported by server components and route handlers — never by a -// 'use client' file. - -import 'server-only' -import { readdir, readFile } from 'node:fs/promises' -import { gunzipSync } from 'node:zlib' -import { join, resolve } from 'node:path' -import { summarizeRunLog, type RunLogSummary } from './parse-worker-log' - -const WORKER_LOGS_DIR = process.env.WORKER_LOGS_DIR ?? '/var/worker-logs/idea' -const RUNS_DIR = join(WORKER_LOGS_DIR, 'runs') - -/** Selectable row counts for the table. */ -export const LIMIT_OPTIONS = [10, 25, 50, 100] as const -const DEFAULT_LIMIT = 10 - -// Filenames are `$(date -u +%Y%m%dT%H%M%SZ).log` — no slashes, no dots beyond -// the literal suffix, so this regex alone rules out path traversal. -const NAME_RE = /^\d{8}T\d{6}Z\.log(\.gz)?$/ - -export type WorkerLogErrorCode = 'invalid' | 'not-found' | 'unavailable' - -export class WorkerLogError extends Error { - readonly code: WorkerLogErrorCode - constructor(message: string, code: WorkerLogErrorCode) { - super(message) - this.name = 'WorkerLogError' - this.code = code - } -} - -/** Clamp an arbitrary requested limit down to the largest allowed option. */ -export function clampLimit(n: number): number { - if (!Number.isFinite(n)) return DEFAULT_LIMIT - let chosen: number = DEFAULT_LIMIT - for (const opt of LIMIT_OPTIONS) { - if (n >= opt) chosen = opt - } - return chosen -} - -export function isValidLogName(name: string): boolean { - return NAME_RE.test(name) -} - -function resolveLogPath(name: string): string { - if (!isValidLogName(name)) { - throw new WorkerLogError(`invalid log name: ${name}`, 'invalid') - } - const base = resolve(RUNS_DIR) - const full = resolve(base, name) - // Defense-in-depth: the regex already forbids traversal, but confirm anyway. - if (full !== join(base, name)) { - throw new WorkerLogError(`path escapes worker logs dir: ${name}`, 'invalid') - } - return full -} - -async function readLogFile(name: string): Promise { - const full = resolveLogPath(name) - if (name.endsWith('.gz')) { - const buf = await readFile(full) - return gunzipSync(buf).toString('utf8') - } - return readFile(full, 'utf8') -} - -/** Newest-first summaries for the table. Sorts by filename, slices, then reads. */ -export async function listRunLogs(limit: number): Promise { - const n = clampLimit(limit) - - let entries: string[] - try { - entries = await readdir(RUNS_DIR) - } catch (err) { - throw new WorkerLogError( - `cannot read worker logs dir ${RUNS_DIR}: ${(err as Error).message}`, - 'unavailable', - ) - } - - // Filename is `YYYYMMDDTHHMMSSZ` — lexicographic order == chronological order. - // Sort + slice BEFORE touching file content (the dir holds ~12k files). - const names = entries.filter(isValidLogName).sort().reverse().slice(0, n) - - return Promise.all( - names.map(async (name) => { - try { - return summarizeRunLog(await readLogFile(name), name) - } catch { - // A single unreadable / mid-rotation file must not break the table. - return { ...summarizeRunLog('', name), status: 'unknown' as const, inProgress: false } - } - }), - ) -} - -/** Raw contents of one run-log (gunzipped if needed). */ -export async function readRunLog(name: string): Promise { - try { - return await readLogFile(name) - } catch (err) { - if (err instanceof WorkerLogError) throw err - if ((err as NodeJS.ErrnoException).code === 'ENOENT') { - throw new WorkerLogError(`log not found: ${name}`, 'not-found') - } - throw new WorkerLogError(`cannot read log ${name}: ${(err as Error).message}`, 'unavailable') - } -} diff --git a/ops-agent/commands.yml.example b/ops-agent/commands.yml.example index 7100079..4c7cd2f 100644 --- a/ops-agent/commands.yml.example +++ b/ops-agent/commands.yml.example @@ -107,20 +107,6 @@ commands: - ops-dashboard description: "Build a docker compose service image" - docker_compose_build_worker_fresh: - # De worker-idea Dockerfile clonet scrum4me-mcp van GitHub in een aparte - # laag. Een gewone docker compose build hergebruikt die laag zolang - # MCP_GIT_REF gelijk blijft (= altijd 'main'), dus nieuwe MCP-commits worden - # NIET opgepikt. MCP_CACHE_BUST met een verse timestamp invalideert de - # clone-laag. sh -c is nodig om $(date) te evalueren (geen shell-injectie: - # vaste string, geen externe input). - cmd: - - sh - - -c - - "docker compose build --build-arg MCP_CACHE_BUST=$(date +%s) worker-idea" - cwd: "/srv/scrum4me/compose" - description: "Rebuild worker-idea image, busting the scrum4me-mcp clone cache so the latest MCP code is pulled" - docker_compose_up: cmd: ["docker", "compose", "up", "-d"] cwd: "/srv/scrum4me/compose" @@ -250,51 +236,3 @@ commands: - -delete - -print description: "Delete ops_dashboard backup files older than 30 days" - - # ── Server-wide backup (restic + NAS + B2) ──────────────────────────────── - # All wrappers live under /srv/backups/scripts/wrappers/ and read - # /etc/restic-backup.env (mode 0600 root:root) which the ops-agent user - # cannot read directly — hence the sudo prefix. See deploy/ops-agent/sudoers - # for the corresponding NOPASSWD entries. - - read_backup_status: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/read-status.sh"] - description: "Read /srv/backups/status/last-run.json + last-restore-test.json (JSON)" - - restic_snapshots_nas: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/restic-snapshots.sh", "nas"] - description: "Restic snapshots from the NAS repo (JSON array, newest first)" - - restic_snapshots_b2: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/restic-snapshots.sh", "b2"] - description: "Restic snapshots from the B2 repo (JSON array, newest first)" - - restic_stats_nas: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/restic-stats.sh", "nas"] - description: "Restic stats for the NAS repo (restore-size + raw-data + dedup ratio)" - - restic_stats_b2: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/restic-stats.sh", "b2"] - description: "Restic stats for the B2 repo (restore-size + raw-data + dedup ratio)" - - list_backup_logs: - cmd: - - sh - - -c - - "ls -lt /srv/backups/logs/*.log 2>/dev/null | head -10 || echo 'no logs yet'" - description: "List the 10 most recent server-backup logs" - - tail_backup_log_today: - cmd: - - sh - - -c - - "f=/srv/backups/logs/server-backup-$(date +%F).log; [ -f \"$f\" ] && tail -200 \"$f\" || echo 'no log for today'" - description: "Tail the last 200 lines of today's server-backup log" - - trigger_server_backup: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/trigger-backup.sh"] - description: "Trigger server-backup.service ad-hoc (refuses if already running)" - - trigger_restore_test: - cmd: ["sudo", "-n", "/srv/backups/scripts/wrappers/trigger-restore-test.sh", "nas"] - description: "Run restore-test.sh against the NAS repo (non-destructive, writes /tmp/restore-test/)" diff --git a/ops-agent/flows.example/redeploy_all.yml b/ops-agent/flows.example/redeploy_all.yml deleted file mode 100644 index 4a3ee3e..0000000 --- a/ops-agent/flows.example/redeploy_all.yml +++ /dev/null @@ -1,86 +0,0 @@ -# Volledige redeploy van de Scrum4Me-stack — alle drie de repos in één flow. -# Copy to /etc/ops-agent/flows/redeploy_all.yml on the host. -# -# Dit is de gecombineerde werkwijze: eerst de hoofd-app (scrum4me-web), -# dan de worker (scrum4me-docker image met verse scrum4me-mcp clone). -# Equivalent aan update_scrum4me_web.yml gevolgd door update_mcp_worker.yml, -# maar als één atomaire flow met audit-trail. -# -# Volgorde-redenering: -# - Web eerst: de DB-migratie (stap 6) is additief en niet-breaking, dus -# veilig terwijl de oude worker nog draait. -# - Worker daarna: de nieuwe MCP-code kan afhankelijk zijn van de nieuwe -# DB-kolommen/enums uit de web-migratie. -# -# Steps: -# 1-9. scrum4me-web: status, fetch, log-ahead, pull, npm ci, migrate, -# build, restart service, smoke-test -# 10-16. worker: status + fetch + pull scrum4me-docker, pull scrum4me-mcp, -# cache-busted image rebuild, container recreate, health-wait -# -# Let op: de worker-rebuild MOET docker_compose_build_worker_fresh gebruiken, -# niet docker_compose_build — anders blijft de scrum4me-mcp clone-laag -# gecached en wordt nieuwe MCP-code gemist. - -name: Redeploy All -description: Volledige stack-redeploy — scrum4me-web (pull/migrate/build/restart) gevolgd door de MCP-worker (cache-busted image rebuild) -steps: - # --- scrum4me-web ------------------------------------------------------- - - command_key: git_status - args: ["/srv/scrum4me/repos/Scrum4Me"] - on_failure: continue - - - command_key: git_fetch - args: ["/srv/scrum4me/repos/Scrum4Me"] - on_failure: abort - - - command_key: git_log_ahead - args: ["/srv/scrum4me/repos/Scrum4Me"] - on_failure: continue - - - command_key: git_pull - args: ["/srv/scrum4me/repos/Scrum4Me"] - on_failure: abort - - - command_key: npm_ci - on_failure: abort - - - command_key: prisma_migrate_deploy - on_failure: abort - - - command_key: npm_run_build - on_failure: abort - - - command_key: systemctl_restart - args: ["scrum4me-web"] - on_failure: abort - - - command_key: curl_smoke_scrum4me_thuis - on_failure: continue - - # --- MCP-worker --------------------------------------------------------- - - command_key: git_status - args: ["/srv/scrum4me/repos/scrum4me-docker"] - on_failure: continue - - - command_key: git_fetch - args: ["/srv/scrum4me/repos/scrum4me-docker"] - on_failure: abort - - - command_key: git_pull - args: ["/srv/scrum4me/repos/scrum4me-docker"] - on_failure: abort - - - command_key: git_pull - args: ["/srv/scrum4me/repos/scrum4me-mcp"] - on_failure: continue - - - command_key: docker_compose_build_worker_fresh - on_failure: abort - - - command_key: docker_compose_up_recreate - args: ["worker-idea"] - on_failure: abort - - - command_key: wait_for_health_worker - on_failure: continue diff --git a/ops-agent/flows.example/server_backup_full.yml b/ops-agent/flows.example/server_backup_full.yml deleted file mode 100644 index 9beb0f9..0000000 --- a/ops-agent/flows.example/server_backup_full.yml +++ /dev/null @@ -1,21 +0,0 @@ -# Trigger a full server-wide backup (pg_dumpall + restic to NAS + B2). -# Runs out-of-band via systemd; this flow just kicks it off and then tails -# today's log + reads the structured statusfile so the dashboard can render -# progress and final result. -# -# Copy to /etc/ops-agent/flows/server_backup_full.yml on the host. -# Triggered manually via /settings/backups → "Backup now" or by the daily -# server-backup.timer (which runs server-backup.service directly, skipping -# this flow). - -name: Server backup (full) -description: Daily full server backup — pg_dumpall + restic to NAS + B2 (Object Lock) -steps: - - command_key: trigger_server_backup - on_failure: abort - - - command_key: tail_backup_log_today - on_failure: continue - - - command_key: read_backup_status - on_failure: continue diff --git a/ops-agent/flows.example/server_backup_restore_test.yml b/ops-agent/flows.example/server_backup_restore_test.yml deleted file mode 100644 index 1ed5b31..0000000 --- a/ops-agent/flows.example/server_backup_restore_test.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Run a non-destructive restore test against the NAS repo. Restores the latest -# snapshot to /tmp/restore-test/ and asserts that critical files came back -# intact. Used to verify backups periodically without touching the live stack. -# -# Copy to /etc/ops-agent/flows/server_backup_restore_test.yml on the host. - -name: Server backup — restore test -description: Restore latest snapshot to /tmp/restore-test and assert critical files -steps: - - command_key: trigger_restore_test - on_failure: continue - - - command_key: read_backup_status - on_failure: continue diff --git a/ops-agent/flows.example/update_mcp_worker.yml b/ops-agent/flows.example/update_mcp_worker.yml index ef8784d..56fdb82 100644 --- a/ops-agent/flows.example/update_mcp_worker.yml +++ b/ops-agent/flows.example/update_mcp_worker.yml @@ -2,21 +2,15 @@ # Copy to /etc/ops-agent/flows/update_mcp_worker.yml on the host. # # Steps: -# 1. Show current git status of scrum4me-docker (informational) -# 2. Fetch remote refs for scrum4me-docker -# 3. Fast-forward pull scrum4me-docker (aborts if working tree is dirty) -# 4. Fast-forward pull scrum4me-mcp — sync van de lokale repo. De image -# cloned MCP zelf van GitHub, dus dit is alleen lokale referentie; -# on_failure: continue zodat een dirty mcp-tree de deploy niet blokkeert. -# 5. Rebuild the worker image MET cache-bust. Een gewone build hergebruikt -# de scrum4me-mcp clone-laag (MCP_GIT_REF blijft 'main'), dus nieuwe -# MCP-commits worden gemist. docker_compose_build_worker_fresh forceert -# een verse clone via MCP_CACHE_BUST. -# 6. Recreate the container (force-recreate picks up the new image) -# 7. Wait for worker pre-flight to pass (checks /var/log/agent/current) +# 1. Show current git status (informational) +# 2. Fetch remote refs +# 3. Fast-forward pull (aborts if working tree is dirty) +# 4. Rebuild the Docker image +# 5. Recreate the container in detached mode (force-recreate picks up new image) +# 6. Wait for worker pre-flight to pass (checks /var/log/agent/current) name: Update MCP Worker -description: Pull latest code, rebuild the worker image with a fresh scrum4me-mcp clone, and recreate the worker container +description: Pull latest code, rebuild Docker image, and restart the MCP worker service steps: - command_key: git_status args: ["/srv/scrum4me/repos/scrum4me-docker"] @@ -30,11 +24,8 @@ steps: args: ["/srv/scrum4me/repos/scrum4me-docker"] on_failure: abort - - command_key: git_pull - args: ["/srv/scrum4me/repos/scrum4me-mcp"] - on_failure: continue - - - command_key: docker_compose_build_worker_fresh + - command_key: docker_compose_build + args: ["worker-idea"] on_failure: abort - command_key: docker_compose_up_recreate diff --git a/ops-agent/src/routes/exec.ts b/ops-agent/src/routes/exec.ts index d017246..9a393ec 100644 --- a/ops-agent/src/routes/exec.ts +++ b/ops-agent/src/routes/exec.ts @@ -108,33 +108,20 @@ export async function execRoutes(app: FastifyInstance): Promise { sendEvent('stderr', chunk.toString()); }); - // Houd de route-handler open totdat het kind klaar is. Zonder dit return-t - // de async functie meteen, finaliseert Fastify de reply, en triggert dat - // `req.raw.on('close')` → `child.kill()` voordat het kind iets kon doen. - await new Promise((resolve) => { - let settled = false - const finish = () => { - if (settled) return - settled = true - resolve() - } - child.on('close', (code) => { - auditLog(command_key, args, code, Date.now() - startedAt) - reply.raw.write(`event: exit\ndata: ${JSON.stringify({ code })}\n\n`) - reply.raw.end() - finish() - }) - child.on('error', (err) => { - auditLog(command_key, args, null, Date.now() - startedAt) - reply.raw.write(`event: error\ndata: ${JSON.stringify({ message: err.message })}\n\n`) - reply.raw.end() - finish() - }) - // Detect client disconnect via response stream (niet request stream — - // die fired al direct na request body parse). - reply.raw.on('close', () => { - if (!settled) child.kill() - }) - }) + child.on('close', (code) => { + auditLog(command_key, args, code, Date.now() - startedAt); + reply.raw.write(`event: exit\ndata: ${JSON.stringify({ code })}\n\n`); + reply.raw.end(); + }); + + child.on('error', (err) => { + auditLog(command_key, args, null, Date.now() - startedAt); + reply.raw.write(`event: error\ndata: ${JSON.stringify({ message: err.message })}\n\n`); + reply.raw.end(); + }); + + req.raw.on('close', () => { + child.kill(); + }); }); } diff --git a/package-lock.json b/package-lock.json index 0eb4437..51b6a01 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,19 +9,13 @@ "version": "0.1.0", "dependencies": { "@base-ui/react": "^1.4.1", - "@codemirror/language": "^6.12.3", - "@codemirror/legacy-modes": "^6.5.2", - "@codemirror/state": "^6.6.0", - "@codemirror/view": "^6.42.1", "@prisma/adapter-pg": "^7.8.0", "@prisma/client": "^7.8.0", "@types/bcryptjs": "^2.4.6", "@types/pg": "^8.20.0", - "@uiw/react-codemirror": "^4.25.9", "bcryptjs": "^3.0.3", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "codemirror": "^6.0.2", "lucide-react": "^1.14.0", "next": "16.2.6", "pg": "^8.20.0", @@ -557,108 +551,6 @@ } } }, - "node_modules/@codemirror/autocomplete": { - "version": "6.20.2", - "resolved": "https://registry.npmjs.org/@codemirror/autocomplete/-/autocomplete-6.20.2.tgz", - "integrity": "sha512-G5FPkgIiLjOgZMjqVjvuKQ1rGPtHogLldJr33eFJdVLtmwY+giGrlv/ewljLz6b9BSQLkjxuwBc6g6omDM+YxQ==", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0", - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.17.0", - "@lezer/common": "^1.0.0" - } - }, - "node_modules/@codemirror/commands": { - "version": "6.10.3", - "resolved": "https://registry.npmjs.org/@codemirror/commands/-/commands-6.10.3.tgz", - "integrity": "sha512-JFRiqhKu+bvSkDLI+rUhJwSxQxYb759W5GBezE8Uc8mHLqC9aV/9aTC7yJSqCtB3F00pylrLCwnyS91Ap5ej4Q==", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0", - "@codemirror/state": "^6.6.0", - "@codemirror/view": "^6.27.0", - "@lezer/common": "^1.1.0" - } - }, - "node_modules/@codemirror/language": { - "version": "6.12.3", - "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.12.3.tgz", - "integrity": "sha512-QwCZW6Tt1siP37Jet9Tb02Zs81TQt6qQrZR2H+eGMcFsL1zMrk2/b9CLC7/9ieP1fjIUMgviLWMmgiHoJrj+ZA==", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.23.0", - "@lezer/common": "^1.5.0", - "@lezer/highlight": "^1.0.0", - "@lezer/lr": "^1.0.0", - "style-mod": "^4.0.0" - } - }, - "node_modules/@codemirror/legacy-modes": { - "version": "6.5.2", - "resolved": "https://registry.npmjs.org/@codemirror/legacy-modes/-/legacy-modes-6.5.2.tgz", - "integrity": "sha512-/jJbwSTazlQEDOQw2FJ8LEEKVS72pU0lx6oM54kGpL8t/NJ2Jda3CZ4pcltiKTdqYSRk3ug1B3pil1gsjA6+8Q==", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0" - } - }, - "node_modules/@codemirror/lint": { - "version": "6.9.6", - "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.9.6.tgz", - "integrity": "sha512-6Kp7r6XfCi/D/5sdXieMfg9pJU1bUEx96WITuLU6ESaKizCz0QHFMjY/TaFSbigDdEAIgi93itLBIUETP4oK+A==", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.42.0", - "crelt": "^1.0.5" - } - }, - "node_modules/@codemirror/search": { - "version": "6.7.0", - "resolved": "https://registry.npmjs.org/@codemirror/search/-/search-6.7.0.tgz", - "integrity": "sha512-ZvGm99wc/s2cITtMT15LFdn8aH/aS+V+DqyGq/N5ZlV5vWtH+nILvC2nw0zX7ByNoHHDZ2IxxdW38O0tc5nVHg==", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.37.0", - "crelt": "^1.0.5" - } - }, - "node_modules/@codemirror/state": { - "version": "6.6.0", - "resolved": "https://registry.npmjs.org/@codemirror/state/-/state-6.6.0.tgz", - "integrity": "sha512-4nbvra5R5EtiCzr9BTHiTLc+MLXK2QGiAVYMyi8PkQd3SR+6ixar/Q/01Fa21TBIDOZXgeWV4WppsQolSreAPQ==", - "license": "MIT", - "dependencies": { - "@marijn/find-cluster-break": "^1.0.0" - } - }, - "node_modules/@codemirror/theme-one-dark": { - "version": "6.1.3", - "resolved": "https://registry.npmjs.org/@codemirror/theme-one-dark/-/theme-one-dark-6.1.3.tgz", - "integrity": "sha512-NzBdIvEJmx6fjeremiGp3t/okrLPYT0d9orIc7AFun8oZcRk58aejkqhv6spnz4MLAevrKNPMQYXEWMg4s+sKA==", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0", - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.0.0", - "@lezer/highlight": "^1.0.0" - } - }, - "node_modules/@codemirror/view": { - "version": "6.42.1", - "resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.42.1.tgz", - "integrity": "sha512-ToN3oFc0nsxNUYVF5P0ztLgbC4UPPjPtA9aKYhkOKQaZASpOUo6ISXyQLP66ctVwlDc+j6Jv0uK5IFALkiXztg==", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.6.0", - "crelt": "^1.0.6", - "style-mod": "^4.1.0", - "w3c-keyname": "^2.2.4" - } - }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", @@ -1514,36 +1406,6 @@ "integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==", "license": "MIT" }, - "node_modules/@lezer/common": { - "version": "1.5.2", - "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.5.2.tgz", - "integrity": "sha512-sxQE460fPZyU3sdc8lafxiPwJHBzZRy/udNFynGQky1SePYBdhkBl1kOagA9uT3pxR8K09bOrmTUqA9wb/PjSQ==", - "license": "MIT" - }, - "node_modules/@lezer/highlight": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.3.tgz", - "integrity": "sha512-qXdH7UqTvGfdVBINrgKhDsVTJTxactNNxLk7+UMwZhU13lMHaOBlJe9Vqp907ya56Y3+ed2tlqzys7jDkTmW0g==", - "license": "MIT", - "dependencies": { - "@lezer/common": "^1.3.0" - } - }, - "node_modules/@lezer/lr": { - "version": "1.4.10", - "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.10.tgz", - "integrity": "sha512-rnCpTIBafOx4mRp43xOxDJbFipJm/c0cia/V5TiGlhmMa+wsSdoGmUN3w5Bqrks/09Q/D4tNAmWaT8p6NRi77A==", - "license": "MIT", - "dependencies": { - "@lezer/common": "^1.0.0" - } - }, - "node_modules/@marijn/find-cluster-break": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz", - "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==", - "license": "MIT" - }, "node_modules/@modelcontextprotocol/sdk": { "version": "1.29.0", "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.29.0.tgz", @@ -2700,59 +2562,6 @@ "integrity": "sha512-lrpDziQipxCEeK5kWxvljWYhUvOiB2A9izZd9B2AFarYAkqZshb4lPbRs7zKEic6eGtH8V/2qJW+dPp9OtF6bw==", "license": "MIT" }, - "node_modules/@uiw/codemirror-extensions-basic-setup": { - "version": "4.25.9", - "resolved": "https://registry.npmjs.org/@uiw/codemirror-extensions-basic-setup/-/codemirror-extensions-basic-setup-4.25.9.tgz", - "integrity": "sha512-QFAqr+pu6lDmNpAlecODcF49TlsrZ0bj15zPzfhiqSDl+Um3EsDLFLppixC7kFLn+rdDM2LTvVjn5CPvefpRgw==", - "license": "MIT", - "dependencies": { - "@codemirror/autocomplete": "^6.0.0", - "@codemirror/commands": "^6.0.0", - "@codemirror/language": "^6.0.0", - "@codemirror/lint": "^6.0.0", - "@codemirror/search": "^6.0.0", - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.0.0" - }, - "funding": { - "url": "https://jaywcjlove.github.io/#/sponsor" - }, - "peerDependencies": { - "@codemirror/autocomplete": ">=6.0.0", - "@codemirror/commands": ">=6.0.0", - "@codemirror/language": ">=6.0.0", - "@codemirror/lint": ">=6.0.0", - "@codemirror/search": ">=6.0.0", - "@codemirror/state": ">=6.0.0", - "@codemirror/view": ">=6.0.0" - } - }, - "node_modules/@uiw/react-codemirror": { - "version": "4.25.9", - "resolved": "https://registry.npmjs.org/@uiw/react-codemirror/-/react-codemirror-4.25.9.tgz", - "integrity": "sha512-HftqCBUYShAOH0pGi1CHP8vfm5L8fQ3+0j0VI6lQD6QpK+UBu3J7nxfEN5O/BXMilMNf9ZyFJRvRcuMMOLHMng==", - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.18.6", - "@codemirror/commands": "^6.1.0", - "@codemirror/state": "^6.1.1", - "@codemirror/theme-one-dark": "^6.0.0", - "@uiw/codemirror-extensions-basic-setup": "4.25.9", - "codemirror": "^6.0.0" - }, - "funding": { - "url": "https://jaywcjlove.github.io/#/sponsor" - }, - "peerDependencies": { - "@babel/runtime": ">=7.11.0", - "@codemirror/state": ">=6.0.0", - "@codemirror/theme-one-dark": ">=6.0.0", - "@codemirror/view": ">=6.0.0", - "codemirror": ">=6.0.0", - "react": ">=17.0.0", - "react-dom": ">=17.0.0" - } - }, "node_modules/@ungap/structured-clone": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.1.tgz", @@ -3321,21 +3130,6 @@ "integrity": "sha512-Oofo0pq3IKnsFtuHqSF7TqBfr71aeyZDVJ0HpmqB7FBM2qEigL0iPONSCZSO9pE9dZTAxANe5XHG9Uy0YMv8cg==", "license": "MIT" }, - "node_modules/codemirror": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/codemirror/-/codemirror-6.0.2.tgz", - "integrity": "sha512-VhydHotNW5w1UGK0Qj96BwSk/Zqbp9WbnyK2W/eVMv4QyF41INRGpjUhFJY7/uDNuudSc33a/PKr4iDqRduvHw==", - "license": "MIT", - "dependencies": { - "@codemirror/autocomplete": "^6.0.0", - "@codemirror/commands": "^6.0.0", - "@codemirror/language": "^6.0.0", - "@codemirror/lint": "^6.0.0", - "@codemirror/search": "^6.0.0", - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.0.0" - } - }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -3475,12 +3269,6 @@ "dev": true, "license": "MIT" }, - "node_modules/crelt": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/crelt/-/crelt-1.0.6.tgz", - "integrity": "sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==", - "license": "MIT" - }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -7165,12 +6953,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/style-mod": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/style-mod/-/style-mod-4.1.3.tgz", - "integrity": "sha512-i/n8VsZydrugj3Iuzll8+x/00GH2vnYsk1eomD8QiRrSAeW6ItbCQDtfXCeJHd0iwiNagqjQkvpvREEPtW3IoQ==", - "license": "MIT" - }, "node_modules/styled-jsx": { "version": "5.1.6", "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz", @@ -7665,12 +7447,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/w3c-keyname": { - "version": "2.2.8", - "resolved": "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.8.tgz", - "integrity": "sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==", - "license": "MIT" - }, "node_modules/web-streams-polyfill": { "version": "3.3.3", "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", diff --git a/package.json b/package.json index 2770f59..a9c9481 100644 --- a/package.json +++ b/package.json @@ -13,19 +13,13 @@ }, "dependencies": { "@base-ui/react": "^1.4.1", - "@codemirror/language": "^6.12.3", - "@codemirror/legacy-modes": "^6.5.2", - "@codemirror/state": "^6.6.0", - "@codemirror/view": "^6.42.1", "@prisma/adapter-pg": "^7.8.0", "@prisma/client": "^7.8.0", "@types/bcryptjs": "^2.4.6", "@types/pg": "^8.20.0", - "@uiw/react-codemirror": "^4.25.9", "bcryptjs": "^3.0.3", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "codemirror": "^6.0.2", "lucide-react": "^1.14.0", "next": "16.2.6", "pg": "^8.20.0",