Ops-dashboard/app/settings/backups/_components/server-backup-section.tsx
Madhura68 ab87c0fada feat(server-backup): restic dual-repo backup (NAS + B2) with dashboard UI
Adds a server-wide backup capability beyond the existing ops_dashboard
pg_dump flow:

- Daily systemd timer (03:30) runs pg_dumpall + Forgejo dump, then restic
  to a local NAS repo and an offsite Backblaze B2 repo with Object Lock.
  Phase-based script with single-instance flock, structured statusfile,
  systemd hardening, and live-datadir excludes (Postgres / Forgejo) so
  the dumps stay authoritative.
- Ops-agent gets nine new read-only/trigger commands (snapshots, stats,
  status, logs, plus two triggers) backed by sudoers-whitelisted wrapper
  scripts that source /etc/restic-backup.env so the agent never sees the
  restic password or B2 keys.
- Two new flows (server_backup_full, server_backup_restore_test) drive
  the dashboard's "Backup now" and "Restore test" buttons.
- /settings/backups gains a Server backup section with overall + per-phase
  status, NAS / B2 snapshot tables, restore-size / raw-data / dedup-ratio
  stats, and the last restore-test result. The existing pg_dump section
  is preserved unchanged.
- Runbook docs/runbooks/server-backup.md follows the tailscale-setup
  pattern (plan + addendum) and covers B2 Object Lock + scoped keys,
  Forgejo subplan with isolated restore-test stack, the off-server
  maintenance flow for B2 prune, and the integrity-check schedule.

Code-only change — installation on scrum4me-srv follows the runbook.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 13:03:00 +02:00

447 lines
16 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use client'
import { useCallback, useState } from 'react'
import Link from 'next/link'
import { useFlowRun } from '@/hooks/useFlowRun'
import StreamingTerminal from '@/components/StreamingTerminal'
import ConfirmDialog from '@/components/ConfirmDialog'
import type {
BackupPhase,
BackupStatus,
BackupStatusEnvelope,
OverallStatus,
PhaseStatus,
ResticSnapshot,
ResticStats,
} from '../_lib/types'
type Props = {
envelope: BackupStatusEnvelope
nasSnapshots: ResticSnapshot[]
b2Snapshots: ResticSnapshot[]
nasStats: ResticStats | null
b2Stats: ResticStats | null
errors: {
status?: string
nasSnapshots?: string
b2Snapshots?: string
nasStats?: string
b2Stats?: string
}
}
type ActiveFlow = 'backup' | 'restore' | null
function formatBytes(bytes: number | null | undefined): string {
if (bytes == null) return '—'
if (bytes < 1024) return `${bytes} B`
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`
}
function formatDuration(seconds: number | null | undefined): string {
if (seconds == null || seconds === 0) return '—'
if (seconds < 60) return `${seconds}s`
if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${seconds % 60}s`
const h = Math.floor(seconds / 3600)
const m = Math.floor((seconds % 3600) / 60)
return `${h}h ${m}m`
}
function formatTimestamp(iso: string | null | undefined): string {
if (!iso) return '—'
try {
const d = new Date(iso)
if (Number.isNaN(d.getTime())) return iso
const yyyy = d.getFullYear()
const mm = String(d.getMonth() + 1).padStart(2, '0')
const dd = String(d.getDate()).padStart(2, '0')
const hh = String(d.getHours()).padStart(2, '0')
const mi = String(d.getMinutes()).padStart(2, '0')
return `${yyyy}-${mm}-${dd} ${hh}:${mi}`
} catch {
return iso
}
}
function overallBadgeClass(status: OverallStatus): string {
switch (status) {
case 'success':
return 'bg-green-500/15 text-green-500 border-green-500/30'
case 'partial_failure':
return 'bg-amber-500/15 text-amber-500 border-amber-500/30'
case 'failed':
return 'bg-destructive/15 text-destructive border-destructive/30'
default:
return 'bg-muted/50 text-muted-foreground border-border'
}
}
function phaseIcon(status: PhaseStatus): { glyph: string; color: string } {
switch (status) {
case 'success':
return { glyph: '✓', color: 'text-green-500' }
case 'skipped':
return { glyph: '', color: 'text-muted-foreground' }
case 'degraded':
return { glyph: '!', color: 'text-amber-500' }
case 'failed':
return { glyph: '✗', color: 'text-destructive' }
case 'pending':
default:
return { glyph: '○', color: 'text-muted-foreground/50' }
}
}
function phaseDurationSeconds(phase: BackupPhase): number | null {
if (!phase.startedAt || !phase.completedAt) return null
const start = new Date(phase.startedAt).getTime()
const end = new Date(phase.completedAt).getTime()
if (Number.isNaN(start) || Number.isNaN(end)) return null
return Math.max(0, Math.round((end - start) / 1000))
}
function StatusCard({ status }: { status: BackupStatus | null }) {
if (!status) {
return (
<div className="rounded-lg border border-border px-4 py-3 text-sm text-muted-foreground">
No backup run recorded yet. Trigger one with the &quot;Backup now&quot; button below.
</div>
)
}
return (
<div className="rounded-lg border border-border p-4 space-y-3">
<div className="flex items-center justify-between flex-wrap gap-2">
<div className="flex items-center gap-3">
<span
className={`inline-flex items-center gap-1.5 rounded-md border px-2 py-0.5 text-xs font-medium uppercase tracking-wide ${overallBadgeClass(status.overallStatus)}`}
>
{status.overallStatus.replace('_', ' ')}
</span>
<span className="text-sm text-muted-foreground">
Last run {formatTimestamp(status.completedAt)} on{' '}
<code className="font-mono text-xs">{status.host || '—'}</code>
</span>
</div>
<span className="text-xs text-muted-foreground">
duration {formatDuration(status.durationSeconds)}
</span>
</div>
<div className="grid grid-cols-2 gap-1 sm:grid-cols-4">
{status.phases.map((p) => {
const icon = phaseIcon(p.status)
const dur = phaseDurationSeconds(p)
return (
<div
key={p.name}
className="flex items-center gap-2 rounded-md border border-border/60 bg-muted/20 px-2 py-1.5"
title={p.error ?? p.status}
>
<span className={`font-mono text-sm ${icon.color}`}>{icon.glyph}</span>
<div className="flex flex-col leading-tight min-w-0">
<span className="truncate text-xs font-medium">{p.name}</span>
<span className="text-[10px] text-muted-foreground">
{p.status}
{dur != null ? ` · ${formatDuration(dur)}` : ''}
</span>
</div>
</div>
)
})}
</div>
</div>
)
}
function StatsBlock({ stats, label, error }: { stats: ResticStats | null; label: string; error?: string }) {
if (error) {
return (
<div className="rounded-lg border border-destructive/50 bg-destructive/10 p-3 text-xs text-destructive">
{label}: {error}
</div>
)
}
if (!stats) {
return (
<div className="rounded-lg border border-border p-3 text-xs text-muted-foreground">
{label}: no stats yet
</div>
)
}
const dedup =
stats.dedupRatio != null && Number.isFinite(stats.dedupRatio)
? `${stats.dedupRatio.toFixed(2)}×`
: '—'
return (
<div className="rounded-lg border border-border p-3 space-y-1.5">
<div className="flex items-center justify-between">
<span className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
{label}
</span>
<span className="text-xs text-muted-foreground">
{stats.snapshotsCount} snapshot{stats.snapshotsCount === 1 ? '' : 's'}
</span>
</div>
<dl className="grid grid-cols-2 gap-x-3 gap-y-0.5 text-xs font-mono">
<dt className="text-muted-foreground">restore size</dt>
<dd className="text-right">{formatBytes(stats.restoreSizeBytes)}</dd>
<dt className="text-muted-foreground">raw data</dt>
<dd className="text-right">{formatBytes(stats.rawDataBytes)}</dd>
<dt className="text-muted-foreground">dedup ratio</dt>
<dd className="text-right">{dedup}</dd>
</dl>
</div>
)
}
function SnapshotsTable({
snapshots,
label,
error,
}: {
snapshots: ResticSnapshot[]
label: string
error?: string
}) {
return (
<div className="space-y-2">
<div className="flex items-center justify-between">
<h3 className="text-sm font-semibold">{label}</h3>
<span className="text-xs text-muted-foreground">{snapshots.length} shown</span>
</div>
{error ? (
<div className="rounded-lg border border-destructive/50 bg-destructive/10 p-3 text-xs text-destructive">
{error}
</div>
) : snapshots.length === 0 ? (
<div className="rounded-lg border border-border px-4 py-6 text-xs text-muted-foreground text-center">
No snapshots in this repo yet.
</div>
) : (
<div className="rounded-lg border border-border overflow-hidden">
<table className="w-full text-xs font-mono">
<thead>
<tr className="border-b border-border bg-muted/30">
<th className="text-left px-3 py-2 font-medium text-muted-foreground">Time</th>
<th className="text-left px-3 py-2 font-medium text-muted-foreground">ID</th>
<th className="text-left px-3 py-2 font-medium text-muted-foreground">Tags</th>
<th className="text-right px-3 py-2 font-medium text-muted-foreground">
Files / size added
</th>
</tr>
</thead>
<tbody>
{snapshots.map((s, i) => (
<tr key={s.id} className={i % 2 === 0 ? '' : 'bg-muted/10'}>
<td className="px-3 py-1.5 text-muted-foreground">{formatTimestamp(s.time)}</td>
<td className="px-3 py-1.5">{s.shortId}</td>
<td className="px-3 py-1.5 text-muted-foreground truncate max-w-[12rem]">
{s.tags.join(', ') || '—'}
</td>
<td className="px-3 py-1.5 text-right text-muted-foreground">
{s.summary?.files_new != null
? `${s.summary.files_new} new · ${formatBytes(s.summary.data_added ?? 0)}`
: '—'}
</td>
</tr>
))}
</tbody>
</table>
</div>
)}
</div>
)
}
export default function ServerBackupSection({
envelope,
nasSnapshots,
b2Snapshots,
nasStats,
b2Stats,
errors,
}: Props) {
const [pending, setPending] = useState<ActiveFlow>(null)
const [completedFlowRunId, setCompletedFlowRunId] = useState<string | null>(null)
const [activeFlow, setActiveFlow] = useState<ActiveFlow>(null)
const handleComplete = useCallback((flowRunId: string) => {
setCompletedFlowRunId(flowRunId)
}, [])
const flowRun = useFlowRun(handleComplete)
const startFlow = useCallback(
(kind: 'backup' | 'restore') => {
setPending(null)
setCompletedFlowRunId(null)
setActiveFlow(kind)
flowRun.startFlow(
kind === 'backup' ? 'server_backup_full' : 'server_backup_restore_test',
false,
)
},
[flowRun],
)
const handleReset = useCallback(() => {
flowRun.reset()
setCompletedFlowRunId(null)
setActiveFlow(null)
}, [flowRun])
return (
<section className="space-y-6">
<div className="flex items-baseline justify-between">
<h2 className="text-lg font-semibold tracking-tight">Server backup (restic)</h2>
<span className="text-xs text-muted-foreground">flows: server_backup_full · restore_test</span>
</div>
<div className="rounded-lg border border-border p-5 space-y-3">
<p className="text-sm text-muted-foreground">
Daily server-wide backup at 03:30: <code className="font-mono text-xs">pg_dumpall</code> +
Forgejo dump, then restic to <strong>NAS</strong> (local) and <strong>Backblaze B2</strong>{' '}
(offsite, Object Lock). Authoritative restore sources are the database dumps; live datadirs
are excluded. See{' '}
<Link
href="https://github.com/Madhura68/Ops-dashboard/blob/main/docs/runbooks/server-backup.md"
className="underline hover:text-foreground"
>
docs/runbooks/server-backup.md
</Link>{' '}
for the full procedure.
</p>
</div>
<StatusCard status={envelope.lastRun} />
{errors.status && (
<div className="rounded-lg border border-amber-500/50 bg-amber-500/10 p-3 text-xs text-amber-500">
Could not read backup status: {errors.status}
</div>
)}
<div className="grid gap-3 md:grid-cols-2">
<StatsBlock stats={nasStats} label="NAS repo" error={errors.nasStats} />
<StatsBlock stats={b2Stats} label="B2 repo" error={errors.b2Stats} />
</div>
<div className="flex items-center gap-3 flex-wrap">
<button
onClick={() => setPending('backup')}
disabled={flowRun.status === 'running'}
className="rounded-lg bg-foreground text-background px-4 py-2 text-sm font-medium hover:opacity-90 disabled:opacity-50 transition-opacity"
>
Backup now
</button>
<button
onClick={() => setPending('restore')}
disabled={flowRun.status === 'running'}
className="rounded-lg border border-border px-4 py-2 text-sm font-medium hover:bg-muted/50 disabled:opacity-50 transition-colors"
>
Run restore test
</button>
{flowRun.status !== 'idle' && flowRun.status !== 'running' && (
<button
onClick={handleReset}
className="text-xs text-muted-foreground hover:text-foreground transition-colors"
>
Reset
</button>
)}
</div>
{flowRun.status !== 'idle' && (
<div className="space-y-2">
<div className="flex items-center justify-between">
<span className="text-sm font-medium">
Output {activeFlow ? `(${activeFlow === 'backup' ? 'backup' : 'restore test'})` : ''}
</span>
{completedFlowRunId && (
<Link
href={`/audit/${completedFlowRunId}`}
className="text-xs text-muted-foreground hover:text-foreground transition-colors"
>
View in audit log
</Link>
)}
</div>
<StreamingTerminal
lines={flowRun.lines}
status={flowRun.status}
error={flowRun.error}
/>
{flowRun.status === 'done' && (
<p className="text-xs text-muted-foreground">
Reload this page to see the updated status, snapshots, and stats.
</p>
)}
</div>
)}
<div className="grid gap-6 lg:grid-cols-2">
<SnapshotsTable
snapshots={nasSnapshots}
label="NAS snapshots"
error={errors.nasSnapshots}
/>
<SnapshotsTable
snapshots={b2Snapshots}
label="B2 snapshots"
error={errors.b2Snapshots}
/>
</div>
{envelope.lastRestoreTest && (
<div className="rounded-lg border border-border p-4 space-y-2">
<div className="flex items-center justify-between flex-wrap gap-2">
<h3 className="text-sm font-semibold">Last restore test</h3>
<span
className={`inline-flex items-center rounded-md border px-2 py-0.5 text-xs font-medium uppercase tracking-wide ${overallBadgeClass(envelope.lastRestoreTest.overallStatus)}`}
>
{envelope.lastRestoreTest.overallStatus.replace('_', ' ')}
</span>
</div>
<p className="text-xs text-muted-foreground">
{formatTimestamp(envelope.lastRestoreTest.completedAt)} · repo{' '}
<code className="font-mono">{envelope.lastRestoreTest.repo}</code> · snapshot{' '}
<code className="font-mono">
{envelope.lastRestoreTest.snapshotId?.slice(0, 8) ?? '—'}
</code>{' '}
· {envelope.lastRestoreTest.assertions.length} assertions
</p>
{envelope.lastRestoreTest.assertions.some((a) => a.status !== 'ok') && (
<ul className="space-y-0.5">
{envelope.lastRestoreTest.assertions
.filter((a) => a.status !== 'ok')
.map((a) => (
<li key={a.path} className="text-xs font-mono text-amber-500">
{a.status === 'missing' ? '✗ missing' : '! empty'} · {a.path}
</li>
))}
</ul>
)}
</div>
)}
<ConfirmDialog
open={pending === 'backup'}
title="Trigger server backup"
commandPreview={
'flow: server_backup_full\n\nSteps:\n 1. trigger_server_backup (systemctl start server-backup.service)\n 2. tail_backup_log_today\n 3. read_backup_status\n\nThe actual work happens in systemd; this flow kicks it off and tails the log.'
}
onConfirm={() => startFlow('backup')}
onCancel={() => setPending(null)}
/>
<ConfirmDialog
open={pending === 'restore'}
title="Run restore test (NAS)"
commandPreview={
'flow: server_backup_restore_test\n\nSteps:\n 1. trigger_restore_test (restore latest NAS snapshot to /tmp/restore-test/)\n 2. read_backup_status\n\nNon-destructive — restores into /tmp only and asserts critical files exist.'
}
onConfirm={() => startFlow('restore')}
onCancel={() => setPending(null)}
/>
</section>
)
}