Adds a server-wide backup capability beyond the existing ops_dashboard pg_dump flow: - Daily systemd timer (03:30) runs pg_dumpall + Forgejo dump, then restic to a local NAS repo and an offsite Backblaze B2 repo with Object Lock. Phase-based script with single-instance flock, structured statusfile, systemd hardening, and live-datadir excludes (Postgres / Forgejo) so the dumps stay authoritative. - Ops-agent gets nine new read-only/trigger commands (snapshots, stats, status, logs, plus two triggers) backed by sudoers-whitelisted wrapper scripts that source /etc/restic-backup.env so the agent never sees the restic password or B2 keys. - Two new flows (server_backup_full, server_backup_restore_test) drive the dashboard's "Backup now" and "Restore test" buttons. - /settings/backups gains a Server backup section with overall + per-phase status, NAS / B2 snapshot tables, restore-size / raw-data / dedup-ratio stats, and the last restore-test result. The existing pg_dump section is preserved unchanged. - Runbook docs/runbooks/server-backup.md follows the tailscale-setup pattern (plan + addendum) and covers B2 Object Lock + scoped keys, Forgejo subplan with isolated restore-test stack, the off-server maintenance flow for B2 prune, and the integrity-check schedule. Code-only change — installation on scrum4me-srv follows the runbook. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
447 lines
16 KiB
TypeScript
447 lines
16 KiB
TypeScript
'use client'
|
||
|
||
import { useCallback, useState } from 'react'
|
||
import Link from 'next/link'
|
||
import { useFlowRun } from '@/hooks/useFlowRun'
|
||
import StreamingTerminal from '@/components/StreamingTerminal'
|
||
import ConfirmDialog from '@/components/ConfirmDialog'
|
||
import type {
|
||
BackupPhase,
|
||
BackupStatus,
|
||
BackupStatusEnvelope,
|
||
OverallStatus,
|
||
PhaseStatus,
|
||
ResticSnapshot,
|
||
ResticStats,
|
||
} from '../_lib/types'
|
||
|
||
type Props = {
|
||
envelope: BackupStatusEnvelope
|
||
nasSnapshots: ResticSnapshot[]
|
||
b2Snapshots: ResticSnapshot[]
|
||
nasStats: ResticStats | null
|
||
b2Stats: ResticStats | null
|
||
errors: {
|
||
status?: string
|
||
nasSnapshots?: string
|
||
b2Snapshots?: string
|
||
nasStats?: string
|
||
b2Stats?: string
|
||
}
|
||
}
|
||
|
||
type ActiveFlow = 'backup' | 'restore' | null
|
||
|
||
function formatBytes(bytes: number | null | undefined): string {
|
||
if (bytes == null) return '—'
|
||
if (bytes < 1024) return `${bytes} B`
|
||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`
|
||
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`
|
||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`
|
||
}
|
||
|
||
function formatDuration(seconds: number | null | undefined): string {
|
||
if (seconds == null || seconds === 0) return '—'
|
||
if (seconds < 60) return `${seconds}s`
|
||
if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${seconds % 60}s`
|
||
const h = Math.floor(seconds / 3600)
|
||
const m = Math.floor((seconds % 3600) / 60)
|
||
return `${h}h ${m}m`
|
||
}
|
||
|
||
function formatTimestamp(iso: string | null | undefined): string {
|
||
if (!iso) return '—'
|
||
try {
|
||
const d = new Date(iso)
|
||
if (Number.isNaN(d.getTime())) return iso
|
||
const yyyy = d.getFullYear()
|
||
const mm = String(d.getMonth() + 1).padStart(2, '0')
|
||
const dd = String(d.getDate()).padStart(2, '0')
|
||
const hh = String(d.getHours()).padStart(2, '0')
|
||
const mi = String(d.getMinutes()).padStart(2, '0')
|
||
return `${yyyy}-${mm}-${dd} ${hh}:${mi}`
|
||
} catch {
|
||
return iso
|
||
}
|
||
}
|
||
|
||
function overallBadgeClass(status: OverallStatus): string {
|
||
switch (status) {
|
||
case 'success':
|
||
return 'bg-green-500/15 text-green-500 border-green-500/30'
|
||
case 'partial_failure':
|
||
return 'bg-amber-500/15 text-amber-500 border-amber-500/30'
|
||
case 'failed':
|
||
return 'bg-destructive/15 text-destructive border-destructive/30'
|
||
default:
|
||
return 'bg-muted/50 text-muted-foreground border-border'
|
||
}
|
||
}
|
||
|
||
function phaseIcon(status: PhaseStatus): { glyph: string; color: string } {
|
||
switch (status) {
|
||
case 'success':
|
||
return { glyph: '✓', color: 'text-green-500' }
|
||
case 'skipped':
|
||
return { glyph: '–', color: 'text-muted-foreground' }
|
||
case 'degraded':
|
||
return { glyph: '!', color: 'text-amber-500' }
|
||
case 'failed':
|
||
return { glyph: '✗', color: 'text-destructive' }
|
||
case 'pending':
|
||
default:
|
||
return { glyph: '○', color: 'text-muted-foreground/50' }
|
||
}
|
||
}
|
||
|
||
function phaseDurationSeconds(phase: BackupPhase): number | null {
|
||
if (!phase.startedAt || !phase.completedAt) return null
|
||
const start = new Date(phase.startedAt).getTime()
|
||
const end = new Date(phase.completedAt).getTime()
|
||
if (Number.isNaN(start) || Number.isNaN(end)) return null
|
||
return Math.max(0, Math.round((end - start) / 1000))
|
||
}
|
||
|
||
function StatusCard({ status }: { status: BackupStatus | null }) {
|
||
if (!status) {
|
||
return (
|
||
<div className="rounded-lg border border-border px-4 py-3 text-sm text-muted-foreground">
|
||
No backup run recorded yet. Trigger one with the "Backup now" button below.
|
||
</div>
|
||
)
|
||
}
|
||
return (
|
||
<div className="rounded-lg border border-border p-4 space-y-3">
|
||
<div className="flex items-center justify-between flex-wrap gap-2">
|
||
<div className="flex items-center gap-3">
|
||
<span
|
||
className={`inline-flex items-center gap-1.5 rounded-md border px-2 py-0.5 text-xs font-medium uppercase tracking-wide ${overallBadgeClass(status.overallStatus)}`}
|
||
>
|
||
{status.overallStatus.replace('_', ' ')}
|
||
</span>
|
||
<span className="text-sm text-muted-foreground">
|
||
Last run {formatTimestamp(status.completedAt)} on{' '}
|
||
<code className="font-mono text-xs">{status.host || '—'}</code>
|
||
</span>
|
||
</div>
|
||
<span className="text-xs text-muted-foreground">
|
||
duration {formatDuration(status.durationSeconds)}
|
||
</span>
|
||
</div>
|
||
<div className="grid grid-cols-2 gap-1 sm:grid-cols-4">
|
||
{status.phases.map((p) => {
|
||
const icon = phaseIcon(p.status)
|
||
const dur = phaseDurationSeconds(p)
|
||
return (
|
||
<div
|
||
key={p.name}
|
||
className="flex items-center gap-2 rounded-md border border-border/60 bg-muted/20 px-2 py-1.5"
|
||
title={p.error ?? p.status}
|
||
>
|
||
<span className={`font-mono text-sm ${icon.color}`}>{icon.glyph}</span>
|
||
<div className="flex flex-col leading-tight min-w-0">
|
||
<span className="truncate text-xs font-medium">{p.name}</span>
|
||
<span className="text-[10px] text-muted-foreground">
|
||
{p.status}
|
||
{dur != null ? ` · ${formatDuration(dur)}` : ''}
|
||
</span>
|
||
</div>
|
||
</div>
|
||
)
|
||
})}
|
||
</div>
|
||
</div>
|
||
)
|
||
}
|
||
|
||
function StatsBlock({ stats, label, error }: { stats: ResticStats | null; label: string; error?: string }) {
|
||
if (error) {
|
||
return (
|
||
<div className="rounded-lg border border-destructive/50 bg-destructive/10 p-3 text-xs text-destructive">
|
||
{label}: {error}
|
||
</div>
|
||
)
|
||
}
|
||
if (!stats) {
|
||
return (
|
||
<div className="rounded-lg border border-border p-3 text-xs text-muted-foreground">
|
||
{label}: no stats yet
|
||
</div>
|
||
)
|
||
}
|
||
const dedup =
|
||
stats.dedupRatio != null && Number.isFinite(stats.dedupRatio)
|
||
? `${stats.dedupRatio.toFixed(2)}×`
|
||
: '—'
|
||
return (
|
||
<div className="rounded-lg border border-border p-3 space-y-1.5">
|
||
<div className="flex items-center justify-between">
|
||
<span className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
|
||
{label}
|
||
</span>
|
||
<span className="text-xs text-muted-foreground">
|
||
{stats.snapshotsCount} snapshot{stats.snapshotsCount === 1 ? '' : 's'}
|
||
</span>
|
||
</div>
|
||
<dl className="grid grid-cols-2 gap-x-3 gap-y-0.5 text-xs font-mono">
|
||
<dt className="text-muted-foreground">restore size</dt>
|
||
<dd className="text-right">{formatBytes(stats.restoreSizeBytes)}</dd>
|
||
<dt className="text-muted-foreground">raw data</dt>
|
||
<dd className="text-right">{formatBytes(stats.rawDataBytes)}</dd>
|
||
<dt className="text-muted-foreground">dedup ratio</dt>
|
||
<dd className="text-right">{dedup}</dd>
|
||
</dl>
|
||
</div>
|
||
)
|
||
}
|
||
|
||
function SnapshotsTable({
|
||
snapshots,
|
||
label,
|
||
error,
|
||
}: {
|
||
snapshots: ResticSnapshot[]
|
||
label: string
|
||
error?: string
|
||
}) {
|
||
return (
|
||
<div className="space-y-2">
|
||
<div className="flex items-center justify-between">
|
||
<h3 className="text-sm font-semibold">{label}</h3>
|
||
<span className="text-xs text-muted-foreground">{snapshots.length} shown</span>
|
||
</div>
|
||
{error ? (
|
||
<div className="rounded-lg border border-destructive/50 bg-destructive/10 p-3 text-xs text-destructive">
|
||
{error}
|
||
</div>
|
||
) : snapshots.length === 0 ? (
|
||
<div className="rounded-lg border border-border px-4 py-6 text-xs text-muted-foreground text-center">
|
||
No snapshots in this repo yet.
|
||
</div>
|
||
) : (
|
||
<div className="rounded-lg border border-border overflow-hidden">
|
||
<table className="w-full text-xs font-mono">
|
||
<thead>
|
||
<tr className="border-b border-border bg-muted/30">
|
||
<th className="text-left px-3 py-2 font-medium text-muted-foreground">Time</th>
|
||
<th className="text-left px-3 py-2 font-medium text-muted-foreground">ID</th>
|
||
<th className="text-left px-3 py-2 font-medium text-muted-foreground">Tags</th>
|
||
<th className="text-right px-3 py-2 font-medium text-muted-foreground">
|
||
Files / size added
|
||
</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
{snapshots.map((s, i) => (
|
||
<tr key={s.id} className={i % 2 === 0 ? '' : 'bg-muted/10'}>
|
||
<td className="px-3 py-1.5 text-muted-foreground">{formatTimestamp(s.time)}</td>
|
||
<td className="px-3 py-1.5">{s.shortId}</td>
|
||
<td className="px-3 py-1.5 text-muted-foreground truncate max-w-[12rem]">
|
||
{s.tags.join(', ') || '—'}
|
||
</td>
|
||
<td className="px-3 py-1.5 text-right text-muted-foreground">
|
||
{s.summary?.files_new != null
|
||
? `${s.summary.files_new} new · ${formatBytes(s.summary.data_added ?? 0)}`
|
||
: '—'}
|
||
</td>
|
||
</tr>
|
||
))}
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
)}
|
||
</div>
|
||
)
|
||
}
|
||
|
||
export default function ServerBackupSection({
|
||
envelope,
|
||
nasSnapshots,
|
||
b2Snapshots,
|
||
nasStats,
|
||
b2Stats,
|
||
errors,
|
||
}: Props) {
|
||
const [pending, setPending] = useState<ActiveFlow>(null)
|
||
const [completedFlowRunId, setCompletedFlowRunId] = useState<string | null>(null)
|
||
const [activeFlow, setActiveFlow] = useState<ActiveFlow>(null)
|
||
|
||
const handleComplete = useCallback((flowRunId: string) => {
|
||
setCompletedFlowRunId(flowRunId)
|
||
}, [])
|
||
|
||
const flowRun = useFlowRun(handleComplete)
|
||
|
||
const startFlow = useCallback(
|
||
(kind: 'backup' | 'restore') => {
|
||
setPending(null)
|
||
setCompletedFlowRunId(null)
|
||
setActiveFlow(kind)
|
||
flowRun.startFlow(
|
||
kind === 'backup' ? 'server_backup_full' : 'server_backup_restore_test',
|
||
false,
|
||
)
|
||
},
|
||
[flowRun],
|
||
)
|
||
|
||
const handleReset = useCallback(() => {
|
||
flowRun.reset()
|
||
setCompletedFlowRunId(null)
|
||
setActiveFlow(null)
|
||
}, [flowRun])
|
||
|
||
return (
|
||
<section className="space-y-6">
|
||
<div className="flex items-baseline justify-between">
|
||
<h2 className="text-lg font-semibold tracking-tight">Server backup (restic)</h2>
|
||
<span className="text-xs text-muted-foreground">flows: server_backup_full · restore_test</span>
|
||
</div>
|
||
|
||
<div className="rounded-lg border border-border p-5 space-y-3">
|
||
<p className="text-sm text-muted-foreground">
|
||
Daily server-wide backup at 03:30: <code className="font-mono text-xs">pg_dumpall</code> +
|
||
Forgejo dump, then restic to <strong>NAS</strong> (local) and <strong>Backblaze B2</strong>{' '}
|
||
(offsite, Object Lock). Authoritative restore sources are the database dumps; live datadirs
|
||
are excluded. See{' '}
|
||
<Link
|
||
href="https://github.com/Madhura68/Ops-dashboard/blob/main/docs/runbooks/server-backup.md"
|
||
className="underline hover:text-foreground"
|
||
>
|
||
docs/runbooks/server-backup.md
|
||
</Link>{' '}
|
||
for the full procedure.
|
||
</p>
|
||
</div>
|
||
|
||
<StatusCard status={envelope.lastRun} />
|
||
{errors.status && (
|
||
<div className="rounded-lg border border-amber-500/50 bg-amber-500/10 p-3 text-xs text-amber-500">
|
||
Could not read backup status: {errors.status}
|
||
</div>
|
||
)}
|
||
|
||
<div className="grid gap-3 md:grid-cols-2">
|
||
<StatsBlock stats={nasStats} label="NAS repo" error={errors.nasStats} />
|
||
<StatsBlock stats={b2Stats} label="B2 repo" error={errors.b2Stats} />
|
||
</div>
|
||
|
||
<div className="flex items-center gap-3 flex-wrap">
|
||
<button
|
||
onClick={() => setPending('backup')}
|
||
disabled={flowRun.status === 'running'}
|
||
className="rounded-lg bg-foreground text-background px-4 py-2 text-sm font-medium hover:opacity-90 disabled:opacity-50 transition-opacity"
|
||
>
|
||
Backup now
|
||
</button>
|
||
<button
|
||
onClick={() => setPending('restore')}
|
||
disabled={flowRun.status === 'running'}
|
||
className="rounded-lg border border-border px-4 py-2 text-sm font-medium hover:bg-muted/50 disabled:opacity-50 transition-colors"
|
||
>
|
||
Run restore test
|
||
</button>
|
||
{flowRun.status !== 'idle' && flowRun.status !== 'running' && (
|
||
<button
|
||
onClick={handleReset}
|
||
className="text-xs text-muted-foreground hover:text-foreground transition-colors"
|
||
>
|
||
Reset
|
||
</button>
|
||
)}
|
||
</div>
|
||
|
||
{flowRun.status !== 'idle' && (
|
||
<div className="space-y-2">
|
||
<div className="flex items-center justify-between">
|
||
<span className="text-sm font-medium">
|
||
Output {activeFlow ? `(${activeFlow === 'backup' ? 'backup' : 'restore test'})` : ''}
|
||
</span>
|
||
{completedFlowRunId && (
|
||
<Link
|
||
href={`/audit/${completedFlowRunId}`}
|
||
className="text-xs text-muted-foreground hover:text-foreground transition-colors"
|
||
>
|
||
View in audit log →
|
||
</Link>
|
||
)}
|
||
</div>
|
||
<StreamingTerminal
|
||
lines={flowRun.lines}
|
||
status={flowRun.status}
|
||
error={flowRun.error}
|
||
/>
|
||
{flowRun.status === 'done' && (
|
||
<p className="text-xs text-muted-foreground">
|
||
Reload this page to see the updated status, snapshots, and stats.
|
||
</p>
|
||
)}
|
||
</div>
|
||
)}
|
||
|
||
<div className="grid gap-6 lg:grid-cols-2">
|
||
<SnapshotsTable
|
||
snapshots={nasSnapshots}
|
||
label="NAS snapshots"
|
||
error={errors.nasSnapshots}
|
||
/>
|
||
<SnapshotsTable
|
||
snapshots={b2Snapshots}
|
||
label="B2 snapshots"
|
||
error={errors.b2Snapshots}
|
||
/>
|
||
</div>
|
||
|
||
{envelope.lastRestoreTest && (
|
||
<div className="rounded-lg border border-border p-4 space-y-2">
|
||
<div className="flex items-center justify-between flex-wrap gap-2">
|
||
<h3 className="text-sm font-semibold">Last restore test</h3>
|
||
<span
|
||
className={`inline-flex items-center rounded-md border px-2 py-0.5 text-xs font-medium uppercase tracking-wide ${overallBadgeClass(envelope.lastRestoreTest.overallStatus)}`}
|
||
>
|
||
{envelope.lastRestoreTest.overallStatus.replace('_', ' ')}
|
||
</span>
|
||
</div>
|
||
<p className="text-xs text-muted-foreground">
|
||
{formatTimestamp(envelope.lastRestoreTest.completedAt)} · repo{' '}
|
||
<code className="font-mono">{envelope.lastRestoreTest.repo}</code> · snapshot{' '}
|
||
<code className="font-mono">
|
||
{envelope.lastRestoreTest.snapshotId?.slice(0, 8) ?? '—'}
|
||
</code>{' '}
|
||
· {envelope.lastRestoreTest.assertions.length} assertions
|
||
</p>
|
||
{envelope.lastRestoreTest.assertions.some((a) => a.status !== 'ok') && (
|
||
<ul className="space-y-0.5">
|
||
{envelope.lastRestoreTest.assertions
|
||
.filter((a) => a.status !== 'ok')
|
||
.map((a) => (
|
||
<li key={a.path} className="text-xs font-mono text-amber-500">
|
||
{a.status === 'missing' ? '✗ missing' : '! empty'} · {a.path}
|
||
</li>
|
||
))}
|
||
</ul>
|
||
)}
|
||
</div>
|
||
)}
|
||
|
||
<ConfirmDialog
|
||
open={pending === 'backup'}
|
||
title="Trigger server backup"
|
||
commandPreview={
|
||
'flow: server_backup_full\n\nSteps:\n 1. trigger_server_backup (systemctl start server-backup.service)\n 2. tail_backup_log_today\n 3. read_backup_status\n\nThe actual work happens in systemd; this flow kicks it off and tails the log.'
|
||
}
|
||
onConfirm={() => startFlow('backup')}
|
||
onCancel={() => setPending(null)}
|
||
/>
|
||
<ConfirmDialog
|
||
open={pending === 'restore'}
|
||
title="Run restore test (NAS)"
|
||
commandPreview={
|
||
'flow: server_backup_restore_test\n\nSteps:\n 1. trigger_restore_test (restore latest NAS snapshot to /tmp/restore-test/)\n 2. read_backup_status\n\nNon-destructive — restores into /tmp only and asserts critical files exist.'
|
||
}
|
||
onConfirm={() => startFlow('restore')}
|
||
onCancel={() => setPending(null)}
|
||
/>
|
||
</section>
|
||
)
|
||
}
|