Three SSE-routes (solo, backlog, notifications) each create a long- running pg.Client that LISTENs on scrum4me_changes. On abrupt close (Fast Refresh, browser refresh, Vercel function recycle) the pgClient.end()-await sometimes hangs silently, leaving the underlying socket connected to Postgres. The connection stays in 'idle' on Neon's side and after ~10-20 reconnects the connection-pool fills up — new SSE connects fail with ERR_INCOMPLETE_CHUNKED_ENCODING in the browser. Fix: shared `closePgClientSafely` helper that races client.end() against a 2 s timeout; on timeout it force-destroys the underlying socket so the OS releases the FD and Postgres notices the disconnect. Validated by direct DB inspection: 18 stale 'idle LISTEN'-connections were piled up before the fix; after manual pg_terminate_backend cleanup the SSE-stream stabilised. This change makes the pile-up impossible going forward. - new lib/realtime/pg-client-cleanup.ts - 3 routes use the helper instead of bare `await pgClient.end()` - 3 unit tests for the helper (timely-end, hang-falls-back-to-destroy, end-rejection-is-swallowed) Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
55 lines
1.9 KiB
TypeScript
55 lines
1.9 KiB
TypeScript
// Robust pg.Client cleanup for SSE-routes that hold a long-running LISTEN-
|
|
// connection. Without this helper, `pgClient.end()` can hang silently when
|
|
// the underlying socket is in a weird state (Fast Refresh, abrupt browser
|
|
// close, Vercel function recycle), leaving the connection in 'idle' on the
|
|
// Postgres server. After ~10-20 reconnects the Neon connection-pool fills
|
|
// up and new SSE-connections fail with ERR_INCOMPLETE_CHUNKED_ENCODING.
|
|
//
|
|
// Strategy: race `pgClient.end()` against a short timeout; if the timeout
|
|
// wins, force-destroy the underlying socket so the OS releases the FD and
|
|
// Neon notices the disconnect.
|
|
|
|
import type { Client } from 'pg'
|
|
|
|
const END_TIMEOUT_MS = 2_000
|
|
|
|
interface PgClientWithStream {
|
|
connection?: { stream?: { destroy?: (err?: Error) => void } }
|
|
}
|
|
|
|
export async function closePgClientSafely(
|
|
client: Client,
|
|
label: string,
|
|
): Promise<void> {
|
|
// Drop notification/error handlers so a late event from the dying
|
|
// connection cannot trigger downstream cleanup again.
|
|
client.removeAllListeners('notification')
|
|
client.removeAllListeners('error')
|
|
client.on('error', () => {
|
|
// Swallow: connection is being torn down on purpose.
|
|
})
|
|
|
|
let timer: ReturnType<typeof setTimeout> | null = null
|
|
const timeout = new Promise<'timeout'>((resolve) => {
|
|
timer = setTimeout(() => resolve('timeout'), END_TIMEOUT_MS)
|
|
})
|
|
|
|
const result = await Promise.race([
|
|
client.end().then(() => 'ended' as const),
|
|
timeout,
|
|
]).catch(() => 'error' as const)
|
|
|
|
if (timer) clearTimeout(timer)
|
|
|
|
if (result !== 'ended') {
|
|
if (process.env.NODE_ENV !== 'production') {
|
|
console.warn(`[${label}] pgClient.end() did not finish in ${END_TIMEOUT_MS}ms — forcing socket destroy`)
|
|
}
|
|
const stream = (client as unknown as PgClientWithStream).connection?.stream
|
|
try {
|
|
stream?.destroy?.(new Error(`forced socket destroy from ${label}`))
|
|
} catch {
|
|
// best-effort
|
|
}
|
|
}
|
|
}
|