Ops-dashboard/lib/parse-worker-log.ts

// lib/parse-worker-log.ts
//
// Parser for Scrum4Me worker run-logs (/srv/scrum4me/worker-logs/idea/runs/*.log).
// Each file is produced by `tsx run-one-job.ts > run_log 2>&1` and is a mix of
// plain-text `[run-one-job]` annotation lines and Claude Code `stream-json`
// event lines (the worker spawns `claude --output-format stream-json --verbose`).
//
// Two entry points:
//   summarizeRunLog(raw, fileName) — one cheap line scan, for the table.
//   parseRunLog(raw, fileName)     — full event timeline, for the detail panel.
//
// Pure module, no dependencies — mirrors lib/parse-docker.ts / lib/parse-systemd.ts.

export type RunStatus = 'idle' | 'running' | 'success' | 'error' | 'token-expired' | 'unknown'

export interface RunLogSummary {
  fileName: string
  runId: string
  startedAt: string | null
  status: RunStatus
  jobId: string | null
  model: string | null
  permissionMode: string | null
  durationMs: number | null
  numTurns: number | null
  totalCostUsd: number | null
  exitCode: number | null
  eventCount: number
  inProgress: boolean
  errorSummary: string | null
}

export type MetaTag =
  | 'claim'
  | 'auth'
  | 'quota'
  | 'no-job'
  | 'claimed'
  | 'worktree'
  | 'config'
  | 'payload'
  | 'spawn'
  | 'claude-done'
  | 'cleanup'
  | 'exit'
  | 'error'
  | 'token-expired'
  | 'timeout'
  | 'other'

export type LogEvent =
  | { kind: 'meta'; ts: string | null; tag: MetaTag; text: string }
  | {
      kind: 'system-init'
      ts: string | null
      model: string
      permissionMode: string
      tools: string[]
      mcpServers: string[]
      sessionId: string
      cwd: string
      version: string
    }
  | { kind: 'assistant-text'; ts: string | null; text: string; truncated: boolean }
  | { kind: 'thinking'; ts: string | null; text: string; truncated: boolean }
  | { kind: 'tool-call'; ts: string | null; id: string; name: string; input: string; truncated: boolean }
  | {
      kind: 'tool-result'
      ts: string | null
      toolUseId: string
      isError: boolean
      body: string
      truncated: boolean
      fullLength: number
    }
  | { kind: 'rate-limit'; ts: string | null; status: string }
  | {
      kind: 'result'
      ts: string | null
      subtype: string
      isError: boolean
      durationMs: number | null
      numTurns: number | null
      totalCostUsd: number | null
      resultText: string
      resultTruncated: boolean
    }
  | { kind: 'raw'; ts: string | null; text: string }

export interface ParsedRunLog {
  summary: RunLogSummary
  events: LogEvent[]
  inProgress: boolean
  responseTruncated: boolean
}

// Per-item caps keep the detail payload bounded even for ~350 KB raw logs.
const TOOL_RESULT_CAP = 8 * 1024
const TEXT_CAP = 16 * 1024
const TOOL_INPUT_CAP = 4 * 1024
const RESPONSE_CAP = 1_500_000

const META_RE = /^(\S+)\s+\[run-one-job\]\s+(.*)$/

function cap(s: string, max: number): { text: string; truncated: boolean } {
  if (s.length <= max) return { text: s, truncated: false }
  return { text: s.slice(0, max), truncated: true }
}

/** Strip the `.log` / `.log.gz` suffix — the run id is the timestamp filename. */
export function runIdFromFileName(fileName: string): string {
  return fileName.replace(/\.log(\.gz)?$/, '')
}

/** run-agent.sh names each file `$(date -u +%Y%m%dT%H%M%SZ).log`, so the name is the start time. */
function startedAtFromRunId(runId: string): string | null {
  const m = runId.match(/^(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z$/)
  if (!m) return null
  return `${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:${m[6]}Z`
}

function classifyMeta(msg: string): MetaTag {
  if (msg.startsWith('claim attempt')) return 'claim'
  if (msg.startsWith('auth ok')) return 'auth'
  if (msg.startsWith('quota probe')) return 'quota'
  if (msg.startsWith('no job claimed')) return 'no-job'
  if (msg.startsWith('claimed job_id=')) return 'claimed'
  if (msg.startsWith('worktree path=')) return 'worktree'
  if (msg.startsWith('config ')) return 'config'
  if (msg.startsWith('payload written')) return 'payload'
  if (msg.startsWith('spawn claude')) return 'spawn'
  if (msg.startsWith('claude done')) return 'claude-done'
  if (msg.startsWith('cleanup')) return 'cleanup'
  if (msg.startsWith('exit code=')) return 'exit'
  if (msg.startsWith('ERROR')) return 'error'
  if (msg.startsWith('TOKEN_EXPIRED detected')) return 'token-expired'
  if (msg.startsWith('claim timeout')) return 'timeout'
  return 'other'
}

/** Cheap single-pass summary for the table — at most one JSON.parse (the result line). */
export function summarizeRunLog(raw: string, fileName: string): RunLogSummary {
  const runId = runIdFromFileName(fileName)
  const lines = raw.split('\n')

  let jobId: string | null = null
  let model: string | null = null
  let permissionMode: string | null = null
  let claudeExit: number | null = null
  let runExit: number | null = null
  let durationMs: number | null = null
  let numTurns: number | null = null
  let totalCostUsd: number | null = null
  let eventCount = 0
  let hasResult = false
  let resultIsError = false
  let resultSubtype: string | null = null
  let tokenExpired = false
  let hasErrorLine = false
  let firstErrorMsg: string | null = null

  for (const line of lines) {
    if (!line) continue
    const m = line.match(META_RE)
    if (m) {
      const msg = m[2]
      if (msg.startsWith('claimed job_id=')) {
        jobId = msg.slice('claimed job_id='.length).trim() || jobId
      } else if (msg.startsWith('config ')) {
        model = /\bmodel=(\S+)/.exec(msg)?.[1] ?? model
        permissionMode = /\bpermission_mode=(\S+)/.exec(msg)?.[1] ?? permissionMode
      } else if (msg.startsWith('claude done')) {
        const e = /\bexit_code=(-?\d+)/.exec(msg)
        if (e) claudeExit = Number(e[1])
        const d = /\bduration_ms=(\d+)/.exec(msg)
        if (d) durationMs = Number(d[1])
      } else if (msg.startsWith('exit code=')) {
        const e = /exit code=(-?\d+)/.exec(msg)
        if (e) runExit = Number(e[1])
      } else if (msg.startsWith('TOKEN_EXPIRED detected')) {
        tokenExpired = true
      } else if (msg.startsWith('ERROR')) {
        hasErrorLine = true
        if (!firstErrorMsg) firstErrorMsg = msg.replace(/^ERROR\s*/, '').slice(0, 300)
      }
      continue
    }
    const trimmed = line.trimStart()
    if (trimmed.startsWith('{')) {
      eventCount++
      if (!hasResult && trimmed.startsWith('{"type":"result"')) {
        try {
          const obj = JSON.parse(trimmed)
          hasResult = true
          resultIsError = !!obj.is_error
          resultSubtype = typeof obj.subtype === 'string' ? obj.subtype : null
          if (typeof obj.num_turns === 'number') numTurns = obj.num_turns
          if (typeof obj.total_cost_usd === 'number') totalCostUsd = obj.total_cost_usd
          if (durationMs == null && typeof obj.duration_ms === 'number') durationMs = obj.duration_ms
        } catch {
          // malformed result line — ignore
        }
      }
    }
  }

  const exitCode = claudeExit ?? runExit
  const terminal = runExit != null || hasResult || hasErrorLine || tokenExpired
  const inProgress = !terminal

  let status: RunStatus
  if (tokenExpired) {
    status = 'token-expired'
  } else if (jobId) {
    if (inProgress) {
      status = 'running'
    } else if (
      resultIsError ||
      hasErrorLine ||
      (claudeExit != null && claudeExit !== 0) ||
      (runExit != null && runExit !== 0)
    ) {
      status = 'error'
    } else {
      status = 'success'
    }
  } else {
    // No job was claimed this iteration — the worker was idle / waiting.
    status = 'idle'
  }

  let errorSummary: string | null = null
  if (status === 'error' || status === 'token-expired') {
    errorSummary =
      firstErrorMsg ??
      (tokenExpired ? 'TOKEN_EXPIRED detected in output' : null) ??
      (resultIsError ? `result: ${resultSubtype ?? 'error'}` : null) ??
      (exitCode != null ? `exit code ${exitCode}` : null)
  }

  return {
    fileName,
    runId,
    startedAt: startedAtFromRunId(runId),
    status,
    jobId,
    model,
    permissionMode,
    durationMs,
    numTurns,
    totalCostUsd,
    exitCode,
    eventCount,
    inProgress,
    errorSummary,
  }
}

function normalizeContent(content: unknown): string {
  if (typeof content === 'string') return content
  if (Array.isArray(content)) {
    return content
      .map((b) => {
        if (typeof b === 'string') return b
        if (b && typeof b === 'object' && typeof (b as { text?: unknown }).text === 'string') {
          return (b as { text: string }).text
        }
        return JSON.stringify(b)
      })
      .join('\n')
  }
  if (content == null) return ''
  return JSON.stringify(content)
}

/* eslint-disable @typescript-eslint/no-explicit-any -- stream-json events are genuinely dynamic */
function pushJsonEvent(events: LogEvent[], obj: any): void {
  const type = obj?.type
  const ts: string | null = typeof obj?.timestamp === 'string' ? obj.timestamp : null

  if (type === 'system') {
    const mcp = Array.isArray(obj.mcp_servers)
      ? obj.mcp_servers.map((s: any) => (typeof s?.name === 'string' ? s.name : String(s)))
      : []
    events.push({
      kind: 'system-init',
      ts,
      model: typeof obj.model === 'string' ? obj.model : '—',
      permissionMode: typeof obj.permissionMode === 'string' ? obj.permissionMode : '—',
      tools: Array.isArray(obj.tools) ? obj.tools.filter((t: unknown) => typeof t === 'string') : [],
      mcpServers: mcp,
      sessionId: typeof obj.session_id === 'string' ? obj.session_id : '',
      cwd: typeof obj.cwd === 'string' ? obj.cwd : '',
      version: typeof obj.claude_code_version === 'string' ? obj.claude_code_version : '',
    })
    return
  }

  if (type === 'rate_limit_event') {
    events.push({
      kind: 'rate-limit',
      ts,
      status: typeof obj.rate_limit_info?.status === 'string' ? obj.rate_limit_info.status : 'unknown',
    })
    return
  }

  if (type === 'assistant') {
    const content = obj?.message?.content
    if (Array.isArray(content)) {
      for (const block of content) {
        if (block?.type === 'text' && typeof block.text === 'string') {
          const c = cap(block.text, TEXT_CAP)
          events.push({ kind: 'assistant-text', ts, text: c.text, truncated: c.truncated })
        } else if (block?.type === 'thinking' && typeof block.thinking === 'string') {
          const c = cap(block.thinking, TEXT_CAP)
          events.push({ kind: 'thinking', ts, text: c.text, truncated: c.truncated })
        } else if (block?.type === 'tool_use') {
          let inputStr: string
          try {
            inputStr = JSON.stringify(block.input, null, 2)
          } catch {
            inputStr = String(block.input)
          }
          const c = cap(inputStr, TOOL_INPUT_CAP)
          events.push({
            kind: 'tool-call',
            ts,
            id: typeof block.id === 'string' ? block.id : '',
            name: typeof block.name === 'string' ? block.name : 'tool',
            input: c.text,
            truncated: c.truncated,
          })
        }
      }
    }
    return
  }

  if (type === 'user') {
    const content = obj?.message?.content
    if (Array.isArray(content)) {
      for (const block of content) {
        if (block?.type === 'tool_result') {
          const body = normalizeContent(block.content)
          const c = cap(body, TOOL_RESULT_CAP)
          events.push({
            kind: 'tool-result',
            ts,
            toolUseId: typeof block.tool_use_id === 'string' ? block.tool_use_id : '',
            isError: !!block.is_error,
            body: c.text,
            truncated: c.truncated,
            fullLength: body.length,
          })
        }
      }
    }
    return
  }

  if (type === 'result') {
    const c = cap(typeof obj.result === 'string' ? obj.result : '', TEXT_CAP)
    events.push({
      kind: 'result',
      ts,
      subtype: typeof obj.subtype === 'string' ? obj.subtype : 'unknown',
      isError: !!obj.is_error,
      durationMs: typeof obj.duration_ms === 'number' ? obj.duration_ms : null,
      numTurns: typeof obj.num_turns === 'number' ? obj.num_turns : null,
      totalCostUsd: typeof obj.total_cost_usd === 'number' ? obj.total_cost_usd : null,
      resultText: c.text,
      resultTruncated: c.truncated,
    })
    return
  }

  // Unknown event type — keep a compact raw note so nothing is silently dropped.
  events.push({ kind: 'raw', ts, text: cap(`${type ?? 'event'}: ${JSON.stringify(obj)}`, 2048).text })
}
/* eslint-enable @typescript-eslint/no-explicit-any */

function estimateSize(e: LogEvent): number {
  switch (e.kind) {
    case 'assistant-text':
    case 'thinking':
    case 'raw':
      return e.text.length
    case 'tool-call':
      return e.input.length
    case 'tool-result':
      return e.body.length
    case 'result':
      return e.resultText.length
    default:
      return 64
  }
}

/** Bound the whole payload — drop tool-result bodies oldest-first if still too large. */
function enforceResponseCap(events: LogEvent[]): boolean {
  let total = 0
  for (const e of events) total += estimateSize(e)
  if (total <= RESPONSE_CAP) return false
  for (const e of events) {
    if (total <= RESPONSE_CAP) break
    if (e.kind === 'tool-result' && e.body) {
      total -= e.body.length
      e.body = ''
      e.truncated = true
    }
  }
  return true
}

/** Full event timeline for the detail panel. */
export function parseRunLog(raw: string, fileName: string): ParsedRunLog {
  const summary = summarizeRunLog(raw, fileName)
  const events: LogEvent[] = []

  for (const line of raw.split('\n')) {
    if (!line.trim()) continue
    const m = line.match(META_RE)
    if (m) {
      events.push({ kind: 'meta', ts: m[1], tag: classifyMeta(m[2]), text: m[2] })
      continue
    }
    const trimmed = line.trimStart()
    if (trimmed.startsWith('{')) {
      try {
        pushJsonEvent(events, JSON.parse(trimmed))
      } catch {
        // partial / malformed JSON line (e.g. a log read mid-write) — keep it raw
        events.push({ kind: 'raw', ts: null, text: cap(line, TOOL_RESULT_CAP).text })
      }
      continue
    }
    // Non-JSON, non-meta noise (e.g. a bare `Warning: ...` from claude).
    events.push({ kind: 'raw', ts: null, text: cap(line, TOOL_RESULT_CAP).text })
  }

  const responseTruncated = enforceResponseCap(events)
  return { summary, events, inProgress: summary.inProgress, responseTruncated }
}