Scrum4Me/scripts/check-doc-links.mjs

#!/usr/bin/env node
/**
 * Doc-link checker: walks docs/ (and README.md, CLAUDE.md, AGENTS.md),
 * extracts relative markdown links, and verifies that every target file
 * (and optional #anchor) actually exists.
 *
 * Exits 0 if all links are valid, 1 if any are broken.
 */

import { readFileSync, existsSync, readdirSync, statSync } from 'fs';
import { resolve, dirname, extname } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = resolve(__dirname, '..');

// Directories under docs/ that are archived and may contain stale links by design.
// Their original-as-written paths are kept for historical reference, but the
// targets have since moved/been deleted. Skip them from link-checking.
const EXCLUDE_DIRS = new Set([
  resolve(__dirname, '..', 'docs', 'old'),
]);

// Collect all .md files under a directory recursively
function collectMd(dir) {
  if (EXCLUDE_DIRS.has(dir)) return [];
  const results = [];
  for (const entry of readdirSync(dir)) {
    const full = resolve(dir, entry);
    const stat = statSync(full);
    if (stat.isDirectory()) {
      results.push(...collectMd(full));
    } else if (extname(entry) === '.md') {
      results.push(full);
    }
  }
  return results;
}

// Convert a heading text to a GitHub-style anchor slug
function toSlug(text) {
  return text
    .toLowerCase()
    .replace(/[^\w\s-]/g, '')
    .trim()
    .replace(/\s+/g, '-');
}

// Extract all heading slugs from a markdown file
function headingSlugs(filePath) {
  const content = readFileSync(filePath, 'utf8');
  const slugs = new Set();
  for (const line of content.split('\n')) {
    const m = line.match(/^#{1,6}\s+(.+)/);
    if (m) slugs.add(toSlug(m[1]));
  }
  return slugs;
}

// Match `[label](url)` where url may contain one level of balanced parens
// (e.g. Next.js route groups like `app/(app)/...`).
const LINK_RE = /\[(?:[^\]]*)\]\(((?:[^()]+|\([^()]*\))+)\)/g;

function checkFile(filePath) {
  const content = readFileSync(filePath, 'utf8');
  const failures = [];
  let m;
  while ((m = LINK_RE.exec(content)) !== null) {
    const raw = m[1];
    // Skip external links and anchors-only
    if (/^https?:\/\//.test(raw) || /^mailto:/.test(raw) || raw.startsWith('#')) continue;

    const [pathPart, anchor] = raw.split('#');
    const target = resolve(dirname(filePath), pathPart);

    if (!existsSync(target)) {
      failures.push({ file: filePath, link: raw, reason: 'file not found' });
      continue;
    }

    if (anchor) {
      const slugs = headingSlugs(target);
      if (!slugs.has(anchor)) {
        failures.push({ file: filePath, link: raw, reason: `anchor #${anchor} not found` });
      }
    }
  }
  return failures;
}

const roots = [
  resolve(ROOT, 'docs'),
  resolve(ROOT, 'README.md'),
  resolve(ROOT, 'CLAUDE.md'),
  resolve(ROOT, 'AGENTS.md'),
];

const files = [];
for (const r of roots) {
  if (!existsSync(r)) continue;
  const stat = statSync(r);
  if (stat.isDirectory()) {
    files.push(...collectMd(r));
  } else {
    files.push(r);
  }
}

const allFailures = [];
for (const f of files) {
  allFailures.push(...checkFile(f));
}

if (allFailures.length === 0) {
  console.log(`✓ All doc links valid (${files.length} files checked)`);
  process.exit(0);
} else {
  console.error(`\n✗ Broken doc links (${allFailures.length}):\n`);
  for (const { file, link, reason } of allFailures) {
    const rel = file.replace(ROOT + '/', '');
    console.error(`  ${rel}\n    → ${link}  (${reason})`);
  }
  console.error('');
  process.exit(1);
}