islandflow/.agents/skills/impeccable/scripts/live-manual-edit-evidence.mjs

#!/usr/bin/env node
/**
 * Collect evidence for pending live copy edits.
 *
 * This module intentionally does not edit source files and does not choose a
 * winner. It gathers staged browser edits, rendered context, framework source
 * hints, and likely source candidates so the AI copy-edit batch runner can make
 * source changes with full repo context.
 */

import fs from 'node:fs';
import path from 'node:path';
import { isGeneratedFile } from './is-generated.mjs';
import { readBuffer, getBufferPath } from './live-manual-edits-buffer.mjs';

const EVIDENCE_VERSION = 1;
const TEXT_EXTENSIONS = new Set(['.html', '.jsx', '.tsx', '.vue', '.svelte', '.astro', '.js', '.mjs', '.ts']);
const SEARCH_DIRS = ['src', 'app', 'pages', 'components', 'public', 'views', 'templates', 'site', 'lib', 'data'];
const STRONG_LITERAL_MATCH_LIMIT = 8;
const WEAK_LITERAL_MATCH_LIMIT = 4;
const OBJECT_KEY_MATCH_LIMIT = 8;
const LOCATOR_MATCH_LIMIT = 4;
const CONTEXT_MATCH_LIMIT = 8;
const CONTEXT_MATCH_PER_HINT = 2;
const SKIP_DIRS = new Set([
  'node_modules',
  '.git',
  '.impeccable',
  '.astro',
  '.next',
  '.nuxt',
  '.svelte-kit',
  'dist',
  'build',
  'out',
  'coverage',
]);

export function buildManualEditEvidence({ cwd = process.cwd(), pageUrl = null } = {}) {
  const buffer = readBuffer(cwd);
  const entries = pageUrl
    ? buffer.entries.filter((entry) => entry.pageUrl === pageUrl)
    : buffer.entries;
  const opCount = countOps(entries);

  if (opCount === 0) {
    return {
      pageUrl,
      count: 0,
      entries: [],
      ops: [],
      candidates: [],
    };
  }

  const searchFiles = collectSearchFiles(cwd);
  const ops = flattenOps(entries);
  const candidates = ops.map((op) => buildCandidatesForOp(op, cwd, searchFiles));
  return {
    version: EVIDENCE_VERSION,
    pageUrl: pageUrl || null,
    count: opCount,
    entries,
    ops,
    context: {
      cwd,
      bufferPath: path.relative(cwd, getBufferPath(cwd)),
      totalEntries: entries.length,
      totalOps: opCount,
    },
    candidates,
  };
}

function countOps(entries) {
  let count = 0;
  for (const entry of entries) count += Array.isArray(entry.ops) ? entry.ops.length : 0;
  return count;
}

function flattenOps(entries) {
  const out = [];
  for (const entry of entries) {
    const contextHintsByRef = buildContextHintsByRef(entry);
    for (const op of entry.ops || []) {
      out.push({
        entryId: entry.id,
        pageUrl: entry.pageUrl,
        ref: op.ref,
        contextRef: op.contextRef || null,
        tag: op.tag,
        elementId: op.elementId || null,
        classes: Array.isArray(op.classes) ? op.classes : [],
        originalText: op.originalText,
        newText: op.newText,
        deleted: op.deleted === true,
        sourceHint: op.sourceHint || null,
        leaf: op.leaf || null,
        nearbyEditableTexts: Array.isArray(op.nearbyEditableTexts) ? op.nearbyEditableTexts : [],
        container: op.container || null,
        contextHints: contextHintsByRef.get(op.ref) || [],
      });
    }
  }
  return out;
}

function buildContextHintsByRef(entry) {
  const map = new Map();
  for (const op of entry.ops || []) {
    const hints = new Set();
    const add = (value) => {
      const text = normalizeText(decodeBasicHtml(String(value || '')));
      if (text.length < 3 || text.length > 160) return;
      if (text === normalizeText(op.originalText) || text === normalizeText(op.newText)) return;
      hints.add(text);
    };

    for (const item of op.nearbyEditableTexts || []) {
      add(typeof item === 'string' ? item : item?.text);
    }
    const outer = typeof entry.element?.outerHTML === 'string' ? entry.element.outerHTML : '';
    for (const match of outer.matchAll(/data-impeccable-original-text="([^"]*)"/g)) add(match[1]);
    if (typeof entry.element?.textContent === 'string') {
      for (const chunk of entry.element.textContent.split(/\s{2,}|\n|\t/)) add(chunk);
    }
    map.set(op.ref, [...hints].slice(0, 16));
  }
  return map;
}

function buildCandidatesForOp(op, cwd, searchFiles) {
  const originalText = String(op.originalText || '');
  const contextNeedles = op.contextHints || [];
  return {
    entryId: op.entryId,
    ref: op.ref,
    originalText,
    sourceHint: analyzeSourceHint(op, cwd),
    textMatches: originalText ? findLiteralMatches(searchFiles, originalText, { max: literalMatchLimit(originalText) }) : [],
    objectKeyMatches: originalText ? findObjectKeyMatches(searchFiles, originalText, { max: OBJECT_KEY_MATCH_LIMIT }) : [],
    locatorMatches: findLocatorMatches(searchFiles, op, { max: LOCATOR_MATCH_LIMIT }),
    contextTextMatches: findContextMatches(searchFiles, contextNeedles, { maxPerHint: CONTEXT_MATCH_PER_HINT, max: CONTEXT_MATCH_LIMIT }),
  };
}

function literalMatchLimit(text) {
  return isWeakSourceNeedle(text) ? WEAK_LITERAL_MATCH_LIMIT : STRONG_LITERAL_MATCH_LIMIT;
}

function isWeakSourceNeedle(text) {
  const normalized = normalizeText(text);
  return normalized.length < 4 || /^[\d.,+\-%\s]+$/.test(normalized);
}

function analyzeSourceHint(op, cwd) {
  const hint = normalizeSourceHint(op.sourceHint);
  if (!hint.file) return null;
  const file = path.resolve(cwd, hint.file);
  const relativeFile = path.relative(cwd, file);
  if (!isPathInsideOrEqual(cwd, file)) {
    return { ...hint, status: 'outside_cwd', relativeFile: hint.file };
  }
  if (!fs.existsSync(file)) {
    return { ...hint, status: 'file_missing', relativeFile };
  }
  if (isGeneratedFile(file, { cwd })) {
    return { ...hint, status: 'generated', relativeFile };
  }

  const content = fs.readFileSync(file, 'utf-8');
  const lines = content.split('\n');
  const line = hint.line || 1;
  const start = Math.max(0, line - 4);
  const end = Math.min(lines.length, line + 3);
  const windowText = lines.slice(start, end).join('\n');
  const containsOriginalText = typeof op.originalText === 'string' && windowText.includes(op.originalText);
  return {
    ...hint,
    status: containsOriginalText ? 'ok' : 'text_not_found_near_hint',
    relativeFile,
    excerpt: lines.slice(start, end).map((text, index) => ({
      line: start + index + 1,
      text: text.slice(0, 240),
    })),
  };
}

function normalizeSourceHint(hint) {
  if (!hint || typeof hint !== 'object') return {};
  let line = Number.isFinite(Number(hint.line)) ? Number(hint.line) : null;
  let column = Number.isFinite(Number(hint.column)) ? Number(hint.column) : null;
  if ((!line || !column) && typeof hint.loc === 'string') {
    const match = hint.loc.match(/^(\d+)(?::(\d+))?/);
    if (match) {
      line = Number(match[1]);
      if (match[2]) column = Number(match[2]);
    }
  }
  return {
    file: typeof hint.file === 'string' ? hint.file : '',
    loc: typeof hint.loc === 'string' ? hint.loc : '',
    line,
    column,
  };
}

function collectSearchFiles(cwd) {
  const out = [];
  const seenDirs = new Set();
  const seenFiles = new Set();
  for (const dir of SEARCH_DIRS) {
    scanDir(path.join(cwd, dir), cwd, seenDirs, seenFiles, out, 0);
  }
  scanRootFiles(cwd, seenFiles, out);
  return out;
}

function scanDir(dir, cwd, seenDirs, seenFiles, out, depth) {
  if (depth > 7 || !fs.existsSync(dir)) return;
  let realDir;
  try { realDir = fs.realpathSync(dir); } catch { return; }
  if (seenDirs.has(realDir)) return;
  seenDirs.add(realDir);

  let entries;
  try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
  for (const entry of entries) {
    const fullPath = path.join(dir, entry.name);
    if (entry.isDirectory()) {
      if (SKIP_DIRS.has(entry.name)) continue;
      scanDir(fullPath, cwd, seenDirs, seenFiles, out, depth + 1);
      continue;
    }
    if (!entry.isFile() || !TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) continue;
    maybeAddSearchFile(fullPath, cwd, seenFiles, out);
  }
}

function scanRootFiles(cwd, seenFiles, out) {
  let entries;
  try { entries = fs.readdirSync(cwd, { withFileTypes: true }); } catch { return; }
  for (const entry of entries) {
    if (!entry.isFile() || !TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) continue;
    maybeAddSearchFile(path.join(cwd, entry.name), cwd, seenFiles, out);
  }
}

function maybeAddSearchFile(file, cwd, seenFiles, out) {
  let realFile;
  try { realFile = fs.realpathSync(file); } catch { return; }
  if (seenFiles.has(realFile)) return;
  seenFiles.add(realFile);
  if (isGeneratedFile(file, { cwd })) return;
  let content;
  try { content = fs.readFileSync(file, 'utf-8'); } catch { return; }
  out.push({ file, relativeFile: path.relative(cwd, file), content, lines: content.split('\n') });
}

function findLiteralMatches(searchFiles, needle, { max }) {
  return findMatches(searchFiles, needle, { kind: 'text', max });
}

function findObjectKeyMatches(searchFiles, text, { max }) {
  const re = new RegExp('(["\\\'`])' + escapeRegExp(text) + '\\1(?=\\s*:)', 'g');
  const out = [];
  for (const file of searchFiles) {
    for (const match of file.content.matchAll(re)) {
      out.push(matchForIndex(file, match.index, 'object_key', text));
      if (out.length >= max) return out;
    }
  }
  return out;
}

function findLocatorMatches(searchFiles, op, { max }) {
  const needles = [];
  if (op.elementId) needles.push({ kind: 'id', needle: op.elementId });
  for (const cls of op.classes || []) {
    if (cls) needles.push({ kind: 'class', needle: cls });
  }
  if (op.tag) needles.push({ kind: 'tag', needle: '<' + op.tag });

  const out = [];
  const seen = new Set();
  for (const { kind, needle } of needles) {
    for (const match of findMatches(searchFiles, needle, { kind, max })) {
      const key = match.file + ':' + match.line + ':' + kind + ':' + needle;
      if (seen.has(key)) continue;
      seen.add(key);
      out.push({ ...match, needle });
      if (out.length >= max) return out;
    }
  }
  return out;
}

function findContextMatches(searchFiles, hints, { maxPerHint, max }) {
  const out = [];
  const seen = new Set();
  for (const hint of hints || []) {
    for (const match of findMatches(searchFiles, hint, { kind: 'context', max: maxPerHint })) {
      const key = match.file + ':' + match.line + ':' + hint;
      if (seen.has(key)) continue;
      seen.add(key);
      out.push({ ...match, needle: hint });
      if (out.length >= max) return out;
    }
  }
  return out;
}

function findMatches(searchFiles, needle, { kind, max }) {
  const text = String(needle || '');
  if (!text) return [];
  const out = [];
  for (const file of searchFiles) {
    let index = 0;
    while (out.length < max) {
      index = file.content.indexOf(text, index);
      if (index === -1) break;
      out.push(matchForIndex(file, index, kind, text));
      index += Math.max(1, text.length);
    }
    if (out.length >= max) break;
  }
  return out;
}

function matchForIndex(file, index, kind, needle) {
  const line = file.content.slice(0, index).split('\n').length;
  const lineText = file.lines[line - 1] || '';
  return {
    kind,
    file: file.relativeFile,
    line,
    needle,
    excerpt: lineText.trim().slice(0, 240),
  };
}

function isPathInsideOrEqual(cwd, file) {
  const rel = path.relative(path.resolve(cwd), path.resolve(file));
  return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel));
}

function normalizeText(value) {
  return String(value || '').replace(/\s+/g, ' ').trim();
}

function decodeBasicHtml(value) {
  return value
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'")
    .replace(/&apos;/g, "'")
    .replace(/&amp;/g, '&')
    .replace(/&lt;/g, '<')
    .replace(/&gt;/g, '>');
}

function escapeRegExp(value) {
  return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}