363 lines
11 KiB
JavaScript
363 lines
11 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Collect evidence for pending live copy edits.
|
|
*
|
|
* This module intentionally does not edit source files and does not choose a
|
|
* winner. It gathers staged browser edits, rendered context, framework source
|
|
* hints, and likely source candidates so the AI copy-edit batch runner can make
|
|
* source changes with full repo context.
|
|
*/
|
|
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import { isGeneratedFile } from './is-generated.mjs';
|
|
import { readBuffer, getBufferPath } from './live-manual-edits-buffer.mjs';
|
|
|
|
const EVIDENCE_VERSION = 1;
|
|
const TEXT_EXTENSIONS = new Set(['.html', '.jsx', '.tsx', '.vue', '.svelte', '.astro', '.js', '.mjs', '.ts']);
|
|
const SEARCH_DIRS = ['src', 'app', 'pages', 'components', 'public', 'views', 'templates', 'site', 'lib', 'data'];
|
|
const STRONG_LITERAL_MATCH_LIMIT = 8;
|
|
const WEAK_LITERAL_MATCH_LIMIT = 4;
|
|
const OBJECT_KEY_MATCH_LIMIT = 8;
|
|
const LOCATOR_MATCH_LIMIT = 4;
|
|
const CONTEXT_MATCH_LIMIT = 8;
|
|
const CONTEXT_MATCH_PER_HINT = 2;
|
|
const SKIP_DIRS = new Set([
|
|
'node_modules',
|
|
'.git',
|
|
'.impeccable',
|
|
'.astro',
|
|
'.next',
|
|
'.nuxt',
|
|
'.svelte-kit',
|
|
'dist',
|
|
'build',
|
|
'out',
|
|
'coverage',
|
|
]);
|
|
|
|
export function buildManualEditEvidence({ cwd = process.cwd(), pageUrl = null } = {}) {
|
|
const buffer = readBuffer(cwd);
|
|
const entries = pageUrl
|
|
? buffer.entries.filter((entry) => entry.pageUrl === pageUrl)
|
|
: buffer.entries;
|
|
const opCount = countOps(entries);
|
|
|
|
if (opCount === 0) {
|
|
return {
|
|
pageUrl,
|
|
count: 0,
|
|
entries: [],
|
|
ops: [],
|
|
candidates: [],
|
|
};
|
|
}
|
|
|
|
const searchFiles = collectSearchFiles(cwd);
|
|
const ops = flattenOps(entries);
|
|
const candidates = ops.map((op) => buildCandidatesForOp(op, cwd, searchFiles));
|
|
return {
|
|
version: EVIDENCE_VERSION,
|
|
pageUrl: pageUrl || null,
|
|
count: opCount,
|
|
entries,
|
|
ops,
|
|
context: {
|
|
cwd,
|
|
bufferPath: path.relative(cwd, getBufferPath(cwd)),
|
|
totalEntries: entries.length,
|
|
totalOps: opCount,
|
|
},
|
|
candidates,
|
|
};
|
|
}
|
|
|
|
function countOps(entries) {
|
|
let count = 0;
|
|
for (const entry of entries) count += Array.isArray(entry.ops) ? entry.ops.length : 0;
|
|
return count;
|
|
}
|
|
|
|
function flattenOps(entries) {
|
|
const out = [];
|
|
for (const entry of entries) {
|
|
const contextHintsByRef = buildContextHintsByRef(entry);
|
|
for (const op of entry.ops || []) {
|
|
out.push({
|
|
entryId: entry.id,
|
|
pageUrl: entry.pageUrl,
|
|
ref: op.ref,
|
|
contextRef: op.contextRef || null,
|
|
tag: op.tag,
|
|
elementId: op.elementId || null,
|
|
classes: Array.isArray(op.classes) ? op.classes : [],
|
|
originalText: op.originalText,
|
|
newText: op.newText,
|
|
deleted: op.deleted === true,
|
|
sourceHint: op.sourceHint || null,
|
|
leaf: op.leaf || null,
|
|
nearbyEditableTexts: Array.isArray(op.nearbyEditableTexts) ? op.nearbyEditableTexts : [],
|
|
container: op.container || null,
|
|
contextHints: contextHintsByRef.get(op.ref) || [],
|
|
});
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function buildContextHintsByRef(entry) {
|
|
const map = new Map();
|
|
for (const op of entry.ops || []) {
|
|
const hints = new Set();
|
|
const add = (value) => {
|
|
const text = normalizeText(decodeBasicHtml(String(value || '')));
|
|
if (text.length < 3 || text.length > 160) return;
|
|
if (text === normalizeText(op.originalText) || text === normalizeText(op.newText)) return;
|
|
hints.add(text);
|
|
};
|
|
|
|
for (const item of op.nearbyEditableTexts || []) {
|
|
add(typeof item === 'string' ? item : item?.text);
|
|
}
|
|
const outer = typeof entry.element?.outerHTML === 'string' ? entry.element.outerHTML : '';
|
|
for (const match of outer.matchAll(/data-impeccable-original-text="([^"]*)"/g)) add(match[1]);
|
|
if (typeof entry.element?.textContent === 'string') {
|
|
for (const chunk of entry.element.textContent.split(/\s{2,}|\n|\t/)) add(chunk);
|
|
}
|
|
map.set(op.ref, [...hints].slice(0, 16));
|
|
}
|
|
return map;
|
|
}
|
|
|
|
function buildCandidatesForOp(op, cwd, searchFiles) {
|
|
const originalText = String(op.originalText || '');
|
|
const contextNeedles = op.contextHints || [];
|
|
return {
|
|
entryId: op.entryId,
|
|
ref: op.ref,
|
|
originalText,
|
|
sourceHint: analyzeSourceHint(op, cwd),
|
|
textMatches: originalText ? findLiteralMatches(searchFiles, originalText, { max: literalMatchLimit(originalText) }) : [],
|
|
objectKeyMatches: originalText ? findObjectKeyMatches(searchFiles, originalText, { max: OBJECT_KEY_MATCH_LIMIT }) : [],
|
|
locatorMatches: findLocatorMatches(searchFiles, op, { max: LOCATOR_MATCH_LIMIT }),
|
|
contextTextMatches: findContextMatches(searchFiles, contextNeedles, { maxPerHint: CONTEXT_MATCH_PER_HINT, max: CONTEXT_MATCH_LIMIT }),
|
|
};
|
|
}
|
|
|
|
function literalMatchLimit(text) {
|
|
return isWeakSourceNeedle(text) ? WEAK_LITERAL_MATCH_LIMIT : STRONG_LITERAL_MATCH_LIMIT;
|
|
}
|
|
|
|
function isWeakSourceNeedle(text) {
|
|
const normalized = normalizeText(text);
|
|
return normalized.length < 4 || /^[\d.,+\-%\s]+$/.test(normalized);
|
|
}
|
|
|
|
function analyzeSourceHint(op, cwd) {
|
|
const hint = normalizeSourceHint(op.sourceHint);
|
|
if (!hint.file) return null;
|
|
const file = path.resolve(cwd, hint.file);
|
|
const relativeFile = path.relative(cwd, file);
|
|
if (!isPathInsideOrEqual(cwd, file)) {
|
|
return { ...hint, status: 'outside_cwd', relativeFile: hint.file };
|
|
}
|
|
if (!fs.existsSync(file)) {
|
|
return { ...hint, status: 'file_missing', relativeFile };
|
|
}
|
|
if (isGeneratedFile(file, { cwd })) {
|
|
return { ...hint, status: 'generated', relativeFile };
|
|
}
|
|
|
|
const content = fs.readFileSync(file, 'utf-8');
|
|
const lines = content.split('\n');
|
|
const line = hint.line || 1;
|
|
const start = Math.max(0, line - 4);
|
|
const end = Math.min(lines.length, line + 3);
|
|
const windowText = lines.slice(start, end).join('\n');
|
|
const containsOriginalText = typeof op.originalText === 'string' && windowText.includes(op.originalText);
|
|
return {
|
|
...hint,
|
|
status: containsOriginalText ? 'ok' : 'text_not_found_near_hint',
|
|
relativeFile,
|
|
excerpt: lines.slice(start, end).map((text, index) => ({
|
|
line: start + index + 1,
|
|
text: text.slice(0, 240),
|
|
})),
|
|
};
|
|
}
|
|
|
|
function normalizeSourceHint(hint) {
|
|
if (!hint || typeof hint !== 'object') return {};
|
|
let line = Number.isFinite(Number(hint.line)) ? Number(hint.line) : null;
|
|
let column = Number.isFinite(Number(hint.column)) ? Number(hint.column) : null;
|
|
if ((!line || !column) && typeof hint.loc === 'string') {
|
|
const match = hint.loc.match(/^(\d+)(?::(\d+))?/);
|
|
if (match) {
|
|
line = Number(match[1]);
|
|
if (match[2]) column = Number(match[2]);
|
|
}
|
|
}
|
|
return {
|
|
file: typeof hint.file === 'string' ? hint.file : '',
|
|
loc: typeof hint.loc === 'string' ? hint.loc : '',
|
|
line,
|
|
column,
|
|
};
|
|
}
|
|
|
|
function collectSearchFiles(cwd) {
|
|
const out = [];
|
|
const seenDirs = new Set();
|
|
const seenFiles = new Set();
|
|
for (const dir of SEARCH_DIRS) {
|
|
scanDir(path.join(cwd, dir), cwd, seenDirs, seenFiles, out, 0);
|
|
}
|
|
scanRootFiles(cwd, seenFiles, out);
|
|
return out;
|
|
}
|
|
|
|
function scanDir(dir, cwd, seenDirs, seenFiles, out, depth) {
|
|
if (depth > 7 || !fs.existsSync(dir)) return;
|
|
let realDir;
|
|
try { realDir = fs.realpathSync(dir); } catch { return; }
|
|
if (seenDirs.has(realDir)) return;
|
|
seenDirs.add(realDir);
|
|
|
|
let entries;
|
|
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
if (SKIP_DIRS.has(entry.name)) continue;
|
|
scanDir(fullPath, cwd, seenDirs, seenFiles, out, depth + 1);
|
|
continue;
|
|
}
|
|
if (!entry.isFile() || !TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) continue;
|
|
maybeAddSearchFile(fullPath, cwd, seenFiles, out);
|
|
}
|
|
}
|
|
|
|
function scanRootFiles(cwd, seenFiles, out) {
|
|
let entries;
|
|
try { entries = fs.readdirSync(cwd, { withFileTypes: true }); } catch { return; }
|
|
for (const entry of entries) {
|
|
if (!entry.isFile() || !TEXT_EXTENSIONS.has(path.extname(entry.name).toLowerCase())) continue;
|
|
maybeAddSearchFile(path.join(cwd, entry.name), cwd, seenFiles, out);
|
|
}
|
|
}
|
|
|
|
function maybeAddSearchFile(file, cwd, seenFiles, out) {
|
|
let realFile;
|
|
try { realFile = fs.realpathSync(file); } catch { return; }
|
|
if (seenFiles.has(realFile)) return;
|
|
seenFiles.add(realFile);
|
|
if (isGeneratedFile(file, { cwd })) return;
|
|
let content;
|
|
try { content = fs.readFileSync(file, 'utf-8'); } catch { return; }
|
|
out.push({ file, relativeFile: path.relative(cwd, file), content, lines: content.split('\n') });
|
|
}
|
|
|
|
function findLiteralMatches(searchFiles, needle, { max }) {
|
|
return findMatches(searchFiles, needle, { kind: 'text', max });
|
|
}
|
|
|
|
function findObjectKeyMatches(searchFiles, text, { max }) {
|
|
const re = new RegExp('(["\\\'`])' + escapeRegExp(text) + '\\1(?=\\s*:)', 'g');
|
|
const out = [];
|
|
for (const file of searchFiles) {
|
|
for (const match of file.content.matchAll(re)) {
|
|
out.push(matchForIndex(file, match.index, 'object_key', text));
|
|
if (out.length >= max) return out;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function findLocatorMatches(searchFiles, op, { max }) {
|
|
const needles = [];
|
|
if (op.elementId) needles.push({ kind: 'id', needle: op.elementId });
|
|
for (const cls of op.classes || []) {
|
|
if (cls) needles.push({ kind: 'class', needle: cls });
|
|
}
|
|
if (op.tag) needles.push({ kind: 'tag', needle: '<' + op.tag });
|
|
|
|
const out = [];
|
|
const seen = new Set();
|
|
for (const { kind, needle } of needles) {
|
|
for (const match of findMatches(searchFiles, needle, { kind, max })) {
|
|
const key = match.file + ':' + match.line + ':' + kind + ':' + needle;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push({ ...match, needle });
|
|
if (out.length >= max) return out;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function findContextMatches(searchFiles, hints, { maxPerHint, max }) {
|
|
const out = [];
|
|
const seen = new Set();
|
|
for (const hint of hints || []) {
|
|
for (const match of findMatches(searchFiles, hint, { kind: 'context', max: maxPerHint })) {
|
|
const key = match.file + ':' + match.line + ':' + hint;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push({ ...match, needle: hint });
|
|
if (out.length >= max) return out;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function findMatches(searchFiles, needle, { kind, max }) {
|
|
const text = String(needle || '');
|
|
if (!text) return [];
|
|
const out = [];
|
|
for (const file of searchFiles) {
|
|
let index = 0;
|
|
while (out.length < max) {
|
|
index = file.content.indexOf(text, index);
|
|
if (index === -1) break;
|
|
out.push(matchForIndex(file, index, kind, text));
|
|
index += Math.max(1, text.length);
|
|
}
|
|
if (out.length >= max) break;
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function matchForIndex(file, index, kind, needle) {
|
|
const line = file.content.slice(0, index).split('\n').length;
|
|
const lineText = file.lines[line - 1] || '';
|
|
return {
|
|
kind,
|
|
file: file.relativeFile,
|
|
line,
|
|
needle,
|
|
excerpt: lineText.trim().slice(0, 240),
|
|
};
|
|
}
|
|
|
|
function isPathInsideOrEqual(cwd, file) {
|
|
const rel = path.relative(path.resolve(cwd), path.resolve(file));
|
|
return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel));
|
|
}
|
|
|
|
function normalizeText(value) {
|
|
return String(value || '').replace(/\s+/g, ' ').trim();
|
|
}
|
|
|
|
function decodeBasicHtml(value) {
|
|
return value
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/'/g, "'")
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>');
|
|
}
|
|
|
|
function escapeRegExp(value) {
|
|
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
}
|