Files
get-shit-done/tests/planner-language-regression.test.cjs
Tom Boucher f425bf9142 enhancement(planner): replace time-based reasoning with context-cost sizing and add multi-source coverage audit (#2091) (#2092) (#2114)
Replace minutes-based task sizing with context-window percentage sizing.
Add planner_authority_limits section prohibiting difficulty-based scope
decisions. Expand decision coverage matrix to multi-source audit covering
GOAL, REQ, RESEARCH, and CONTEXT artifacts. Add Source Audit gap handling
to plan-phase orchestrator (step 9c). Update plan-checker to detect
time/complexity language in scope reduction scans. Add 374 CI regression
tests preventing prohibited language from leaking back into artifacts.

Closes #2091
Closes #2092

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-11 10:26:27 -04:00

334 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict';
/**
* Planner Language Regression Tests (#2091, #2092)
*
* Prevents time-based reasoning and complexity-as-scope-justification
* from leaking back into planning artifacts via future PRs.
*
* These tests scan agent definitions, workflow files, and references
* for prohibited patterns that import human-world constraints into
* an AI execution context where those constraints do not exist.
*/
const { test, describe } = require('node:test');
const assert = require('node:assert/strict');
const fs = require('fs');
const path = require('path');
const ROOT = path.join(__dirname, '..');
const AGENTS_DIR = path.join(ROOT, 'agents');
const WORKFLOWS_DIR = path.join(ROOT, 'get-shit-done', 'workflows');
const REFERENCES_DIR = path.join(ROOT, 'get-shit-done', 'references');
const TEMPLATES_DIR = path.join(ROOT, 'get-shit-done', 'templates');
/**
* Collect all .md files from a directory (non-recursive).
*/
function mdFiles(dir) {
if (!fs.existsSync(dir)) return [];
return fs.readdirSync(dir)
.filter(f => f.endsWith('.md'))
.map(f => ({ name: f, path: path.join(dir, f) }));
}
/**
* Collect all .md files recursively.
*/
function mdFilesRecursive(dir) {
if (!fs.existsSync(dir)) return [];
const results = [];
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
results.push(...mdFilesRecursive(full));
} else if (entry.name.endsWith('.md')) {
results.push({ name: entry.name, path: full });
}
}
return results;
}
/**
* Files that define planning behavior — agents, workflows, references.
* These are the files where time-based and complexity-based scope
* reasoning must never appear.
*/
const PLANNING_FILES = [
...mdFiles(AGENTS_DIR),
...mdFiles(WORKFLOWS_DIR),
...mdFiles(REFERENCES_DIR),
...mdFilesRecursive(TEMPLATES_DIR),
];
// -- Prohibited patterns --
/**
* Time-based task sizing patterns.
* Matches "15-60 minutes", "X minutes Claude execution time", etc.
* Does NOT match operational timeouts ("timeout: 5 minutes"),
* API docs examples ("100 requests per 15 minutes"),
* or human-readable timeout descriptions in workflow execution steps.
*/
const TIME_SIZING_PATTERNS = [
// "N-M minutes" in task sizing context (not timeout context)
/each task[:\s]*\*?\*?\d+[-]\d+\s*min/i,
// "minutes Claude execution time" or "minutes execution time"
/minutes?\s+(claude\s+)?execution\s+time/i,
// Duration-based sizing table rows: "< 15 min", "15-60 min", "> 60 min"
/[<>]\s*\d+\s*min\s*\|/i,
];
/**
* Complexity-as-scope-justification patterns.
* Matches "too complex to implement", "challenging feature", etc.
* Does NOT match legitimate uses like:
* - "complex domains" in research/discovery context (describing what to research)
* - "non-trivial" in verification context (confirming substantive code exists)
* - "challenging" in user-profiling context (quoting user reactions)
*/
const COMPLEXITY_SCOPE_PATTERNS = [
// "too complex to" — always a scope-reduction justification
/too\s+complex\s+to/i,
// "too difficult" — always a scope-reduction justification
/too\s+difficult/i,
// "is too complex for" — scope justification (e.g. "Phase X is too complex for")
/is\s+too\s+complex\s+for/i,
];
/**
* Files allowed to contain certain patterns because they document
* the prohibition itself, or use the terms in non-scope-reduction context.
*/
const ALLOWLIST = {
// Plan-checker scans FOR these patterns — it's a detection list, not usage
'gsd-plan-checker.md': ['complexity_scope', 'time_sizing'],
// Planner defines the prohibition and the authority limits — uses terms to explain what NOT to do
'gsd-planner.md': ['complexity_scope'],
// Debugger uses "30+ minutes" as anti-pattern detection, not task sizing
'gsd-debugger.md': ['time_sizing'],
// Doc-writer uses "15 minutes" in API rate limit example, "2 minutes" for doc quality
'gsd-doc-writer.md': ['time_sizing'],
// Discovery-phase uses time for level descriptions (operational, not scope)
'discovery-phase.md': ['time_sizing'],
// Explore uses "~30 seconds" as operational estimate
'explore.md': ['time_sizing'],
// Review uses "up to 5 minutes" for CodeRabbit timeout
'review.md': ['time_sizing'],
// Fast uses "under 2 minutes wall time" as operational constraint
'fast.md': ['time_sizing'],
// Execute-phase uses "timeout: 5 minutes" for test runner
'execute-phase.md': ['time_sizing'],
// Verify-phase uses "timeout: 5 minutes" for test runner
'verify-phase.md': ['time_sizing'],
// Map-codebase documents subagent_timeout
'map-codebase.md': ['time_sizing'],
// Help documents CodeRabbit timing
'help.md': ['time_sizing'],
};
function isAllowlisted(fileName, category) {
const entry = ALLOWLIST[fileName];
return entry && entry.includes(category);
}
// -- Tests --
describe('Planner language regression — time-based task sizing (#2092)', () => {
for (const file of PLANNING_FILES) {
test(`${file.name} must not use time-based task sizing`, () => {
if (isAllowlisted(file.name, 'time_sizing')) return;
const content = fs.readFileSync(file.path, 'utf-8');
for (const pattern of TIME_SIZING_PATTERNS) {
const match = content.match(pattern);
assert.ok(
!match,
[
`${file.name} contains time-based task sizing: "${match?.[0]}"`,
'Task sizing must use context-window percentage, not time units.',
'See issue #2092 for rationale.',
].join('\n')
);
}
});
}
});
describe('Planner language regression — complexity-as-scope-justification (#2092)', () => {
for (const file of PLANNING_FILES) {
test(`${file.name} must not use complexity to justify scope reduction`, () => {
if (isAllowlisted(file.name, 'complexity_scope')) return;
const content = fs.readFileSync(file.path, 'utf-8');
for (const pattern of COMPLEXITY_SCOPE_PATTERNS) {
const match = content.match(pattern);
assert.ok(
!match,
[
`${file.name} contains complexity-as-scope-justification: "${match?.[0]}"`,
'Scope decisions must be based on context cost, missing information,',
'or dependency conflicts — not perceived difficulty.',
'See issue #2092 for rationale.',
].join('\n')
);
}
});
}
});
describe('gsd-planner.md — required structural sections (#2091, #2092)', () => {
let plannerContent;
test('planner file exists and is readable', () => {
const plannerPath = path.join(AGENTS_DIR, 'gsd-planner.md');
assert.ok(fs.existsSync(plannerPath), 'agents/gsd-planner.md must exist');
plannerContent = fs.readFileSync(plannerPath, 'utf-8');
});
test('contains <planner_authority_limits> section', () => {
assert.ok(
plannerContent.includes('<planner_authority_limits>'),
'gsd-planner.md must contain a <planner_authority_limits> section defining what the planner cannot decide'
);
});
test('authority limits prohibit difficulty-based scope decisions', () => {
assert.ok(
plannerContent.includes('The planner has no authority to'),
'planner_authority_limits must explicitly state what the planner cannot decide'
);
});
test('authority limits list three legitimate split reasons: context cost, missing info, dependency', () => {
assert.ok(
plannerContent.includes('Context cost') || plannerContent.includes('context cost'),
'authority limits must list context cost as a legitimate split reason'
);
assert.ok(
plannerContent.includes('Missing information') || plannerContent.includes('missing information'),
'authority limits must list missing information as a legitimate split reason'
);
assert.ok(
plannerContent.includes('Dependency conflict') || plannerContent.includes('dependency conflict'),
'authority limits must list dependency conflict as a legitimate split reason'
);
});
test('task sizing uses context percentage, not time units', () => {
assert.ok(
plannerContent.includes('context consumption') || plannerContent.includes('context cost'),
'task sizing must reference context consumption, not time'
);
assert.ok(
!(/each task[:\s]*\*?\*?\d+[-]\d+\s*min/i.test(plannerContent)),
'task sizing must not use minutes as sizing unit'
);
});
test('contains multi-source coverage audit (not just D-XX decisions)', () => {
assert.ok(
plannerContent.includes('Multi-Source Coverage Audit') ||
plannerContent.includes('multi-source coverage audit'),
'gsd-planner.md must contain a multi-source coverage audit, not just D-XX decision matrix'
);
});
test('coverage audit includes all four source types: GOAL, REQ, RESEARCH, CONTEXT', () => {
// The planner file or its referenced planner-source-audit.md must define all four types.
// The inline compact version uses **GOAL**, **REQ**, **RESEARCH**, **CONTEXT**.
const refPath = path.join(ROOT, 'get-shit-done', 'references', 'planner-source-audit.md');
const combined = plannerContent + (fs.existsSync(refPath) ? fs.readFileSync(refPath, 'utf-8') : '');
const hasGoal = combined.includes('**GOAL**');
const hasReq = combined.includes('**REQ**');
const hasResearch = combined.includes('**RESEARCH**');
const hasContext = combined.includes('**CONTEXT**');
assert.ok(hasGoal, 'coverage audit must include GOAL source type (ROADMAP.md phase goal)');
assert.ok(hasReq, 'coverage audit must include REQ source type (REQUIREMENTS.md)');
assert.ok(hasResearch, 'coverage audit must include RESEARCH source type (RESEARCH.md)');
assert.ok(hasContext, 'coverage audit must include CONTEXT source type (CONTEXT.md decisions)');
});
test('coverage audit defines MISSING item handling with developer escalation', () => {
assert.ok(
plannerContent.includes('Source Audit: Unplanned Items Found') ||
plannerContent.includes('MISSING'),
'coverage audit must define handling for MISSING items'
);
assert.ok(
plannerContent.includes('Awaiting developer decision') ||
plannerContent.includes('developer confirmation'),
'MISSING items must escalate to developer, not be silently dropped'
);
});
});
describe('plan-phase.md — source audit orchestration (#2091)', () => {
let workflowContent;
test('plan-phase workflow exists and is readable', () => {
const workflowPath = path.join(WORKFLOWS_DIR, 'plan-phase.md');
assert.ok(fs.existsSync(workflowPath), 'workflows/plan-phase.md must exist');
workflowContent = fs.readFileSync(workflowPath, 'utf-8');
});
test('step 9 handles Source Audit return from planner', () => {
assert.ok(
workflowContent.includes('Source Audit: Unplanned Items Found'),
'plan-phase.md step 9 must handle the Source Audit return from the planner'
);
});
test('step 9c exists for source audit gap handling', () => {
assert.ok(
workflowContent.includes('9c') && workflowContent.includes('Source Audit'),
'plan-phase.md must have a step 9c for handling source audit gaps'
);
});
test('step 9b does not use "too complex" language', () => {
// Extract just step 9b content (between "## 9b" and "## 9c" or "## 10")
const step9bMatch = workflowContent.match(/## 9b\.([\s\S]*?)(?=## 9c|## 10)/);
if (step9bMatch) {
const step9b = step9bMatch[1];
assert.ok(
!step9b.includes('too complex'),
'step 9b must not use "too complex" — use context budget language instead'
);
}
});
test('phase split recommendation uses context budget framing', () => {
assert.ok(
workflowContent.includes('context budget') || workflowContent.includes('context cost'),
'phase split recommendation must be framed in terms of context budget, not complexity'
);
});
});
describe('gsd-plan-checker.md — scope reduction detection includes time/complexity (#2092)', () => {
let checkerContent;
test('plan-checker exists and is readable', () => {
const checkerPath = path.join(AGENTS_DIR, 'gsd-plan-checker.md');
assert.ok(fs.existsSync(checkerPath), 'agents/gsd-plan-checker.md must exist');
checkerContent = fs.readFileSync(checkerPath, 'utf-8');
});
test('scope reduction scan includes complexity-based justification patterns', () => {
assert.ok(
checkerContent.includes('too complex') || checkerContent.includes('too difficult'),
'plan-checker scope reduction scan must detect complexity-based justification language'
);
});
test('scope reduction scan includes time-based justification patterns', () => {
assert.ok(
checkerContent.includes('would take') || checkerContent.includes('hours') || checkerContent.includes('minutes'),
'plan-checker scope reduction scan must detect time-based justification language'
);
});
});