From c5e77c88094ac8794cb4e35afe495e2d82c2e422 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Fri, 17 Apr 2026 10:47:08 -0400 Subject: [PATCH] feat(agents): enforce size budget + extract duplicated boilerplate (#2361) (#2362) Adds tiered agent-size-budget test to prevent unbounded growth in agent definitions, which are loaded verbatim into context on every subagent dispatch. Extracts two duplicated blocks (mandatory-initial-read, project-skills-discovery) to shared references under get-shit-done/references/ and migrates the 5 top agents (planner, executor, debugger, verifier, phase-researcher) to @file includes. Also fixes two broken relative @planner-source-audit.md references in gsd-planner.md that silently disabled the planner's source audit discipline. Closes #2361 Co-authored-by: Claude Opus 4.7 --- CHANGELOG.md | 7 ++ agents/gsd-debugger.md | 14 +-- agents/gsd-executor.md | 14 +-- agents/gsd-phase-researcher.md | 14 +-- agents/gsd-planner.md | 11 +- agents/gsd-verifier.md | 14 +-- .../references/mandatory-initial-read.md | 2 + .../references/project-skills-discovery.md | 19 +++ tests/agent-size-budget.test.cjs | 112 ++++++++++++++++++ 9 files changed, 162 insertions(+), 45 deletions(-) create mode 100644 get-shit-done/references/mandatory-initial-read.md create mode 100644 get-shit-done/references/project-skills-discovery.md create mode 100644 tests/agent-size-budget.test.cjs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d7d5a07..ba181300 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,17 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Added +- **Agent size-budget enforcement** — New `tests/agent-size-budget.test.cjs` enforces tiered line-count limits on every `gsd-*.md` agent (XL=1600, LARGE=1000, DEFAULT=500). Unbounded agent growth is paid in context on every subagent dispatch; the test prevents regressions and requires a deliberate PR rationale to raise a budget (#2361) +- **Shared `references/mandatory-initial-read.md`** — Extracts the `` enforcement block that was duplicated across 5 top agents. Agents now include it via a single `@~/.claude/get-shit-done/references/mandatory-initial-read.md` line, using Claude Code's progressive-disclosure `@file` reference mechanism (#2361) +- **Shared `references/project-skills-discovery.md`** — Extracts the 5-step project skills discovery checklist that was copy-pasted across 5 top agents with slight divergence. Single source of truth with a per-agent "Application" paragraph documenting how planners, executors, researchers, verifiers, and debuggers each apply the rules (#2361) + ### Changed - **`gsd-debugger` philosophy extracted to shared reference** — The 76-line `` block containing evergreen debugging disciplines (user-as-reporter framing, meta-debugging, foundation principles, cognitive-bias table, systematic investigation, when-to-restart protocol) is now in `get-shit-done/references/debugger-philosophy.md` and pulled into the agent via a single `@file` include. Same content, lighter per-dispatch context footprint (#2363) +- **`gsd-planner`, `gsd-executor`, `gsd-debugger`, `gsd-verifier`, `gsd-phase-researcher`** — Migrated to `@file` includes for the mandatory-initial-read and project-skills-discovery boilerplate. Reduces per-dispatch context load without changing behavior (#2361) ### Fixed +- **Broken `@planner-source-audit.md` relative references in `gsd-planner.md`** — Two locations referenced `@planner-source-audit.md` (resolves relative to working directory, almost always missing) instead of the correct absolute `@~/.claude/get-shit-done/references/planner-source-audit.md`. The planner's source audit discipline was silently unenforced (#2361) - **Shell hooks falsely flagged as stale on every session** — `gsd-phase-boundary.sh`, `gsd-session-state.sh`, and `gsd-validate-commit.sh` now ship with a `# gsd-hook-version: {{GSD_VERSION}}` header; the installer substitutes `{{GSD_VERSION}}` in `.sh` hooks the same way it does for `.js` hooks; and the stale-hook detector in `gsd-check-update.js` now matches bash `#` comment syntax in addition to JS `//` syntax. All three changes are required together — neither the regex fix alone nor the install fix alone is sufficient to resolve the false positive (#2136, #2206, #2209, #2210, #2212) ## [1.36.0] - 2026-04-14 diff --git a/agents/gsd-debugger.md b/agents/gsd-debugger.md index 74a356a8..185436fd 100644 --- a/agents/gsd-debugger.md +++ b/agents/gsd-debugger.md @@ -21,8 +21,7 @@ You are spawned by: Your job: Find the root cause through hypothesis testing, maintain debug file state, optionally fix and verify (depending on mode). -**CRITICAL: Mandatory Initial Read** -If the prompt contains a `` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context. +@~/.claude/get-shit-done/references/mandatory-initial-read.md **Core responsibilities:** - Investigate autonomously (user reports symptoms, you find cause) @@ -37,14 +36,9 @@ If the prompt contains a `` block, you MUST use the `Read` too @~/.claude/get-shit-done/references/common-bug-patterns.md -**Project skills:** Check `.claude/skills/` or `.agents/skills/` directory if either exists: -1. List available skills (subdirectories) -2. Read `SKILL.md` for each skill (lightweight index ~130 lines) -3. Load specific `rules/*.md` files as needed during implementation -4. Do NOT load full `AGENTS.md` files (100KB+ context cost) -5. Follow skill rules relevant to the bug being investigated and the fix being applied. - -This ensures project-specific patterns, conventions, and best practices are applied during execution. +**Project skills:** @~/.claude/get-shit-done/references/project-skills-discovery.md +- Load `rules/*.md` as needed during **investigation and fix**. +- Follow skill rules relevant to the bug being investigated and the fix being applied. diff --git a/agents/gsd-executor.md b/agents/gsd-executor.md index 22ecf9a0..de4a6cc8 100644 --- a/agents/gsd-executor.md +++ b/agents/gsd-executor.md @@ -18,8 +18,7 @@ Spawned by `/gsd-execute-phase` orchestrator. Your job: Execute the plan completely, commit each task, create SUMMARY.md, update STATE.md. -**CRITICAL: Mandatory Initial Read** -If the prompt contains a `` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context. +@~/.claude/get-shit-done/references/mandatory-initial-read.md @@ -54,14 +53,9 @@ Before executing, discover project context: **Project instructions:** Read `./CLAUDE.md` if it exists in the working directory. Follow all project-specific guidelines, security requirements, and coding conventions. -**Project skills:** Check `.claude/skills/` or `.agents/skills/` directory if either exists: -1. List available skills (subdirectories) -2. Read `SKILL.md` for each skill (lightweight index ~130 lines) -3. Load specific `rules/*.md` files as needed during implementation -4. Do NOT load full `AGENTS.md` files (100KB+ context cost) -5. Follow skill rules relevant to your current task - -This ensures project-specific patterns, conventions, and best practices are applied during execution. +**Project skills:** @~/.claude/get-shit-done/references/project-skills-discovery.md +- Load `rules/*.md` as needed during **implementation**. +- Follow skill rules relevant to the task you are about to commit. **CLAUDE.md enforcement:** If `./CLAUDE.md` exists, treat its directives as hard constraints during execution. Before committing each task, verify that code changes do not violate CLAUDE.md rules (forbidden patterns, required conventions, mandated tools). If a task action would contradict a CLAUDE.md directive, apply the CLAUDE.md rule — it takes precedence over plan instructions. Document any CLAUDE.md-driven adjustments as deviations (Rule 2: auto-add missing critical functionality). diff --git a/agents/gsd-phase-researcher.md b/agents/gsd-phase-researcher.md index 2b3b38ad..8ce1e159 100644 --- a/agents/gsd-phase-researcher.md +++ b/agents/gsd-phase-researcher.md @@ -16,8 +16,7 @@ You are a GSD phase researcher. You answer "What do I need to know to PLAN this Spawned by `/gsd-plan-phase` (integrated) or `/gsd-research-phase` (standalone). -**CRITICAL: Mandatory Initial Read** -If the prompt contains a `` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context. +@~/.claude/get-shit-done/references/mandatory-initial-read.md **Core responsibilities:** - Investigate the phase's technical domain @@ -62,14 +61,9 @@ Before researching, discover project context: **Project instructions:** Read `./CLAUDE.md` if it exists in the working directory. Follow all project-specific guidelines, security requirements, and coding conventions. -**Project skills:** Check `.claude/skills/` or `.agents/skills/` directory if either exists: -1. List available skills (subdirectories) -2. Read `SKILL.md` for each skill (lightweight index ~130 lines) -3. Load specific `rules/*.md` files as needed during research -4. Do NOT load full `AGENTS.md` files (100KB+ context cost) -5. Research should account for project skill patterns - -This ensures research aligns with project-specific conventions and libraries. +**Project skills:** @~/.claude/get-shit-done/references/project-skills-discovery.md +- Load `rules/*.md` as needed during **research**. +- Research output should account for project skill patterns and conventions. **CLAUDE.md enforcement:** If `./CLAUDE.md` exists, extract all actionable directives (required tools, forbidden patterns, coding conventions, testing rules, security requirements). Include a `## Project Constraints (from CLAUDE.md)` section in RESEARCH.md listing these directives so the planner can verify compliance. Treat CLAUDE.md directives with the same authority as locked decisions from CONTEXT.md — research should not recommend approaches that contradict them. diff --git a/agents/gsd-planner.md b/agents/gsd-planner.md index b211921f..aee52ab0 100644 --- a/agents/gsd-planner.md +++ b/agents/gsd-planner.md @@ -22,8 +22,7 @@ Spawned by: Your job: Produce PLAN.md files that Claude executors can implement without interpretation. Plans are prompts, not documents that become prompts. -**CRITICAL: Mandatory Initial Read** -If the prompt contains a `` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context. +@~/.claude/get-shit-done/references/mandatory-initial-read.md **Core responsibilities:** - **FIRST: Parse and honor user decisions from CONTEXT.md** (locked decisions are NON-NEGOTIABLE) @@ -44,7 +43,9 @@ Before planning, discover project context: **Project instructions:** Read `./CLAUDE.md` if it exists in the working directory. Follow all project-specific guidelines, security requirements, and coding conventions. -**Project skills:** Check `.claude/skills/` or `.agents/skills/` if either exists. Read `SKILL.md` for each skill (lightweight index), load specific `rules/*.md` as needed. Do NOT load full `AGENTS.md` files. Ensure plans reflect project skill patterns. +**Project skills:** @~/.claude/get-shit-done/references/project-skills-discovery.md +- Load `rules/*.md` as needed during **planning**. +- Ensure plans account for project skill patterns and conventions. @@ -95,7 +96,7 @@ Do NOT silently omit features. Instead: ## Multi-Source Coverage Audit (MANDATORY in every plan set) -@planner-source-audit.md for full format, examples, and gap-handling rules. +@~/.claude/get-shit-done/references/planner-source-audit.md for full format, examples, and gap-handling rules. Audit ALL four source types before finalizing: **GOAL** (ROADMAP phase goal), **REQ** (phase_req_ids from REQUIREMENTS.md), **RESEARCH** (RESEARCH.md features/constraints), **CONTEXT** (D-XX decisions from CONTEXT.md). @@ -107,7 +108,7 @@ Exclusions (not gaps): Deferred Ideas in CONTEXT.md, items scoped to other phase ## The Planner Does Not Decide What Is Too Hard -@planner-source-audit.md for constraint examples. +@~/.claude/get-shit-done/references/planner-source-audit.md for constraint examples. The planner has no authority to judge a feature as too difficult, omit features because they seem challenging, or use "complex/difficult/non-trivial" to justify scope reduction. diff --git a/agents/gsd-verifier.md b/agents/gsd-verifier.md index f63dd75c..439156f4 100644 --- a/agents/gsd-verifier.md +++ b/agents/gsd-verifier.md @@ -16,8 +16,7 @@ You are a GSD phase verifier. You verify that a phase achieved its GOAL, not jus Your job: Goal-backward verification. Start from what the phase SHOULD deliver, verify it actually exists and works in the codebase. -**CRITICAL: Mandatory Initial Read** -If the prompt contains a `` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context. +@~/.claude/get-shit-done/references/mandatory-initial-read.md **Critical mindset:** Do NOT trust SUMMARY.md claims. SUMMARYs document what Claude SAID it did. You verify what ACTUALLY exists in the code. These often differ. @@ -34,14 +33,9 @@ Before verifying, discover project context: **Project instructions:** Read `./CLAUDE.md` if it exists in the working directory. Follow all project-specific guidelines, security requirements, and coding conventions. -**Project skills:** Check `.claude/skills/` or `.agents/skills/` directory if either exists: -1. List available skills (subdirectories) -2. Read `SKILL.md` for each skill (lightweight index ~130 lines) -3. Load specific `rules/*.md` files as needed during verification -4. Do NOT load full `AGENTS.md` files (100KB+ context cost) -5. Apply skill rules when scanning for anti-patterns and verifying quality - -This ensures project-specific patterns, conventions, and best practices are applied during verification. +**Project skills:** @~/.claude/get-shit-done/references/project-skills-discovery.md +- Load `rules/*.md` as needed during **verification**. +- Apply skill rules when scanning for anti-patterns and verifying quality. diff --git a/get-shit-done/references/mandatory-initial-read.md b/get-shit-done/references/mandatory-initial-read.md new file mode 100644 index 00000000..23dece92 --- /dev/null +++ b/get-shit-done/references/mandatory-initial-read.md @@ -0,0 +1,2 @@ +**CRITICAL: Mandatory Initial Read** +If the prompt contains a `` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context. diff --git a/get-shit-done/references/project-skills-discovery.md b/get-shit-done/references/project-skills-discovery.md new file mode 100644 index 00000000..78fc16e4 --- /dev/null +++ b/get-shit-done/references/project-skills-discovery.md @@ -0,0 +1,19 @@ +# Project Skills Discovery + +Before execution, check for project-defined skills and apply their rules. + +**Discovery steps (shared across all GSD agents):** +1. Check `.claude/skills/` or `.agents/skills/` directory — if neither exists, skip. +2. List available skills (subdirectories). +3. Read `SKILL.md` for each skill (lightweight index, typically ~130 lines). +4. Load specific `rules/*.md` files only as needed during the current task. +5. Do NOT load full `AGENTS.md` files — they are large (100KB+) and cost significant context. + +**Application** — how to apply the loaded rules depends on the calling agent: +- Planners account for project skill patterns and conventions in the plan. +- Executors follow skill rules relevant to the task being implemented. +- Researchers ensure research output accounts for project skill patterns. +- Verifiers apply skill rules when scanning for anti-patterns and verifying quality. +- Debuggers follow skill rules relevant to the bug being investigated and the fix being applied. + +The caller's agent file should specify which application applies. diff --git a/tests/agent-size-budget.test.cjs b/tests/agent-size-budget.test.cjs new file mode 100644 index 00000000..4feada42 --- /dev/null +++ b/tests/agent-size-budget.test.cjs @@ -0,0 +1,112 @@ +/** + * Agent size budget. + * + * Agent definitions in `agents/gsd-*.md` are loaded verbatim into Claude's + * context on every subagent dispatch. Unbounded growth is paid on every call + * across every workflow. + * + * Budgets are tiered to reflect the intent of each agent class: + * - XL : top-level orchestrators that own end-to-end rubrics + * - LARGE : multi-phase operators with branching workflows + * - DEFAULT : focused single-purpose agents + * + * Raising a budget is a deliberate choice — adjust the constant, write a + * rationale in the PR, and make sure the bloat is not duplicated content + * that belongs in `get-shit-done/references/`. + * + * See: https://github.com/gsd-build/get-shit-done/issues/2361 + */ + +const { test, describe } = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('fs'); +const path = require('path'); + +const AGENTS_DIR = path.join(__dirname, '..', 'agents'); + +const XL_BUDGET = 1600; +const LARGE_BUDGET = 1000; +const DEFAULT_BUDGET = 500; + +const XL_AGENTS = new Set([ + 'gsd-debugger', + 'gsd-planner', +]); + +const LARGE_AGENTS = new Set([ + 'gsd-phase-researcher', + 'gsd-verifier', + 'gsd-doc-writer', + 'gsd-plan-checker', + 'gsd-executor', + 'gsd-code-fixer', + 'gsd-codebase-mapper', + 'gsd-project-researcher', + 'gsd-roadmapper', +]); + +const ALL_AGENTS = fs.readdirSync(AGENTS_DIR) + .filter(f => f.startsWith('gsd-') && f.endsWith('.md')) + .map(f => f.replace('.md', '')); + +function budgetFor(agent) { + if (XL_AGENTS.has(agent)) return { tier: 'XL', limit: XL_BUDGET }; + if (LARGE_AGENTS.has(agent)) return { tier: 'LARGE', limit: LARGE_BUDGET }; + return { tier: 'DEFAULT', limit: DEFAULT_BUDGET }; +} + +function lineCount(filePath) { + const content = fs.readFileSync(filePath, 'utf-8'); + if (content.length === 0) return 0; + const trailingNewline = content.endsWith('\n') ? 1 : 0; + return content.split('\n').length - trailingNewline; +} + +describe('SIZE: agent line-count budget', () => { + for (const agent of ALL_AGENTS) { + const { tier, limit } = budgetFor(agent); + test(`${agent} (${tier}) stays under ${limit} lines`, () => { + const filePath = path.join(AGENTS_DIR, agent + '.md'); + const lines = lineCount(filePath); + assert.ok( + lines <= limit, + `${agent}.md has ${lines} lines — exceeds ${tier} budget of ${limit}. ` + + `Extract shared boilerplate to get-shit-done/references/ or raise the budget ` + + `in tests/agent-size-budget.test.cjs with a rationale.` + ); + }); + } +}); + +describe('SIZE: every agent is classified', () => { + test('every agent falls in exactly one tier', () => { + for (const agent of ALL_AGENTS) { + const inXL = XL_AGENTS.has(agent); + const inLarge = LARGE_AGENTS.has(agent); + assert.ok( + !(inXL && inLarge), + `${agent} is in both XL_AGENTS and LARGE_AGENTS — pick one` + ); + } + }); + + test('every named XL agent exists', () => { + for (const agent of XL_AGENTS) { + const filePath = path.join(AGENTS_DIR, agent + '.md'); + assert.ok( + fs.existsSync(filePath), + `XL_AGENTS references ${agent}.md which does not exist — clean up the set` + ); + } + }); + + test('every named LARGE agent exists', () => { + for (const agent of LARGE_AGENTS) { + const filePath = path.join(AGENTS_DIR, agent + '.md'); + assert.ok( + fs.existsSync(filePath), + `LARGE_AGENTS references ${agent}.md which does not exist — clean up the set` + ); + } + }); +});