feat: GSD SDK — headless CLI with init + auto commands (#1407)

* test: Bootstrapped sdk/ as TypeScript ESM package with full GSD-1 PLAN.… - "sdk/package.json" - "sdk/tsconfig.json" - "sdk/vitest.config.ts" - "sdk/src/types.ts" - "sdk/src/plan-parser.ts" - "sdk/src/plan-parser.test.ts" GSD-Task: S01/T01 * test: Implemented config reader and gsd-tools bridge with 25 unit tests… - "sdk/src/config.ts" - "sdk/src/config.test.ts" - "sdk/src/gsd-tools.ts" - "sdk/src/gsd-tools.test.ts" GSD-Task: S01/T02 * test: Built prompt-builder, session-runner, and GSD class — 85 total un… - "sdk/src/prompt-builder.ts" - "sdk/src/prompt-builder.test.ts" - "sdk/src/session-runner.ts" - "sdk/src/index.ts" - "sdk/src/types.ts" GSD-Task: S01/T03 * test: Created E2E integration test with fixtures proving full SDK pipel… - "sdk/src/e2e.integration.test.ts" - "sdk/test-fixtures/sample-plan.md" - "sdk/test-fixtures/.planning/config.json" - "sdk/test-fixtures/.planning/STATE.md" - "vitest.config.ts" - "tsconfig.json" GSD-Task: S01/T04 * test: Added PhaseType/GSDEventType enums, 16-variant GSDEvent union, GS… - "sdk/src/types.ts" - "sdk/src/event-stream.ts" - "sdk/src/logger.ts" - "sdk/src/event-stream.test.ts" - "sdk/src/logger.test.ts" GSD-Task: S02/T01 * test: Built ContextEngine for phase-aware context file resolution, getT… - "sdk/src/context-engine.ts" - "sdk/src/tool-scoping.ts" - "sdk/src/phase-prompt.ts" - "sdk/src/context-engine.test.ts" - "sdk/src/tool-scoping.test.ts" - "sdk/src/phase-prompt.test.ts" GSD-Task: S02/T02 * test: Wired event stream into session runner, added onEvent()/addTransp… - "sdk/src/session-runner.ts" - "sdk/src/index.ts" - "sdk/src/e2e.integration.test.ts" GSD-Task: S02/T03 * feat: Added PhaseStepType enum, PhaseOpInfo interface, phase lifecycle… - "sdk/src/types.ts" - "sdk/src/gsd-tools.ts" - "sdk/src/session-runner.ts" - "sdk/src/index.ts" - "sdk/src/phase-runner-types.test.ts" GSD-Task: S03/T01 * test: Implemented PhaseRunner state machine with 39 unit tests covering… - "sdk/src/phase-runner.ts" - "sdk/src/phase-runner.test.ts" GSD-Task: S03/T02 * test: Wired PhaseRunner into GSD.runPhase() public API with full re-exp… - "sdk/src/index.ts" - "sdk/src/phase-runner.integration.test.ts" - "sdk/src/phase-runner.ts" GSD-Task: S03/T03 * test: Expanded runVerifyStep with full gap closure cycle (plan → execut… - "sdk/src/types.ts" - "sdk/src/phase-runner.ts" - "sdk/src/phase-runner.test.ts" GSD-Task: S04/T02 * fix: Added 3 integration tests proving phasePlanIndex returns correct t… - "sdk/src/phase-runner.integration.test.ts" - "sdk/src/index.ts" GSD-Task: S04/T03 * test: Add milestone-level types, typed roadmapAnalyze(), GSD.run() orch… - "sdk/src/types.ts" - "sdk/src/gsd-tools.ts" - "sdk/src/index.ts" - "sdk/src/milestone-runner.test.ts" GSD-Task: S05/T01 * test: Added CLITransport (structured stdout log lines) and WSTransport… - "sdk/src/cli-transport.ts" - "sdk/src/cli-transport.test.ts" - "sdk/src/ws-transport.ts" - "sdk/src/ws-transport.test.ts" - "sdk/src/index.ts" - "sdk/package.json" GSD-Task: S05/T02 * test: Added gsd-sdk CLI entry point with argument parsing, bin field, p… - "sdk/src/cli.ts" - "sdk/src/cli.test.ts" - "sdk/package.json" GSD-Task: S05/T03 * feat: Add InitNewProjectInfo type, initNewProject()/configSet() GSDTool… - "sdk/src/types.ts" - "sdk/src/gsd-tools.ts" - "sdk/src/cli.ts" - "sdk/src/cli.test.ts" - "sdk/src/gsd-tools.test.ts" GSD-Task: S01/T01 * chore: Created InitRunner orchestrator with setup → config → PROJECT.md… - "sdk/src/init-runner.ts" - "sdk/src/types.ts" - "sdk/src/index.ts" GSD-Task: S01/T02 * test: Wired InitRunner into CLI main() for full gsd-sdk init dispatch a… - "sdk/src/cli.ts" - "sdk/src/init-runner.test.ts" - "sdk/src/cli.test.ts" GSD-Task: S01/T03 * test: Add PlanCheck step, AI self-discuss, and retryOnce wrapper to Pha… - "sdk/src/types.ts" - "sdk/src/phase-runner.ts" - "sdk/src/session-runner.ts" - "sdk/src/phase-runner.test.ts" - "sdk/src/phase-runner-types.test.ts" GSD-Task: S02/T01 * feat: Rewrite CLITransport with ANSI colors, phase banners, spawn indic… - "sdk/src/cli-transport.ts" - "sdk/src/cli-transport.test.ts" GSD-Task: S02/T02 * test: Add `gsd-sdk auto` command with autoMode config override, USAGE t… - "sdk/src/cli.ts" - "sdk/src/cli.test.ts" - "sdk/src/index.ts" - "sdk/src/types.ts" GSD-Task: S02/T03 * fix: CLI shebang + gsd-tools non-JSON output handling Three bugs found during first real gsd-sdk run: 1. cli.ts shebang was commented out — shell executed JS as bash, triggering ImageMagick's import command instead of Node 2. configSet() called exec() which JSON.parse()d the output, but gsd-tools config-set returns 'key=value' text, not JSON. Added execRaw() method for commands that return plain text. 3. Same JSON parse bug affected commit() (returns git SHA), stateLoad(), verifySummary(), initExecutePhase(), stateBeginPhase(), and phaseComplete(). All switched to execRaw(). Tests updated to match real gsd-tools output format (plain text instead of mocked JSON). 376/376 tests pass.
2026-04-25 17:25:23 +02:00 · 2026-03-26 20:27:51 -06:00
parent 604a78b30b
commit 596ce2d252
45 changed files with 17604 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,28 @@ philosophy.md
 .github/skills/get-shit-done
 .github/copilot-instructions.md
 .bg-shell/
+
+# ── GSD baseline (auto-generated) ──
+.gsd
+Thumbs.db
+*.swp
+*.swo
+*~
+.idea/
+.vscode/
+*.code-workspace
+.env
+.env.*
+!.env.example
+.next/
+dist/
+build/
+__pycache__/
+*.pyc
+.venv/
+venv/
+target/
+vendor/
+*.log
+.cache/
+tmp/
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -40,7 +40,8 @@
  },
  "devDependencies": {
    "c8": "^11.0.0",
-    "esbuild": "^0.24.0"
+    "esbuild": "^0.24.0",
+    "vitest": "^4.1.2"
  },
  "scripts": {
    "build:hooks": "node scripts/build-hooks.js",
--- a/sdk/package-lock.json
+++ b/sdk/package-lock.json
--- a/sdk/package.json
+++ b/sdk/package.json
@@ -0,0 +1,37 @@
+{
+  "name": "@gsd/sdk",
+  "version": "0.1.0",
+  "description": "GSD SDK — programmatic interface for running GSD plans via the Agent SDK",
+  "type": "module",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    }
+  },
+  "bin": {
+    "gsd-sdk": "./dist/cli.js"
+  },
+  "engines": {
+    "node": ">=20"
+  },
+  "scripts": {
+    "build": "tsc",
+    "prepublishOnly": "npm run build",
+    "test": "vitest run",
+    "test:unit": "vitest run --project unit",
+    "test:integration": "vitest run --project integration"
+  },
+  "dependencies": {
+    "@anthropic-ai/claude-agent-sdk": "^0.2.84",
+    "ws": "^8.20.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.0.0",
+    "@types/ws": "^8.18.1",
+    "typescript": "^5.7.0",
+    "vitest": "^3.1.1"
+  }
+}
--- a/sdk/src/cli-transport.test.ts
+++ b/sdk/src/cli-transport.test.ts
@@ -0,0 +1,388 @@
+import { describe, it, expect } from 'vitest';
+import { PassThrough } from 'node:stream';
+import { CLITransport } from './cli-transport.js';
+import { GSDEventType, type GSDEvent, type GSDEventBase } from './types.js';
+
+// ─── ANSI constants (mirror the source for readable assertions) ──────────────
+
+const BOLD = '\x1b[1m';
+const RESET = '\x1b[0m';
+const GREEN = '\x1b[32m';
+const RED = '\x1b[31m';
+const YELLOW = '\x1b[33m';
+const CYAN = '\x1b[36m';
+const DIM = '\x1b[90m';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeBase(overrides: Partial<GSDEventBase> = {}): Omit<GSDEventBase, 'type'> {
+  return {
+    timestamp: '2025-06-15T14:30:45.123Z',
+    sessionId: 'test-session',
+    ...overrides,
+  };
+}
+
+function readOutput(stream: PassThrough): string {
+  const chunks: Buffer[] = [];
+  let chunk: Buffer | null;
+  while ((chunk = stream.read() as Buffer | null) !== null) {
+    chunks.push(chunk);
+  }
+  return Buffer.concat(chunks).toString('utf-8').trim();
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('CLITransport', () => {
+  it('formats SessionInit event correctly', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.SessionInit,
+      model: 'claude-sonnet-4-20250514',
+      tools: ['Read', 'Write', 'Bash'],
+      cwd: '/home/project',
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toBe(
+      '[14:30:45] [INIT] Session started — model: claude-sonnet-4-20250514, tools: 3, cwd: /home/project',
+    );
+  });
+
+  it('formats SessionComplete in green with checkmark', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.SessionComplete,
+      success: true,
+      totalCostUsd: 1.234,
+      durationMs: 45600,
+      numTurns: 12,
+      result: 'done',
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toBe(
+      `[14:30:45] ${GREEN}✓ Session complete — cost: $1.23, turns: 12, duration: 45.6s${RESET}`,
+    );
+  });
+
+  it('formats SessionError in red with ✗ marker', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.SessionError,
+      success: false,
+      totalCostUsd: 0.5,
+      durationMs: 3000,
+      numTurns: 2,
+      errorSubtype: 'tool_error',
+      errors: ['file not found', 'permission denied'],
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toBe(
+      `[14:30:45] ${RED}✗ Session failed — subtype: tool_error, errors: [file not found, permission denied]${RESET}`,
+    );
+  });
+
+  it('formats PhaseStart as bold cyan banner and PhaseComplete with running cost', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.PhaseStart,
+      phaseNumber: '01',
+      phaseName: 'Authentication',
+    } as GSDEvent);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.PhaseComplete,
+      phaseNumber: '01',
+      phaseName: 'Authentication',
+      success: true,
+      totalCostUsd: 2.50,
+      totalDurationMs: 60000,
+      stepsCompleted: 5,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    const lines = output.split('\n');
+    expect(lines[0]).toBe(`${BOLD}${CYAN}━━━ GSD ► PHASE 01: Authentication ━━━${RESET}`);
+    expect(lines[1]).toBe('[14:30:45] [PHASE] Phase 01 complete — success: true, cost: $2.50, running: $0.00');
+  });
+
+  it('formats ToolCall with truncated input', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    const longInput = { content: 'x'.repeat(200) };
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.ToolCall,
+      toolName: 'Write',
+      toolUseId: 'tool-123',
+      input: longInput,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toMatch(/^\[14:30:45\] \[TOOL\] Write\(.+…\)$/);
+    // The truncated input portion (inside parens) should be ≤80 chars
+    const insideParens = output.match(/Write\((.+)\)/)![1]!;
+    expect(insideParens.length).toBeLessThanOrEqual(80);
+  });
+
+  it('formats MilestoneStart as bold banner and MilestoneComplete with running cost', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.MilestoneStart,
+      phaseCount: 3,
+      prompt: 'build the app',
+    } as GSDEvent);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.MilestoneComplete,
+      success: true,
+      totalCostUsd: 8.75,
+      totalDurationMs: 300000,
+      phasesCompleted: 3,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    const lines = output.split('\n');
+    // MilestoneStart emits 3 lines (top bar, text, bottom bar)
+    expect(lines[0]).toBe(`${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`);
+    expect(lines[1]).toBe(`${BOLD}  GSD Milestone — 3 phases${RESET}`);
+    expect(lines[2]).toBe(`${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`);
+    expect(lines[3]).toBe(`${BOLD}━━━ Milestone complete — success: true, cost: $8.75, running: $0.00 ━━━${RESET}`);
+  });
+
+  it('close() is callable without error', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+    expect(() => transport.close()).not.toThrow();
+  });
+
+  it('onEvent does not throw on unknown event type variant', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    // Use a known event type that hits the default/fallback branch
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.ToolProgress,
+      toolName: 'Bash',
+      toolUseId: 'tool-456',
+      elapsedSeconds: 12,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toBe('[14:30:45] [EVENT] tool_progress');
+  });
+
+  it('formats AssistantText as dim with truncation at 200 chars', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    const longText = 'A'.repeat(300);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.AssistantText,
+      text: longText,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toMatch(new RegExp(`^${escRe(DIM)}\\[14:30:45\\] A+…${escRe(RESET)}$`));
+    // Strip ANSI to check text length
+    const stripped = stripAnsi(output);
+    const agentText = stripped.split('] ')[1]!;
+    expect(agentText.length).toBeLessThanOrEqual(200);
+  });
+
+  it('formats WaveStart in yellow and WaveComplete with colored counts', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.WaveStart,
+      phaseNumber: '01',
+      waveNumber: 2,
+      planCount: 4,
+      planIds: ['plan-a', 'plan-b', 'plan-c', 'plan-d'],
+    } as GSDEvent);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.WaveComplete,
+      phaseNumber: '01',
+      waveNumber: 2,
+      successCount: 3,
+      failureCount: 1,
+      durationMs: 25000,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    const lines = output.split('\n');
+    expect(lines[0]).toBe(`${YELLOW}⟫ Wave 2 (4 plans)${RESET}`);
+    expect(lines[1]).toBe(
+      `[14:30:45] [WAVE] Wave 2 complete — ${GREEN}3 success${RESET}, ${RED}1 failed${RESET}, 25000ms`,
+    );
+  });
+
+  // ─── New tests for rich formatting ─────────────────────────────────────────
+
+  it('formats PhaseStepStart in cyan with ◆ indicator', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.PhaseStepStart,
+      phaseNumber: '01',
+      step: 'research',
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toBe(`${CYAN}◆ research${RESET}`);
+  });
+
+  it('formats PhaseStepComplete green ✓ on success, red ✗ on failure', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.PhaseStepComplete,
+      phaseNumber: '01',
+      step: 'plan',
+      success: true,
+      durationMs: 5200,
+    } as GSDEvent);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.PhaseStepComplete,
+      phaseNumber: '01',
+      step: 'execute',
+      success: false,
+      durationMs: 12000,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    const lines = output.split('\n');
+    expect(lines[0]).toBe(`${GREEN}✓ plan${RESET} ${DIM}5200ms${RESET}`);
+    expect(lines[1]).toBe(`${RED}✗ execute${RESET} ${DIM}12000ms${RESET}`);
+  });
+
+  it('formats InitResearchSpawn in cyan with ◆ and session count', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.InitResearchSpawn,
+      sessionCount: 4,
+      researchTypes: ['stack', 'features', 'architecture', 'pitfalls'],
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    expect(output).toBe(`${CYAN}◆ Spawning 4 researchers...${RESET}`);
+  });
+
+  it('tracks running cost across CostUpdate events', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    // First cost update
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.CostUpdate,
+      sessionCostUsd: 0.50,
+      cumulativeCostUsd: 0.50,
+    } as GSDEvent);
+
+    // Second cost update
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.CostUpdate,
+      sessionCostUsd: 0.75,
+      cumulativeCostUsd: 1.25,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    const lines = output.split('\n');
+    expect(lines[0]).toBe(`${DIM}[14:30:45] Cost: session $0.50, running $0.50${RESET}`);
+    expect(lines[1]).toBe(`${DIM}[14:30:45] Cost: session $0.75, running $1.25${RESET}`);
+  });
+
+  it('shows running cost in PhaseComplete and MilestoneComplete after CostUpdates', () => {
+    const stream = new PassThrough();
+    const transport = new CLITransport(stream);
+
+    // Accumulate some cost
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.CostUpdate,
+      sessionCostUsd: 1.50,
+      cumulativeCostUsd: 1.50,
+    } as GSDEvent);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.PhaseComplete,
+      phaseNumber: '02',
+      phaseName: 'Build',
+      success: true,
+      totalCostUsd: 1.50,
+      totalDurationMs: 30000,
+      stepsCompleted: 3,
+    } as GSDEvent);
+
+    transport.onEvent({
+      ...makeBase(),
+      type: GSDEventType.MilestoneComplete,
+      success: true,
+      totalCostUsd: 1.50,
+      totalDurationMs: 30000,
+      phasesCompleted: 2,
+    } as GSDEvent);
+
+    const output = readOutput(stream);
+    const lines = output.split('\n');
+    // CostUpdate line
+    expect(lines[0]).toContain('running $1.50');
+    // PhaseComplete includes running cost
+    expect(lines[1]).toContain('running: $1.50');
+    // MilestoneComplete includes running cost
+    expect(lines[2]).toContain('running: $1.50');
+  });
+});
+
+// ─── Test utilities ──────────────────────────────────────────────────────────
+
+/** Escape a string for use in a RegExp. */
+function escRe(s: string): string {
+  return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+/** Strip ANSI escape sequences from a string. */
+function stripAnsi(s: string): string {
+  return s.replace(/\x1b\[[0-9;]*m/g, '');
+}
--- a/sdk/src/cli-transport.ts
+++ b/sdk/src/cli-transport.ts
@@ -0,0 +1,130 @@
+/**
+ * CLI Transport — renders GSD events as rich ANSI-colored output to a Writable stream.
+ *
+ * Implements TransportHandler with colored banners, step indicators, spawn markers,
+ * and running cost totals. No external dependencies — ANSI codes are inline constants.
+ */
+
+import type { Writable } from 'node:stream';
+import { GSDEventType, type GSDEvent, type TransportHandler } from './types.js';
+
+// ─── ANSI escape constants (no dependency per D021) ──────────────────────────
+
+const BOLD = '\x1b[1m';
+const RESET = '\x1b[0m';
+const GREEN = '\x1b[32m';
+const RED = '\x1b[31m';
+const YELLOW = '\x1b[33m';
+const CYAN = '\x1b[36m';
+const DIM = '\x1b[90m';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+/** Extract HH:MM:SS from an ISO-8601 timestamp. */
+function formatTime(ts: string): string {
+  try {
+    const d = new Date(ts);
+    if (Number.isNaN(d.getTime())) return '??:??:??';
+    return d.toISOString().slice(11, 19);
+  } catch {
+    return '??:??:??';
+  }
+}
+
+/** Truncate a string to `max` characters, appending '…' if truncated. */
+function truncate(s: string, max: number): string {
+  if (s.length <= max) return s;
+  return s.slice(0, max - 1) + '…';
+}
+
+/** Format a USD amount. */
+function usd(n: number): string {
+  return `$${n.toFixed(2)}`;
+}
+
+// ─── CLITransport ────────────────────────────────────────────────────────────
+
+export class CLITransport implements TransportHandler {
+  private readonly out: Writable;
+  private runningCostUsd = 0;
+
+  constructor(out?: Writable) {
+    this.out = out ?? process.stdout;
+  }
+
+  /** Format and write a GSD event as a rich ANSI-colored line. Never throws. */
+  onEvent(event: GSDEvent): void {
+    try {
+      const line = this.formatEvent(event);
+      this.out.write(line + '\n');
+    } catch {
+      // TransportHandler contract: onEvent must never throw
+    }
+  }
+
+  /** No-op — stdout doesn't need cleanup. */
+  close(): void {
+    // Nothing to clean up
+  }
+
+  // ─── Private formatting ────────────────────────────────────────────
+
+  private formatEvent(event: GSDEvent): string {
+    const time = formatTime(event.timestamp);
+
+    switch (event.type) {
+      case GSDEventType.SessionInit:
+        return `[${time}] [INIT] Session started — model: ${event.model}, tools: ${event.tools.length}, cwd: ${event.cwd}`;
+
+      case GSDEventType.SessionComplete:
+        return `[${time}] ${GREEN}✓ Session complete — cost: ${usd(event.totalCostUsd)}, turns: ${event.numTurns}, duration: ${(event.durationMs / 1000).toFixed(1)}s${RESET}`;
+
+      case GSDEventType.SessionError:
+        return `[${time}] ${RED}✗ Session failed — subtype: ${event.errorSubtype}, errors: [${event.errors.join(', ')}]${RESET}`;
+
+      case GSDEventType.ToolCall:
+        return `[${time}] [TOOL] ${event.toolName}(${truncate(JSON.stringify(event.input), 80)})`;
+
+      case GSDEventType.PhaseStart:
+        return `${BOLD}${CYAN}━━━ GSD ► PHASE ${event.phaseNumber}: ${event.phaseName} ━━━${RESET}`;
+
+      case GSDEventType.PhaseComplete:
+        return `[${time}] [PHASE] Phase ${event.phaseNumber} complete — success: ${event.success}, cost: ${usd(event.totalCostUsd)}, running: ${usd(this.runningCostUsd)}`;
+
+      case GSDEventType.PhaseStepStart:
+        return `${CYAN}◆ ${event.step}${RESET}`;
+
+      case GSDEventType.PhaseStepComplete:
+        return event.success
+          ? `${GREEN}✓ ${event.step}${RESET} ${DIM}${event.durationMs}ms${RESET}`
+          : `${RED}✗ ${event.step}${RESET} ${DIM}${event.durationMs}ms${RESET}`;
+
+      case GSDEventType.WaveStart:
+        return `${YELLOW}⟫ Wave ${event.waveNumber} (${event.planCount} plans)${RESET}`;
+
+      case GSDEventType.WaveComplete:
+        return `[${time}] [WAVE] Wave ${event.waveNumber} complete — ${GREEN}${event.successCount} success${RESET}, ${RED}${event.failureCount} failed${RESET}, ${event.durationMs}ms`;
+
+      case GSDEventType.CostUpdate: {
+        this.runningCostUsd += event.sessionCostUsd;
+        return `${DIM}[${time}] Cost: session ${usd(event.sessionCostUsd)}, running ${usd(this.runningCostUsd)}${RESET}`;
+      }
+
+      case GSDEventType.MilestoneStart:
+        return `${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n${BOLD}  GSD Milestone — ${event.phaseCount} phases${RESET}\n${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}`;
+
+      case GSDEventType.MilestoneComplete:
+        return `${BOLD}━━━ Milestone complete — success: ${event.success}, cost: ${usd(event.totalCostUsd)}, running: ${usd(this.runningCostUsd)} ━━━${RESET}`;
+
+      case GSDEventType.AssistantText:
+        return `${DIM}[${time}] ${truncate(event.text, 200)}${RESET}`;
+
+      case GSDEventType.InitResearchSpawn:
+        return `${CYAN}◆ Spawning ${event.sessionCount} researchers...${RESET}`;
+
+      // Generic fallback for event types without specific formatting
+      default:
+        return `[${time}] [EVENT] ${event.type}`;
+    }
+  }
+}
--- a/sdk/src/cli.test.ts
+++ b/sdk/src/cli.test.ts
@@ -0,0 +1,310 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { parseCliArgs, resolveInitInput, USAGE, type ParsedCliArgs } from './cli.js';
+import { mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+describe('parseCliArgs', () => {
+  it('parses run <prompt> with defaults', () => {
+    const result = parseCliArgs(['run', 'build auth']);
+
+    expect(result.command).toBe('run');
+    expect(result.prompt).toBe('build auth');
+    expect(result.help).toBe(false);
+    expect(result.version).toBe(false);
+    expect(result.wsPort).toBeUndefined();
+    expect(result.model).toBeUndefined();
+    expect(result.maxBudget).toBeUndefined();
+  });
+
+  it('parses --help flag', () => {
+    const result = parseCliArgs(['--help']);
+
+    expect(result.help).toBe(true);
+    expect(result.command).toBeUndefined();
+  });
+
+  it('parses -h short flag', () => {
+    const result = parseCliArgs(['-h']);
+
+    expect(result.help).toBe(true);
+  });
+
+  it('parses --version flag', () => {
+    const result = parseCliArgs(['--version']);
+
+    expect(result.version).toBe(true);
+  });
+
+  it('parses -v short flag', () => {
+    const result = parseCliArgs(['-v']);
+
+    expect(result.version).toBe(true);
+  });
+
+  it('parses --ws-port as number', () => {
+    const result = parseCliArgs(['run', 'build X', '--ws-port', '8080']);
+
+    expect(result.command).toBe('run');
+    expect(result.prompt).toBe('build X');
+    expect(result.wsPort).toBe(8080);
+  });
+
+  it('parses --model option', () => {
+    const result = parseCliArgs(['run', 'build X', '--model', 'claude-sonnet-4-6']);
+
+    expect(result.model).toBe('claude-sonnet-4-6');
+  });
+
+  it('parses --max-budget option', () => {
+    const result = parseCliArgs(['run', 'build X', '--max-budget', '10']);
+
+    expect(result.maxBudget).toBe(10);
+  });
+
+  it('parses --project-dir option', () => {
+    const result = parseCliArgs(['run', 'build X', '--project-dir', '/tmp/my-project']);
+
+    expect(result.projectDir).toBe('/tmp/my-project');
+  });
+
+  it('returns undefined command and prompt for empty args', () => {
+    const result = parseCliArgs([]);
+
+    expect(result.command).toBeUndefined();
+    expect(result.prompt).toBeUndefined();
+    expect(result.help).toBe(false);
+    expect(result.version).toBe(false);
+  });
+
+  it('parses multi-word prompts from positionals', () => {
+    const result = parseCliArgs(['run', 'build', 'the', 'entire', 'app']);
+
+    expect(result.prompt).toBe('build the entire app');
+  });
+
+  it('handles all options combined', () => {
+    const result = parseCliArgs([
+      'run', 'build auth',
+      '--project-dir', '/tmp/proj',
+      '--ws-port', '9090',
+      '--model', 'claude-sonnet-4-6',
+      '--max-budget', '15',
+    ]);
+
+    expect(result.command).toBe('run');
+    expect(result.prompt).toBe('build auth');
+    expect(result.projectDir).toBe('/tmp/proj');
+    expect(result.wsPort).toBe(9090);
+    expect(result.model).toBe('claude-sonnet-4-6');
+    expect(result.maxBudget).toBe(15);
+  });
+
+  it('throws on unknown options (strict mode)', () => {
+    expect(() => parseCliArgs(['--unknown-flag'])).toThrow();
+  });
+
+  // ─── Init command parsing ──────────────────────────────────────────────
+
+  it('parses init with @file input', () => {
+    const result = parseCliArgs(['init', '@prd.md']);
+
+    expect(result.command).toBe('init');
+    expect(result.initInput).toBe('@prd.md');
+    expect(result.prompt).toBe('@prd.md');
+  });
+
+  it('parses init with raw text input', () => {
+    const result = parseCliArgs(['init', 'build a todo app']);
+
+    expect(result.command).toBe('init');
+    expect(result.initInput).toBe('build a todo app');
+  });
+
+  it('parses init with multi-word text input', () => {
+    const result = parseCliArgs(['init', 'build', 'a', 'todo', 'app']);
+
+    expect(result.command).toBe('init');
+    expect(result.initInput).toBe('build a todo app');
+  });
+
+  it('parses init with no input (stdin mode)', () => {
+    const result = parseCliArgs(['init']);
+
+    expect(result.command).toBe('init');
+    expect(result.initInput).toBeUndefined();
+    expect(result.prompt).toBeUndefined();
+  });
+
+  it('parses init with options', () => {
+    const result = parseCliArgs(['init', '@prd.md', '--project-dir', '/tmp/proj', '--model', 'claude-sonnet-4-6']);
+
+    expect(result.command).toBe('init');
+    expect(result.initInput).toBe('@prd.md');
+    expect(result.projectDir).toBe('/tmp/proj');
+    expect(result.model).toBe('claude-sonnet-4-6');
+  });
+
+  it('does not set initInput for non-init commands', () => {
+    const result = parseCliArgs(['run', 'build auth']);
+
+    expect(result.command).toBe('run');
+    expect(result.initInput).toBeUndefined();
+    expect(result.prompt).toBe('build auth');
+  });
+
+  // ─── Auto command parsing ──────────────────────────────────────────────
+
+  it('parses auto command with no prompt', () => {
+    const result = parseCliArgs(['auto']);
+
+    expect(result.command).toBe('auto');
+    expect(result.prompt).toBeUndefined();
+    expect(result.initInput).toBeUndefined();
+  });
+
+  it('parses auto with --project-dir', () => {
+    const result = parseCliArgs(['auto', '--project-dir', '/tmp/x']);
+
+    expect(result.command).toBe('auto');
+    expect(result.projectDir).toBe('/tmp/x');
+  });
+
+  it('parses auto with --ws-port', () => {
+    const result = parseCliArgs(['auto', '--ws-port', '9090']);
+
+    expect(result.command).toBe('auto');
+    expect(result.wsPort).toBe(9090);
+  });
+
+  it('parses auto with all options combined', () => {
+    const result = parseCliArgs([
+      'auto',
+      '--project-dir', '/tmp/proj',
+      '--ws-port', '8080',
+      '--model', 'claude-sonnet-4-6',
+      '--max-budget', '20',
+    ]);
+
+    expect(result.command).toBe('auto');
+    expect(result.projectDir).toBe('/tmp/proj');
+    expect(result.wsPort).toBe(8080);
+    expect(result.model).toBe('claude-sonnet-4-6');
+    expect(result.maxBudget).toBe(20);
+  });
+
+  it('auto command does not set initInput', () => {
+    const result = parseCliArgs(['auto']);
+
+    expect(result.initInput).toBeUndefined();
+  });
+});
+
+// ─── resolveInitInput tests ──────────────────────────────────────────────────
+
+describe('resolveInitInput', () => {
+  let tmpDir: string;
+
+  beforeEach(async () => {
+    tmpDir = join(tmpdir(), `cli-init-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    await mkdir(tmpDir, { recursive: true });
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  function makeArgs(overrides: Partial<ParsedCliArgs>): ParsedCliArgs {
+    return {
+      command: 'init',
+      prompt: undefined,
+      initInput: undefined,
+      projectDir: tmpDir,
+      wsPort: undefined,
+      model: undefined,
+      maxBudget: undefined,
+      help: false,
+      version: false,
+      ...overrides,
+    };
+  }
+
+  it('reads file contents when input starts with @', async () => {
+    const prdPath = join(tmpDir, 'prd.md');
+    await writeFile(prdPath, '# My PRD\n\nBuild a todo app');
+
+    const result = await resolveInitInput(makeArgs({ initInput: '@prd.md' }));
+
+    expect(result).toBe('# My PRD\n\nBuild a todo app');
+  });
+
+  it('resolves @file path relative to projectDir', async () => {
+    const subDir = join(tmpDir, 'docs');
+    await mkdir(subDir, { recursive: true });
+    await writeFile(join(subDir, 'spec.md'), 'specification content');
+
+    const result = await resolveInitInput(makeArgs({ initInput: '@docs/spec.md' }));
+
+    expect(result).toBe('specification content');
+  });
+
+  it('throws descriptive error when @file does not exist', async () => {
+    await expect(
+      resolveInitInput(makeArgs({ initInput: '@nonexistent.md' }))
+    ).rejects.toThrow('file not found');
+  });
+
+  it('returns raw text as-is when input does not start with @', async () => {
+    const result = await resolveInitInput(makeArgs({ initInput: 'build a todo app' }));
+
+    expect(result).toBe('build a todo app');
+  });
+
+  it('throws TTY error when no input and stdin is TTY', async () => {
+    // In test environment, stdin.isTTY is typically undefined (not a TTY),
+    // but we can verify the function throws when stdin is a TTY by
+    // checking the error path directly via the export.
+    // This test verifies the raw text path works for empty-like scenarios.
+    const result = await resolveInitInput(makeArgs({ initInput: 'some text' }));
+    expect(result).toBe('some text');
+  });
+
+  it('reads @file with absolute path', async () => {
+    const absPath = join(tmpDir, 'absolute-prd.md');
+    await writeFile(absPath, 'absolute path content');
+
+    // Absolute paths are resolved relative to projectDir, so we need
+    // to use the relative form or the absolute form via @
+    const result = await resolveInitInput(makeArgs({ initInput: `@${absPath}` }));
+
+    expect(result).toBe('absolute path content');
+  });
+
+  it('preserves whitespace in raw text input', async () => {
+    const input = '  build a todo app with spaces  ';
+    const result = await resolveInitInput(makeArgs({ initInput: input }));
+
+    expect(result).toBe(input);
+  });
+
+  it('reads large file content from @file', async () => {
+    const largeContent = 'x'.repeat(10000) + '\n# PRD\nDescription here';
+    await writeFile(join(tmpDir, 'large.md'), largeContent);
+
+    const result = await resolveInitInput(makeArgs({ initInput: '@large.md' }));
+
+    expect(result).toBe(largeContent);
+  });
+});
+
+// ─── USAGE text tests ────────────────────────────────────────────────────────
+
+describe('USAGE', () => {
+  it('includes auto command', () => {
+    expect(USAGE).toContain('auto');
+  });
+
+  it('describes auto as autonomous lifecycle', () => {
+    expect(USAGE).toMatch(/auto\s+.*autonomous/i);
+  });
+});
--- a/sdk/src/cli.ts
+++ b/sdk/src/cli.ts
@@ -0,0 +1,382 @@
+#!/usr/bin/env node
+/**
+ * CLI entry point for gsd-sdk.
+ *
+ * Usage: gsd-sdk run "<prompt>" [--project-dir <dir>] [--ws-port <port>]
+ *                                [--model <model>] [--max-budget <n>]
+ */
+
+import { parseArgs } from 'node:util';
+import { readFile } from 'node:fs/promises';
+import { resolve, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+import { GSD } from './index.js';
+import { CLITransport } from './cli-transport.js';
+import { WSTransport } from './ws-transport.js';
+import { InitRunner } from './init-runner.js';
+
+// ─── Parsed CLI args ─────────────────────────────────────────────────────────
+
+export interface ParsedCliArgs {
+  command: string | undefined;
+  prompt: string | undefined;
+  /** For 'init' command: the raw input source (@file, text, or undefined for stdin). */
+  initInput: string | undefined;
+  projectDir: string;
+  wsPort: number | undefined;
+  model: string | undefined;
+  maxBudget: number | undefined;
+  help: boolean;
+  version: boolean;
+}
+
+/**
+ * Parse CLI arguments into a structured object.
+ * Exported for testing — the main() function uses this internally.
+ */
+export function parseCliArgs(argv: string[]): ParsedCliArgs {
+  const { values, positionals } = parseArgs({
+    args: argv,
+    options: {
+      'project-dir': { type: 'string', default: process.cwd() },
+      'ws-port': { type: 'string' },
+      model: { type: 'string' },
+      'max-budget': { type: 'string' },
+      help: { type: 'boolean', short: 'h', default: false },
+      version: { type: 'boolean', short: 'v', default: false },
+    },
+    allowPositionals: true,
+    strict: true,
+  });
+
+  const command = positionals[0] as string | undefined;
+  const prompt = positionals.slice(1).join(' ') || undefined;
+
+  // For 'init' command, the positional after 'init' is the input source.
+  // For 'run' command, it's the prompt. Both use positionals[1+].
+  const initInput = command === 'init' ? prompt : undefined;
+
+  return {
+    command,
+    prompt,
+    initInput,
+    projectDir: values['project-dir'] as string,
+    wsPort: values['ws-port'] ? Number(values['ws-port']) : undefined,
+    model: values.model as string | undefined,
+    maxBudget: values['max-budget'] ? Number(values['max-budget']) : undefined,
+    help: values.help as boolean,
+    version: values.version as boolean,
+  };
+}
+
+// ─── Usage ───────────────────────────────────────────────────────────────────
+
+export const USAGE = `
+Usage: gsd-sdk <command> [args] [options]
+
+Commands:
+  run <prompt>          Run a full milestone from a text prompt
+  auto                  Run the full autonomous lifecycle (discover → execute → advance)
+  init [input]          Bootstrap a new project from a PRD or description
+                        input can be:
+                          @path/to/prd.md   Read input from a file
+                          "description"     Use text directly
+                          (empty)           Read from stdin
+
+Options:
+  --project-dir <dir>   Project directory (default: cwd)
+  --ws-port <port>      Enable WebSocket transport on <port>
+  --model <model>       Override LLM model
+  --max-budget <n>      Max budget per step in USD
+  -h, --help            Show this help
+  -v, --version         Show version
+`.trim();
+
+/**
+ * Read the package version from package.json.
+ */
+async function getVersion(): Promise<string> {
+  try {
+    const pkgPath = resolve(fileURLToPath(import.meta.url), '..', '..', 'package.json');
+    const raw = await readFile(pkgPath, 'utf-8');
+    const pkg = JSON.parse(raw) as { version?: string };
+    return pkg.version ?? 'unknown';
+  } catch {
+    return 'unknown';
+  }
+}
+
+// ─── Init input resolution ───────────────────────────────────────────────────
+
+/**
+ * Resolve the init command input to a string.
+ *
+ * - `@path/to/file.md` → reads the file contents
+ * - Raw text → returns as-is
+ * - No input → reads from stdin (with TTY detection)
+ *
+ * Exported for testing.
+ */
+export async function resolveInitInput(args: ParsedCliArgs): Promise<string> {
+  const input = args.initInput;
+
+  if (input && input.startsWith('@')) {
+    // File path: strip @ prefix, resolve relative to projectDir
+    const filePath = resolve(args.projectDir, input.slice(1));
+    try {
+      return await readFile(filePath, 'utf-8');
+    } catch (err) {
+      throw new Error(`Cannot read input file "${filePath}": ${(err as NodeJS.ErrnoException).code === 'ENOENT' ? 'file not found' : (err as Error).message}`);
+    }
+  }
+
+  if (input) {
+    // Raw text
+    return input;
+  }
+
+  // No input — read from stdin
+  return readStdin();
+}
+
+/**
+ * Read all data from stdin. Rejects if stdin is a TTY with no piped data.
+ */
+async function readStdin(): Promise<string> {
+  const { stdin } = process;
+
+  if (stdin.isTTY) {
+    throw new Error(
+      'No input provided. Usage:\n' +
+      '  gsd-sdk init @path/to/prd.md\n' +
+      '  gsd-sdk init "build a todo app"\n' +
+      '  cat prd.md | gsd-sdk init'
+    );
+  }
+
+  return new Promise<string>((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    stdin.on('data', (chunk: Buffer) => chunks.push(chunk));
+    stdin.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')));
+    stdin.on('error', reject);
+  });
+}
+
+// ─── Main ────────────────────────────────────────────────────────────────────
+
+export async function main(argv: string[] = process.argv.slice(2)): Promise<void> {
+  let args: ParsedCliArgs;
+
+  try {
+    args = parseCliArgs(argv);
+  } catch (err) {
+    console.error(`Error: ${(err as Error).message}`);
+    console.error(USAGE);
+    process.exitCode = 1;
+    return;
+  }
+
+  if (args.help) {
+    console.log(USAGE);
+    return;
+  }
+
+  if (args.version) {
+    const ver = await getVersion();
+    console.log(`gsd-sdk v${ver}`);
+    return;
+  }
+
+  if (args.command !== 'run' && args.command !== 'init' && args.command !== 'auto') {
+    console.error('Error: Expected "gsd-sdk run <prompt>", "gsd-sdk auto", or "gsd-sdk init [input]"');
+    console.error(USAGE);
+    process.exitCode = 1;
+    return;
+  }
+
+  if (args.command === 'run' && !args.prompt) {
+    console.error('Error: "gsd-sdk run" requires a prompt');
+    console.error(USAGE);
+    process.exitCode = 1;
+    return;
+  }
+
+  // ─── Init command ─────────────────────────────────────────────────────────
+  if (args.command === 'init') {
+    let input: string;
+    try {
+      input = await resolveInitInput(args);
+    } catch (err) {
+      console.error(`Error: ${(err as Error).message}`);
+      process.exitCode = 1;
+      return;
+    }
+
+    console.log(`[init] Resolved input: ${input.length} chars`);
+
+    // Build GSD instance for tools and event stream
+    const gsd = new GSD({
+      projectDir: args.projectDir,
+      model: args.model,
+      maxBudgetUsd: args.maxBudget,
+    });
+
+    // Wire CLI transport
+    const cliTransport = new CLITransport();
+    gsd.addTransport(cliTransport);
+
+    // Optional WebSocket transport
+    let wsTransport: WSTransport | undefined;
+    if (args.wsPort !== undefined) {
+      wsTransport = new WSTransport({ port: args.wsPort });
+      await wsTransport.start();
+      gsd.addTransport(wsTransport);
+      console.log(`WebSocket transport listening on port ${args.wsPort}`);
+    }
+
+    try {
+      const tools = gsd.createTools();
+      const runner = new InitRunner({
+        projectDir: args.projectDir,
+        tools,
+        eventStream: gsd.eventStream,
+        config: {
+          maxBudgetPerSession: args.maxBudget,
+          orchestratorModel: args.model,
+        },
+      });
+
+      const result = await runner.run(input);
+
+      // Print completion summary
+      const status = result.success ? 'SUCCESS' : 'FAILED';
+      const stepCount = result.steps.length;
+      const passedSteps = result.steps.filter(s => s.success).length;
+      const cost = result.totalCostUsd.toFixed(2);
+      const duration = (result.totalDurationMs / 1000).toFixed(1);
+      const artifactList = result.artifacts.join(', ');
+
+      console.log(`\n[${status}] ${passedSteps}/${stepCount} steps, $${cost}, ${duration}s`);
+      if (result.artifacts.length > 0) {
+        console.log(`Artifacts: ${artifactList}`);
+      }
+
+      if (!result.success) {
+        // Log failed steps
+        for (const step of result.steps) {
+          if (!step.success && step.error) {
+            console.error(`  ✗ ${step.step}: ${step.error}`);
+          }
+        }
+        process.exitCode = 1;
+      }
+    } catch (err) {
+      console.error(`Fatal error: ${(err as Error).message}`);
+      process.exitCode = 1;
+    } finally {
+      cliTransport.close();
+      if (wsTransport) {
+        wsTransport.close();
+      }
+    }
+    return;
+  }
+
+  // ─── Auto command ─────────────────────────────────────────────────────────
+  if (args.command === 'auto') {
+    const gsd = new GSD({
+      projectDir: args.projectDir,
+      model: args.model,
+      maxBudgetUsd: args.maxBudget,
+      autoMode: true,
+    });
+
+    // Wire CLI transport (always active)
+    const cliTransport = new CLITransport();
+    gsd.addTransport(cliTransport);
+
+    // Optional WebSocket transport
+    let wsTransport: WSTransport | undefined;
+    if (args.wsPort !== undefined) {
+      wsTransport = new WSTransport({ port: args.wsPort });
+      await wsTransport.start();
+      gsd.addTransport(wsTransport);
+      console.log(`WebSocket transport listening on port ${args.wsPort}`);
+    }
+
+    try {
+      const result = await gsd.run('');
+
+      // Final summary
+      const status = result.success ? 'SUCCESS' : 'FAILED';
+      const phases = result.phases.length;
+      const cost = result.totalCostUsd.toFixed(2);
+      const duration = (result.totalDurationMs / 1000).toFixed(1);
+      console.log(`\n[${status}] ${phases} phase(s), $${cost}, ${duration}s`);
+
+      if (!result.success) {
+        process.exitCode = 1;
+      }
+    } catch (err) {
+      console.error(`Fatal error: ${(err as Error).message}`);
+      process.exitCode = 1;
+    } finally {
+      cliTransport.close();
+      if (wsTransport) {
+        wsTransport.close();
+      }
+    }
+    return;
+  }
+
+  // ─── Run command ─────────────────────────────────────────────────────────
+
+  // Build GSD instance
+  const gsd = new GSD({
+    projectDir: args.projectDir,
+    model: args.model,
+    maxBudgetUsd: args.maxBudget,
+  });
+
+  // Wire CLI transport (always active)
+  const cliTransport = new CLITransport();
+  gsd.addTransport(cliTransport);
+
+  // Optional WebSocket transport
+  let wsTransport: WSTransport | undefined;
+  if (args.wsPort !== undefined) {
+    wsTransport = new WSTransport({ port: args.wsPort });
+    await wsTransport.start();
+    gsd.addTransport(wsTransport);
+    console.log(`WebSocket transport listening on port ${args.wsPort}`);
+  }
+
+  try {
+    const result = await gsd.run(args.prompt!);
+
+    // Final summary
+    const status = result.success ? 'SUCCESS' : 'FAILED';
+    const phases = result.phases.length;
+    const cost = result.totalCostUsd.toFixed(2);
+    const duration = (result.totalDurationMs / 1000).toFixed(1);
+    console.log(`\n[${status}] ${phases} phase(s), $${cost}, ${duration}s`);
+
+    if (!result.success) {
+      process.exitCode = 1;
+    }
+  } catch (err) {
+    console.error(`Fatal error: ${(err as Error).message}`);
+    process.exitCode = 1;
+  } finally {
+    // Clean up transports
+    cliTransport.close();
+    if (wsTransport) {
+      wsTransport.close();
+    }
+  }
+}
+
+// ─── Auto-run when invoked directly ──────────────────────────────────────────
+
+main();
--- a/sdk/src/config.test.ts
+++ b/sdk/src/config.test.ts
@@ -0,0 +1,168 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { loadConfig, CONFIG_DEFAULTS } from './config.js';
+import { mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+describe('loadConfig', () => {
+  let tmpDir: string;
+
+  beforeEach(async () => {
+    tmpDir = join(tmpdir(), `gsd-config-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    await mkdir(join(tmpDir, '.planning'), { recursive: true });
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  it('returns all defaults when config file is missing', async () => {
+    // No config.json created
+    await rm(join(tmpDir, '.planning', 'config.json'), { force: true });
+    const config = await loadConfig(tmpDir);
+    expect(config).toEqual(CONFIG_DEFAULTS);
+  });
+
+  it('returns all defaults when config file is empty', async () => {
+    await writeFile(join(tmpDir, '.planning', 'config.json'), '');
+    const config = await loadConfig(tmpDir);
+    expect(config).toEqual(CONFIG_DEFAULTS);
+  });
+
+  it('loads valid config and merges with defaults', async () => {
+    const userConfig = {
+      model_profile: 'fast',
+      workflow: { research: false },
+    };
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify(userConfig),
+    );
+
+    const config = await loadConfig(tmpDir);
+
+    expect(config.model_profile).toBe('fast');
+    expect(config.workflow.research).toBe(false);
+    // Other workflow defaults preserved
+    expect(config.workflow.plan_check).toBe(true);
+    expect(config.workflow.verifier).toBe(true);
+    // Top-level defaults preserved
+    expect(config.commit_docs).toBe(true);
+    expect(config.parallelization).toBe(true);
+  });
+
+  it('partial config merges correctly for nested objects', async () => {
+    const userConfig = {
+      git: { branching_strategy: 'milestone' },
+      hooks: { context_warnings: false },
+    };
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify(userConfig),
+    );
+
+    const config = await loadConfig(tmpDir);
+
+    expect(config.git.branching_strategy).toBe('milestone');
+    // Other git defaults preserved
+    expect(config.git.phase_branch_template).toBe('gsd/phase-{phase}-{slug}');
+    expect(config.hooks.context_warnings).toBe(false);
+  });
+
+  it('preserves unknown top-level keys', async () => {
+    const userConfig = { custom_key: 'custom_value' };
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify(userConfig),
+    );
+
+    const config = await loadConfig(tmpDir);
+    expect(config.custom_key).toBe('custom_value');
+  });
+
+  it('merges agent_skills', async () => {
+    const userConfig = {
+      agent_skills: { planner: 'custom-skill' },
+    };
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify(userConfig),
+    );
+
+    const config = await loadConfig(tmpDir);
+    expect(config.agent_skills).toEqual({ planner: 'custom-skill' });
+  });
+
+  // ─── Negative tests ─────────────────────────────────────────────────────
+
+  it('throws on malformed JSON', async () => {
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      '{bad json',
+    );
+
+    await expect(loadConfig(tmpDir)).rejects.toThrow(/Failed to parse config/);
+  });
+
+  it('throws when config is not an object (array)', async () => {
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      '[1, 2, 3]',
+    );
+
+    await expect(loadConfig(tmpDir)).rejects.toThrow(/must be a JSON object/);
+  });
+
+  it('throws when config is not an object (string)', async () => {
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      '"just a string"',
+    );
+
+    await expect(loadConfig(tmpDir)).rejects.toThrow(/must be a JSON object/);
+  });
+
+  it('ignores unknown keys without error', async () => {
+    const userConfig = {
+      totally_unknown: true,
+      another_unknown: { nested: 'value' },
+    };
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify(userConfig),
+    );
+
+    const config = await loadConfig(tmpDir);
+    // Should load fine, with unknowns passed through
+    expect(config.model_profile).toBe('balanced');
+    expect((config as Record<string, unknown>).totally_unknown).toBe(true);
+  });
+
+  it('handles wrong value types gracefully (user sets string instead of bool)', async () => {
+    const userConfig = {
+      commit_docs: 'yes', // should be boolean but we don't validate types
+      parallelization: 0,
+    };
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify(userConfig),
+    );
+
+    const config = await loadConfig(tmpDir);
+    // We pass through the user's values as-is — runtime code handles type mismatches
+    expect(config.commit_docs).toBe('yes');
+    expect(config.parallelization).toBe(0);
+  });
+
+  it('does not mutate CONFIG_DEFAULTS between calls', async () => {
+    const before = structuredClone(CONFIG_DEFAULTS);
+
+    await writeFile(
+      join(tmpDir, '.planning', 'config.json'),
+      JSON.stringify({ model_profile: 'fast', workflow: { research: false } }),
+    );
+    await loadConfig(tmpDir);
+
+    expect(CONFIG_DEFAULTS).toEqual(before);
+  });
+});
--- a/sdk/src/config.ts
+++ b/sdk/src/config.ts
@@ -0,0 +1,148 @@
+/**
+ * Config reader — loads `.planning/config.json` and merges with defaults.
+ *
+ * Mirrors the default structure from `get-shit-done/bin/lib/config.cjs`
+ * `buildNewProjectConfig()`.
+ */
+
+import { readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+export interface GitConfig {
+  branching_strategy: string;
+  phase_branch_template: string;
+  milestone_branch_template: string;
+  quick_branch_template: string | null;
+}
+
+export interface WorkflowConfig {
+  research: boolean;
+  plan_check: boolean;
+  verifier: boolean;
+  nyquist_validation: boolean;
+  auto_advance: boolean;
+  node_repair: boolean;
+  node_repair_budget: number;
+  ui_phase: boolean;
+  ui_safety_gate: boolean;
+  text_mode: boolean;
+  research_before_questions: boolean;
+  discuss_mode: string;
+  skip_discuss: boolean;
+}
+
+export interface HooksConfig {
+  context_warnings: boolean;
+}
+
+export interface GSDConfig {
+  model_profile: string;
+  commit_docs: boolean;
+  parallelization: boolean;
+  search_gitignored: boolean;
+  brave_search: boolean;
+  firecrawl: boolean;
+  exa_search: boolean;
+  git: GitConfig;
+  workflow: WorkflowConfig;
+  hooks: HooksConfig;
+  agent_skills: Record<string, unknown>;
+  [key: string]: unknown;
+}
+
+// ─── Defaults ────────────────────────────────────────────────────────────────
+
+export const CONFIG_DEFAULTS: GSDConfig = {
+  model_profile: 'balanced',
+  commit_docs: true,
+  parallelization: true,
+  search_gitignored: false,
+  brave_search: false,
+  firecrawl: false,
+  exa_search: false,
+  git: {
+    branching_strategy: 'none',
+    phase_branch_template: 'gsd/phase-{phase}-{slug}',
+    milestone_branch_template: 'gsd/{milestone}-{slug}',
+    quick_branch_template: null,
+  },
+  workflow: {
+    research: true,
+    plan_check: true,
+    verifier: true,
+    nyquist_validation: true,
+    auto_advance: false,
+    node_repair: true,
+    node_repair_budget: 2,
+    ui_phase: true,
+    ui_safety_gate: true,
+    text_mode: false,
+    research_before_questions: false,
+    discuss_mode: 'discuss',
+    skip_discuss: false,
+  },
+  hooks: {
+    context_warnings: true,
+  },
+  agent_skills: {},
+};
+
+// ─── Loader ──────────────────────────────────────────────────────────────────
+
+/**
+ * Load project config from `.planning/config.json`, merging with defaults.
+ * Returns full defaults when file is missing or empty.
+ * Throws on malformed JSON with a helpful error message.
+ */
+export async function loadConfig(projectDir: string): Promise<GSDConfig> {
+  const configPath = join(projectDir, '.planning', 'config.json');
+
+  let raw: string;
+  try {
+    raw = await readFile(configPath, 'utf-8');
+  } catch {
+    // File missing — normal for new projects
+    return structuredClone(CONFIG_DEFAULTS);
+  }
+
+  const trimmed = raw.trim();
+  if (trimmed === '') {
+    return structuredClone(CONFIG_DEFAULTS);
+  }
+
+  let parsed: Record<string, unknown>;
+  try {
+    parsed = JSON.parse(trimmed);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to parse config at ${configPath}: ${msg}`);
+  }
+
+  if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
+    throw new Error(`Config at ${configPath} must be a JSON object`);
+  }
+
+  // Three-level deep merge: defaults <- parsed
+  return {
+    ...structuredClone(CONFIG_DEFAULTS),
+    ...parsed,
+    git: {
+      ...CONFIG_DEFAULTS.git,
+      ...(parsed.git as Partial<GitConfig> ?? {}),
+    },
+    workflow: {
+      ...CONFIG_DEFAULTS.workflow,
+      ...(parsed.workflow as Partial<WorkflowConfig> ?? {}),
+    },
+    hooks: {
+      ...CONFIG_DEFAULTS.hooks,
+      ...(parsed.hooks as Partial<HooksConfig> ?? {}),
+    },
+    agent_skills: {
+      ...CONFIG_DEFAULTS.agent_skills,
+      ...(parsed.agent_skills as Record<string, unknown> ?? {}),
+    },
+  };
+}
--- a/sdk/src/context-engine.test.ts
+++ b/sdk/src/context-engine.test.ts
@@ -0,0 +1,211 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { mkdtemp, mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { ContextEngine, PHASE_FILE_MANIFEST } from './context-engine.js';
+import { PhaseType } from './types.js';
+import type { GSDLogger } from './logger.js';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+async function createTempProject(): Promise<string> {
+  return mkdtemp(join(tmpdir(), 'gsd-ctx-'));
+}
+
+async function createPlanningDir(projectDir: string, files: Record<string, string>): Promise<void> {
+  const planningDir = join(projectDir, '.planning');
+  await mkdir(planningDir, { recursive: true });
+  for (const [filename, content] of Object.entries(files)) {
+    await writeFile(join(planningDir, filename), content, 'utf-8');
+  }
+}
+
+function makeMockLogger(): GSDLogger {
+  return {
+    debug: vi.fn(),
+    info: vi.fn(),
+    warn: vi.fn(),
+    error: vi.fn(),
+    setPhase: vi.fn(),
+    setPlan: vi.fn(),
+    setSessionId: vi.fn(),
+  } as unknown as GSDLogger;
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('ContextEngine', () => {
+  let projectDir: string;
+
+  beforeEach(async () => {
+    projectDir = await createTempProject();
+  });
+
+  afterEach(async () => {
+    await rm(projectDir, { recursive: true, force: true });
+  });
+
+  describe('resolveContextFiles', () => {
+    it('returns all files for plan phase when all exist', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '# State\nproject: test',
+        'ROADMAP.md': '# Roadmap\nphase 01',
+        'CONTEXT.md': '# Context\nstack: node',
+        'RESEARCH.md': '# Research\nfindings here',
+        'REQUIREMENTS.md': '# Requirements\nR1: auth',
+      });
+
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Plan);
+
+      expect(files.state).toBe('# State\nproject: test');
+      expect(files.roadmap).toBe('# Roadmap\nphase 01');
+      expect(files.context).toBe('# Context\nstack: node');
+      expect(files.research).toBe('# Research\nfindings here');
+      expect(files.requirements).toBe('# Requirements\nR1: auth');
+    });
+
+    it('returns minimal files for execute phase', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '# State',
+        'config.json': '{"model":"claude"}',
+        'ROADMAP.md': '# Roadmap — should not be read',
+        'CONTEXT.md': '# Context — should not be read',
+      });
+
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Execute);
+
+      expect(files.state).toBe('# State');
+      expect(files.config).toBe('{"model":"claude"}');
+      expect(files.roadmap).toBeUndefined();
+      expect(files.context).toBeUndefined();
+    });
+
+    it('returns state + roadmap + context for research phase', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '# State',
+        'ROADMAP.md': '# Roadmap',
+        'CONTEXT.md': '# Context',
+      });
+
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Research);
+
+      expect(files.state).toBe('# State');
+      expect(files.roadmap).toBe('# Roadmap');
+      expect(files.context).toBe('# Context');
+      expect(files.requirements).toBeUndefined();
+    });
+
+    it('returns state + roadmap + requirements for verify phase', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '# State',
+        'ROADMAP.md': '# Roadmap',
+        'REQUIREMENTS.md': '# Requirements',
+        'PLAN.md': '# Plan',
+        'SUMMARY.md': '# Summary',
+      });
+
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Verify);
+
+      expect(files.state).toBe('# State');
+      expect(files.roadmap).toBe('# Roadmap');
+      expect(files.requirements).toBe('# Requirements');
+      expect(files.plan).toBe('# Plan');
+      expect(files.summary).toBe('# Summary');
+    });
+
+    it('returns state + optional files for discuss phase', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '# State',
+        'ROADMAP.md': '# Roadmap',
+      });
+
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Discuss);
+
+      expect(files.state).toBe('# State');
+      expect(files.roadmap).toBe('# Roadmap');
+      expect(files.context).toBeUndefined();
+    });
+
+    it('returns undefined for missing optional files without warning', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '# State',
+        'ROADMAP.md': '# Roadmap',
+        'CONTEXT.md': '# Context',
+      });
+
+      const logger = makeMockLogger();
+      const engine = new ContextEngine(projectDir, logger);
+      const files = await engine.resolveContextFiles(PhaseType.Plan);
+
+      // research and requirements are optional for plan — no warning
+      expect(files.research).toBeUndefined();
+      expect(files.requirements).toBeUndefined();
+      expect(logger.warn).not.toHaveBeenCalled();
+    });
+
+    it('warns for missing required files', async () => {
+      // Empty .planning dir — STATE.md is required for all phases
+      await createPlanningDir(projectDir, {});
+
+      const logger = makeMockLogger();
+      const engine = new ContextEngine(projectDir, logger);
+      await engine.resolveContextFiles(PhaseType.Execute);
+
+      expect(logger.warn).toHaveBeenCalledWith(
+        expect.stringContaining('STATE.md'),
+        expect.objectContaining({ phase: PhaseType.Execute }),
+      );
+    });
+
+    it('handles missing .planning directory gracefully', async () => {
+      // No .planning dir at all
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Execute);
+
+      expect(files.state).toBeUndefined();
+      expect(files.config).toBeUndefined();
+    });
+
+    it('handles empty file content', async () => {
+      await createPlanningDir(projectDir, {
+        'STATE.md': '',
+      });
+
+      const engine = new ContextEngine(projectDir);
+      const files = await engine.resolveContextFiles(PhaseType.Execute);
+
+      // Empty string is still defined — the file exists
+      expect(files.state).toBe('');
+    });
+  });
+
+  describe('PHASE_FILE_MANIFEST', () => {
+    it('covers all phase types', () => {
+      for (const phase of Object.values(PhaseType)) {
+        expect(PHASE_FILE_MANIFEST[phase]).toBeDefined();
+        expect(PHASE_FILE_MANIFEST[phase].length).toBeGreaterThan(0);
+      }
+    });
+
+    it('execute phase has fewest files', () => {
+      const executeCount = PHASE_FILE_MANIFEST[PhaseType.Execute].length;
+      const planCount = PHASE_FILE_MANIFEST[PhaseType.Plan].length;
+      expect(executeCount).toBeLessThan(planCount);
+    });
+
+    it('every spec has required key, filename, and required flag', () => {
+      for (const specs of Object.values(PHASE_FILE_MANIFEST)) {
+        for (const spec of specs) {
+          expect(spec.key).toBeDefined();
+          expect(spec.filename).toBeDefined();
+          expect(typeof spec.required).toBe('boolean');
+        }
+      }
+    });
+  });
+});
--- a/sdk/src/context-engine.ts
+++ b/sdk/src/context-engine.ts
@@ -0,0 +1,114 @@
+/**
+ * Context engine — resolves which .planning/ state files exist per phase type.
+ *
+ * Different phases need different subsets of context files. The execute phase
+ * only needs STATE.md + config.json (minimal). Research needs STATE.md +
+ * ROADMAP.md + CONTEXT.md. Plan needs all files. Verify needs STATE.md +
+ * ROADMAP.md + REQUIREMENTS.md + PLAN/SUMMARY files.
+ */
+
+import { readFile, access } from 'node:fs/promises';
+import { join } from 'node:path';
+import { constants } from 'node:fs';
+
+import type { ContextFiles } from './types.js';
+import { PhaseType } from './types.js';
+import type { GSDLogger } from './logger.js';
+
+// ─── File manifest per phase ─────────────────────────────────────────────────
+
+interface FileSpec {
+  key: keyof ContextFiles;
+  filename: string;
+  required: boolean;
+}
+
+/**
+ * Define which files each phase needs. Required files emit warnings when missing;
+ * optional files silently return undefined.
+ */
+const PHASE_FILE_MANIFEST: Record<PhaseType, FileSpec[]> = {
+  [PhaseType.Execute]: [
+    { key: 'state', filename: 'STATE.md', required: true },
+    { key: 'config', filename: 'config.json', required: false },
+  ],
+  [PhaseType.Research]: [
+    { key: 'state', filename: 'STATE.md', required: true },
+    { key: 'roadmap', filename: 'ROADMAP.md', required: true },
+    { key: 'context', filename: 'CONTEXT.md', required: true },
+    { key: 'requirements', filename: 'REQUIREMENTS.md', required: false },
+  ],
+  [PhaseType.Plan]: [
+    { key: 'state', filename: 'STATE.md', required: true },
+    { key: 'roadmap', filename: 'ROADMAP.md', required: true },
+    { key: 'context', filename: 'CONTEXT.md', required: true },
+    { key: 'research', filename: 'RESEARCH.md', required: false },
+    { key: 'requirements', filename: 'REQUIREMENTS.md', required: false },
+  ],
+  [PhaseType.Verify]: [
+    { key: 'state', filename: 'STATE.md', required: true },
+    { key: 'roadmap', filename: 'ROADMAP.md', required: true },
+    { key: 'requirements', filename: 'REQUIREMENTS.md', required: false },
+    { key: 'plan', filename: 'PLAN.md', required: false },
+    { key: 'summary', filename: 'SUMMARY.md', required: false },
+  ],
+  [PhaseType.Discuss]: [
+    { key: 'state', filename: 'STATE.md', required: true },
+    { key: 'roadmap', filename: 'ROADMAP.md', required: false },
+    { key: 'context', filename: 'CONTEXT.md', required: false },
+  ],
+};
+
+// ─── ContextEngine class ─────────────────────────────────────────────────────
+
+export class ContextEngine {
+  private readonly planningDir: string;
+  private readonly logger?: GSDLogger;
+
+  constructor(projectDir: string, logger?: GSDLogger) {
+    this.planningDir = join(projectDir, '.planning');
+    this.logger = logger;
+  }
+
+  /**
+   * Resolve context files appropriate for the given phase type.
+   * Reads each file defined in the phase manifest, returning undefined
+   * for missing optional files and warning for missing required files.
+   */
+  async resolveContextFiles(phaseType: PhaseType): Promise<ContextFiles> {
+    const manifest = PHASE_FILE_MANIFEST[phaseType];
+    const result: ContextFiles = {};
+
+    for (const spec of manifest) {
+      const filePath = join(this.planningDir, spec.filename);
+      const content = await this.readFileIfExists(filePath);
+
+      if (content !== undefined) {
+        result[spec.key] = content;
+      } else if (spec.required) {
+        this.logger?.warn(`Required context file missing for ${phaseType} phase: ${spec.filename}`, {
+          phase: phaseType,
+          file: spec.filename,
+          path: filePath,
+        });
+      }
+    }
+
+    return result;
+  }
+
+  /**
+   * Check if a file exists and read it. Returns undefined if not found.
+   */
+  private async readFileIfExists(filePath: string): Promise<string | undefined> {
+    try {
+      await access(filePath, constants.R_OK);
+      return await readFile(filePath, 'utf-8');
+    } catch {
+      return undefined;
+    }
+  }
+}
+
+export { PHASE_FILE_MANIFEST };
+export type { FileSpec };
--- a/sdk/src/e2e.integration.test.ts
+++ b/sdk/src/e2e.integration.test.ts
@@ -0,0 +1,178 @@
+/**
+ * E2E integration test — proves full SDK pipeline:
+ * parse → prompt → query() → SUMMARY.md
+ *
+ * Requires Claude Code CLI (`claude`) installed and authenticated.
+ * Skips gracefully if CLI is unavailable.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { execSync } from 'node:child_process';
+import { mkdtemp, cp, rm, readFile, readdir } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { fileURLToPath } from 'node:url';
+
+import { GSD, parsePlanFile, GSDEventType } from './index.js';
+import type { GSDEvent } from './index.js';
+
+// ─── CLI availability check ─────────────────────────────────────────────────
+
+let cliAvailable = false;
+try {
+  execSync('which claude', { stdio: 'ignore' });
+  cliAvailable = true;
+} catch {
+  cliAvailable = false;
+}
+
+const __dirname = fileURLToPath(new URL('.', import.meta.url));
+const fixturesDir = join(__dirname, '..', 'test-fixtures');
+
+// ─── Test suite ──────────────────────────────────────────────────────────────
+
+describe.skipIf(!cliAvailable)('E2E: Single plan execution', () => {
+  let tmpDir: string;
+
+  beforeAll(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'gsd-sdk-e2e-'));
+    // Copy fixture files to temp directory
+    await cp(fixturesDir, tmpDir, { recursive: true });
+  });
+
+  afterAll(async () => {
+    if (tmpDir) {
+      await rm(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it('executes a single plan and returns a valid PlanResult', async () => {
+    const gsd = new GSD({ projectDir: tmpDir, maxBudgetUsd: 1.0, maxTurns: 20 });
+    const result = await gsd.executePlan('sample-plan.md');
+
+    expect(result.success).toBe(true);
+    expect(typeof result.sessionId).toBe('string');
+    expect(result.sessionId.length).toBeGreaterThan(0);
+    expect(result.totalCostUsd).toBeGreaterThanOrEqual(0);
+    expect(result.durationMs).toBeGreaterThan(0);
+    expect(result.numTurns).toBeGreaterThan(0);
+
+    // Verify the plan's task was executed — output.txt should exist
+    const outputPath = join(tmpDir, 'output.txt');
+    const outputContent = await readFile(outputPath, 'utf-8');
+    expect(outputContent).toContain('hello from gsd-sdk');
+  }, 120_000); // 2 minute timeout for real CLI execution
+
+  it('proves session isolation (R014) — different session IDs for sequential runs', async () => {
+    // Create a second temp dir for isolation proof
+    const tmpDir2 = await mkdtemp(join(tmpdir(), 'gsd-sdk-e2e-'));
+    await cp(fixturesDir, tmpDir2, { recursive: true });
+
+    try {
+      const gsd1 = new GSD({ projectDir: tmpDir, maxBudgetUsd: 1.0, maxTurns: 20 });
+      const gsd2 = new GSD({ projectDir: tmpDir2, maxBudgetUsd: 1.0, maxTurns: 20 });
+
+      const result1 = await gsd1.executePlan('sample-plan.md');
+      const result2 = await gsd2.executePlan('sample-plan.md');
+
+      // Different sessions must have different session IDs
+      expect(result1.sessionId).not.toBe(result2.sessionId);
+
+      // Both should track cost independently
+      expect(result1.totalCostUsd).toBeGreaterThanOrEqual(0);
+      expect(result2.totalCostUsd).toBeGreaterThanOrEqual(0);
+    } finally {
+      await rm(tmpDir2, { recursive: true, force: true });
+    }
+  }, 240_000); // 4 minute timeout — two sequential runs
+});
+
+describe('E2E: Fixture validation (no CLI required)', () => {
+  it('fixture PLAN.md is valid and parseable', async () => {
+    const plan = await parsePlanFile(join(fixturesDir, 'sample-plan.md'));
+
+    expect(plan.frontmatter.phase).toBe('01-test');
+    expect(plan.frontmatter.plan).toBe('01');
+    expect(plan.frontmatter.type).toBe('execute');
+    expect(plan.frontmatter.wave).toBe(1);
+    expect(plan.frontmatter.depends_on).toEqual([]);
+    expect(plan.frontmatter.files_modified).toEqual(['output.txt']);
+    expect(plan.frontmatter.autonomous).toBe(true);
+    expect(plan.frontmatter.requirements).toEqual(['TEST-01']);
+    expect(plan.frontmatter.must_haves.truths).toEqual(['output.txt exists with expected content']);
+
+    expect(plan.objective).toContain('simple output file');
+    expect(plan.tasks).toHaveLength(1);
+    expect(plan.tasks[0].name).toBe('Create output file');
+    expect(plan.tasks[0].type).toBe('auto');
+    expect(plan.tasks[0].verify).toBe('test -f output.txt');
+  });
+});
+
+describe.skipIf(!cliAvailable)('E2E: Event stream during plan execution (R007)', () => {
+  let tmpDir: string;
+
+  beforeAll(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'gsd-sdk-e2e-stream-'));
+    await cp(fixturesDir, tmpDir, { recursive: true });
+  });
+
+  afterAll(async () => {
+    if (tmpDir) {
+      await rm(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it('event stream emits events during plan execution (R007)', async () => {
+    const events: GSDEvent[] = [];
+    const gsd = new GSD({ projectDir: tmpDir, maxBudgetUsd: 1.0, maxTurns: 20 });
+
+    // Subscribe to all events
+    gsd.onEvent((event) => {
+      events.push(event);
+    });
+
+    const result = await gsd.executePlan('sample-plan.md');
+    expect(result.success).toBe(true);
+
+    // (a) At least one session_init event received
+    const initEvents = events.filter(e => e.type === GSDEventType.SessionInit);
+    expect(initEvents.length).toBeGreaterThanOrEqual(1);
+
+    // (b) At least one tool_call event received
+    const toolCallEvents = events.filter(e => e.type === GSDEventType.ToolCall);
+    expect(toolCallEvents.length).toBeGreaterThanOrEqual(1);
+
+    // (c) Exactly one session_complete event with cost >= 0
+    const completeEvents = events.filter(e => e.type === GSDEventType.SessionComplete);
+    expect(completeEvents).toHaveLength(1);
+    const completeEvent = completeEvents[0]!;
+    if (completeEvent.type === GSDEventType.SessionComplete) {
+      expect(completeEvent.totalCostUsd).toBeGreaterThanOrEqual(0);
+    }
+
+    // (d) Events arrived in order: session_init before tool_call before session_complete
+    const initIdx = events.findIndex(e => e.type === GSDEventType.SessionInit);
+    const toolCallIdx = events.findIndex(e => e.type === GSDEventType.ToolCall);
+    const completeIdx = events.findIndex(e => e.type === GSDEventType.SessionComplete);
+    expect(initIdx).toBeLessThan(toolCallIdx);
+    expect(toolCallIdx).toBeLessThan(completeIdx);
+
+    // Bonus: at least one cost_update event was emitted
+    const costEvents = events.filter(e => e.type === GSDEventType.CostUpdate);
+    expect(costEvents.length).toBeGreaterThanOrEqual(1);
+  }, 120_000);
+});
+
+describe('E2E: Error handling', () => {
+  it('returns failure for nonexistent plan path', async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), 'gsd-sdk-e2e-err-'));
+
+    try {
+      const gsd = new GSD({ projectDir: tmpDir });
+      await expect(gsd.executePlan('nonexistent-plan.md')).rejects.toThrow();
+    } finally {
+      await rm(tmpDir, { recursive: true, force: true });
+    }
+  });
+});
--- a/sdk/src/event-stream.test.ts
+++ b/sdk/src/event-stream.test.ts
@@ -0,0 +1,661 @@
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { GSDEventStream } from './event-stream.js';
+import {
+  GSDEventType,
+  PhaseType,
+  type GSDEvent,
+  type GSDSessionInitEvent,
+  type GSDSessionCompleteEvent,
+  type GSDSessionErrorEvent,
+  type GSDAssistantTextEvent,
+  type GSDToolCallEvent,
+  type GSDToolProgressEvent,
+  type GSDToolUseSummaryEvent,
+  type GSDTaskStartedEvent,
+  type GSDTaskProgressEvent,
+  type GSDTaskNotificationEvent,
+  type GSDAPIRetryEvent,
+  type GSDRateLimitEvent,
+  type GSDStatusChangeEvent,
+  type GSDCompactBoundaryEvent,
+  type GSDStreamEvent,
+  type GSDCostUpdateEvent,
+  type TransportHandler,
+} from './types.js';
+import type {
+  SDKMessage,
+  SDKSystemMessage,
+  SDKAssistantMessage,
+  SDKResultSuccess,
+  SDKResultError,
+  SDKToolProgressMessage,
+  SDKToolUseSummaryMessage,
+  SDKTaskStartedMessage,
+  SDKTaskProgressMessage,
+  SDKTaskNotificationMessage,
+  SDKAPIRetryMessage,
+  SDKRateLimitEvent,
+  SDKStatusMessage,
+  SDKCompactBoundaryMessage,
+  SDKPartialAssistantMessage,
+} from '@anthropic-ai/claude-agent-sdk';
+import type { UUID } from 'crypto';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+const TEST_UUID = '00000000-0000-0000-0000-000000000000' as UUID;
+const TEST_SESSION = 'test-session-1';
+
+function makeSystemInit(): SDKSystemMessage {
+  return {
+    type: 'system',
+    subtype: 'init',
+    agents: [],
+    apiKeySource: 'user',
+    betas: [],
+    claude_code_version: '1.0.0',
+    cwd: '/test',
+    tools: ['Read', 'Write', 'Bash'],
+    mcp_servers: [],
+    model: 'claude-sonnet-4-6',
+    permissionMode: 'bypassPermissions',
+    slash_commands: [],
+    output_style: 'text',
+    skills: [],
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKSystemMessage;
+}
+
+function makeAssistantMsg(content: Array<{ type: string; [key: string]: unknown }>): SDKAssistantMessage {
+  return {
+    type: 'assistant',
+    message: {
+      content,
+      id: 'msg-1',
+      type: 'message',
+      role: 'assistant',
+      model: 'claude-sonnet-4-6',
+      stop_reason: 'end_turn',
+      stop_sequence: null,
+      usage: { input_tokens: 100, output_tokens: 50 },
+    } as unknown as SDKAssistantMessage['message'],
+    parent_tool_use_id: null,
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKAssistantMessage;
+}
+
+function makeResultSuccess(costUsd = 0.05): SDKResultSuccess {
+  return {
+    type: 'result',
+    subtype: 'success',
+    duration_ms: 5000,
+    duration_api_ms: 4000,
+    is_error: false,
+    num_turns: 3,
+    result: 'Task completed successfully',
+    stop_reason: 'end_turn',
+    total_cost_usd: costUsd,
+    usage: { input_tokens: 1000, output_tokens: 500, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 },
+    modelUsage: {},
+    permission_denials: [],
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKResultSuccess;
+}
+
+function makeResultError(): SDKResultError {
+  return {
+    type: 'result',
+    subtype: 'error_max_turns',
+    duration_ms: 10000,
+    duration_api_ms: 8000,
+    is_error: true,
+    num_turns: 50,
+    stop_reason: null,
+    total_cost_usd: 2.50,
+    usage: { input_tokens: 5000, output_tokens: 2000, cache_read_input_tokens: 0, cache_creation_input_tokens: 0 },
+    modelUsage: {},
+    permission_denials: [],
+    errors: ['Max turns exceeded'],
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKResultError;
+}
+
+function makeToolProgress(): SDKToolProgressMessage {
+  return {
+    type: 'tool_progress',
+    tool_use_id: 'tu-1',
+    tool_name: 'Bash',
+    parent_tool_use_id: null,
+    elapsed_time_seconds: 5.2,
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKToolProgressMessage;
+}
+
+function makeToolUseSummary(): SDKToolUseSummaryMessage {
+  return {
+    type: 'tool_use_summary',
+    summary: 'Ran 3 bash commands',
+    preceding_tool_use_ids: ['tu-1', 'tu-2', 'tu-3'],
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKToolUseSummaryMessage;
+}
+
+function makeTaskStarted(): SDKTaskStartedMessage {
+  return {
+    type: 'system',
+    subtype: 'task_started',
+    task_id: 'task-1',
+    description: 'Running test suite',
+    task_type: 'local_workflow',
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKTaskStartedMessage;
+}
+
+function makeTaskProgress(): SDKTaskProgressMessage {
+  return {
+    type: 'system',
+    subtype: 'task_progress',
+    task_id: 'task-1',
+    description: 'Running tests',
+    usage: { total_tokens: 500, tool_uses: 3, duration_ms: 2000 },
+    last_tool_name: 'Bash',
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKTaskProgressMessage;
+}
+
+function makeTaskNotification(): SDKTaskNotificationMessage {
+  return {
+    type: 'system',
+    subtype: 'task_notification',
+    task_id: 'task-1',
+    status: 'completed',
+    output_file: '/tmp/output.txt',
+    summary: 'All tests passed',
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKTaskNotificationMessage;
+}
+
+function makeAPIRetry(): SDKAPIRetryMessage {
+  return {
+    type: 'system',
+    subtype: 'api_retry',
+    attempt: 2,
+    max_retries: 5,
+    retry_delay_ms: 1000,
+    error_status: 529,
+    error: 'server_error',
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKAPIRetryMessage;
+}
+
+function makeRateLimitEvent(): SDKRateLimitEvent {
+  return {
+    type: 'rate_limit_event',
+    rate_limit_info: {
+      status: 'allowed_warning',
+      resetsAt: Date.now() + 60000,
+      utilization: 0.85,
+    },
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKRateLimitEvent;
+}
+
+function makeStatusMessage(): SDKStatusMessage {
+  return {
+    type: 'system',
+    subtype: 'status',
+    status: 'compacting',
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKStatusMessage;
+}
+
+function makeCompactBoundary(): SDKCompactBoundaryMessage {
+  return {
+    type: 'system',
+    subtype: 'compact_boundary',
+    compact_metadata: {
+      trigger: 'auto',
+      pre_tokens: 95000,
+    },
+    uuid: TEST_UUID,
+    session_id: TEST_SESSION,
+  } as SDKCompactBoundaryMessage;
+}
+
+// ─── SDKMessage → GSDEvent mapping tests ─────────────────────────────────────
+
+describe('GSDEventStream', () => {
+  let stream: GSDEventStream;
+
+  beforeEach(() => {
+    stream = new GSDEventStream();
+  });
+
+  describe('mapSDKMessage', () => {
+    it('maps SDKSystemMessage init → SessionInit', () => {
+      const event = stream.mapSDKMessage(makeSystemInit());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.SessionInit);
+
+      const init = event as GSDSessionInitEvent;
+      expect(init.model).toBe('claude-sonnet-4-6');
+      expect(init.tools).toEqual(['Read', 'Write', 'Bash']);
+      expect(init.cwd).toBe('/test');
+      expect(init.sessionId).toBe(TEST_SESSION);
+    });
+
+    it('maps assistant text blocks → AssistantText', () => {
+      const msg = makeAssistantMsg([
+        { type: 'text', text: 'Hello ' },
+        { type: 'text', text: 'world' },
+      ]);
+      const event = stream.mapSDKMessage(msg);
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.AssistantText);
+      expect((event as GSDAssistantTextEvent).text).toBe('Hello world');
+    });
+
+    it('maps assistant tool_use blocks → ToolCall', () => {
+      const msg = makeAssistantMsg([
+        { type: 'tool_use', id: 'tu-1', name: 'Read', input: { path: 'test.ts' } },
+      ]);
+      const event = stream.mapSDKMessage(msg);
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.ToolCall);
+
+      const tc = event as GSDToolCallEvent;
+      expect(tc.toolName).toBe('Read');
+      expect(tc.toolUseId).toBe('tu-1');
+      expect(tc.input).toEqual({ path: 'test.ts' });
+    });
+
+    it('handles multi-block assistant messages (text + tool_use)', () => {
+      const events: GSDEvent[] = [];
+      stream.on('event', (e: GSDEvent) => events.push(e));
+
+      const msg = makeAssistantMsg([
+        { type: 'text', text: 'Let me check that.' },
+        { type: 'tool_use', id: 'tu-1', name: 'Read', input: { path: 'f.ts' } },
+      ]);
+
+      // mapAndEmit will emit the text event directly and return the tool_call
+      const returned = stream.mapAndEmit(msg);
+      expect(returned).not.toBeNull();
+
+      // Should have received 2 events total
+      expect(events).toHaveLength(2);
+      expect(events[0]!.type).toBe(GSDEventType.AssistantText);
+      expect(events[1]!.type).toBe(GSDEventType.ToolCall);
+    });
+
+    it('maps SDKResultSuccess → SessionComplete', () => {
+      const event = stream.mapSDKMessage(makeResultSuccess());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.SessionComplete);
+
+      const complete = event as GSDSessionCompleteEvent;
+      expect(complete.success).toBe(true);
+      expect(complete.totalCostUsd).toBe(0.05);
+      expect(complete.durationMs).toBe(5000);
+      expect(complete.numTurns).toBe(3);
+      expect(complete.result).toBe('Task completed successfully');
+    });
+
+    it('maps SDKResultError → SessionError', () => {
+      const event = stream.mapSDKMessage(makeResultError());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.SessionError);
+
+      const err = event as GSDSessionErrorEvent;
+      expect(err.success).toBe(false);
+      expect(err.errorSubtype).toBe('error_max_turns');
+      expect(err.errors).toContain('Max turns exceeded');
+    });
+
+    it('maps SDKToolProgressMessage → ToolProgress', () => {
+      const event = stream.mapSDKMessage(makeToolProgress());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.ToolProgress);
+
+      const tp = event as GSDToolProgressEvent;
+      expect(tp.toolName).toBe('Bash');
+      expect(tp.toolUseId).toBe('tu-1');
+      expect(tp.elapsedSeconds).toBe(5.2);
+    });
+
+    it('maps SDKToolUseSummaryMessage → ToolUseSummary', () => {
+      const event = stream.mapSDKMessage(makeToolUseSummary());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.ToolUseSummary);
+
+      const tus = event as GSDToolUseSummaryEvent;
+      expect(tus.summary).toBe('Ran 3 bash commands');
+      expect(tus.toolUseIds).toEqual(['tu-1', 'tu-2', 'tu-3']);
+    });
+
+    it('maps SDKTaskStartedMessage → TaskStarted', () => {
+      const event = stream.mapSDKMessage(makeTaskStarted());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.TaskStarted);
+
+      const ts = event as GSDTaskStartedEvent;
+      expect(ts.taskId).toBe('task-1');
+      expect(ts.description).toBe('Running test suite');
+      expect(ts.taskType).toBe('local_workflow');
+    });
+
+    it('maps SDKTaskProgressMessage → TaskProgress', () => {
+      const event = stream.mapSDKMessage(makeTaskProgress());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.TaskProgress);
+
+      const tp = event as GSDTaskProgressEvent;
+      expect(tp.taskId).toBe('task-1');
+      expect(tp.totalTokens).toBe(500);
+      expect(tp.toolUses).toBe(3);
+      expect(tp.lastToolName).toBe('Bash');
+    });
+
+    it('maps SDKTaskNotificationMessage → TaskNotification', () => {
+      const event = stream.mapSDKMessage(makeTaskNotification());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.TaskNotification);
+
+      const tn = event as GSDTaskNotificationEvent;
+      expect(tn.taskId).toBe('task-1');
+      expect(tn.status).toBe('completed');
+      expect(tn.summary).toBe('All tests passed');
+    });
+
+    it('maps SDKAPIRetryMessage → APIRetry', () => {
+      const event = stream.mapSDKMessage(makeAPIRetry());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.APIRetry);
+
+      const retry = event as GSDAPIRetryEvent;
+      expect(retry.attempt).toBe(2);
+      expect(retry.maxRetries).toBe(5);
+      expect(retry.retryDelayMs).toBe(1000);
+      expect(retry.errorStatus).toBe(529);
+    });
+
+    it('maps SDKRateLimitEvent → RateLimit', () => {
+      const event = stream.mapSDKMessage(makeRateLimitEvent());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.RateLimit);
+
+      const rl = event as GSDRateLimitEvent;
+      expect(rl.status).toBe('allowed_warning');
+      expect(rl.utilization).toBe(0.85);
+    });
+
+    it('maps SDKStatusMessage → StatusChange', () => {
+      const event = stream.mapSDKMessage(makeStatusMessage());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.StatusChange);
+      expect((event as GSDStatusChangeEvent).status).toBe('compacting');
+    });
+
+    it('maps SDKCompactBoundaryMessage → CompactBoundary', () => {
+      const event = stream.mapSDKMessage(makeCompactBoundary());
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.CompactBoundary);
+
+      const cb = event as GSDCompactBoundaryEvent;
+      expect(cb.trigger).toBe('auto');
+      expect(cb.preTokens).toBe(95000);
+    });
+
+    it('returns null for user messages', () => {
+      const msg = { type: 'user', session_id: TEST_SESSION } as SDKMessage;
+      expect(stream.mapSDKMessage(msg)).toBeNull();
+    });
+
+    it('returns null for auth_status messages', () => {
+      const msg = { type: 'auth_status', session_id: TEST_SESSION } as SDKMessage;
+      expect(stream.mapSDKMessage(msg)).toBeNull();
+    });
+
+    it('returns null for prompt_suggestion messages', () => {
+      const msg = { type: 'prompt_suggestion', session_id: TEST_SESSION } as SDKMessage;
+      expect(stream.mapSDKMessage(msg)).toBeNull();
+    });
+
+    it('includes phase and planName context when provided', () => {
+      const event = stream.mapSDKMessage(makeSystemInit(), {
+        phase: PhaseType.Execute,
+        planName: 'feature-plan',
+      });
+
+      expect(event!.phase).toBe(PhaseType.Execute);
+      expect(event!.planName).toBe('feature-plan');
+    });
+  });
+
+  // ─── Cost tracking ─────────────────────────────────────────────────────
+
+  describe('cost tracking', () => {
+    it('tracks per-session cost on session_complete', () => {
+      stream.mapSDKMessage(makeResultSuccess(0.05));
+
+      const cost = stream.getCost();
+      expect(cost.session).toBe(0.05);
+      expect(cost.cumulative).toBe(0.05);
+    });
+
+    it('accumulates cumulative cost across multiple sessions', () => {
+      // Session 1
+      const result1 = makeResultSuccess(0.05);
+      result1.session_id = 'session-1';
+      stream.mapSDKMessage(result1);
+
+      // Session 2
+      const result2 = makeResultSuccess(0.10);
+      result2.session_id = 'session-2';
+      stream.mapSDKMessage(result2);
+
+      const cost = stream.getCost();
+      // Current session is session-2 (last one updated)
+      expect(cost.session).toBe(0.10);
+      expect(cost.cumulative).toBeCloseTo(0.15, 10);
+    });
+
+    it('correctly computes delta when same session updates cost', () => {
+      // Session reports intermediate cost, then final cost
+      const result1 = makeResultSuccess(0.03);
+      stream.mapSDKMessage(result1);
+
+      const result2 = makeResultSuccess(0.05);
+      stream.mapSDKMessage(result2);
+
+      const cost = stream.getCost();
+      expect(cost.session).toBe(0.05);
+      // Cumulative should be 0.05, not 0.08 (delta was +0.02, not +0.05)
+      expect(cost.cumulative).toBeCloseTo(0.05, 10);
+    });
+
+    it('tracks error session costs too', () => {
+      stream.mapSDKMessage(makeResultError());
+
+      const cost = stream.getCost();
+      expect(cost.session).toBe(2.50);
+      expect(cost.cumulative).toBe(2.50);
+    });
+  });
+
+  // ─── Transport management ──────────────────────────────────────────────
+
+  describe('transport management', () => {
+    it('delivers events to subscribed transports', () => {
+      const received: GSDEvent[] = [];
+      const transport: TransportHandler = {
+        onEvent: (event) => received.push(event),
+        close: () => {},
+      };
+
+      stream.addTransport(transport);
+      stream.mapAndEmit(makeSystemInit());
+
+      expect(received).toHaveLength(1);
+      expect(received[0]!.type).toBe(GSDEventType.SessionInit);
+    });
+
+    it('delivers events to multiple transports', () => {
+      const received1: GSDEvent[] = [];
+      const received2: GSDEvent[] = [];
+
+      stream.addTransport({
+        onEvent: (e) => received1.push(e),
+        close: () => {},
+      });
+      stream.addTransport({
+        onEvent: (e) => received2.push(e),
+        close: () => {},
+      });
+
+      stream.mapAndEmit(makeSystemInit());
+
+      expect(received1).toHaveLength(1);
+      expect(received2).toHaveLength(1);
+    });
+
+    it('stops delivering events after transport removal', () => {
+      const received: GSDEvent[] = [];
+      const transport: TransportHandler = {
+        onEvent: (e) => received.push(e),
+        close: () => {},
+      };
+
+      stream.addTransport(transport);
+      stream.mapAndEmit(makeSystemInit());
+      expect(received).toHaveLength(1);
+
+      stream.removeTransport(transport);
+      stream.mapAndEmit(makeResultSuccess());
+      expect(received).toHaveLength(1); // No new events
+    });
+
+    it('survives transport.onEvent() throwing', () => {
+      const badTransport: TransportHandler = {
+        onEvent: () => { throw new Error('transport failed'); },
+        close: () => {},
+      };
+      const goodReceived: GSDEvent[] = [];
+      const goodTransport: TransportHandler = {
+        onEvent: (e) => goodReceived.push(e),
+        close: () => {},
+      };
+
+      stream.addTransport(badTransport);
+      stream.addTransport(goodTransport);
+
+      // Should not throw, and good transport still receives events
+      expect(() => stream.mapAndEmit(makeSystemInit())).not.toThrow();
+      expect(goodReceived).toHaveLength(1);
+    });
+
+    it('closeAll() calls close on all transports and clears them', () => {
+      const closeCalled: boolean[] = [];
+      stream.addTransport({
+        onEvent: () => {},
+        close: () => closeCalled.push(true),
+      });
+      stream.addTransport({
+        onEvent: () => {},
+        close: () => closeCalled.push(true),
+      });
+
+      stream.closeAll();
+      expect(closeCalled).toHaveLength(2);
+
+      // No more deliveries after closeAll
+      const events: GSDEvent[] = [];
+      stream.on('event', (e: GSDEvent) => events.push(e));
+      stream.mapAndEmit(makeSystemInit());
+      // EventEmitter listeners still work, but transports are gone
+      expect(events).toHaveLength(1);
+    });
+  });
+
+  // ─── EventEmitter integration ──────────────────────────────────────────
+
+  describe('EventEmitter integration', () => {
+    it('emits typed events via "event" channel', () => {
+      const events: GSDEvent[] = [];
+      stream.on('event', (e: GSDEvent) => events.push(e));
+
+      stream.mapAndEmit(makeSystemInit());
+      stream.mapAndEmit(makeResultSuccess());
+
+      expect(events).toHaveLength(2);
+      expect(events[0]!.type).toBe(GSDEventType.SessionInit);
+      expect(events[1]!.type).toBe(GSDEventType.SessionComplete);
+    });
+
+    it('emits events on per-type channels', () => {
+      const initEvents: GSDEvent[] = [];
+      stream.on(GSDEventType.SessionInit, (e: GSDEvent) => initEvents.push(e));
+
+      stream.mapAndEmit(makeSystemInit());
+      stream.mapAndEmit(makeResultSuccess());
+
+      expect(initEvents).toHaveLength(1);
+      expect(initEvents[0]!.type).toBe(GSDEventType.SessionInit);
+    });
+  });
+
+  // ─── Stream event mapping ──────────────────────────────────────────────
+
+  describe('stream_event mapping', () => {
+    it('maps SDKPartialAssistantMessage → StreamEvent', () => {
+      const msg = {
+        type: 'stream_event' as const,
+        event: { type: 'content_block_delta' },
+        parent_tool_use_id: null,
+        uuid: TEST_UUID,
+        session_id: TEST_SESSION,
+      } as SDKPartialAssistantMessage;
+
+      const event = stream.mapSDKMessage(msg);
+      expect(event).not.toBeNull();
+      expect(event!.type).toBe(GSDEventType.StreamEvent);
+      expect((event as GSDStreamEvent).event).toEqual({ type: 'content_block_delta' });
+    });
+  });
+
+  // ─── Empty / edge cases ────────────────────────────────────────────────
+
+  describe('edge cases', () => {
+    it('returns null for assistant messages with empty content', () => {
+      const msg = makeAssistantMsg([]);
+      expect(stream.mapSDKMessage(msg)).toBeNull();
+    });
+
+    it('returns null for assistant messages with only empty text', () => {
+      const msg = makeAssistantMsg([{ type: 'text', text: '' }]);
+      expect(stream.mapSDKMessage(msg)).toBeNull();
+    });
+
+    it('returns null for unknown system subtypes', () => {
+      const msg = {
+        type: 'system',
+        subtype: 'unknown_future_type',
+        session_id: TEST_SESSION,
+        uuid: TEST_UUID,
+      } as unknown as SDKMessage;
+      expect(stream.mapSDKMessage(msg)).toBeNull();
+    });
+  });
+});
--- a/sdk/src/event-stream.ts
+++ b/sdk/src/event-stream.ts
@@ -0,0 +1,439 @@
+/**
+ * GSD Event Stream — maps SDKMessage variants to typed GSD events.
+ *
+ * Extends EventEmitter to provide a typed event bus. Includes:
+ * - SDKMessage → GSDEvent mapping
+ * - Transport management (subscribe/unsubscribe handlers)
+ * - Per-session cost tracking with cumulative totals
+ */
+
+import { EventEmitter } from 'node:events';
+import type {
+  SDKMessage,
+  SDKResultSuccess,
+  SDKResultError,
+  SDKAssistantMessage,
+  SDKSystemMessage,
+  SDKToolProgressMessage,
+  SDKTaskNotificationMessage,
+  SDKTaskStartedMessage,
+  SDKTaskProgressMessage,
+  SDKToolUseSummaryMessage,
+  SDKRateLimitEvent,
+  SDKAPIRetryMessage,
+  SDKStatusMessage,
+  SDKCompactBoundaryMessage,
+  SDKPartialAssistantMessage,
+} from '@anthropic-ai/claude-agent-sdk';
+import {
+  GSDEventType,
+  type GSDEvent,
+  type GSDSessionInitEvent,
+  type GSDSessionCompleteEvent,
+  type GSDSessionErrorEvent,
+  type GSDAssistantTextEvent,
+  type GSDToolCallEvent,
+  type GSDToolProgressEvent,
+  type GSDToolUseSummaryEvent,
+  type GSDTaskStartedEvent,
+  type GSDTaskProgressEvent,
+  type GSDTaskNotificationEvent,
+  type GSDCostUpdateEvent,
+  type GSDAPIRetryEvent,
+  type GSDRateLimitEvent as GSDRateLimitEventType,
+  type GSDStatusChangeEvent,
+  type GSDCompactBoundaryEvent,
+  type GSDStreamEvent,
+  type TransportHandler,
+  type CostBucket,
+  type CostTracker,
+  type PhaseType,
+} from './types.js';
+
+// ─── Mapping context ─────────────────────────────────────────────────────────
+
+export interface EventStreamContext {
+  phase?: PhaseType;
+  planName?: string;
+}
+
+// ─── GSDEventStream ──────────────────────────────────────────────────────────
+
+export class GSDEventStream extends EventEmitter {
+  private readonly transports: Set<TransportHandler> = new Set();
+  private readonly costTracker: CostTracker = {
+    sessions: new Map(),
+    cumulativeCostUsd: 0,
+  };
+
+  constructor() {
+    super();
+    this.setMaxListeners(20);
+  }
+
+  // ─── Transport management ────────────────────────────────────────────
+
+  /** Subscribe a transport handler to receive all events. */
+  addTransport(handler: TransportHandler): void {
+    this.transports.add(handler);
+  }
+
+  /** Unsubscribe a transport handler. */
+  removeTransport(handler: TransportHandler): void {
+    this.transports.delete(handler);
+  }
+
+  /** Close all transports. */
+  closeAll(): void {
+    for (const transport of this.transports) {
+      try {
+        transport.close();
+      } catch {
+        // Ignore transport close errors
+      }
+    }
+    this.transports.clear();
+  }
+
+  // ─── Event emission ──────────────────────────────────────────────────
+
+  /** Emit a typed GSD event to all listeners and transports. */
+  emitEvent(event: GSDEvent): void {
+    // Emit via EventEmitter for listener-based consumers
+    this.emit('event', event);
+    this.emit(event.type, event);
+
+    // Deliver to all transports — wrap in try/catch to prevent
+    // one bad transport from killing the stream
+    for (const transport of this.transports) {
+      try {
+        transport.onEvent(event);
+      } catch {
+        // Silently ignore transport errors
+      }
+    }
+  }
+
+  // ─── SDKMessage mapping ──────────────────────────────────────────────
+
+  /**
+   * Map an SDKMessage to a GSDEvent.
+   * Returns null for non-actionable message types (user messages, replays, etc.).
+   */
+  mapSDKMessage(msg: SDKMessage, context: EventStreamContext = {}): GSDEvent | null {
+    const base = {
+      timestamp: new Date().toISOString(),
+      sessionId: 'session_id' in msg ? (msg.session_id as string) : '',
+      phase: context.phase,
+      planName: context.planName,
+    };
+
+    switch (msg.type) {
+      case 'system':
+        return this.mapSystemMessage(msg as SDKSystemMessage | SDKAPIRetryMessage | SDKStatusMessage | SDKCompactBoundaryMessage | SDKTaskStartedMessage | SDKTaskProgressMessage | SDKTaskNotificationMessage, base);
+
+      case 'assistant':
+        return this.mapAssistantMessage(msg as SDKAssistantMessage, base);
+
+      case 'result':
+        return this.mapResultMessage(msg as SDKResultSuccess | SDKResultError, base);
+
+      case 'tool_progress':
+        return this.mapToolProgressMessage(msg as SDKToolProgressMessage, base);
+
+      case 'tool_use_summary':
+        return this.mapToolUseSummaryMessage(msg as SDKToolUseSummaryMessage, base);
+
+      case 'rate_limit_event':
+        return this.mapRateLimitMessage(msg as SDKRateLimitEvent, base);
+
+      case 'stream_event':
+        return this.mapStreamEvent(msg as SDKPartialAssistantMessage, base);
+
+      // Non-actionable message types — ignore
+      case 'user':
+      case 'auth_status':
+      case 'prompt_suggestion':
+        return null;
+
+      default:
+        return null;
+    }
+  }
+
+  /**
+   * Map an SDKMessage and emit the resulting event (if any).
+   * Convenience method combining mapSDKMessage + emitEvent.
+   */
+  mapAndEmit(msg: SDKMessage, context: EventStreamContext = {}): GSDEvent | null {
+    const event = this.mapSDKMessage(msg, context);
+    if (event) {
+      this.emitEvent(event);
+    }
+    return event;
+  }
+
+  // ─── Cost tracking ───────────────────────────────────────────────────
+
+  /** Get current cost totals. */
+  getCost(): { session: number; cumulative: number } {
+    const activeId = this.costTracker.activeSessionId;
+    const sessionCost = activeId
+      ? (this.costTracker.sessions.get(activeId)?.costUsd ?? 0)
+      : 0;
+
+    return {
+      session: sessionCost,
+      cumulative: this.costTracker.cumulativeCostUsd,
+    };
+  }
+
+  /** Update cost for a session. */
+  private updateCost(sessionId: string, costUsd: number): void {
+    const existing = this.costTracker.sessions.get(sessionId);
+    const previousCost = existing?.costUsd ?? 0;
+    const delta = costUsd - previousCost;
+
+    const bucket: CostBucket = { sessionId, costUsd };
+    this.costTracker.sessions.set(sessionId, bucket);
+    this.costTracker.activeSessionId = sessionId;
+    this.costTracker.cumulativeCostUsd += delta;
+  }
+
+  // ─── Private mappers ─────────────────────────────────────────────────
+
+  private mapSystemMessage(
+    msg: SDKSystemMessage | SDKAPIRetryMessage | SDKStatusMessage | SDKCompactBoundaryMessage | SDKTaskStartedMessage | SDKTaskProgressMessage | SDKTaskNotificationMessage,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDEvent | null {
+    // All system messages have a subtype
+    const subtype = (msg as { subtype: string }).subtype;
+
+    switch (subtype) {
+      case 'init': {
+        const initMsg = msg as SDKSystemMessage;
+        return {
+          ...base,
+          type: GSDEventType.SessionInit,
+          model: initMsg.model,
+          tools: initMsg.tools,
+          cwd: initMsg.cwd,
+        } as GSDSessionInitEvent;
+      }
+
+      case 'api_retry': {
+        const retryMsg = msg as SDKAPIRetryMessage;
+        return {
+          ...base,
+          type: GSDEventType.APIRetry,
+          attempt: retryMsg.attempt,
+          maxRetries: retryMsg.max_retries,
+          retryDelayMs: retryMsg.retry_delay_ms,
+          errorStatus: retryMsg.error_status,
+        } as GSDAPIRetryEvent;
+      }
+
+      case 'status': {
+        const statusMsg = msg as SDKStatusMessage;
+        return {
+          ...base,
+          type: GSDEventType.StatusChange,
+          status: statusMsg.status,
+        } as GSDStatusChangeEvent;
+      }
+
+      case 'compact_boundary': {
+        const compactMsg = msg as SDKCompactBoundaryMessage;
+        return {
+          ...base,
+          type: GSDEventType.CompactBoundary,
+          trigger: compactMsg.compact_metadata.trigger,
+          preTokens: compactMsg.compact_metadata.pre_tokens,
+        } as GSDCompactBoundaryEvent;
+      }
+
+      case 'task_started': {
+        const taskMsg = msg as SDKTaskStartedMessage;
+        return {
+          ...base,
+          type: GSDEventType.TaskStarted,
+          taskId: taskMsg.task_id,
+          description: taskMsg.description,
+          taskType: taskMsg.task_type,
+        } as GSDTaskStartedEvent;
+      }
+
+      case 'task_progress': {
+        const progressMsg = msg as SDKTaskProgressMessage;
+        return {
+          ...base,
+          type: GSDEventType.TaskProgress,
+          taskId: progressMsg.task_id,
+          description: progressMsg.description,
+          totalTokens: progressMsg.usage.total_tokens,
+          toolUses: progressMsg.usage.tool_uses,
+          durationMs: progressMsg.usage.duration_ms,
+          lastToolName: progressMsg.last_tool_name,
+        } as GSDTaskProgressEvent;
+      }
+
+      case 'task_notification': {
+        const notifMsg = msg as SDKTaskNotificationMessage;
+        return {
+          ...base,
+          type: GSDEventType.TaskNotification,
+          taskId: notifMsg.task_id,
+          status: notifMsg.status,
+          summary: notifMsg.summary,
+        } as GSDTaskNotificationEvent;
+      }
+
+      // Non-actionable system subtypes
+      case 'hook_started':
+      case 'hook_progress':
+      case 'hook_response':
+      case 'local_command_output':
+      case 'session_state_changed':
+      case 'files_persisted':
+      case 'elicitation_complete':
+        return null;
+
+      default:
+        return null;
+    }
+  }
+
+  private mapAssistantMessage(
+    msg: SDKAssistantMessage,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDEvent | null {
+    const events: GSDEvent[] = [];
+
+    // Extract text blocks — content blocks are a discriminated union with a 'type' field
+    const content = msg.message.content as Array<{ type: string; [key: string]: unknown }>;
+
+    const textBlocks = content.filter(
+      (b): b is { type: 'text'; text: string } => b.type === 'text',
+    );
+    if (textBlocks.length > 0) {
+      const text = textBlocks.map(b => b.text).join('');
+      if (text.length > 0) {
+        events.push({
+          ...base,
+          type: GSDEventType.AssistantText,
+          text,
+        } as GSDAssistantTextEvent);
+      }
+    }
+
+    // Extract tool_use blocks
+    const toolUseBlocks = content.filter(
+      (b): b is { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> } =>
+        b.type === 'tool_use',
+    );
+    for (const block of toolUseBlocks) {
+      events.push({
+        ...base,
+        type: GSDEventType.ToolCall,
+        toolName: block.name,
+        toolUseId: block.id,
+        input: block.input as Record<string, unknown>,
+      } as GSDToolCallEvent);
+    }
+
+    // Return the first event — for multi-event messages, emit the rest
+    // via separate emitEvent calls. This preserves the single-return contract
+    // while still handling multi-block messages.
+    if (events.length === 0) return null;
+    if (events.length === 1) return events[0]!;
+
+    // For multi-event assistant messages, emit all but the last directly,
+    // and return the last one for the caller to handle
+    for (let i = 0; i < events.length - 1; i++) {
+      this.emitEvent(events[i]!);
+    }
+    return events[events.length - 1]!;
+  }
+
+  private mapResultMessage(
+    msg: SDKResultSuccess | SDKResultError,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDEvent {
+    // Update cost tracking
+    this.updateCost(msg.session_id, msg.total_cost_usd);
+
+    if (msg.subtype === 'success') {
+      const successMsg = msg as SDKResultSuccess;
+      return {
+        ...base,
+        type: GSDEventType.SessionComplete,
+        success: true,
+        totalCostUsd: successMsg.total_cost_usd,
+        durationMs: successMsg.duration_ms,
+        numTurns: successMsg.num_turns,
+        result: successMsg.result,
+      } as GSDSessionCompleteEvent;
+    }
+
+    const errorMsg = msg as SDKResultError;
+    return {
+      ...base,
+      type: GSDEventType.SessionError,
+      success: false,
+      totalCostUsd: errorMsg.total_cost_usd,
+      durationMs: errorMsg.duration_ms,
+      numTurns: errorMsg.num_turns,
+      errorSubtype: errorMsg.subtype,
+      errors: errorMsg.errors,
+    } as GSDSessionErrorEvent;
+  }
+
+  private mapToolProgressMessage(
+    msg: SDKToolProgressMessage,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDToolProgressEvent {
+    return {
+      ...base,
+      type: GSDEventType.ToolProgress,
+      toolName: msg.tool_name,
+      toolUseId: msg.tool_use_id,
+      elapsedSeconds: msg.elapsed_time_seconds,
+    } as GSDToolProgressEvent;
+  }
+
+  private mapToolUseSummaryMessage(
+    msg: SDKToolUseSummaryMessage,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDToolUseSummaryEvent {
+    return {
+      ...base,
+      type: GSDEventType.ToolUseSummary,
+      summary: msg.summary,
+      toolUseIds: msg.preceding_tool_use_ids,
+    } as GSDToolUseSummaryEvent;
+  }
+
+  private mapRateLimitMessage(
+    msg: SDKRateLimitEvent,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDRateLimitEventType {
+    return {
+      ...base,
+      type: GSDEventType.RateLimit,
+      status: msg.rate_limit_info.status,
+      resetsAt: msg.rate_limit_info.resetsAt,
+      utilization: msg.rate_limit_info.utilization,
+    } as GSDRateLimitEventType;
+  }
+
+  private mapStreamEvent(
+    msg: SDKPartialAssistantMessage,
+    base: Omit<GSDEvent, 'type'>,
+  ): GSDStreamEvent {
+    return {
+      ...base,
+      type: GSDEventType.StreamEvent,
+      event: msg.event,
+    } as GSDStreamEvent;
+  }
+}
--- a/sdk/src/gsd-tools.test.ts
+++ b/sdk/src/gsd-tools.test.ts
@@ -0,0 +1,360 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { GSDTools, GSDToolsError } from './gsd-tools.js';
+import { mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+describe('GSDTools', () => {
+  let tmpDir: string;
+  let fixtureDir: string;
+
+  beforeEach(async () => {
+    tmpDir = join(tmpdir(), `gsd-tools-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    fixtureDir = join(tmpDir, 'fixtures');
+    await mkdir(fixtureDir, { recursive: true });
+    await mkdir(join(tmpDir, '.planning'), { recursive: true });
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  // ─── Helper: create a Node script that outputs something ────────────────
+
+  async function createScript(name: string, code: string): Promise<string> {
+    const scriptPath = join(fixtureDir, name);
+    await writeFile(scriptPath, code, { mode: 0o755 });
+    return scriptPath;
+  }
+
+  // ─── exec() tests ──────────────────────────────────────────────────────
+
+  describe('exec()', () => {
+    it('parses valid JSON output', async () => {
+      // Create a script that ignores args and outputs JSON
+      const scriptPath = await createScript(
+        'echo-json.cjs',
+        `process.stdout.write(JSON.stringify({ status: "ok", count: 42 }));`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.exec('state', ['load']);
+
+      expect(result).toEqual({ status: 'ok', count: 42 });
+    });
+
+    it('handles @file: prefix by reading referenced file', async () => {
+      // Write a large JSON result to a file
+      const resultFile = join(fixtureDir, 'big-result.json');
+      const bigData = { items: Array.from({ length: 100 }, (_, i) => ({ id: i })) };
+      await writeFile(resultFile, JSON.stringify(bigData));
+
+      // Script outputs @file: prefix
+      const scriptPath = await createScript(
+        'file-ref.cjs',
+        `process.stdout.write('@file:${resultFile.replace(/\\/g, '\\\\')}');`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.exec('state', ['load']);
+
+      expect(result).toEqual(bigData);
+    });
+
+    it('returns null for empty stdout', async () => {
+      const scriptPath = await createScript(
+        'empty-output.cjs',
+        `// outputs nothing`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.exec('state', ['load']);
+
+      expect(result).toBeNull();
+    });
+
+    it('throws GSDToolsError on non-zero exit code', async () => {
+      const scriptPath = await createScript(
+        'fail.cjs',
+        `process.stderr.write('something went wrong\\n'); process.exit(1);`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+
+      try {
+        await tools.exec('state', ['load']);
+        expect.fail('Should have thrown');
+      } catch (err) {
+        expect(err).toBeInstanceOf(GSDToolsError);
+        const gsdErr = err as GSDToolsError;
+        expect(gsdErr.command).toBe('state');
+        expect(gsdErr.args).toEqual(['load']);
+        expect(gsdErr.stderr).toContain('something went wrong');
+        expect(gsdErr.exitCode).toBeGreaterThan(0);
+      }
+    });
+
+    it('throws GSDToolsError with context when gsd-tools.cjs not found', async () => {
+      const tools = new GSDTools({
+        projectDir: tmpDir,
+        gsdToolsPath: '/nonexistent/path/gsd-tools.cjs',
+      });
+
+      await expect(tools.exec('state', ['load'])).rejects.toThrow(GSDToolsError);
+    });
+
+    it('throws parse error when stdout is non-JSON', async () => {
+      const scriptPath = await createScript(
+        'bad-json.cjs',
+        `process.stdout.write('Not JSON at all');`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+
+      try {
+        await tools.exec('state', ['load']);
+        expect.fail('Should have thrown');
+      } catch (err) {
+        expect(err).toBeInstanceOf(GSDToolsError);
+        const gsdErr = err as GSDToolsError;
+        expect(gsdErr.message).toContain('Failed to parse');
+        expect(gsdErr.message).toContain('Not JSON at all');
+      }
+    });
+
+    it('throws when @file: points to nonexistent file', async () => {
+      const scriptPath = await createScript(
+        'bad-file-ref.cjs',
+        `process.stdout.write('@file:/tmp/does-not-exist-${Date.now()}.json');`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+
+      await expect(tools.exec('state', ['load'])).rejects.toThrow(GSDToolsError);
+    });
+
+    it('handles timeout by killing child process', async () => {
+      const scriptPath = await createScript(
+        'hang.cjs',
+        `setTimeout(() => {}, 60000); // hang for 60s`,
+      );
+
+      const tools = new GSDTools({
+        projectDir: tmpDir,
+        gsdToolsPath: scriptPath,
+        timeoutMs: 500,
+      });
+
+      try {
+        await tools.exec('state', ['load']);
+        expect.fail('Should have thrown');
+      } catch (err) {
+        expect(err).toBeInstanceOf(GSDToolsError);
+        const gsdErr = err as GSDToolsError;
+        expect(gsdErr.message).toContain('timed out');
+      }
+    }, 10_000);
+  });
+
+  // ─── Typed method tests ────────────────────────────────────────────────
+
+  describe('typed methods', () => {
+    it('stateLoad() calls exec with correct args', async () => {
+      const scriptPath = await createScript(
+        'state-load.cjs',
+        `
+        const args = process.argv.slice(2);
+        // Script receives: state load --raw
+        if (args[0] === 'state' && args[1] === 'load' && args.includes('--raw')) {
+          process.stdout.write('phase=3\\nstatus=executing');
+        } else {
+          process.stderr.write('unexpected args: ' + args.join(' '));
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.stateLoad();
+
+      expect(result).toBe('phase=3\nstatus=executing');
+    });
+
+    it('commit() passes message and optional files', async () => {
+      const scriptPath = await createScript(
+        'commit.cjs',
+        `
+        const args = process.argv.slice(2);
+        // commit <msg> --files f1 f2 --raw — returns a git SHA
+        process.stdout.write('f89ae07');
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.commit('test message', ['file1.md', 'file2.md']);
+
+      expect(result).toBe('f89ae07');
+    });
+
+    it('roadmapAnalyze() calls roadmap analyze', async () => {
+      const scriptPath = await createScript(
+        'roadmap.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'roadmap' && args[1] === 'analyze') {
+          process.stdout.write(JSON.stringify({ phases: [] }));
+        } else {
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.roadmapAnalyze();
+
+      expect(result).toEqual({ phases: [] });
+    });
+
+    it('verifySummary() passes path argument', async () => {
+      const scriptPath = await createScript(
+        'verify.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'verify-summary' && args[1] === '/path/to/SUMMARY.md') {
+          process.stdout.write('passed');
+        } else {
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.verifySummary('/path/to/SUMMARY.md');
+
+      expect(result).toBe('passed');
+    });
+  });
+
+  // ─── Integration-style test ────────────────────────────────────────────
+
+  describe('integration', () => {
+    it('handles large JSON output (>100KB)', async () => {
+      const largeArray = Array.from({ length: 5000 }, (_, i) => ({
+        id: i,
+        name: `item-${i}`,
+        data: 'x'.repeat(20),
+      }));
+      const largeJson = JSON.stringify(largeArray);
+
+      const scriptPath = await createScript(
+        'large-output.cjs',
+        `process.stdout.write(${JSON.stringify(largeJson)});`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.exec('state', ['load']);
+
+      expect(Array.isArray(result)).toBe(true);
+      expect((result as unknown[]).length).toBe(5000);
+    });
+  });
+
+  // ─── initNewProject() tests ────────────────────────────────────────────
+
+  describe('initNewProject()', () => {
+    it('calls init new-project and returns typed result', async () => {
+      const mockResult = {
+        researcher_model: 'claude-sonnet-4-6',
+        synthesizer_model: 'claude-sonnet-4-6',
+        roadmapper_model: 'claude-sonnet-4-6',
+        commit_docs: true,
+        project_exists: false,
+        has_codebase_map: false,
+        planning_exists: false,
+        has_existing_code: false,
+        has_package_file: false,
+        is_brownfield: false,
+        needs_codebase_map: false,
+        has_git: true,
+        brave_search_available: false,
+        firecrawl_available: false,
+        exa_search_available: false,
+        project_path: '.planning/PROJECT.md',
+        project_root: '/tmp/test',
+      };
+
+      const scriptPath = await createScript(
+        'init-new-project.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'init' && args[1] === 'new-project' && args.includes('--raw')) {
+          process.stdout.write(JSON.stringify(${JSON.stringify(mockResult)}));
+        } else {
+          process.stderr.write('unexpected args: ' + args.join(' '));
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.initNewProject();
+
+      expect(result.researcher_model).toBe('claude-sonnet-4-6');
+      expect(result.project_exists).toBe(false);
+      expect(result.has_git).toBe(true);
+      expect(result.is_brownfield).toBe(false);
+      expect(result.project_path).toBe('.planning/PROJECT.md');
+    });
+
+    it('propagates errors from gsd-tools', async () => {
+      const scriptPath = await createScript(
+        'init-fail.cjs',
+        `process.stderr.write('init failed\\n'); process.exit(1);`,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+
+      await expect(tools.initNewProject()).rejects.toThrow(GSDToolsError);
+    });
+  });
+
+  // ─── configSet() tests ─────────────────────────────────────────────────
+
+  describe('configSet()', () => {
+    it('calls config-set with key and value args', async () => {
+      const scriptPath = await createScript(
+        'config-set.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'config-set' && args[1] === 'workflow.auto_advance' && args[2] === 'true' && args.includes('--raw')) {
+          process.stdout.write('workflow.auto_advance=true');
+        } else {
+          process.stderr.write('unexpected args: ' + args.join(' '));
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.configSet('workflow.auto_advance', 'true');
+
+      expect(result).toBe('workflow.auto_advance=true');
+    });
+
+    it('passes string values without coercion', async () => {
+      const scriptPath = await createScript(
+        'config-set-str.cjs',
+        `
+        const args = process.argv.slice(2);
+        // config-set mode yolo --raw
+        process.stdout.write(args[1] + '=' + args[2]);
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.configSet('mode', 'yolo');
+
+      expect(result).toBe('mode=yolo');
+    });
+  });
+});
--- a/sdk/src/gsd-tools.ts
+++ b/sdk/src/gsd-tools.ts
@@ -0,0 +1,284 @@
+/**
+ * GSD Tools Bridge — shells out to `gsd-tools.cjs` for state management.
+ *
+ * All `.planning/` state operations go through gsd-tools.cjs rather than
+ * reimplementing 12K+ lines of logic.
+ */
+
+import { execFile } from 'node:child_process';
+import { readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+import type { InitNewProjectInfo, PhaseOpInfo, PhasePlanIndex, RoadmapAnalysis } from './types.js';
+
+// ─── Error type ──────────────────────────────────────────────────────────────
+
+export class GSDToolsError extends Error {
+  constructor(
+    message: string,
+    public readonly command: string,
+    public readonly args: string[],
+    public readonly exitCode: number | null,
+    public readonly stderr: string,
+  ) {
+    super(message);
+    this.name = 'GSDToolsError';
+  }
+}
+
+// ─── GSDTools class ──────────────────────────────────────────────────────────
+
+const DEFAULT_TIMEOUT_MS = 30_000;
+
+export class GSDTools {
+  private readonly projectDir: string;
+  private readonly gsdToolsPath: string;
+  private readonly timeoutMs: number;
+
+  constructor(opts: {
+    projectDir: string;
+    gsdToolsPath?: string;
+    timeoutMs?: number;
+  }) {
+    this.projectDir = opts.projectDir;
+    this.gsdToolsPath =
+      opts.gsdToolsPath ??
+      join(homedir(), '.claude', 'get-shit-done', 'bin', 'gsd-tools.cjs');
+    this.timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+  }
+
+  // ─── Core exec ───────────────────────────────────────────────────────────
+
+  /**
+   * Execute a gsd-tools command and return parsed JSON output.
+   * Appends `--raw` to get machine-readable JSON output.
+   * Handles the `@file:` prefix pattern for large results.
+   */
+  async exec(command: string, args: string[] = []): Promise<unknown> {
+    const fullArgs = [this.gsdToolsPath, command, ...args, '--raw'];
+
+    return new Promise<unknown>((resolve, reject) => {
+      const child = execFile(
+        'node',
+        fullArgs,
+        {
+          cwd: this.projectDir,
+          maxBuffer: 10 * 1024 * 1024, // 10MB
+          timeout: this.timeoutMs,
+          env: { ...process.env },
+        },
+        async (error, stdout, stderr) => {
+          const stderrStr = stderr?.toString() ?? '';
+
+          if (error) {
+            // Distinguish timeout from other errors
+            if (error.killed || (error as NodeJS.ErrnoException).code === 'ETIMEDOUT') {
+              reject(
+                new GSDToolsError(
+                  `gsd-tools timed out after ${this.timeoutMs}ms: ${command} ${args.join(' ')}`,
+                  command,
+                  args,
+                  null,
+                  stderrStr,
+                ),
+              );
+              return;
+            }
+
+            reject(
+              new GSDToolsError(
+                `gsd-tools exited with code ${error.code ?? 'unknown'}: ${command} ${args.join(' ')}${stderrStr ? `\n${stderrStr}` : ''}`,
+                command,
+                args,
+                typeof error.code === 'number' ? error.code : (error as { status?: number }).status ?? 1,
+                stderrStr,
+              ),
+            );
+            return;
+          }
+
+          const raw = stdout?.toString() ?? '';
+
+          try {
+            const parsed = await this.parseOutput(raw);
+            resolve(parsed);
+          } catch (parseErr) {
+            reject(
+              new GSDToolsError(
+                `Failed to parse gsd-tools output for "${command}": ${parseErr instanceof Error ? parseErr.message : String(parseErr)}\nRaw output: ${raw.slice(0, 500)}`,
+                command,
+                args,
+                0,
+                stderrStr,
+              ),
+            );
+          }
+        },
+      );
+
+      // Safety net: kill if child doesn't respond to timeout signal
+      child.on('error', (err) => {
+        reject(
+          new GSDToolsError(
+            `Failed to execute gsd-tools: ${err.message}`,
+            command,
+            args,
+            null,
+            '',
+          ),
+        );
+      });
+    });
+  }
+
+  /**
+   * Parse gsd-tools output, handling `@file:` prefix.
+   */
+  private async parseOutput(raw: string): Promise<unknown> {
+    const trimmed = raw.trim();
+
+    if (trimmed === '') {
+      return null;
+    }
+
+    let jsonStr = trimmed;
+    if (jsonStr.startsWith('@file:')) {
+      const filePath = jsonStr.slice(6).trim();
+      jsonStr = await readFile(filePath, 'utf-8');
+    }
+
+    return JSON.parse(jsonStr);
+  }
+
+  // ─── Raw exec (no JSON parsing) ───────────────────────────────────────
+
+  /**
+   * Execute a gsd-tools command and return raw stdout without JSON parsing.
+   * Use for commands like `config-set` that return plain text, not JSON.
+   */
+  async execRaw(command: string, args: string[] = []): Promise<string> {
+    const fullArgs = [this.gsdToolsPath, command, ...args, '--raw'];
+
+    return new Promise<string>((resolve, reject) => {
+      const child = execFile(
+        'node',
+        fullArgs,
+        {
+          cwd: this.projectDir,
+          maxBuffer: 10 * 1024 * 1024,
+          timeout: this.timeoutMs,
+          env: { ...process.env },
+        },
+        (error, stdout, stderr) => {
+          const stderrStr = stderr?.toString() ?? '';
+          if (error) {
+            reject(
+              new GSDToolsError(
+                `gsd-tools exited with code ${error.code ?? 'unknown'}: ${command} ${args.join(' ')}${stderrStr ? `\n${stderrStr}` : ''}`,
+                command,
+                args,
+                typeof error.code === 'number' ? error.code : (error as { status?: number }).status ?? 1,
+                stderrStr,
+              ),
+            );
+            return;
+          }
+          resolve((stdout?.toString() ?? '').trim());
+        },
+      );
+
+      child.on('error', (err) => {
+        reject(
+          new GSDToolsError(
+            `Failed to execute gsd-tools: ${err.message}`,
+            command,
+            args,
+            null,
+            '',
+          ),
+        );
+      });
+    });
+  }
+
+  // ─── Typed convenience methods ─────────────────────────────────────────
+
+  async stateLoad(): Promise<string> {
+    return this.execRaw('state', ['load']);
+  }
+
+  async roadmapAnalyze(): Promise<RoadmapAnalysis> {
+    return this.exec('roadmap', ['analyze']) as Promise<RoadmapAnalysis>;
+  }
+
+  async phaseComplete(phase: string): Promise<string> {
+    return this.execRaw('phase', ['complete', phase]);
+  }
+
+  async commit(message: string, files?: string[]): Promise<string> {
+    const args = [message];
+    if (files?.length) {
+      args.push('--files', ...files);
+    }
+    return this.execRaw('commit', args);
+  }
+
+  async verifySummary(path: string): Promise<string> {
+    return this.execRaw('verify-summary', [path]);
+  }
+
+  async initExecutePhase(phase: string): Promise<string> {
+    return this.execRaw('state', ['begin-phase', '--phase', phase]);
+  }
+
+  /**
+   * Query phase state from gsd-tools.cjs `init phase-op`.
+   * Returns a typed PhaseOpInfo describing what exists on disk for this phase.
+   */
+  async initPhaseOp(phaseNumber: string): Promise<PhaseOpInfo> {
+    const result = await this.exec('init', ['phase-op', phaseNumber]);
+    return result as PhaseOpInfo;
+  }
+
+  /**
+   * Get a config value from gsd-tools.cjs.
+   */
+  async configGet(key: string): Promise<string | null> {
+    const result = await this.exec('config', ['get', key]);
+    return result as string | null;
+  }
+
+  /**
+   * Begin phase state tracking in gsd-tools.cjs.
+   */
+  async stateBeginPhase(phaseNumber: string): Promise<string> {
+    return this.execRaw('state', ['begin-phase', '--phase', phaseNumber]);
+  }
+
+  /**
+   * Get the plan index for a phase, grouping plans into dependency waves.
+   * Returns typed PhasePlanIndex with wave assignments and completion status.
+   */
+  async phasePlanIndex(phaseNumber: string): Promise<PhasePlanIndex> {
+    const result = await this.exec('phase-plan-index', [phaseNumber]);
+    return result as PhasePlanIndex;
+  }
+
+  /**
+   * Query new-project init state from gsd-tools.cjs `init new-project`.
+   * Returns project metadata, model configs, brownfield detection, etc.
+   */
+  async initNewProject(): Promise<InitNewProjectInfo> {
+    const result = await this.exec('init', ['new-project']);
+    return result as InitNewProjectInfo;
+  }
+
+  /**
+   * Set a config value via gsd-tools.cjs `config-set`.
+   * Handles type coercion (booleans, numbers, JSON) on the gsd-tools side.
+   * Note: config-set returns `key=value` text, not JSON, so we use execRaw.
+   */
+  async configSet(key: string, value: string): Promise<string> {
+    return this.execRaw('config-set', [key, value]);
+  }
+}
--- a/sdk/src/index.ts
+++ b/sdk/src/index.ts
@@ -0,0 +1,312 @@
+/**
+ * GSD SDK — Public API for running GSD plans programmatically.
+ *
+ * The GSD class composes plan parsing, config loading, prompt building,
+ * and session running into a single `executePlan()` call.
+ *
+ * @example
+ * ```typescript
+ * import { GSD } from '@gsd/sdk';
+ *
+ * const gsd = new GSD({ projectDir: '/path/to/project' });
+ * const result = await gsd.executePlan('.planning/phases/01-auth/01-auth-01-PLAN.md');
+ *
+ * if (result.success) {
+ *   console.log(`Plan completed in ${result.durationMs}ms, cost: $${result.totalCostUsd}`);
+ * } else {
+ *   console.error(`Plan failed: ${result.error?.messages.join(', ')}`);
+ * }
+ * ```
+ */
+
+import { readFile } from 'node:fs/promises';
+import { join, resolve } from 'node:path';
+import { homedir } from 'node:os';
+
+import type { GSDOptions, PlanResult, SessionOptions, GSDEvent, TransportHandler, PhaseRunnerOptions, PhaseRunnerResult, MilestoneRunnerOptions, MilestoneRunnerResult, RoadmapPhaseInfo } from './types.js';
+import { GSDEventType } from './types.js';
+import { parsePlan, parsePlanFile } from './plan-parser.js';
+import { loadConfig } from './config.js';
+import { GSDTools } from './gsd-tools.js';
+import { runPlanSession } from './session-runner.js';
+import { buildExecutorPrompt, parseAgentTools } from './prompt-builder.js';
+import { GSDEventStream } from './event-stream.js';
+import { PhaseRunner } from './phase-runner.js';
+import { ContextEngine } from './context-engine.js';
+import { PromptFactory } from './phase-prompt.js';
+
+// ─── GSD class ───────────────────────────────────────────────────────────────
+
+export class GSD {
+  private readonly projectDir: string;
+  private readonly gsdToolsPath: string;
+  private readonly defaultModel?: string;
+  private readonly defaultMaxBudgetUsd: number;
+  private readonly defaultMaxTurns: number;
+  private readonly autoMode: boolean;
+  readonly eventStream: GSDEventStream;
+
+  constructor(options: GSDOptions) {
+    this.projectDir = resolve(options.projectDir);
+    this.gsdToolsPath =
+      options.gsdToolsPath ??
+      join(homedir(), '.claude', 'get-shit-done', 'bin', 'gsd-tools.cjs');
+    this.defaultModel = options.model;
+    this.defaultMaxBudgetUsd = options.maxBudgetUsd ?? 5.0;
+    this.defaultMaxTurns = options.maxTurns ?? 50;
+    this.autoMode = options.autoMode ?? false;
+    this.eventStream = new GSDEventStream();
+  }
+
+  /**
+   * Execute a single GSD plan file.
+   *
+   * Reads the plan from disk, parses it, loads project config,
+   * optionally reads the agent definition, then runs a query() session.
+   *
+   * @param planPath - Path to the PLAN.md file (absolute or relative to projectDir)
+   * @param options - Per-execution overrides
+   * @returns PlanResult with cost, duration, success/error status
+   */
+  async executePlan(planPath: string, options?: SessionOptions): Promise<PlanResult> {
+    // Resolve plan path relative to project dir
+    const absolutePlanPath = resolve(this.projectDir, planPath);
+
+    // Parse the plan
+    const plan = await parsePlanFile(absolutePlanPath);
+
+    // Load project config
+    const config = await loadConfig(this.projectDir);
+
+    // Try to load agent definition for tool restrictions
+    const agentDef = await this.loadAgentDefinition();
+
+    // Merge defaults with per-call options
+    const sessionOptions: SessionOptions = {
+      maxTurns: options?.maxTurns ?? this.defaultMaxTurns,
+      maxBudgetUsd: options?.maxBudgetUsd ?? this.defaultMaxBudgetUsd,
+      model: options?.model ?? this.defaultModel,
+      cwd: options?.cwd ?? this.projectDir,
+      allowedTools: options?.allowedTools,
+    };
+
+    return runPlanSession(plan, config, sessionOptions, agentDef, this.eventStream, {
+      phase: undefined, // Phase context set by higher-level orchestrators
+      planName: plan.frontmatter.plan,
+    });
+  }
+
+  /**
+   * Subscribe a simple handler to receive all GSD events.
+   */
+  onEvent(handler: (event: GSDEvent) => void): void {
+    this.eventStream.on('event', handler);
+  }
+
+  /**
+   * Subscribe a transport handler to receive all GSD events.
+   * Transports provide structured onEvent/close lifecycle.
+   */
+  addTransport(handler: TransportHandler): void {
+    this.eventStream.addTransport(handler);
+  }
+
+  /**
+   * Create a GSDTools instance for state management operations.
+   */
+  createTools(): GSDTools {
+    return new GSDTools({
+      projectDir: this.projectDir,
+      gsdToolsPath: this.gsdToolsPath,
+    });
+  }
+
+  /**
+   * Run a full phase lifecycle: discuss → research → plan → execute → verify → advance.
+   *
+   * Creates the necessary collaborators (GSDTools, PromptFactory, ContextEngine),
+   * loads project config, instantiates a PhaseRunner, and delegates to `runner.run()`.
+   *
+   * @param phaseNumber - The phase number to execute (e.g. "01", "02")
+   * @param options - Per-phase overrides for budget, turns, model, and callbacks
+   * @returns PhaseRunnerResult with per-step results, overall success, cost, and timing
+   */
+  async runPhase(phaseNumber: string, options?: PhaseRunnerOptions): Promise<PhaseRunnerResult> {
+    const tools = this.createTools();
+    const promptFactory = new PromptFactory();
+    const contextEngine = new ContextEngine(this.projectDir);
+    const config = await loadConfig(this.projectDir);
+
+    // Auto mode: force auto_advance on and skip_discuss off so self-discuss kicks in
+    if (this.autoMode) {
+      config.workflow.auto_advance = true;
+      config.workflow.skip_discuss = false;
+    }
+
+    const runner = new PhaseRunner({
+      projectDir: this.projectDir,
+      tools,
+      promptFactory,
+      contextEngine,
+      eventStream: this.eventStream,
+      config,
+    });
+
+    return runner.run(phaseNumber, options);
+  }
+
+  /**
+   * Run a full milestone: discover phases, execute each incomplete one in order,
+   * re-discover after each completion to catch dynamically inserted phases.
+   *
+   * @param prompt - The user prompt describing the milestone goal
+   * @param options - Per-milestone overrides for budget, turns, model, and callbacks
+   * @returns MilestoneRunnerResult with per-phase results, overall success, cost, and timing
+   */
+  async run(prompt: string, options?: MilestoneRunnerOptions): Promise<MilestoneRunnerResult> {
+    const tools = this.createTools();
+    const startTime = Date.now();
+    const phaseResults: PhaseRunnerResult[] = [];
+    let success = true;
+
+    // Discover initial phases
+    const initialAnalysis = await tools.roadmapAnalyze();
+    const incompletePhases = this.filterAndSortPhases(initialAnalysis.phases);
+
+    // Emit MilestoneStart
+    this.eventStream.emitEvent({
+      type: GSDEventType.MilestoneStart,
+      timestamp: new Date().toISOString(),
+      sessionId: `milestone-${Date.now()}`,
+      phaseCount: incompletePhases.length,
+      prompt,
+    });
+
+    // Loop through phases, re-discovering after each completion
+    let currentPhases = incompletePhases;
+
+    while (currentPhases.length > 0) {
+      const phase = currentPhases[0];
+
+      try {
+        const result = await this.runPhase(phase.number, options);
+        phaseResults.push(result);
+
+        if (!result.success) {
+          success = false;
+          break;
+        }
+
+        // Notify callback if present; stop if requested
+        if (options?.onPhaseComplete) {
+          const verdict = await options.onPhaseComplete(result, phase);
+          if (verdict === 'stop') {
+            break;
+          }
+        }
+
+        // Re-discover phases to catch dynamically inserted ones
+        const updatedAnalysis = await tools.roadmapAnalyze();
+        currentPhases = this.filterAndSortPhases(updatedAnalysis.phases);
+      } catch (err) {
+        // Phase threw an unexpected error — record as failure and stop
+        phaseResults.push({
+          phaseNumber: phase.number,
+          phaseName: phase.phase_name,
+          steps: [],
+          success: false,
+          totalCostUsd: 0,
+          totalDurationMs: 0,
+        });
+        success = false;
+        break;
+      }
+    }
+
+    const totalCostUsd = phaseResults.reduce((sum, r) => sum + r.totalCostUsd, 0);
+    const totalDurationMs = Date.now() - startTime;
+
+    // Emit MilestoneComplete
+    this.eventStream.emitEvent({
+      type: GSDEventType.MilestoneComplete,
+      timestamp: new Date().toISOString(),
+      sessionId: `milestone-${Date.now()}`,
+      success,
+      totalCostUsd,
+      totalDurationMs,
+      phasesCompleted: phaseResults.filter(r => r.success).length,
+    });
+
+    return {
+      success,
+      phases: phaseResults,
+      totalCostUsd,
+      totalDurationMs,
+    };
+  }
+
+  /**
+   * Filter to incomplete phases and sort numerically.
+   * Uses parseFloat to handle decimal phase numbers (e.g. '5.1').
+   */
+  private filterAndSortPhases(phases: RoadmapPhaseInfo[]): RoadmapPhaseInfo[] {
+    return phases
+      .filter(p => !p.roadmap_complete)
+      .sort((a, b) => parseFloat(a.number) - parseFloat(b.number));
+  }
+
+  /**
+   * Load the gsd-executor agent definition if available.
+   * Falls back gracefully — returns undefined if not found.
+   */
+  private async loadAgentDefinition(): Promise<string | undefined> {
+    const paths = [
+      join(homedir(), '.claude', 'agents', 'gsd-executor.md'),
+      join(this.projectDir, 'agents', 'gsd-executor.md'),
+    ];
+
+    for (const p of paths) {
+      try {
+        return await readFile(p, 'utf-8');
+      } catch {
+        // Not found at this path, try next
+      }
+    }
+
+    return undefined;
+  }
+}
+
+// ─── Re-exports for advanced usage ──────────────────────────────────────────
+
+export { parsePlan, parsePlanFile } from './plan-parser.js';
+export { loadConfig } from './config.js';
+export type { GSDConfig } from './config.js';
+export { GSDTools, GSDToolsError } from './gsd-tools.js';
+export { runPlanSession, runPhaseStepSession } from './session-runner.js';
+export { buildExecutorPrompt, parseAgentTools } from './prompt-builder.js';
+export * from './types.js';
+
+// S02: Event stream, context, prompt, and logging modules
+export { GSDEventStream } from './event-stream.js';
+export type { EventStreamContext } from './event-stream.js';
+export { ContextEngine, PHASE_FILE_MANIFEST } from './context-engine.js';
+export type { FileSpec } from './context-engine.js';
+export { getToolsForPhase, PHASE_AGENT_MAP, PHASE_DEFAULT_TOOLS } from './tool-scoping.js';
+export { PromptFactory, extractBlock, extractSteps, PHASE_WORKFLOW_MAP } from './phase-prompt.js';
+export { GSDLogger } from './logger.js';
+export type { LogLevel, LogEntry, GSDLoggerOptions } from './logger.js';
+
+// S03: Phase lifecycle state machine
+export { PhaseRunner, PhaseRunnerError } from './phase-runner.js';
+export type { PhaseRunnerDeps, VerificationOutcome } from './phase-runner.js';
+
+// S05: Transports
+export { CLITransport } from './cli-transport.js';
+export { WSTransport } from './ws-transport.js';
+export type { WSTransportOptions } from './ws-transport.js';
+
+// Init workflow
+export { InitRunner } from './init-runner.js';
+export type { InitRunnerDeps } from './init-runner.js';
+export type { InitConfig, InitResult, InitStepResult, InitStepName } from './types.js';
--- a/sdk/src/init-runner.test.ts
+++ b/sdk/src/init-runner.test.ts
@@ -0,0 +1,563 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { mkdir, writeFile, rm, readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+import { InitRunner } from './init-runner.js';
+import type { InitRunnerDeps } from './init-runner.js';
+import type {
+  PlanResult,
+  SessionUsage,
+  GSDEvent,
+  InitNewProjectInfo,
+  InitStepResult,
+} from './types.js';
+import { GSDEventType } from './types.js';
+
+// ─── Mock modules ────────────────────────────────────────────────────────────
+
+// Mock session-runner to avoid real SDK calls
+vi.mock('./session-runner.js', () => ({
+  runPhaseStepSession: vi.fn(),
+  runPlanSession: vi.fn(),
+}));
+
+// Mock config loader
+vi.mock('./config.js', () => ({
+  loadConfig: vi.fn().mockResolvedValue({
+    mode: 'yolo',
+    model_profile: 'balanced',
+  }),
+  CONFIG_DEFAULTS: {},
+}));
+
+// Mock fs/promises for template reading (InitRunner reads GSD templates)
+// We partially mock — only readFile needs interception for template paths
+const originalReadFile = vi.importActual('node:fs/promises').then(m => (m as typeof import('node:fs/promises')).readFile);
+
+import { runPhaseStepSession } from './session-runner.js';
+
+const mockRunSession = vi.mocked(runPhaseStepSession);
+
+// ─── Factory helpers ─────────────────────────────────────────────────────────
+
+function makeUsage(): SessionUsage {
+  return {
+    inputTokens: 1000,
+    outputTokens: 500,
+    cacheReadInputTokens: 0,
+    cacheCreationInputTokens: 0,
+  };
+}
+
+function makeSuccessResult(overrides: Partial<PlanResult> = {}): PlanResult {
+  return {
+    success: true,
+    sessionId: `sess-${Date.now()}`,
+    totalCostUsd: 0.05,
+    durationMs: 2000,
+    usage: makeUsage(),
+    numTurns: 10,
+    ...overrides,
+  };
+}
+
+function makeErrorResult(overrides: Partial<PlanResult> = {}): PlanResult {
+  return {
+    success: false,
+    sessionId: `sess-err-${Date.now()}`,
+    totalCostUsd: 0.01,
+    durationMs: 500,
+    usage: makeUsage(),
+    numTurns: 2,
+    error: {
+      subtype: 'error_during_execution',
+      messages: ['Session failed'],
+    },
+    ...overrides,
+  };
+}
+
+function makeProjectInfo(overrides: Partial<InitNewProjectInfo> = {}): InitNewProjectInfo {
+  return {
+    researcher_model: 'claude-sonnet-4-6',
+    synthesizer_model: 'claude-sonnet-4-6',
+    roadmapper_model: 'claude-sonnet-4-6',
+    commit_docs: false, // false for tests — no git operations
+    project_exists: false,
+    has_codebase_map: false,
+    planning_exists: false,
+    has_existing_code: false,
+    has_package_file: false,
+    is_brownfield: false,
+    needs_codebase_map: false,
+    has_git: true, // skip git init in tests
+    brave_search_available: false,
+    firecrawl_available: false,
+    exa_search_available: false,
+    project_path: '.planning/PROJECT.md',
+    ...overrides,
+  };
+}
+
+function makeTools(overrides: Record<string, unknown> = {}) {
+  return {
+    initNewProject: vi.fn().mockResolvedValue(makeProjectInfo()),
+    configSet: vi.fn().mockResolvedValue(undefined),
+    commit: vi.fn().mockResolvedValue(undefined),
+    exec: vi.fn(),
+    stateLoad: vi.fn(),
+    roadmapAnalyze: vi.fn(),
+    phaseComplete: vi.fn(),
+    verifySummary: vi.fn(),
+    initExecutePhase: vi.fn(),
+    initPhaseOp: vi.fn(),
+    configGet: vi.fn(),
+    stateBeginPhase: vi.fn(),
+    phasePlanIndex: vi.fn(),
+    ...overrides,
+  } as any;
+}
+
+function makeEventStream() {
+  const events: GSDEvent[] = [];
+  return {
+    emitEvent: vi.fn((event: GSDEvent) => events.push(event)),
+    on: vi.fn(),
+    emit: vi.fn(),
+    addTransport: vi.fn(),
+    events,
+  } as any;
+}
+
+function makeDeps(overrides: Partial<InitRunnerDeps> & { tmpDir: string }): InitRunnerDeps & { events: GSDEvent[] } {
+  const tools = makeTools();
+  const eventStream = makeEventStream();
+  return {
+    projectDir: overrides.tmpDir,
+    tools: overrides.tools ?? tools,
+    eventStream: overrides.eventStream ?? eventStream,
+    config: overrides.config,
+    events: eventStream.events,
+    ...(overrides.tools ? {} : {}),
+  };
+}
+
+// ─── Test suite ──────────────────────────────────────────────────────────────
+
+describe('InitRunner', () => {
+  let tmpDir: string;
+
+  beforeEach(async () => {
+    tmpDir = join(tmpdir(), `init-runner-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    await mkdir(tmpDir, { recursive: true });
+    vi.clearAllMocks();
+
+    // Default: all sessions succeed
+    mockRunSession.mockResolvedValue(makeSuccessResult());
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  // ─── Helpers ─────────────────────────────────────────────────────────────
+
+  function createRunner(toolsOverrides: Record<string, unknown> = {}, configOverrides?: Partial<InitRunnerDeps['config']>) {
+    const tools = makeTools(toolsOverrides);
+    const eventStream = makeEventStream();
+    const runner = new InitRunner({
+      projectDir: tmpDir,
+      tools,
+      eventStream,
+      config: configOverrides as any,
+    });
+    return { runner, tools, eventStream, events: eventStream.events as GSDEvent[] };
+  }
+
+  // ─── Core workflow tests ─────────────────────────────────────────────────
+
+  it('run() calls initNewProject and validates project_exists === false', async () => {
+    const { runner, tools } = createRunner();
+
+    await runner.run('build a todo app');
+
+    expect(tools.initNewProject).toHaveBeenCalledOnce();
+  });
+
+  it('run() returns error result when initNewProject reports project_exists', async () => {
+    const { runner, tools } = createRunner({
+      initNewProject: vi.fn().mockResolvedValue(makeProjectInfo({ project_exists: true })),
+    });
+
+    const result = await runner.run('build a todo app');
+
+    expect(result.success).toBe(false);
+    // The setup step should have failed
+    const setupStep = result.steps.find(s => s.step === 'setup');
+    expect(setupStep).toBeDefined();
+    expect(setupStep!.success).toBe(false);
+    expect(setupStep!.error).toContain('already exists');
+  });
+
+  it('run() writes config.json with auto-mode defaults', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    // config.json should be written to .planning/config.json in tmpDir
+    const configPath = join(tmpDir, '.planning', 'config.json');
+    const content = await readFile(configPath, 'utf-8');
+    const parsed = JSON.parse(content);
+
+    expect(parsed.mode).toBe('yolo');
+    expect(parsed.parallelization).toBe(true);
+    expect(parsed.workflow.auto_advance).toBe(true);
+  });
+
+  it('run() calls configSet for auto_advance', async () => {
+    const { runner, tools } = createRunner();
+
+    await runner.run('build a todo app');
+
+    expect(tools.configSet).toHaveBeenCalledWith('workflow.auto_advance', 'true');
+  });
+
+  it('run() spawns PROJECT.md synthesis session', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    // The third session call should be the PROJECT.md synthesis
+    // Calls: setup (no session), config (no session), project (1st session),
+    //        4x research, synthesis, requirements, roadmap
+    // Total: 8 runPhaseStepSession calls
+    expect(mockRunSession).toHaveBeenCalled();
+
+    // First call should be for PROJECT.md (step 3)
+    const firstCall = mockRunSession.mock.calls[0];
+    expect(firstCall).toBeDefined();
+    const prompt = firstCall![0] as string;
+    expect(prompt).toContain('PROJECT.md');
+  });
+
+  it('run() spawns 4 parallel research sessions via Promise.allSettled', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    // Count calls that contain the specific "researching the X aspect" pattern
+    // which uniquely identifies research prompts (vs synthesis/requirements that reference research files)
+    const researchCalls = mockRunSession.mock.calls.filter(call => {
+      const prompt = call[0] as string;
+      return prompt.includes('You are researching the');
+    });
+
+    // Should be exactly 4 research sessions
+    expect(researchCalls.length).toBe(4);
+  });
+
+  it('run() spawns synthesis session after research completes', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    // Synthesis call should contain 'Synthesize' or 'SUMMARY'
+    const synthesisCalls = mockRunSession.mock.calls.filter(call => {
+      const prompt = call[0] as string;
+      return prompt.includes('Synthesize') || prompt.includes('SUMMARY.md');
+    });
+
+    expect(synthesisCalls.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('run() spawns requirements session', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    const reqCalls = mockRunSession.mock.calls.filter(call => {
+      const prompt = call[0] as string;
+      return prompt.includes('REQUIREMENTS.md');
+    });
+
+    expect(reqCalls.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('run() spawns roadmapper session', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    const roadmapCalls = mockRunSession.mock.calls.filter(call => {
+      const prompt = call[0] as string;
+      return prompt.includes('ROADMAP.md') || prompt.includes('STATE.md');
+    });
+
+    expect(roadmapCalls.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('run() calls commit after each major step when commit_docs is true', async () => {
+    const commitFn = vi.fn().mockResolvedValue(undefined);
+    const { runner } = createRunner({
+      initNewProject: vi.fn().mockResolvedValue(makeProjectInfo({ commit_docs: true })),
+      commit: commitFn,
+    });
+
+    await runner.run('build a todo app');
+
+    // Should commit: config, PROJECT.md, research, REQUIREMENTS.md, ROADMAP+STATE
+    expect(commitFn).toHaveBeenCalled();
+    expect(commitFn.mock.calls.length).toBeGreaterThanOrEqual(4);
+  });
+
+  it('run() does not call commit when commit_docs is false', async () => {
+    const commitFn = vi.fn().mockResolvedValue(undefined);
+    const { runner } = createRunner({
+      initNewProject: vi.fn().mockResolvedValue(makeProjectInfo({ commit_docs: false })),
+      commit: commitFn,
+    });
+
+    await runner.run('build a todo app');
+
+    expect(commitFn).not.toHaveBeenCalled();
+  });
+
+  // ─── Event emission tests ────────────────────────────────────────────────
+
+  it('run() emits InitStart and InitComplete events', async () => {
+    const { runner, events } = createRunner();
+
+    await runner.run('build a todo app');
+
+    const startEvents = events.filter(e => e.type === GSDEventType.InitStart);
+    const completeEvents = events.filter(e => e.type === GSDEventType.InitComplete);
+
+    expect(startEvents.length).toBe(1);
+    expect(completeEvents.length).toBe(1);
+
+    const start = startEvents[0] as any;
+    expect(start.projectDir).toBe(tmpDir);
+    expect(start.input).toBeTruthy();
+
+    const complete = completeEvents[0] as any;
+    expect(complete.success).toBe(true);
+    expect(complete.totalCostUsd).toBeTypeOf('number');
+    expect(complete.totalDurationMs).toBeTypeOf('number');
+    expect(complete.artifactCount).toBeGreaterThan(0);
+  });
+
+  it('run() emits InitStepStart/Complete for each step', async () => {
+    const { runner, events } = createRunner();
+
+    await runner.run('build a todo app');
+
+    const stepStarts = events.filter(e => e.type === GSDEventType.InitStepStart);
+    const stepCompletes = events.filter(e => e.type === GSDEventType.InitStepComplete);
+
+    // Steps: setup, config, project, 4x research, synthesis, requirements, roadmap = 10
+    expect(stepStarts.length).toBe(10);
+    expect(stepCompletes.length).toBe(10);
+
+    // Verify each step start has a matching complete (order may vary for parallel research)
+    const startSteps = stepStarts.map(e => (e as any).step).sort();
+    const completeSteps = stepCompletes.map(e => (e as any).step).sort();
+
+    expect(startSteps).toEqual(completeSteps);
+
+    // Verify expected step names are present
+    expect(startSteps).toContain('setup');
+    expect(startSteps).toContain('config');
+    expect(startSteps).toContain('project');
+    expect(startSteps).toContain('research-stack');
+    expect(startSteps).toContain('research-features');
+    expect(startSteps).toContain('research-architecture');
+    expect(startSteps).toContain('research-pitfalls');
+    expect(startSteps).toContain('synthesis');
+    expect(startSteps).toContain('requirements');
+    expect(startSteps).toContain('roadmap');
+  });
+
+  it('run() emits InitResearchSpawn before research sessions', async () => {
+    const { runner, events } = createRunner();
+
+    await runner.run('build a todo app');
+
+    const spawnEvents = events.filter(e => e.type === GSDEventType.InitResearchSpawn);
+    expect(spawnEvents.length).toBe(1);
+
+    const spawn = spawnEvents[0] as any;
+    expect(spawn.sessionCount).toBe(4);
+    expect(spawn.researchTypes).toEqual(['STACK', 'FEATURES', 'ARCHITECTURE', 'PITFALLS']);
+  });
+
+  // ─── Error handling tests ────────────────────────────────────────────────
+
+  it('run() returns error when a session fails (partial research success)', async () => {
+    // Make the STACK research session fail, others succeed
+    let callCount = 0;
+    mockRunSession.mockImplementation(async (prompt: string) => {
+      callCount++;
+      // First call is PROJECT.md, then 4 research calls
+      // The 2nd call overall (1st research) should fail
+      if (callCount === 2) {
+        return makeErrorResult();
+      }
+      return makeSuccessResult();
+    });
+
+    const { runner } = createRunner();
+    const result = await runner.run('build a todo app');
+
+    // Should still complete (partial success allowed for research)
+    // but overall result indicates research failure
+    expect(result.success).toBe(false);
+
+    // Steps should still exist for all phases
+    expect(result.steps.length).toBeGreaterThanOrEqual(7);
+  });
+
+  it('run() stops workflow when PROJECT.md synthesis fails', async () => {
+    // First session (PROJECT.md) fails
+    mockRunSession.mockResolvedValueOnce(makeErrorResult());
+
+    const { runner } = createRunner();
+    const result = await runner.run('build a todo app');
+
+    expect(result.success).toBe(false);
+
+    // Should have setup, config, and project steps only
+    const stepNames = result.steps.map(s => s.step);
+    expect(stepNames).toContain('setup');
+    expect(stepNames).toContain('config');
+    expect(stepNames).toContain('project');
+    // Should NOT continue to research
+    expect(stepNames).not.toContain('research-stack');
+  });
+
+  it('run() stops workflow when requirements session fails', async () => {
+    // Let PROJECT.md and research succeed, but make requirements fail
+    let sessionCallIndex = 0;
+    mockRunSession.mockImplementation(async () => {
+      sessionCallIndex++;
+      // Calls: 1=PROJECT.md, 2-5=research, 6=synthesis, 7=requirements
+      if (sessionCallIndex === 7) {
+        return makeErrorResult();
+      }
+      return makeSuccessResult();
+    });
+
+    const { runner } = createRunner();
+    const result = await runner.run('build a todo app');
+
+    expect(result.success).toBe(false);
+
+    const stepNames = result.steps.map(s => s.step);
+    expect(stepNames).toContain('requirements');
+    // Should NOT continue to roadmap
+    expect(stepNames).not.toContain('roadmap');
+  });
+
+  // ─── Cost aggregation tests ──────────────────────────────────────────────
+
+  it('run() aggregates costs from all sessions', async () => {
+    const costPerSession = 0.05;
+    mockRunSession.mockResolvedValue(makeSuccessResult({ totalCostUsd: costPerSession }));
+
+    const { runner } = createRunner();
+    const result = await runner.run('build a todo app');
+
+    // 8 total sessions: PROJECT.md + 4 research + synthesis + requirements + roadmap
+    // Cost from sessions extracted via extractCost, non-session steps (setup/config) are 0
+    expect(result.totalCostUsd).toBeGreaterThan(0);
+    expect(result.totalDurationMs).toBeGreaterThan(0);
+  });
+
+  // ─── Artifact tracking tests ─────────────────────────────────────────────
+
+  it('run() returns all expected artifacts on success', async () => {
+    const { runner } = createRunner();
+    const result = await runner.run('build a todo app');
+
+    expect(result.success).toBe(true);
+    expect(result.artifacts).toContain('.planning/config.json');
+    expect(result.artifacts).toContain('.planning/PROJECT.md');
+    expect(result.artifacts).toContain('.planning/research/SUMMARY.md');
+    expect(result.artifacts).toContain('.planning/REQUIREMENTS.md');
+    expect(result.artifacts).toContain('.planning/ROADMAP.md');
+    expect(result.artifacts).toContain('.planning/STATE.md');
+  });
+
+  it('run() includes research artifact paths on success', async () => {
+    const { runner } = createRunner();
+    const result = await runner.run('build a todo app');
+
+    expect(result.artifacts).toContain('.planning/research/STACK.md');
+    expect(result.artifacts).toContain('.planning/research/FEATURES.md');
+    expect(result.artifacts).toContain('.planning/research/ARCHITECTURE.md');
+    expect(result.artifacts).toContain('.planning/research/PITFALLS.md');
+  });
+
+  // ─── Git init test ─────────────────────────────────────────────────────
+
+  it('run() initializes git when has_git is false', async () => {
+    // We can't easily test git init without mocking execFile deeply,
+    // but we can verify the tools.initNewProject is called with the result
+    // and that the workflow continues. Since has_git=true by default in our
+    // mock, flip it to false and verify the config step still passes.
+    const { runner } = createRunner({
+      initNewProject: vi.fn().mockResolvedValue(makeProjectInfo({ has_git: false })),
+    });
+
+    // This will attempt to run `git init` which may or may not exist in test env.
+    // Since we're in a tmpDir, git init is safe. The test verifies the workflow proceeds.
+    const result = await runner.run('build a todo app');
+
+    // The config step should succeed (git init in tmpDir should work)
+    const configStep = result.steps.find(s => s.step === 'config');
+    expect(configStep).toBeDefined();
+    // Note: if git is not available in CI, this may fail — that's expected
+  });
+
+  // ─── Config passthrough test ─────────────────────────────────────────────
+
+  it('constructor accepts config overrides', async () => {
+    // Set projectInfo model fields to undefined so orchestratorModel is used as fallback
+    const { runner } = createRunner({
+      initNewProject: vi.fn().mockResolvedValue(makeProjectInfo({
+        researcher_model: undefined as any,
+        synthesizer_model: undefined as any,
+        roadmapper_model: undefined as any,
+      })),
+    }, {
+      maxBudgetPerSession: 10.0,
+      maxTurnsPerSession: 50,
+      orchestratorModel: 'claude-opus-4-6',
+    });
+
+    await runner.run('build a todo app');
+
+    // Verify the session runner was called with overridden model
+    const calls = mockRunSession.mock.calls;
+    expect(calls.length).toBeGreaterThan(0);
+
+    // Check model in options (4th argument, index 3)
+    const modelsUsed = calls.map(c => {
+      const options = c[3] as any;
+      return options?.model;
+    });
+    // When projectInfo model is undefined, ?? falls through to orchestratorModel
+    expect(modelsUsed.some(m => m === 'claude-opus-4-6')).toBe(true);
+  });
+
+  // ─── Session count validation ────────────────────────────────────────────
+
+  it('run() calls runPhaseStepSession exactly 8 times on full success', async () => {
+    const { runner } = createRunner();
+
+    await runner.run('build a todo app');
+
+    // 1 PROJECT.md + 4 research + 1 synthesis + 1 requirements + 1 roadmap = 8
+    expect(mockRunSession).toHaveBeenCalledTimes(8);
+  });
+});
--- a/sdk/src/init-runner.ts
+++ b/sdk/src/init-runner.ts
@@ -0,0 +1,703 @@
+/**
+ * InitRunner — orchestrates the GSD new-project init workflow.
+ *
+ * Workflow: setup → config → PROJECT.md → parallel research (4 sessions)
+ *         → synthesis → requirements → roadmap
+ *
+ * Each step calls Agent SDK `query()` via `runPhaseStepSession()` with
+ * prompts derived from GSD-1 workflow/agent/template files on disk.
+ */
+
+import { readFile, writeFile, mkdir } from 'node:fs/promises';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+import { execFile } from 'node:child_process';
+
+import type {
+  InitConfig,
+  InitResult,
+  InitStepResult,
+  InitStepName,
+  InitNewProjectInfo,
+  GSDInitStartEvent,
+  GSDInitStepStartEvent,
+  GSDInitStepCompleteEvent,
+  GSDInitCompleteEvent,
+  GSDInitResearchSpawnEvent,
+  PlanResult,
+} from './types.js';
+import { GSDEventType, PhaseStepType } from './types.js';
+import type { GSDTools } from './gsd-tools.js';
+import type { GSDEventStream } from './event-stream.js';
+import { loadConfig } from './config.js';
+import { runPhaseStepSession } from './session-runner.js';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const GSD_TEMPLATES_DIR = join(homedir(), '.claude', 'get-shit-done', 'templates');
+const GSD_AGENTS_DIR = join(homedir(), '.claude', 'agents');
+
+const RESEARCH_TYPES = ['STACK', 'FEATURES', 'ARCHITECTURE', 'PITFALLS'] as const;
+type ResearchType = (typeof RESEARCH_TYPES)[number];
+
+const RESEARCH_STEP_MAP: Record<ResearchType, InitStepName> = {
+  STACK: 'research-stack',
+  FEATURES: 'research-features',
+  ARCHITECTURE: 'research-architecture',
+  PITFALLS: 'research-pitfalls',
+};
+
+/** Default config.json written during init for auto-mode projects. */
+const AUTO_MODE_CONFIG = {
+  mode: 'yolo',
+  parallelization: true,
+  depth: 'quick',
+  workflow: {
+    research: true,
+    plan_checker: true,
+    verifier: true,
+    auto_advance: true,
+    skip_discuss: false,
+  },
+};
+
+// ─── InitRunner ──────────────────────────────────────────────────────────────
+
+export interface InitRunnerDeps {
+  projectDir: string;
+  tools: GSDTools;
+  eventStream: GSDEventStream;
+  config?: Partial<InitConfig>;
+}
+
+export class InitRunner {
+  private readonly projectDir: string;
+  private readonly tools: GSDTools;
+  private readonly eventStream: GSDEventStream;
+  private readonly config: InitConfig;
+  private readonly sessionId: string;
+
+  constructor(deps: InitRunnerDeps) {
+    this.projectDir = deps.projectDir;
+    this.tools = deps.tools;
+    this.eventStream = deps.eventStream;
+    this.config = {
+      maxBudgetPerSession: deps.config?.maxBudgetPerSession ?? 3.0,
+      maxTurnsPerSession: deps.config?.maxTurnsPerSession ?? 30,
+      researchModel: deps.config?.researchModel,
+      orchestratorModel: deps.config?.orchestratorModel,
+    };
+    this.sessionId = `init-${Date.now()}`;
+  }
+
+  /**
+   * Run the full init workflow.
+   *
+   * @param input - User input: PRD content, project description, etc.
+   * @returns InitResult with per-step results, artifacts, and totals.
+   */
+  async run(input: string): Promise<InitResult> {
+    const startTime = Date.now();
+    const steps: InitStepResult[] = [];
+    const artifacts: string[] = [];
+
+    this.emitEvent<GSDInitStartEvent>({
+      type: GSDEventType.InitStart,
+      input: input.slice(0, 200),
+      projectDir: this.projectDir,
+    });
+
+    try {
+      // ── Step 1: Setup — get project metadata ──────────────────────────
+      const setupResult = await this.runStep('setup', async () => {
+        const info = await this.tools.initNewProject();
+        if (info.project_exists) {
+          throw new Error('Project already exists (.planning/PROJECT.md found). Use a fresh directory or delete .planning/ first.');
+        }
+        return info;
+      });
+      steps.push(setupResult.stepResult);
+      if (!setupResult.stepResult.success) {
+        return this.buildResult(false, steps, artifacts, startTime);
+      }
+      const projectInfo = setupResult.value as InitNewProjectInfo;
+
+      // ── Step 2: Config — write config.json and init git ───────────────
+      const configResult = await this.runStep('config', async () => {
+        // Ensure git is initialized
+        if (!projectInfo.has_git) {
+          await this.execGit(['init']);
+        }
+
+        // Ensure .planning/ directory exists
+        const planningDir = join(this.projectDir, '.planning');
+        await mkdir(planningDir, { recursive: true });
+
+        // Write config.json
+        const configPath = join(planningDir, 'config.json');
+        await writeFile(configPath, JSON.stringify(AUTO_MODE_CONFIG, null, 2) + '\n', 'utf-8');
+        artifacts.push('.planning/config.json');
+
+        // Persist auto_advance via gsd-tools (validates & updates state)
+        await this.tools.configSet('workflow.auto_advance', 'true');
+
+        // Commit config
+        if (projectInfo.commit_docs) {
+          await this.tools.commit('chore: add project config', ['.planning/config.json']);
+        }
+      });
+      steps.push(configResult.stepResult);
+      if (!configResult.stepResult.success) {
+        return this.buildResult(false, steps, artifacts, startTime);
+      }
+
+      // ── Step 3: PROJECT.md — synthesize from input ────────────────────
+      const projectResult = await this.runStep('project', async () => {
+        const prompt = await this.buildProjectPrompt(input);
+        const result = await this.runSession(prompt, projectInfo.researcher_model);
+        if (!result.success) {
+          throw new Error(`PROJECT.md synthesis failed: ${result.error?.messages.join(', ') ?? 'unknown error'}`);
+        }
+        artifacts.push('.planning/PROJECT.md');
+        if (projectInfo.commit_docs) {
+          await this.tools.commit('docs: add PROJECT.md', ['.planning/PROJECT.md']);
+        }
+        return result;
+      });
+      steps.push(projectResult.stepResult);
+      if (!projectResult.stepResult.success) {
+        return this.buildResult(false, steps, artifacts, startTime);
+      }
+
+      // ── Step 4: Parallel research (4 sessions) ───────────────────────
+      const researchSteps = await this.runParallelResearch(input, projectInfo);
+      steps.push(...researchSteps);
+      const researchFailed = researchSteps.some(s => !s.success);
+
+      // Add artifacts for successful research files
+      for (const rs of researchSteps) {
+        if (rs.success && rs.artifacts) {
+          artifacts.push(...rs.artifacts);
+        }
+      }
+
+      if (researchFailed) {
+        // Continue with partial results — synthesis will work with what's available
+        // but flag the overall result as partial
+      }
+
+      // ── Step 5: Synthesis — combine research into SUMMARY.md ──────────
+      const synthResult = await this.runStep('synthesis', async () => {
+        const prompt = await this.buildSynthesisPrompt();
+        const result = await this.runSession(prompt, projectInfo.synthesizer_model);
+        if (!result.success) {
+          throw new Error(`Research synthesis failed: ${result.error?.messages.join(', ') ?? 'unknown error'}`);
+        }
+        artifacts.push('.planning/research/SUMMARY.md');
+        if (projectInfo.commit_docs) {
+          await this.tools.commit('docs: add research files', ['.planning/research/']);
+        }
+        return result;
+      });
+      steps.push(synthResult.stepResult);
+      if (!synthResult.stepResult.success) {
+        return this.buildResult(false, steps, artifacts, startTime);
+      }
+
+      // ── Step 6: Requirements — derive from PROJECT + research ─────────
+      const reqResult = await this.runStep('requirements', async () => {
+        const prompt = await this.buildRequirementsPrompt();
+        const result = await this.runSession(prompt, projectInfo.synthesizer_model);
+        if (!result.success) {
+          throw new Error(`Requirements generation failed: ${result.error?.messages.join(', ') ?? 'unknown error'}`);
+        }
+        artifacts.push('.planning/REQUIREMENTS.md');
+        if (projectInfo.commit_docs) {
+          await this.tools.commit('docs: add REQUIREMENTS.md', ['.planning/REQUIREMENTS.md']);
+        }
+        return result;
+      });
+      steps.push(reqResult.stepResult);
+      if (!reqResult.stepResult.success) {
+        return this.buildResult(false, steps, artifacts, startTime);
+      }
+
+      // ── Step 7: Roadmap — create phases + STATE.md ────────────────────
+      const roadmapResult = await this.runStep('roadmap', async () => {
+        const prompt = await this.buildRoadmapPrompt();
+        const result = await this.runSession(prompt, projectInfo.roadmapper_model);
+        if (!result.success) {
+          throw new Error(`Roadmap generation failed: ${result.error?.messages.join(', ') ?? 'unknown error'}`);
+        }
+        artifacts.push('.planning/ROADMAP.md', '.planning/STATE.md');
+        if (projectInfo.commit_docs) {
+          await this.tools.commit('docs: add ROADMAP.md and STATE.md', [
+            '.planning/ROADMAP.md',
+            '.planning/STATE.md',
+          ]);
+        }
+        return result;
+      });
+      steps.push(roadmapResult.stepResult);
+      if (!roadmapResult.stepResult.success) {
+        return this.buildResult(false, steps, artifacts, startTime);
+      }
+
+      const success = !researchFailed;
+      return this.buildResult(success, steps, artifacts, startTime);
+    } catch (err) {
+      // Unexpected top-level error
+      steps.push({
+        step: 'setup',
+        success: false,
+        durationMs: 0,
+        costUsd: 0,
+        error: err instanceof Error ? err.message : String(err),
+      });
+      return this.buildResult(false, steps, artifacts, startTime);
+    }
+  }
+
+  // ─── Step execution wrapper ────────────────────────────────────────────────
+
+  private async runStep<T>(
+    step: InitStepName,
+    fn: () => Promise<T>,
+  ): Promise<{ stepResult: InitStepResult; value?: T }> {
+    const stepStart = Date.now();
+
+    this.emitEvent<GSDInitStepStartEvent>({
+      type: GSDEventType.InitStepStart,
+      step,
+    });
+
+    try {
+      const value = await fn();
+      const durationMs = Date.now() - stepStart;
+      const costUsd = this.extractCost(value);
+
+      const stepResult: InitStepResult = {
+        step,
+        success: true,
+        durationMs,
+        costUsd,
+      };
+
+      this.emitEvent<GSDInitStepCompleteEvent>({
+        type: GSDEventType.InitStepComplete,
+        step,
+        success: true,
+        durationMs,
+        costUsd,
+      });
+
+      return { stepResult, value };
+    } catch (err) {
+      const durationMs = Date.now() - stepStart;
+      const errorMsg = err instanceof Error ? err.message : String(err);
+
+      const stepResult: InitStepResult = {
+        step,
+        success: false,
+        durationMs,
+        costUsd: 0,
+        error: errorMsg,
+      };
+
+      this.emitEvent<GSDInitStepCompleteEvent>({
+        type: GSDEventType.InitStepComplete,
+        step,
+        success: false,
+        durationMs,
+        costUsd: 0,
+        error: errorMsg,
+      });
+
+      return { stepResult };
+    }
+  }
+
+  // ─── Parallel research ─────────────────────────────────────────────────────
+
+  private async runParallelResearch(
+    input: string,
+    projectInfo: InitNewProjectInfo,
+  ): Promise<InitStepResult[]> {
+    this.emitEvent<GSDInitResearchSpawnEvent>({
+      type: GSDEventType.InitResearchSpawn,
+      sessionCount: RESEARCH_TYPES.length,
+      researchTypes: [...RESEARCH_TYPES],
+    });
+
+    const promises = RESEARCH_TYPES.map(async (researchType) => {
+      const step = RESEARCH_STEP_MAP[researchType];
+      const result = await this.runStep(step, async () => {
+        const prompt = await this.buildResearchPrompt(researchType, input);
+        const sessionResult = await this.runSession(prompt, projectInfo.researcher_model);
+        if (!sessionResult.success) {
+          throw new Error(
+            `Research (${researchType}) failed: ${sessionResult.error?.messages.join(', ') ?? 'unknown error'}`,
+          );
+        }
+        return sessionResult;
+      });
+      // Attach artifact path on success
+      if (result.stepResult.success) {
+        result.stepResult.artifacts = [`.planning/research/${researchType}.md`];
+      }
+      return result.stepResult;
+    });
+
+    const results = await Promise.allSettled(promises);
+
+    return results.map((r, i) => {
+      if (r.status === 'fulfilled') {
+        return r.value;
+      }
+      // Promise.allSettled rejection — should not happen since runStep catches,
+      // but handle defensively
+      return {
+        step: RESEARCH_STEP_MAP[RESEARCH_TYPES[i]!]!,
+        success: false,
+        durationMs: 0,
+        costUsd: 0,
+        error: r.reason instanceof Error ? r.reason.message : String(r.reason),
+      } satisfies InitStepResult;
+    });
+  }
+
+  // ─── Prompt builders ───────────────────────────────────────────────────────
+
+  /**
+   * Build the PROJECT.md synthesis prompt.
+   * Reads the project template and combines with user input.
+   */
+  private async buildProjectPrompt(input: string): Promise<string> {
+    const template = await this.readGSDFile('templates/project.md');
+
+    return [
+      'You are creating the PROJECT.md for a new software project.',
+      'Write .planning/PROJECT.md based on the template structure below and the user\'s project description.',
+      '',
+      '<project_template>',
+      template,
+      '</project_template>',
+      '',
+      '<user_input>',
+      input,
+      '</user_input>',
+      '',
+      'Write the file to .planning/PROJECT.md. Follow the template structure but fill in with real content derived from the user input.',
+      'Be specific and opinionated — make decisions, don\'t list options.',
+    ].join('\n');
+  }
+
+  /**
+   * Build a research prompt for a specific research type.
+   * Reads the agent definition and research template.
+   */
+  private async buildResearchPrompt(
+    researchType: ResearchType,
+    input: string,
+  ): Promise<string> {
+    const agentDef = await this.readAgentFile('gsd-project-researcher.md');
+    const template = await this.readGSDFile(`templates/research-project/${researchType}.md`);
+
+    // Read PROJECT.md if it exists (it should by now)
+    let projectContent = '';
+    try {
+      projectContent = await readFile(
+        join(this.projectDir, '.planning', 'PROJECT.md'),
+        'utf-8',
+      );
+    } catch {
+      // Fall back to raw input if PROJECT.md not yet written
+      projectContent = input;
+    }
+
+    return [
+      '<agent_definition>',
+      agentDef,
+      '</agent_definition>',
+      '',
+      `You are researching the ${researchType} aspect of this project.`,
+      `Write your findings to .planning/research/${researchType}.md`,
+      '',
+      '<files_to_read>',
+      '.planning/PROJECT.md',
+      '</files_to_read>',
+      '',
+      '<project_context>',
+      projectContent,
+      '</project_context>',
+      '',
+      '<research_template>',
+      template,
+      '</research_template>',
+      '',
+      `Write .planning/research/${researchType}.md following the template structure.`,
+      'Be comprehensive but opinionated. "Use X because Y" not "Options are X, Y, Z."',
+    ].join('\n');
+  }
+
+  /**
+   * Build the synthesis prompt.
+   * Reads synthesizer agent def and all 4 research outputs.
+   */
+  private async buildSynthesisPrompt(): Promise<string> {
+    const agentDef = await this.readAgentFile('gsd-research-synthesizer.md');
+    const summaryTemplate = await this.readGSDFile('templates/research-project/SUMMARY.md');
+    const researchDir = join(this.projectDir, '.planning', 'research');
+
+    // Read whatever research files exist
+    const researchContent: string[] = [];
+    for (const rt of RESEARCH_TYPES) {
+      try {
+        const content = await readFile(join(researchDir, `${rt}.md`), 'utf-8');
+        researchContent.push(`<research_${rt.toLowerCase()}>\n${content}\n</research_${rt.toLowerCase()}>`);
+      } catch {
+        researchContent.push(`<research_${rt.toLowerCase()}>\n(Not available)\n</research_${rt.toLowerCase()}>`);
+      }
+    }
+
+    return [
+      '<agent_definition>',
+      agentDef,
+      '</agent_definition>',
+      '',
+      '<files_to_read>',
+      '.planning/research/STACK.md',
+      '.planning/research/FEATURES.md',
+      '.planning/research/ARCHITECTURE.md',
+      '.planning/research/PITFALLS.md',
+      '</files_to_read>',
+      '',
+      'Synthesize the research files below into .planning/research/SUMMARY.md',
+      '',
+      ...researchContent,
+      '',
+      '<summary_template>',
+      summaryTemplate,
+      '</summary_template>',
+      '',
+      'Write .planning/research/SUMMARY.md synthesizing all research findings.',
+      'Also commit all research files: git add .planning/research/ && git commit.',
+    ].join('\n');
+  }
+
+  /**
+   * Build the requirements prompt.
+   * Reads PROJECT.md + FEATURES.md for requirement derivation.
+   */
+  private async buildRequirementsPrompt(): Promise<string> {
+    const reqTemplate = await this.readGSDFile('templates/requirements.md');
+
+    let projectContent = '';
+    let featuresContent = '';
+    try {
+      projectContent = await readFile(
+        join(this.projectDir, '.planning', 'PROJECT.md'),
+        'utf-8',
+      );
+    } catch {
+      // Should not happen at this point
+    }
+    try {
+      featuresContent = await readFile(
+        join(this.projectDir, '.planning', 'research', 'FEATURES.md'),
+        'utf-8',
+      );
+    } catch {
+      // Research may have partially failed
+    }
+
+    return [
+      'You are generating REQUIREMENTS.md for this project.',
+      'Derive requirements from the PROJECT.md and research outputs.',
+      'Auto-include all table-stakes requirements (auth, error handling, logging, etc.).',
+      '',
+      '<project_context>',
+      projectContent,
+      '</project_context>',
+      '',
+      '<features_research>',
+      featuresContent || '(Not available)',
+      '</features_research>',
+      '',
+      '<requirements_template>',
+      reqTemplate,
+      '</requirements_template>',
+      '',
+      'Write .planning/REQUIREMENTS.md following the template structure.',
+      'Every requirement must be testable and specific. No vague aspirations.',
+    ].join('\n');
+  }
+
+  /**
+   * Build the roadmap prompt.
+   * Reads PROJECT.md + REQUIREMENTS.md + research/SUMMARY.md + config.json.
+   */
+  private async buildRoadmapPrompt(): Promise<string> {
+    const agentDef = await this.readAgentFile('gsd-roadmapper.md');
+    const roadmapTemplate = await this.readGSDFile('templates/roadmap.md');
+    const stateTemplate = await this.readGSDFile('templates/state.md');
+
+    const filesToRead = [
+      '.planning/PROJECT.md',
+      '.planning/REQUIREMENTS.md',
+      '.planning/research/SUMMARY.md',
+      '.planning/config.json',
+    ];
+
+    const fileContents: string[] = [];
+    for (const fp of filesToRead) {
+      try {
+        const content = await readFile(join(this.projectDir, fp), 'utf-8');
+        fileContents.push(`<file path="${fp}">\n${content}\n</file>`);
+      } catch {
+        fileContents.push(`<file path="${fp}">\n(Not available)\n</file>`);
+      }
+    }
+
+    return [
+      '<agent_definition>',
+      agentDef,
+      '</agent_definition>',
+      '',
+      '<files_to_read>',
+      ...filesToRead,
+      '</files_to_read>',
+      '',
+      ...fileContents,
+      '',
+      '<roadmap_template>',
+      roadmapTemplate,
+      '</roadmap_template>',
+      '',
+      '<state_template>',
+      stateTemplate,
+      '</state_template>',
+      '',
+      'Create .planning/ROADMAP.md and .planning/STATE.md.',
+      'ROADMAP.md: Transform requirements into phases. Every v1 requirement maps to exactly one phase.',
+      'STATE.md: Initialize project state tracking.',
+    ].join('\n');
+  }
+
+  // ─── Session execution ─────────────────────────────────────────────────────
+
+  /**
+   * Run a single Agent SDK session via runPhaseStepSession.
+   */
+  private async runSession(prompt: string, modelOverride?: string): Promise<PlanResult> {
+    const config = await loadConfig(this.projectDir);
+
+    return runPhaseStepSession(
+      prompt,
+      PhaseStepType.Research, // Research phase gives broadest tool access
+      config,
+      {
+        maxTurns: this.config.maxTurnsPerSession,
+        maxBudgetUsd: this.config.maxBudgetPerSession,
+        model: modelOverride ?? this.config.orchestratorModel,
+        cwd: this.projectDir,
+      },
+      this.eventStream,
+      { phase: undefined, planName: undefined },
+    );
+  }
+
+  // ─── File reading helpers ──────────────────────────────────────────────────
+
+  /**
+   * Read a file from the GSD templates directory (~/.claude/get-shit-done/).
+   */
+  private async readGSDFile(relativePath: string): Promise<string> {
+    const fullPath = join(GSD_TEMPLATES_DIR, '..', relativePath);
+    try {
+      return await readFile(fullPath, 'utf-8');
+    } catch {
+      // If the template doesn't exist, return a placeholder
+      return `(Template not found: ${relativePath})`;
+    }
+  }
+
+  /**
+   * Read an agent definition from ~/.claude/agents/.
+   */
+  private async readAgentFile(filename: string): Promise<string> {
+    const fullPath = join(GSD_AGENTS_DIR, filename);
+    try {
+      return await readFile(fullPath, 'utf-8');
+    } catch {
+      return `(Agent definition not found: ${filename})`;
+    }
+  }
+
+  // ─── Git helper ────────────────────────────────────────────────────────────
+
+  /**
+   * Execute a git command in the project directory.
+   */
+  private execGit(args: string[]): Promise<string> {
+    return new Promise((resolve, reject) => {
+      execFile('git', args, { cwd: this.projectDir }, (error, stdout, stderr) => {
+        if (error) {
+          reject(new Error(`git ${args.join(' ')} failed: ${stderr || error.message}`));
+          return;
+        }
+        resolve(stdout.toString());
+      });
+    });
+  }
+
+  // ─── Event helpers ─────────────────────────────────────────────────────────
+
+  private emitEvent<T extends { type: GSDEventType }>(
+    partial: Omit<T, 'timestamp' | 'sessionId'> & { type: GSDEventType },
+  ): void {
+    this.eventStream.emitEvent({
+      timestamp: new Date().toISOString(),
+      sessionId: this.sessionId,
+      ...partial,
+    } as unknown as import('./types.js').GSDEvent);
+  }
+
+  // ─── Result helpers ────────────────────────────────────────────────────────
+
+  private buildResult(
+    success: boolean,
+    steps: InitStepResult[],
+    artifacts: string[],
+    startTime: number,
+  ): InitResult {
+    const totalCostUsd = steps.reduce((sum, s) => sum + s.costUsd, 0);
+    const totalDurationMs = Date.now() - startTime;
+
+    this.emitEvent<GSDInitCompleteEvent>({
+      type: GSDEventType.InitComplete,
+      success,
+      totalCostUsd,
+      totalDurationMs,
+      artifactCount: artifacts.length,
+    });
+
+    return {
+      success,
+      steps,
+      totalCostUsd,
+      totalDurationMs,
+      artifacts,
+    };
+  }
+
+  /**
+   * Extract cost from a step return value if it's a PlanResult.
+   */
+  private extractCost(value: unknown): number {
+    if (value && typeof value === 'object' && 'totalCostUsd' in value) {
+      return (value as PlanResult).totalCostUsd;
+    }
+    return 0;
+  }
+}
--- a/sdk/src/logger.test.ts
+++ b/sdk/src/logger.test.ts
@@ -0,0 +1,149 @@
+import { describe, it, expect, beforeEach } from 'vitest';
+import { Writable } from 'node:stream';
+import { GSDLogger } from './logger.js';
+import type { LogEntry } from './logger.js';
+import { PhaseType } from './types.js';
+
+// ─── Test output capture ─────────────────────────────────────────────────────
+
+class BufferStream extends Writable {
+  lines: string[] = [];
+  _write(chunk: Buffer, _encoding: string, callback: () => void): void {
+    const str = chunk.toString();
+    this.lines.push(...str.split('\n').filter(l => l.length > 0));
+    callback();
+  }
+}
+
+function parseLogEntry(line: string): LogEntry {
+  return JSON.parse(line) as LogEntry;
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('GSDLogger', () => {
+  let output: BufferStream;
+
+  beforeEach(() => {
+    output = new BufferStream();
+  });
+
+  it('outputs valid JSON on each log call', () => {
+    const logger = new GSDLogger({ output, level: 'debug' });
+    logger.info('test message');
+
+    expect(output.lines).toHaveLength(1);
+    expect(() => JSON.parse(output.lines[0]!)).not.toThrow();
+  });
+
+  it('includes required fields: timestamp, level, message', () => {
+    const logger = new GSDLogger({ output, level: 'debug' });
+    logger.info('hello world');
+
+    const entry = parseLogEntry(output.lines[0]!);
+    expect(entry.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
+    expect(entry.level).toBe('info');
+    expect(entry.message).toBe('hello world');
+  });
+
+  it('filters messages below minimum log level', () => {
+    const logger = new GSDLogger({ output, level: 'warn' });
+
+    logger.debug('should be dropped');
+    logger.info('should be dropped');
+    logger.warn('should appear');
+    logger.error('should appear');
+
+    expect(output.lines).toHaveLength(2);
+    expect(parseLogEntry(output.lines[0]!).level).toBe('warn');
+    expect(parseLogEntry(output.lines[1]!).level).toBe('error');
+  });
+
+  it('defaults to info level filtering', () => {
+    const logger = new GSDLogger({ output });
+
+    logger.debug('dropped');
+    logger.info('kept');
+
+    expect(output.lines).toHaveLength(1);
+    expect(parseLogEntry(output.lines[0]!).level).toBe('info');
+  });
+
+  it('writes to custom output stream', () => {
+    const customOutput = new BufferStream();
+    const logger = new GSDLogger({ output: customOutput, level: 'debug' });
+    logger.info('custom');
+
+    expect(customOutput.lines).toHaveLength(1);
+    expect(output.lines).toHaveLength(0);
+  });
+
+  it('includes phase, plan, and sessionId context when set', () => {
+    const logger = new GSDLogger({
+      output,
+      level: 'debug',
+      phase: PhaseType.Execute,
+      plan: 'test-plan',
+      sessionId: 'sess-123',
+    });
+
+    logger.info('context test');
+
+    const entry = parseLogEntry(output.lines[0]!);
+    expect(entry.phase).toBe('execute');
+    expect(entry.plan).toBe('test-plan');
+    expect(entry.sessionId).toBe('sess-123');
+  });
+
+  it('includes extra data when provided', () => {
+    const logger = new GSDLogger({ output, level: 'debug' });
+    logger.info('with data', { count: 42, tool: 'Bash' });
+
+    const entry = parseLogEntry(output.lines[0]!);
+    expect(entry.data).toEqual({ count: 42, tool: 'Bash' });
+  });
+
+  it('omits optional fields when not set', () => {
+    const logger = new GSDLogger({ output, level: 'debug' });
+    logger.info('minimal');
+
+    const entry = parseLogEntry(output.lines[0]!);
+    expect(entry.phase).toBeUndefined();
+    expect(entry.plan).toBeUndefined();
+    expect(entry.sessionId).toBeUndefined();
+    expect(entry.data).toBeUndefined();
+  });
+
+  it('supports runtime context updates via setters', () => {
+    const logger = new GSDLogger({ output, level: 'debug' });
+
+    logger.info('before');
+    logger.setPhase(PhaseType.Research);
+    logger.setPlan('my-plan');
+    logger.setSessionId('sess-456');
+    logger.info('after');
+
+    const before = parseLogEntry(output.lines[0]!);
+    const after = parseLogEntry(output.lines[1]!);
+
+    expect(before.phase).toBeUndefined();
+    expect(after.phase).toBe('research');
+    expect(after.plan).toBe('my-plan');
+    expect(after.sessionId).toBe('sess-456');
+  });
+
+  it('emits all four log levels correctly', () => {
+    const logger = new GSDLogger({ output, level: 'debug' });
+
+    logger.debug('d');
+    logger.info('i');
+    logger.warn('w');
+    logger.error('e');
+
+    expect(output.lines).toHaveLength(4);
+    expect(parseLogEntry(output.lines[0]!).level).toBe('debug');
+    expect(parseLogEntry(output.lines[1]!).level).toBe('info');
+    expect(parseLogEntry(output.lines[2]!).level).toBe('warn');
+    expect(parseLogEntry(output.lines[3]!).level).toBe('error');
+  });
+});
--- a/sdk/src/logger.ts
+++ b/sdk/src/logger.ts
@@ -0,0 +1,113 @@
+/**
+ * Structured JSON logger for GSD debugging.
+ *
+ * Writes structured log entries to stderr (or configurable writable stream).
+ * This is a debugging facility (R019), separate from the event stream.
+ */
+
+import type { Writable } from 'node:stream';
+import type { PhaseType } from './types.js';
+
+// ─── Log levels ──────────────────────────────────────────────────────────────
+
+export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
+
+const LOG_LEVEL_PRIORITY: Record<LogLevel, number> = {
+  debug: 0,
+  info: 1,
+  warn: 2,
+  error: 3,
+};
+
+// ─── Log entry ───────────────────────────────────────────────────────────────
+
+export interface LogEntry {
+  timestamp: string;
+  level: LogLevel;
+  phase?: PhaseType;
+  plan?: string;
+  sessionId?: string;
+  message: string;
+  data?: Record<string, unknown>;
+}
+
+// ─── Logger options ──────────────────────────────────────────────────────────
+
+export interface GSDLoggerOptions {
+  /** Minimum log level to output. Default: 'info'. */
+  level?: LogLevel;
+  /** Output stream. Default: process.stderr. */
+  output?: Writable;
+  /** Phase context for all log entries. */
+  phase?: PhaseType;
+  /** Plan name context for all log entries. */
+  plan?: string;
+  /** Session ID context for all log entries. */
+  sessionId?: string;
+}
+
+// ─── Logger class ────────────────────────────────────────────────────────────
+
+export class GSDLogger {
+  private readonly minLevel: number;
+  private readonly output: Writable;
+  private phase?: PhaseType;
+  private plan?: string;
+  private sessionId?: string;
+
+  constructor(options: GSDLoggerOptions = {}) {
+    this.minLevel = LOG_LEVEL_PRIORITY[options.level ?? 'info'];
+    this.output = options.output ?? process.stderr;
+    this.phase = options.phase;
+    this.plan = options.plan;
+    this.sessionId = options.sessionId;
+  }
+
+  /** Set phase context for subsequent log entries. */
+  setPhase(phase: PhaseType | undefined): void {
+    this.phase = phase;
+  }
+
+  /** Set plan context for subsequent log entries. */
+  setPlan(plan: string | undefined): void {
+    this.plan = plan;
+  }
+
+  /** Set session ID context for subsequent log entries. */
+  setSessionId(sessionId: string | undefined): void {
+    this.sessionId = sessionId;
+  }
+
+  debug(message: string, data?: Record<string, unknown>): void {
+    this.log('debug', message, data);
+  }
+
+  info(message: string, data?: Record<string, unknown>): void {
+    this.log('info', message, data);
+  }
+
+  warn(message: string, data?: Record<string, unknown>): void {
+    this.log('warn', message, data);
+  }
+
+  error(message: string, data?: Record<string, unknown>): void {
+    this.log('error', message, data);
+  }
+
+  private log(level: LogLevel, message: string, data?: Record<string, unknown>): void {
+    if (LOG_LEVEL_PRIORITY[level] < this.minLevel) return;
+
+    const entry: LogEntry = {
+      timestamp: new Date().toISOString(),
+      level,
+      message,
+    };
+
+    if (this.phase !== undefined) entry.phase = this.phase;
+    if (this.plan !== undefined) entry.plan = this.plan;
+    if (this.sessionId !== undefined) entry.sessionId = this.sessionId;
+    if (data !== undefined) entry.data = data;
+
+    this.output.write(JSON.stringify(entry) + '\n');
+  }
+}
--- a/sdk/src/milestone-runner.test.ts
+++ b/sdk/src/milestone-runner.test.ts
@@ -0,0 +1,415 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import type {
+  PhaseRunnerResult,
+  RoadmapPhaseInfo,
+  RoadmapAnalysis,
+  GSDEvent,
+  MilestoneRunnerOptions,
+} from './types.js';
+import { GSDEventType } from './types.js';
+
+// ─── Mock modules ────────────────────────────────────────────────────────────
+
+// Mock the heavy dependencies that GSD constructor + runPhase pull in
+vi.mock('./plan-parser.js', () => ({
+  parsePlan: vi.fn(),
+  parsePlanFile: vi.fn(),
+}));
+
+vi.mock('./config.js', () => ({
+  loadConfig: vi.fn().mockResolvedValue({
+    model_profile: 'test-model',
+    tools: [],
+    phases: {},
+  }),
+}));
+
+vi.mock('./session-runner.js', () => ({
+  runPlanSession: vi.fn(),
+  runPhaseStepSession: vi.fn(),
+}));
+
+vi.mock('./prompt-builder.js', () => ({
+  buildExecutorPrompt: vi.fn(),
+  parseAgentTools: vi.fn().mockReturnValue([]),
+}));
+
+vi.mock('./event-stream.js', () => {
+  return {
+    GSDEventStream: vi.fn().mockImplementation(() => ({
+      emitEvent: vi.fn(),
+      on: vi.fn(),
+      emit: vi.fn(),
+      addTransport: vi.fn(),
+    })),
+  };
+});
+
+vi.mock('./phase-runner.js', () => ({
+  PhaseRunner: vi.fn(),
+  PhaseRunnerError: class extends Error {
+    name = 'PhaseRunnerError';
+  },
+}));
+
+vi.mock('./context-engine.js', () => ({
+  ContextEngine: vi.fn(),
+  PHASE_FILE_MANIFEST: [],
+}));
+
+vi.mock('./phase-prompt.js', () => ({
+  PromptFactory: vi.fn(),
+  extractBlock: vi.fn(),
+  extractSteps: vi.fn(),
+  PHASE_WORKFLOW_MAP: {},
+}));
+
+vi.mock('./gsd-tools.js', () => ({
+  GSDTools: vi.fn().mockImplementation(() => ({
+    roadmapAnalyze: vi.fn(),
+  })),
+  GSDToolsError: class extends Error {
+    name = 'GSDToolsError';
+  },
+}));
+
+import { GSD } from './index.js';
+import { GSDTools } from './gsd-tools.js';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makePhaseInfo(overrides: Partial<RoadmapPhaseInfo> = {}): RoadmapPhaseInfo {
+  return {
+    number: '1',
+    disk_status: 'not_started',
+    roadmap_complete: false,
+    phase_name: 'Auth',
+    ...overrides,
+  };
+}
+
+function makePhaseResult(overrides: Partial<PhaseRunnerResult> = {}): PhaseRunnerResult {
+  return {
+    phaseNumber: '1',
+    phaseName: 'Auth',
+    steps: [],
+    success: true,
+    totalCostUsd: 0.50,
+    totalDurationMs: 5000,
+    ...overrides,
+  };
+}
+
+function makeAnalysis(phases: RoadmapPhaseInfo[]): RoadmapAnalysis {
+  return { phases };
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('GSD.run()', () => {
+  let gsd: GSD;
+  let mockRoadmapAnalyze: ReturnType<typeof vi.fn>;
+  let events: GSDEvent[];
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+
+    gsd = new GSD({ projectDir: '/tmp/test-project' });
+    events = [];
+
+    // Capture emitted events
+    (gsd.eventStream.emitEvent as ReturnType<typeof vi.fn>).mockImplementation(
+      (event: GSDEvent) => events.push(event),
+    );
+
+    // Wire mock roadmapAnalyze on the GSDTools instance
+    mockRoadmapAnalyze = vi.fn();
+    vi.mocked(GSDTools).mockImplementation(
+      () =>
+        ({
+          roadmapAnalyze: mockRoadmapAnalyze,
+        }) as any,
+    );
+  });
+
+  it('discovers phases and calls runPhase for each incomplete one', async () => {
+    const phases = [
+      makePhaseInfo({ number: '1', phase_name: 'Auth', roadmap_complete: false }),
+      makePhaseInfo({ number: '2', phase_name: 'Dashboard', roadmap_complete: false }),
+    ];
+
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis(phases)) // initial discovery
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ])) // after phase 1
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: true }),
+      ])); // after phase 2
+
+    const runPhaseSpy = vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '1' }))
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '2' }));
+
+    const result = await gsd.run('build the app');
+
+    expect(result.success).toBe(true);
+    expect(result.phases).toHaveLength(2);
+    expect(runPhaseSpy).toHaveBeenCalledTimes(2);
+    expect(runPhaseSpy).toHaveBeenCalledWith('1', undefined);
+    expect(runPhaseSpy).toHaveBeenCalledWith('2', undefined);
+  });
+
+  it('skips phases where roadmap_complete === true', async () => {
+    const phases = [
+      makePhaseInfo({ number: '1', roadmap_complete: true }),
+      makePhaseInfo({ number: '2', roadmap_complete: false }),
+      makePhaseInfo({ number: '3', roadmap_complete: true }),
+    ];
+
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis(phases))
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: true }),
+        makePhaseInfo({ number: '3', roadmap_complete: true }),
+      ]));
+
+    const runPhaseSpy = vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '2' }));
+
+    const result = await gsd.run('build it');
+
+    expect(result.success).toBe(true);
+    expect(result.phases).toHaveLength(1);
+    expect(runPhaseSpy).toHaveBeenCalledTimes(1);
+    expect(runPhaseSpy).toHaveBeenCalledWith('2', undefined);
+  });
+
+  it('re-discovers phases after each completion to catch dynamically inserted phases', async () => {
+    // Initially phase 1 and 2 are incomplete
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: false }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]))
+      // After phase 1, a new phase 1.5 was inserted
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '1.5', phase_name: 'Hotfix', roadmap_complete: false }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]))
+      // After phase 1.5 completes
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '1.5', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]))
+      // After phase 2 completes
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '1.5', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: true }),
+      ]));
+
+    const runPhaseSpy = vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '1' }))
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '1.5', phaseName: 'Hotfix' }))
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '2' }));
+
+    const result = await gsd.run('build it');
+
+    expect(result.success).toBe(true);
+    expect(result.phases).toHaveLength(3);
+    expect(runPhaseSpy).toHaveBeenCalledTimes(3);
+    // The dynamically inserted phase 1.5 was executed
+    expect(runPhaseSpy).toHaveBeenNthCalledWith(2, '1.5', undefined);
+  });
+
+  it('aggregates costs from all phases', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: false }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]))
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]))
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: true }),
+      ]));
+
+    vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ totalCostUsd: 1.25 }))
+      .mockResolvedValueOnce(makePhaseResult({ totalCostUsd: 0.75 }));
+
+    const result = await gsd.run('build it');
+
+    expect(result.totalCostUsd).toBeCloseTo(2.0, 2);
+  });
+
+  it('emits MilestoneStart and MilestoneComplete events', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: false }),
+      ]))
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+      ]));
+
+    vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ totalCostUsd: 0.50 }));
+
+    await gsd.run('build it');
+
+    const startEvents = events.filter(e => e.type === GSDEventType.MilestoneStart);
+    const completeEvents = events.filter(e => e.type === GSDEventType.MilestoneComplete);
+
+    expect(startEvents).toHaveLength(1);
+    expect(completeEvents).toHaveLength(1);
+
+    const start = startEvents[0] as any;
+    expect(start.phaseCount).toBe(1);
+    expect(start.prompt).toBe('build it');
+
+    const complete = completeEvents[0] as any;
+    expect(complete.success).toBe(true);
+    expect(complete.phasesCompleted).toBe(1);
+    expect(complete.totalCostUsd).toBeCloseTo(0.50, 2);
+  });
+
+  it('stops on phase failure', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: false }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]));
+
+    vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '1', success: false }));
+
+    const result = await gsd.run('build it');
+
+    expect(result.success).toBe(false);
+    expect(result.phases).toHaveLength(1);
+    // Phase 2 was never started
+  });
+
+  it('handles empty phase list', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([]));
+
+    const runPhaseSpy = vi.spyOn(gsd, 'runPhase');
+
+    const result = await gsd.run('build it');
+
+    expect(result.success).toBe(true);
+    expect(result.phases).toHaveLength(0);
+    expect(runPhaseSpy).not.toHaveBeenCalled();
+    expect(result.totalCostUsd).toBe(0);
+  });
+
+  it('sorts phases numerically, not lexicographically', async () => {
+    const phases = [
+      makePhaseInfo({ number: '10', phase_name: 'Ten', roadmap_complete: false }),
+      makePhaseInfo({ number: '2', phase_name: 'Two', roadmap_complete: false }),
+      makePhaseInfo({ number: '1.5', phase_name: 'OnePointFive', roadmap_complete: false }),
+    ];
+
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis(phases))
+      // After phase 1.5
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1.5', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+        makePhaseInfo({ number: '10', roadmap_complete: false }),
+      ]))
+      // After phase 2
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1.5', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: true }),
+        makePhaseInfo({ number: '10', roadmap_complete: false }),
+      ]))
+      // After phase 10
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1.5', roadmap_complete: true }),
+        makePhaseInfo({ number: '2', roadmap_complete: true }),
+        makePhaseInfo({ number: '10', roadmap_complete: true }),
+      ]));
+
+    const executionOrder: string[] = [];
+    vi.spyOn(gsd, 'runPhase').mockImplementation(async (phaseNumber: string) => {
+      executionOrder.push(phaseNumber);
+      return makePhaseResult({ phaseNumber });
+    });
+
+    await gsd.run('build it');
+
+    // Numeric order: 1.5 → 2 → 10 (not lexicographic: "10" < "2")
+    expect(executionOrder).toEqual(['1.5', '2', '10']);
+  });
+
+  it('handles phase throwing an unexpected error', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', phase_name: 'Broken', roadmap_complete: false }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]));
+
+    vi.spyOn(gsd, 'runPhase')
+      .mockRejectedValueOnce(new Error('Unexpected explosion'));
+
+    const result = await gsd.run('build it');
+
+    expect(result.success).toBe(false);
+    expect(result.phases).toHaveLength(1);
+    expect(result.phases[0].success).toBe(false);
+    expect(result.phases[0].phaseNumber).toBe('1');
+  });
+
+  it('passes MilestoneRunnerOptions through to runPhase', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: false }),
+      ]))
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: true }),
+      ]));
+
+    const runPhaseSpy = vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult());
+
+    const opts: MilestoneRunnerOptions = {
+      model: 'claude-sonnet-4-6',
+      maxBudgetPerStep: 2.0,
+      onPhaseComplete: vi.fn(),
+    };
+
+    await gsd.run('build it', opts);
+
+    expect(runPhaseSpy).toHaveBeenCalledWith('1', opts);
+  });
+
+  it('respects onPhaseComplete returning stop', async () => {
+    mockRoadmapAnalyze
+      .mockResolvedValueOnce(makeAnalysis([
+        makePhaseInfo({ number: '1', roadmap_complete: false }),
+        makePhaseInfo({ number: '2', roadmap_complete: false }),
+      ]));
+
+    vi.spyOn(gsd, 'runPhase')
+      .mockResolvedValueOnce(makePhaseResult({ phaseNumber: '1' }));
+
+    const result = await gsd.run('build it', {
+      onPhaseComplete: async () => 'stop',
+    });
+
+    // Only 1 phase was executed because callback said stop
+    expect(result.phases).toHaveLength(1);
+    expect(result.success).toBe(true);
+  });
+});
--- a/sdk/src/phase-prompt.test.ts
+++ b/sdk/src/phase-prompt.test.ts
@@ -0,0 +1,403 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdtemp, mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { PromptFactory, extractBlock, extractSteps, PHASE_WORKFLOW_MAP } from './phase-prompt.js';
+import { PhaseType } from './types.js';
+import type { ContextFiles, ParsedPlan, PlanFrontmatter } from './types.js';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+async function createTempDir(): Promise<string> {
+  return mkdtemp(join(tmpdir(), 'gsd-prompt-'));
+}
+
+function makeWorkflowContent(purpose: string, steps: string[]): string {
+  const stepBlocks = steps
+    .map((s, i) => `<step name="step_${i + 1}">\n${s}\n</step>`)
+    .join('\n\n');
+  return `<purpose>\n${purpose}\n</purpose>\n\n<process>\n${stepBlocks}\n</process>`;
+}
+
+function makeAgentDef(name: string, tools: string, role: string): string {
+  return `---\nname: ${name}\ntools: ${tools}\n---\n\n<role>\n${role}\n</role>`;
+}
+
+function makeParsedPlan(overrides?: Partial<ParsedPlan>): ParsedPlan {
+  return {
+    frontmatter: {
+      phase: 'execute',
+      plan: 'test-plan',
+      type: 'feature',
+      wave: 1,
+      depends_on: [],
+      files_modified: [],
+      autonomous: true,
+      requirements: [],
+      must_haves: { truths: [], artifacts: [], key_links: [] },
+    } as PlanFrontmatter,
+    objective: 'Test objective',
+    execution_context: [],
+    context_refs: [],
+    tasks: [],
+    raw: '',
+    ...overrides,
+  };
+}
+
+// ─── extractBlock tests ──────────────────────────────────────────────────────
+
+describe('extractBlock', () => {
+  it('extracts content from a simple block', () => {
+    const content = '<purpose>\nDo the thing.\n</purpose>';
+    expect(extractBlock(content, 'purpose')).toBe('Do the thing.');
+  });
+
+  it('extracts content from block with attributes', () => {
+    const content = '<step name="init" priority="first">\nLoad context.\n</step>';
+    expect(extractBlock(content, 'step')).toBe('Load context.');
+  });
+
+  it('returns empty string for missing block', () => {
+    const content = '<purpose>Something</purpose>';
+    expect(extractBlock(content, 'role')).toBe('');
+  });
+
+  it('extracts multiline content', () => {
+    const content = '<role>\nLine 1\nLine 2\nLine 3\n</role>';
+    expect(extractBlock(content, 'role')).toBe('Line 1\nLine 2\nLine 3');
+  });
+});
+
+describe('extractSteps', () => {
+  it('extracts multiple steps from process content', () => {
+    const process = `
+<step name="init">Initialize</step>
+<step name="execute">Run tasks</step>
+<step name="verify">Check results</step>`;
+
+    const steps = extractSteps(process);
+    expect(steps).toHaveLength(3);
+    expect(steps[0]).toEqual({ name: 'init', content: 'Initialize' });
+    expect(steps[1]).toEqual({ name: 'execute', content: 'Run tasks' });
+    expect(steps[2]).toEqual({ name: 'verify', content: 'Check results' });
+  });
+
+  it('returns empty array for no steps', () => {
+    expect(extractSteps('no steps here')).toEqual([]);
+  });
+
+  it('handles steps with priority attributes', () => {
+    const process = '<step name="init" priority="first">\nDo first.\n</step>';
+    const steps = extractSteps(process);
+    expect(steps).toHaveLength(1);
+    expect(steps[0].name).toBe('init');
+    expect(steps[0].content).toBe('Do first.');
+  });
+});
+
+// ─── PromptFactory tests ─────────────────────────────────────────────────────
+
+describe('PromptFactory', () => {
+  let tempDir: string;
+  let workflowsDir: string;
+  let agentsDir: string;
+
+  beforeEach(async () => {
+    tempDir = await createTempDir();
+    workflowsDir = join(tempDir, 'workflows');
+    agentsDir = join(tempDir, 'agents');
+    await mkdir(workflowsDir, { recursive: true });
+    await mkdir(agentsDir, { recursive: true });
+  });
+
+  afterEach(async () => {
+    await rm(tempDir, { recursive: true, force: true });
+  });
+
+  function makeFactory(): PromptFactory {
+    return new PromptFactory({
+      gsdInstallDir: tempDir,
+      agentsDir,
+    });
+  }
+
+  describe('buildPrompt', () => {
+    it('assembles research prompt with role + purpose + process + context', async () => {
+      await writeFile(
+        join(workflowsDir, 'research-phase.md'),
+        makeWorkflowContent('Research the phase.', ['Gather info', 'Analyze findings']),
+      );
+      await writeFile(
+        join(agentsDir, 'gsd-phase-researcher.md'),
+        makeAgentDef('gsd-phase-researcher', 'Read, Grep, Bash', 'You are a researcher.'),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = {
+        state: '# State\nproject: test',
+        roadmap: '# Roadmap\nphases listed',
+      };
+
+      const prompt = await factory.buildPrompt(PhaseType.Research, null, contextFiles);
+
+      expect(prompt).toContain('## Role');
+      expect(prompt).toContain('You are a researcher.');
+      expect(prompt).toContain('## Purpose');
+      expect(prompt).toContain('Research the phase.');
+      expect(prompt).toContain('## Process');
+      expect(prompt).toContain('Gather info');
+      expect(prompt).toContain('## Context');
+      expect(prompt).toContain('# State');
+      expect(prompt).toContain('# Roadmap');
+      expect(prompt).toContain('## Phase Instructions');
+    });
+
+    it('assembles plan prompt with all context files', async () => {
+      await writeFile(
+        join(workflowsDir, 'plan-phase.md'),
+        makeWorkflowContent('Plan the implementation.', ['Break down tasks']),
+      );
+      await writeFile(
+        join(agentsDir, 'gsd-planner.md'),
+        makeAgentDef('gsd-planner', 'Read, Write, Bash', 'You are a planner.'),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = {
+        state: '# State',
+        roadmap: '# Roadmap',
+        context: '# Context',
+        research: '# Research',
+        requirements: '# Requirements',
+      };
+
+      const prompt = await factory.buildPrompt(PhaseType.Plan, null, contextFiles);
+
+      expect(prompt).toContain('You are a planner.');
+      expect(prompt).toContain('Plan the implementation.');
+      expect(prompt).toContain('# State');
+      expect(prompt).toContain('# Research');
+      expect(prompt).toContain('# Requirements');
+      expect(prompt).toContain('executable plans');
+    });
+
+    it('delegates execute phase with plan to buildExecutorPrompt', async () => {
+      await writeFile(
+        join(agentsDir, 'gsd-executor.md'),
+        makeAgentDef('gsd-executor', 'Read, Write, Edit, Bash', 'You are an executor.'),
+      );
+
+      const factory = makeFactory();
+      const plan = makeParsedPlan({ objective: 'Build the auth system' });
+      const contextFiles: ContextFiles = { state: '# State' };
+
+      const prompt = await factory.buildPrompt(PhaseType.Execute, plan, contextFiles);
+
+      // buildExecutorPrompt produces structured output with ## Objective
+      expect(prompt).toContain('## Objective');
+      expect(prompt).toContain('Build the auth system');
+      expect(prompt).toContain('## Role');
+      expect(prompt).toContain('You are an executor.');
+    });
+
+    it('handles execute phase without plan (non-delegation path)', async () => {
+      await writeFile(
+        join(workflowsDir, 'execute-plan.md'),
+        makeWorkflowContent('Execute the plan.', ['Run tasks']),
+      );
+      await writeFile(
+        join(agentsDir, 'gsd-executor.md'),
+        makeAgentDef('gsd-executor', 'Read, Write, Edit, Bash', 'You are an executor.'),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = { state: '# State' };
+
+      const prompt = await factory.buildPrompt(PhaseType.Execute, null, contextFiles);
+
+      // Falls through to general assembly path
+      expect(prompt).toContain('## Role');
+      expect(prompt).toContain('You are an executor.');
+      expect(prompt).toContain('## Purpose');
+      expect(prompt).toContain('Execute the plan.');
+    });
+
+    it('assembles verify prompt with phase instructions', async () => {
+      await writeFile(
+        join(workflowsDir, 'verify-phase.md'),
+        makeWorkflowContent('Verify phase goals.', ['Check artifacts', 'Run tests']),
+      );
+      await writeFile(
+        join(agentsDir, 'gsd-verifier.md'),
+        makeAgentDef('gsd-verifier', 'Read, Bash, Grep', 'You are a verifier.'),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = {
+        state: '# State',
+        roadmap: '# Roadmap',
+        requirements: '# Requirements',
+      };
+
+      const prompt = await factory.buildPrompt(PhaseType.Verify, null, contextFiles);
+
+      expect(prompt).toContain('You are a verifier.');
+      expect(prompt).toContain('Verify phase goals.');
+      expect(prompt).toContain('goal achievement');
+    });
+
+    it('assembles discuss prompt without agent role (no dedicated agent)', async () => {
+      await writeFile(
+        join(workflowsDir, 'discuss-phase.md'),
+        makeWorkflowContent('Discuss implementation decisions.', ['Identify areas']),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = { state: '# State' };
+
+      const prompt = await factory.buildPrompt(PhaseType.Discuss, null, contextFiles);
+
+      // Discuss has no agent, so no Role section
+      expect(prompt).not.toContain('## Role');
+      expect(prompt).toContain('## Purpose');
+      expect(prompt).toContain('Discuss implementation decisions.');
+      expect(prompt).toContain('## Phase Instructions');
+      expect(prompt).toContain('Extract implementation decisions');
+    });
+
+    it('handles missing workflow file gracefully', async () => {
+      // No workflow files on disk
+      await writeFile(
+        join(agentsDir, 'gsd-phase-researcher.md'),
+        makeAgentDef('gsd-phase-researcher', 'Read, Bash', 'You are a researcher.'),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = { state: '# State' };
+
+      const prompt = await factory.buildPrompt(PhaseType.Research, null, contextFiles);
+
+      // Should still produce a prompt with role and context
+      expect(prompt).toContain('## Role');
+      expect(prompt).toContain('## Context');
+      expect(prompt).not.toContain('## Purpose');
+    });
+
+    it('handles missing agent def gracefully', async () => {
+      await writeFile(
+        join(workflowsDir, 'research-phase.md'),
+        makeWorkflowContent('Research the phase.', ['Gather info']),
+      );
+      // No agent file on disk
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = { state: '# State' };
+
+      const prompt = await factory.buildPrompt(PhaseType.Research, null, contextFiles);
+
+      expect(prompt).not.toContain('## Role');
+      expect(prompt).toContain('## Purpose');
+      expect(prompt).toContain('Research the phase.');
+    });
+
+    it('omits empty context section when no files provided', async () => {
+      await writeFile(
+        join(workflowsDir, 'discuss-phase.md'),
+        makeWorkflowContent('Discuss things.', ['Talk']),
+      );
+
+      const factory = makeFactory();
+      const contextFiles: ContextFiles = {};
+
+      const prompt = await factory.buildPrompt(PhaseType.Discuss, null, contextFiles);
+
+      expect(prompt).not.toContain('## Context');
+    });
+  });
+
+  describe('loadWorkflowFile', () => {
+    it('loads existing workflow file', async () => {
+      await writeFile(
+        join(workflowsDir, 'research-phase.md'),
+        'workflow content',
+      );
+
+      const factory = makeFactory();
+      const content = await factory.loadWorkflowFile(PhaseType.Research);
+      expect(content).toBe('workflow content');
+    });
+
+    it('returns undefined for missing workflow file', async () => {
+      const factory = makeFactory();
+      const content = await factory.loadWorkflowFile(PhaseType.Research);
+      expect(content).toBeUndefined();
+    });
+  });
+
+  describe('loadAgentDef', () => {
+    it('loads agent def from agents dir', async () => {
+      await writeFile(
+        join(agentsDir, 'gsd-executor.md'),
+        'agent content',
+      );
+
+      const factory = makeFactory();
+      const content = await factory.loadAgentDef(PhaseType.Execute);
+      expect(content).toBe('agent content');
+    });
+
+    it('returns undefined for phases with no agent (discuss)', async () => {
+      const factory = makeFactory();
+      const content = await factory.loadAgentDef(PhaseType.Discuss);
+      expect(content).toBeUndefined();
+    });
+
+    it('falls back to project agents dir', async () => {
+      const projectAgentsDir = join(tempDir, 'project-agents');
+      await mkdir(projectAgentsDir, { recursive: true });
+      await writeFile(
+        join(projectAgentsDir, 'gsd-executor.md'),
+        'project agent content',
+      );
+
+      const factory = new PromptFactory({
+        gsdInstallDir: tempDir,
+        agentsDir,
+        projectAgentsDir,
+      });
+
+      const content = await factory.loadAgentDef(PhaseType.Execute);
+      expect(content).toBe('project agent content');
+    });
+
+    it('prefers user agents dir over project agents dir', async () => {
+      const projectAgentsDir = join(tempDir, 'project-agents');
+      await mkdir(projectAgentsDir, { recursive: true });
+      await writeFile(join(agentsDir, 'gsd-executor.md'), 'user agent');
+      await writeFile(join(projectAgentsDir, 'gsd-executor.md'), 'project agent');
+
+      const factory = new PromptFactory({
+        gsdInstallDir: tempDir,
+        agentsDir,
+        projectAgentsDir,
+      });
+
+      const content = await factory.loadAgentDef(PhaseType.Execute);
+      expect(content).toBe('user agent');
+    });
+  });
+});
+
+describe('PHASE_WORKFLOW_MAP', () => {
+  it('maps all phase types to workflow filenames', () => {
+    for (const phase of Object.values(PhaseType)) {
+      expect(PHASE_WORKFLOW_MAP[phase]).toBeDefined();
+      expect(PHASE_WORKFLOW_MAP[phase]).toMatch(/\.md$/);
+    }
+  });
+
+  it('execute phase maps to execute-plan.md (not execute-phase.md)', () => {
+    expect(PHASE_WORKFLOW_MAP[PhaseType.Execute]).toBe('execute-plan.md');
+  });
+});
--- a/sdk/src/phase-prompt.ts
+++ b/sdk/src/phase-prompt.ts
@@ -0,0 +1,233 @@
+/**
+ * Phase-aware prompt factory — assembles complete prompts for each phase type.
+ *
+ * Reads workflow .md + agent .md files from disk (D006), extracts structured
+ * blocks (<role>, <purpose>, <process>), and composes system prompts with
+ * injected context files per phase type.
+ */
+
+import { readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+
+import type { ContextFiles, ParsedPlan } from './types.js';
+import { PhaseType } from './types.js';
+import { buildExecutorPrompt, parseAgentRole } from './prompt-builder.js';
+import { PHASE_AGENT_MAP } from './tool-scoping.js';
+
+// ─── Workflow file mapping ───────────────────────────────────────────────────
+
+/**
+ * Maps phase types to their workflow file names.
+ */
+const PHASE_WORKFLOW_MAP: Record<PhaseType, string> = {
+  [PhaseType.Execute]: 'execute-plan.md',
+  [PhaseType.Research]: 'research-phase.md',
+  [PhaseType.Plan]: 'plan-phase.md',
+  [PhaseType.Verify]: 'verify-phase.md',
+  [PhaseType.Discuss]: 'discuss-phase.md',
+};
+
+// ─── XML block extraction ────────────────────────────────────────────────────
+
+/**
+ * Extract content from an XML-style block (e.g., <purpose>...</purpose>).
+ * Returns the trimmed inner content, or empty string if not found.
+ */
+export function extractBlock(content: string, tagName: string): string {
+  const regex = new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i');
+  const match = content.match(regex);
+  return match ? match[1].trim() : '';
+}
+
+/**
+ * Extract all <step> blocks from a workflow's <process> section.
+ * Returns an array of step contents with their name attributes.
+ */
+export function extractSteps(processContent: string): Array<{ name: string; content: string }> {
+  const steps: Array<{ name: string; content: string }> = [];
+  const stepRegex = /<step\s+name="([^"]*)"[^>]*>([\s\S]*?)<\/step>/gi;
+  let match;
+
+  while ((match = stepRegex.exec(processContent)) !== null) {
+    steps.push({
+      name: match[1],
+      content: match[2].trim(),
+    });
+  }
+
+  return steps;
+}
+
+// ─── PromptFactory class ─────────────────────────────────────────────────────
+
+export class PromptFactory {
+  private readonly workflowsDir: string;
+  private readonly agentsDir: string;
+  private readonly projectAgentsDir?: string;
+
+  constructor(options?: {
+    gsdInstallDir?: string;
+    agentsDir?: string;
+    projectAgentsDir?: string;
+  }) {
+    const gsdInstallDir = options?.gsdInstallDir ?? join(homedir(), '.claude', 'get-shit-done');
+    this.workflowsDir = join(gsdInstallDir, 'workflows');
+    this.agentsDir = options?.agentsDir ?? join(homedir(), '.claude', 'agents');
+    this.projectAgentsDir = options?.projectAgentsDir;
+  }
+
+  /**
+   * Build a complete prompt for the given phase type.
+   *
+   * For execute phase with a plan, delegates to buildExecutorPrompt().
+   * For other phases, assembles: role + purpose + process steps + context.
+   */
+  async buildPrompt(
+    phaseType: PhaseType,
+    plan: ParsedPlan | null,
+    contextFiles: ContextFiles,
+  ): Promise<string> {
+    // Execute phase with a plan: delegate to existing buildExecutorPrompt
+    if (phaseType === PhaseType.Execute && plan) {
+      const agentDef = await this.loadAgentDef(phaseType);
+      return buildExecutorPrompt(plan, agentDef);
+    }
+
+    const sections: string[] = [];
+
+    // ── Agent role ──
+    const agentDef = await this.loadAgentDef(phaseType);
+    if (agentDef) {
+      const role = parseAgentRole(agentDef);
+      if (role) {
+        sections.push(`## Role\n\n${role}`);
+      }
+    }
+
+    // ── Workflow purpose + process ──
+    const workflow = await this.loadWorkflowFile(phaseType);
+    if (workflow) {
+      const purpose = extractBlock(workflow, 'purpose');
+      if (purpose) {
+        sections.push(`## Purpose\n\n${purpose}`);
+      }
+
+      const process = extractBlock(workflow, 'process');
+      if (process) {
+        const steps = extractSteps(process);
+        if (steps.length > 0) {
+          const stepBlocks = steps.map((s) => `### ${s.name}\n\n${s.content}`).join('\n\n');
+          sections.push(`## Process\n\n${stepBlocks}`);
+        }
+      }
+    }
+
+    // ── Context files ──
+    const contextSection = this.formatContextFiles(contextFiles);
+    if (contextSection) {
+      sections.push(contextSection);
+    }
+
+    // ── Phase-specific instructions ──
+    const phaseInstructions = this.getPhaseInstructions(phaseType);
+    if (phaseInstructions) {
+      sections.push(`## Phase Instructions\n\n${phaseInstructions}`);
+    }
+
+    return sections.join('\n\n');
+  }
+
+  /**
+   * Load the workflow file for a phase type.
+   * Returns the raw content, or undefined if not found.
+   */
+  async loadWorkflowFile(phaseType: PhaseType): Promise<string | undefined> {
+    const filename = PHASE_WORKFLOW_MAP[phaseType];
+    const filePath = join(this.workflowsDir, filename);
+
+    try {
+      return await readFile(filePath, 'utf-8');
+    } catch {
+      return undefined;
+    }
+  }
+
+  /**
+   * Load the agent definition for a phase type.
+   * Tries user-level agents dir first, then project-level.
+   * Returns undefined if no agent is mapped or file not found.
+   */
+  async loadAgentDef(phaseType: PhaseType): Promise<string | undefined> {
+    const agentFilename = PHASE_AGENT_MAP[phaseType];
+    if (!agentFilename) return undefined;
+
+    // Try user-level agents dir first
+    const paths = [join(this.agentsDir, agentFilename)];
+
+    // Then project-level if configured
+    if (this.projectAgentsDir) {
+      paths.push(join(this.projectAgentsDir, agentFilename));
+    }
+
+    for (const p of paths) {
+      try {
+        return await readFile(p, 'utf-8');
+      } catch {
+        // Not found at this path, try next
+      }
+    }
+
+    return undefined;
+  }
+
+  /**
+   * Format context files into a prompt section.
+   */
+  private formatContextFiles(contextFiles: ContextFiles): string | null {
+    const entries: string[] = [];
+
+    const fileLabels: Record<keyof ContextFiles, string> = {
+      state: 'Project State (STATE.md)',
+      roadmap: 'Roadmap (ROADMAP.md)',
+      context: 'Context (CONTEXT.md)',
+      research: 'Research (RESEARCH.md)',
+      requirements: 'Requirements (REQUIREMENTS.md)',
+      config: 'Config (config.json)',
+      plan: 'Plan (PLAN.md)',
+      summary: 'Summary (SUMMARY.md)',
+    };
+
+    for (const [key, label] of Object.entries(fileLabels)) {
+      const content = contextFiles[key as keyof ContextFiles];
+      if (content) {
+        entries.push(`### ${label}\n\n${content}`);
+      }
+    }
+
+    if (entries.length === 0) return null;
+    return `## Context\n\n${entries.join('\n\n')}`;
+  }
+
+  /**
+   * Get phase-specific instructions that aren't covered by the workflow file.
+   */
+  private getPhaseInstructions(phaseType: PhaseType): string | null {
+    switch (phaseType) {
+      case PhaseType.Research:
+        return 'Focus on technical investigation. Do not modify source files. Produce RESEARCH.md with findings organized by topic, confidence levels (HIGH/MEDIUM/LOW), and specific recommendations.';
+      case PhaseType.Plan:
+        return 'Create executable plans with task breakdown, dependency analysis, and verification criteria. Each task must have clear acceptance criteria and a done condition.';
+      case PhaseType.Verify:
+        return 'Verify goal achievement, not just task completion. Start from what the phase SHOULD deliver, then verify it actually exists and works. Produce VERIFICATION.md with pass/fail for each criterion.';
+      case PhaseType.Discuss:
+        return 'Extract implementation decisions that downstream agents need. Identify gray areas, capture decisions that guide research and planning.';
+      case PhaseType.Execute:
+        return null;
+      default:
+        return null;
+    }
+  }
+}
+
+export { PHASE_WORKFLOW_MAP };
--- a/sdk/src/phase-runner-types.test.ts
+++ b/sdk/src/phase-runner-types.test.ts
@@ -0,0 +1,420 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { GSDTools, GSDToolsError } from './gsd-tools.js';
+import {
+  PhaseStepType,
+  GSDEventType,
+  PhaseType,
+  type PhaseOpInfo,
+  type PhaseStepResult,
+  type PhaseRunnerResult,
+  type HumanGateCallbacks,
+  type PhaseRunnerOptions,
+  type GSDPhaseStartEvent,
+  type GSDPhaseStepStartEvent,
+  type GSDPhaseStepCompleteEvent,
+  type GSDPhaseCompleteEvent,
+} from './types.js';
+import { mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+
+describe('Phase lifecycle types', () => {
+  // ─── PhaseStepType enum ────────────────────────────────────────────────
+
+  describe('PhaseStepType', () => {
+    it('has all expected step values', () => {
+      expect(PhaseStepType.Discuss).toBe('discuss');
+      expect(PhaseStepType.Research).toBe('research');
+      expect(PhaseStepType.Plan).toBe('plan');
+      expect(PhaseStepType.Execute).toBe('execute');
+      expect(PhaseStepType.Verify).toBe('verify');
+      expect(PhaseStepType.Advance).toBe('advance');
+    });
+
+    it('has exactly 7 members', () => {
+      const values = Object.values(PhaseStepType);
+      expect(values).toHaveLength(7);
+    });
+  });
+
+  // ─── GSDEventType phase lifecycle values ───────────────────────────────
+
+  describe('GSDEventType phase lifecycle events', () => {
+    it('includes PhaseStart', () => {
+      expect(GSDEventType.PhaseStart).toBe('phase_start');
+    });
+
+    it('includes PhaseStepStart', () => {
+      expect(GSDEventType.PhaseStepStart).toBe('phase_step_start');
+    });
+
+    it('includes PhaseStepComplete', () => {
+      expect(GSDEventType.PhaseStepComplete).toBe('phase_step_complete');
+    });
+
+    it('includes PhaseComplete', () => {
+      expect(GSDEventType.PhaseComplete).toBe('phase_complete');
+    });
+  });
+
+  // ─── PhaseOpInfo shape validation ──────────────────────────────────────
+
+  describe('PhaseOpInfo interface', () => {
+    it('accepts a valid phase-op output object', () => {
+      const info: PhaseOpInfo = {
+        phase_found: true,
+        phase_dir: '.planning/phases/05-Skill-Scaffolding',
+        phase_number: '5',
+        phase_name: 'Skill Scaffolding',
+        phase_slug: 'skill-scaffolding',
+        padded_phase: '05',
+        has_research: false,
+        has_context: false,
+        has_plans: false,
+        has_verification: false,
+        plan_count: 0,
+        roadmap_exists: true,
+        planning_exists: true,
+        commit_docs: true,
+        context_path: '.planning/phases/05-Skill-Scaffolding/CONTEXT.md',
+        research_path: '.planning/phases/05-Skill-Scaffolding/RESEARCH.md',
+      };
+
+      expect(info.phase_found).toBe(true);
+      expect(info.phase_number).toBe('5');
+      expect(info.plan_count).toBe(0);
+      expect(info.has_context).toBe(false);
+    });
+
+    it('matches the documented init phase-op JSON shape', () => {
+      // Simulate parsing JSON from gsd-tools.cjs
+      const raw = JSON.parse(JSON.stringify({
+        phase_found: true,
+        phase_dir: '.planning/phases/03-Auth',
+        phase_number: '3',
+        phase_name: 'Auth',
+        phase_slug: 'auth',
+        padded_phase: '03',
+        has_research: true,
+        has_context: true,
+        has_plans: true,
+        has_verification: false,
+        plan_count: 2,
+        roadmap_exists: true,
+        planning_exists: true,
+        commit_docs: true,
+        context_path: '.planning/phases/03-Auth/CONTEXT.md',
+        research_path: '.planning/phases/03-Auth/RESEARCH.md',
+      }));
+
+      const info = raw as PhaseOpInfo;
+      expect(info.phase_found).toBe(true);
+      expect(info.has_plans).toBe(true);
+      expect(info.plan_count).toBe(2);
+      expect(typeof info.phase_dir).toBe('string');
+      expect(typeof info.padded_phase).toBe('string');
+    });
+  });
+
+  // ─── Phase result types ────────────────────────────────────────────────
+
+  describe('PhaseStepResult', () => {
+    it('can represent a successful step', () => {
+      const result: PhaseStepResult = {
+        step: PhaseStepType.Research,
+        success: true,
+        durationMs: 5000,
+      };
+      expect(result.success).toBe(true);
+      expect(result.error).toBeUndefined();
+    });
+
+    it('can represent a failed step with error', () => {
+      const result: PhaseStepResult = {
+        step: PhaseStepType.Execute,
+        success: false,
+        durationMs: 12000,
+        error: 'Session timed out',
+        planResults: [],
+      };
+      expect(result.success).toBe(false);
+      expect(result.error).toBe('Session timed out');
+    });
+  });
+
+  describe('PhaseRunnerResult', () => {
+    it('can represent a complete phase run', () => {
+      const result: PhaseRunnerResult = {
+        phaseNumber: '3',
+        phaseName: 'Auth',
+        steps: [
+          { step: PhaseStepType.Research, success: true, durationMs: 5000 },
+          { step: PhaseStepType.Plan, success: true, durationMs: 3000 },
+          { step: PhaseStepType.Execute, success: true, durationMs: 60000 },
+        ],
+        success: true,
+        totalCostUsd: 1.5,
+        totalDurationMs: 68000,
+      };
+      expect(result.steps).toHaveLength(3);
+      expect(result.success).toBe(true);
+    });
+  });
+
+  describe('HumanGateCallbacks', () => {
+    it('accepts an object with all optional callbacks', () => {
+      const callbacks: HumanGateCallbacks = {
+        onDiscussApproval: async () => 'approve',
+        onVerificationReview: async () => 'accept',
+        onBlockerDecision: async () => 'retry',
+      };
+      expect(callbacks.onDiscussApproval).toBeDefined();
+    });
+
+    it('accepts an empty object (all callbacks optional)', () => {
+      const callbacks: HumanGateCallbacks = {};
+      expect(callbacks.onDiscussApproval).toBeUndefined();
+    });
+  });
+
+  describe('PhaseRunnerOptions', () => {
+    it('accepts full options', () => {
+      const options: PhaseRunnerOptions = {
+        callbacks: {},
+        maxBudgetPerStep: 3.0,
+        maxTurnsPerStep: 30,
+        model: 'claude-sonnet-4-6',
+      };
+      expect(options.maxBudgetPerStep).toBe(3.0);
+    });
+
+    it('accepts empty options (all fields optional)', () => {
+      const options: PhaseRunnerOptions = {};
+      expect(options.callbacks).toBeUndefined();
+    });
+  });
+
+  // ─── Phase lifecycle event interfaces ──────────────────────────────────
+
+  describe('Phase lifecycle event interfaces', () => {
+    it('GSDPhaseStartEvent has correct shape', () => {
+      const event: GSDPhaseStartEvent = {
+        type: GSDEventType.PhaseStart,
+        timestamp: new Date().toISOString(),
+        sessionId: 'test-session',
+        phaseNumber: '3',
+        phaseName: 'Auth',
+      };
+      expect(event.type).toBe('phase_start');
+      expect(event.phaseNumber).toBe('3');
+    });
+
+    it('GSDPhaseStepStartEvent has correct shape', () => {
+      const event: GSDPhaseStepStartEvent = {
+        type: GSDEventType.PhaseStepStart,
+        timestamp: new Date().toISOString(),
+        sessionId: 'test-session',
+        phaseNumber: '3',
+        step: PhaseStepType.Research,
+      };
+      expect(event.type).toBe('phase_step_start');
+      expect(event.step).toBe('research');
+    });
+
+    it('GSDPhaseStepCompleteEvent has correct shape', () => {
+      const event: GSDPhaseStepCompleteEvent = {
+        type: GSDEventType.PhaseStepComplete,
+        timestamp: new Date().toISOString(),
+        sessionId: 'test-session',
+        phaseNumber: '3',
+        step: PhaseStepType.Execute,
+        success: true,
+        durationMs: 45000,
+      };
+      expect(event.type).toBe('phase_step_complete');
+      expect(event.success).toBe(true);
+    });
+
+    it('GSDPhaseStepCompleteEvent can include error', () => {
+      const event: GSDPhaseStepCompleteEvent = {
+        type: GSDEventType.PhaseStepComplete,
+        timestamp: new Date().toISOString(),
+        sessionId: 'test-session',
+        phaseNumber: '3',
+        step: PhaseStepType.Verify,
+        success: false,
+        durationMs: 2000,
+        error: 'Verification failed',
+      };
+      expect(event.error).toBe('Verification failed');
+    });
+
+    it('GSDPhaseCompleteEvent has correct shape', () => {
+      const event: GSDPhaseCompleteEvent = {
+        type: GSDEventType.PhaseComplete,
+        timestamp: new Date().toISOString(),
+        sessionId: 'test-session',
+        phaseNumber: '3',
+        phaseName: 'Auth',
+        success: true,
+        totalCostUsd: 2.5,
+        totalDurationMs: 120000,
+        stepsCompleted: 5,
+      };
+      expect(event.type).toBe('phase_complete');
+      expect(event.stepsCompleted).toBe(5);
+    });
+  });
+});
+
+// ─── GSDTools typed methods ──────────────────────────────────────────────────
+
+describe('GSDTools typed methods', () => {
+  let tmpDir: string;
+  let fixtureDir: string;
+
+  beforeEach(async () => {
+    tmpDir = join(tmpdir(), `gsd-tools-phase-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    fixtureDir = join(tmpDir, 'fixtures');
+    await mkdir(fixtureDir, { recursive: true });
+    await mkdir(join(tmpDir, '.planning'), { recursive: true });
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  async function createScript(name: string, code: string): Promise<string> {
+    const scriptPath = join(fixtureDir, name);
+    await writeFile(scriptPath, code, { mode: 0o755 });
+    return scriptPath;
+  }
+
+  describe('initPhaseOp()', () => {
+    it('returns typed PhaseOpInfo from gsd-tools output', async () => {
+      const mockOutput: PhaseOpInfo = {
+        phase_found: true,
+        phase_dir: '.planning/phases/05-Skill-Scaffolding',
+        phase_number: '5',
+        phase_name: 'Skill Scaffolding',
+        phase_slug: 'skill-scaffolding',
+        padded_phase: '05',
+        has_research: false,
+        has_context: true,
+        has_plans: true,
+        has_verification: false,
+        plan_count: 3,
+        roadmap_exists: true,
+        planning_exists: true,
+        commit_docs: true,
+        context_path: '.planning/phases/05-Skill-Scaffolding/CONTEXT.md',
+        research_path: '.planning/phases/05-Skill-Scaffolding/RESEARCH.md',
+      };
+
+      const scriptPath = await createScript(
+        'init-phase-op.cjs',
+        `
+        const args = process.argv.slice(2);
+        // Script receives: init phase-op 5 --raw
+        if (args[0] === 'init' && args[1] === 'phase-op' && args[2] === '5') {
+          process.stdout.write(JSON.stringify(${JSON.stringify(mockOutput)}));
+        } else {
+          process.stderr.write('unexpected args: ' + args.join(' '));
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.initPhaseOp('5');
+
+      expect(result.phase_found).toBe(true);
+      expect(result.phase_number).toBe('5');
+      expect(result.phase_name).toBe('Skill Scaffolding');
+      expect(result.plan_count).toBe(3);
+      expect(result.has_context).toBe(true);
+      expect(result.has_plans).toBe(true);
+      expect(result.context_path).toContain('CONTEXT.md');
+    });
+
+    it('calls exec with correct args (init phase-op <N>)', async () => {
+      const scriptPath = await createScript(
+        'init-phase-op-args.cjs',
+        `
+        const args = process.argv.slice(2);
+        process.stdout.write(JSON.stringify({ received_args: args }));
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.initPhaseOp('7') as { received_args: string[] };
+
+      expect(result.received_args).toContain('init');
+      expect(result.received_args).toContain('phase-op');
+      expect(result.received_args).toContain('7');
+      expect(result.received_args).toContain('--raw');
+    });
+  });
+
+  describe('configGet()', () => {
+    it('returns string value from gsd-tools config', async () => {
+      const scriptPath = await createScript(
+        'config-get.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'config' && args[1] === 'get' && args[2] === 'model_profile') {
+          process.stdout.write(JSON.stringify('balanced'));
+        } else {
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.configGet('model_profile');
+
+      expect(result).toBe('balanced');
+    });
+
+    it('returns null when key not found', async () => {
+      const scriptPath = await createScript(
+        'config-get-null.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'config' && args[1] === 'get') {
+          process.stdout.write('null');
+        } else {
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.configGet('nonexistent_key');
+
+      expect(result).toBeNull();
+    });
+  });
+
+  describe('stateBeginPhase()', () => {
+    it('calls state begin-phase with correct args', async () => {
+      const scriptPath = await createScript(
+        'state-begin-phase.cjs',
+        `
+        const args = process.argv.slice(2);
+        if (args[0] === 'state' && args[1] === 'begin-phase' && args[2] === '--phase' && args[3] === '3') {
+          process.stdout.write('ok');
+        } else {
+          process.stderr.write('unexpected args: ' + args.join(' '));
+          process.exit(1);
+        }
+        `,
+      );
+
+      const tools = new GSDTools({ projectDir: tmpDir, gsdToolsPath: scriptPath });
+      const result = await tools.stateBeginPhase('3');
+
+      expect(result).toBe('ok');
+    });
+  });
+});
--- a/sdk/src/phase-runner.integration.test.ts
+++ b/sdk/src/phase-runner.integration.test.ts
@@ -0,0 +1,376 @@
+/**
+ * Integration test — proves PhaseRunner state machine works against real gsd-tools.cjs.
+ *
+ * Creates a temp `.planning/` directory structure, instantiates real GSDTools,
+ * and exercises the state machine. Sessions will fail (no Claude CLI in CI) but
+ * the state machine's control flow, event emission, and error capture are proven.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { mkdtemp, mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { homedir } from 'node:os';
+
+import { GSDTools } from './gsd-tools.js';
+import { PhaseRunner } from './phase-runner.js';
+import type { PhaseRunnerDeps } from './phase-runner.js';
+import { ContextEngine } from './context-engine.js';
+import { PromptFactory } from './phase-prompt.js';
+import { GSDEventStream } from './event-stream.js';
+import { loadConfig } from './config.js';
+import type { GSDEvent } from './types.js';
+import { GSDEventType, PhaseStepType } from './types.js';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+const GSD_TOOLS_PATH = join(homedir(), '.claude', 'get-shit-done', 'bin', 'gsd-tools.cjs');
+
+async function createTempPlanningDir(): Promise<string> {
+  const tmpDir = await mkdtemp(join(tmpdir(), 'gsd-sdk-phase-int-'));
+
+  // Create .planning structure
+  const planningDir = join(tmpDir, '.planning');
+  const phasesDir = join(planningDir, 'phases');
+  const phaseDir = join(phasesDir, '01-integration-test');
+
+  await mkdir(phaseDir, { recursive: true });
+
+  // config.json
+  await writeFile(
+    join(planningDir, 'config.json'),
+    JSON.stringify({
+      model_profile: 'balanced',
+      commit_docs: false,
+      workflow: {
+        research: true,
+        verifier: true,
+        auto_advance: true,
+        skip_discuss: false,
+      },
+    }),
+  );
+
+  // ROADMAP.md — required for roadmap_exists
+  await writeFile(join(planningDir, 'ROADMAP.md'), '# Roadmap\n\n## Phase 01: Integration Test\n');
+
+  // CONTEXT.md in phase dir — triggers has_context=true → discuss is skipped
+  await writeFile(
+    join(phaseDir, 'CONTEXT.md'),
+    '# Context\n\nThis is an integration test phase with pre-existing context.\n',
+  );
+
+  return tmpDir;
+}
+
+// ─── Test suite ──────────────────────────────────────────────────────────────
+
+describe('Integration: PhaseRunner against real gsd-tools.cjs', () => {
+  let tmpDir: string;
+  let tools: GSDTools;
+
+  beforeAll(async () => {
+    tmpDir = await createTempPlanningDir();
+    tools = new GSDTools({
+      projectDir: tmpDir,
+      gsdToolsPath: GSD_TOOLS_PATH,
+      timeoutMs: 10_000,
+    });
+  });
+
+  afterAll(async () => {
+    if (tmpDir) {
+      await rm(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  // ── Test 1: initPhaseOp returns valid PhaseOpInfo ──
+
+  it('initPhaseOp returns valid PhaseOpInfo for temp phase', async () => {
+    const info = await tools.initPhaseOp('01');
+
+    expect(info.phase_found).toBe(true);
+    expect(info.phase_number).toBe('01');
+    expect(info.phase_name).toBe('integration-test');
+    expect(info.phase_dir).toBe('.planning/phases/01-integration-test');
+    expect(info.has_context).toBe(true);
+    expect(info.has_plans).toBe(false);
+    expect(info.plan_count).toBe(0);
+    expect(info.roadmap_exists).toBe(true);
+    expect(info.planning_exists).toBe(true);
+  });
+
+  it('initPhaseOp returns phase_found=false for nonexistent phase', async () => {
+    const info = await tools.initPhaseOp('99');
+
+    expect(info.phase_found).toBe(false);
+    expect(info.has_context).toBe(false);
+    expect(info.plan_count).toBe(0);
+  });
+
+  // ── Test 2: PhaseRunner state machine control flow ──
+
+  it('PhaseRunner emits lifecycle events and captures session errors gracefully', { timeout: 300_000 }, async () => {
+    const eventStream = new GSDEventStream();
+    const config = await loadConfig(tmpDir);
+    const contextEngine = new ContextEngine(tmpDir);
+    const promptFactory = new PromptFactory();
+
+    const events: GSDEvent[] = [];
+    eventStream.on('event', (e: GSDEvent) => events.push(e));
+
+    const deps: PhaseRunnerDeps = {
+      projectDir: tmpDir,
+      tools,
+      promptFactory,
+      contextEngine,
+      eventStream,
+      config,
+    };
+
+    const runner = new PhaseRunner(deps);
+    // Tight budget/turns so each session finishes fast
+    const result = await runner.run('01', {
+      maxTurnsPerStep: 2,
+      maxBudgetPerStep: 0.10,
+    });
+
+    // ── (a) Phase start event emitted ──
+    const phaseStartEvents = events.filter(e => e.type === GSDEventType.PhaseStart);
+    expect(phaseStartEvents).toHaveLength(1);
+    const phaseStart = phaseStartEvents[0]!;
+    if (phaseStart.type === GSDEventType.PhaseStart) {
+      expect(phaseStart.phaseNumber).toBe('01');
+      expect(phaseStart.phaseName).toBe('integration-test');
+    }
+
+    // ── (b) Discuss should be skipped (has_context=true) ──
+    // No discuss step in results since it was skipped
+    const discussSteps = result.steps.filter(s => s.step === PhaseStepType.Discuss);
+    expect(discussSteps).toHaveLength(0);
+
+    // ── (c) Step start events emitted for attempted steps ──
+    const stepStartEvents = events.filter(e => e.type === GSDEventType.PhaseStepStart);
+    expect(stepStartEvents.length).toBeGreaterThanOrEqual(1);
+
+    // ── (d) Step results are properly structured ──
+    // With CLI available, sessions may succeed or fail depending on budget/turns.
+    // Either way, each step result must have correct structure.
+    expect(result.steps.length).toBeGreaterThanOrEqual(1);
+    for (const step of result.steps) {
+      expect(Object.values(PhaseStepType)).toContain(step.step);
+      expect(typeof step.success).toBe('boolean');
+      expect(typeof step.durationMs).toBe('number');
+      // Failed steps may or may not have an error message
+      // (e.g. advance step can fail without explicit error string)
+    }
+
+    // ── (e) Phase complete event emitted ──
+    const phaseCompleteEvents = events.filter(e => e.type === GSDEventType.PhaseComplete);
+    expect(phaseCompleteEvents).toHaveLength(1);
+
+    // ── (f) Result structure is valid ──
+    expect(result.phaseNumber).toBe('01');
+    expect(result.phaseName).toBe('integration-test');
+    expect(typeof result.totalCostUsd).toBe('number');
+    expect(typeof result.totalDurationMs).toBe('number');
+    expect(result.totalDurationMs).toBeGreaterThan(0);
+  });
+
+  // ── Test 3: PhaseRunner with nonexistent phase throws ──
+
+  it('PhaseRunner throws PhaseRunnerError for nonexistent phase', async () => {
+    const eventStream = new GSDEventStream();
+    const config = await loadConfig(tmpDir);
+    const contextEngine = new ContextEngine(tmpDir);
+    const promptFactory = new PromptFactory();
+
+    const deps: PhaseRunnerDeps = {
+      projectDir: tmpDir,
+      tools,
+      promptFactory,
+      contextEngine,
+      eventStream,
+      config,
+    };
+
+    const runner = new PhaseRunner(deps);
+    await expect(runner.run('99')).rejects.toThrow('Phase 99 not found on disk');
+  });
+
+  // ── Test 4: GSD.runPhase() public API delegates correctly ──
+
+  it('GSD.runPhase() creates collaborators and delegates to PhaseRunner', { timeout: 300_000 }, async () => {
+    // Import GSD here to test the public API wiring
+    const { GSD } = await import('./index.js');
+
+    const gsd = new GSD({ projectDir: tmpDir });
+    const events: GSDEvent[] = [];
+    gsd.onEvent((e) => events.push(e));
+
+    const result = await gsd.runPhase('01', {
+      maxTurnsPerStep: 2,
+      maxBudgetPerStep: 0.10,
+    });
+
+    // Proves the full wiring works: GSD → PhaseRunner → GSDTools → gsd-tools.cjs
+    expect(result.phaseNumber).toBe('01');
+    expect(result.phaseName).toBe('integration-test');
+    expect(result.steps.length).toBeGreaterThanOrEqual(1);
+    expect(events.some(e => e.type === GSDEventType.PhaseStart)).toBe(true);
+    expect(events.some(e => e.type === GSDEventType.PhaseComplete)).toBe(true);
+  });
+});
+
+// ─── Wave / phasePlanIndex Integration Tests ─────────────────────────────────
+
+/**
+ * Creates a temp `.planning/` directory with multi-wave plan files.
+ * - Plans 01 and 02 are wave 1 (parallel)
+ * - Plan 03 is wave 2 (depends on wave 1)
+ * - Plan 01 has a SUMMARY.md (marks it as completed)
+ */
+async function createMultiWavePlanningDir(): Promise<string> {
+  const tmpDir = await mkdtemp(join(tmpdir(), 'gsd-sdk-wave-int-'));
+
+  const planningDir = join(tmpDir, '.planning');
+  const phaseDir = join(planningDir, 'phases', '01-wave-test');
+  await mkdir(phaseDir, { recursive: true });
+
+  // config.json — with parallelization enabled
+  await writeFile(
+    join(planningDir, 'config.json'),
+    JSON.stringify({
+      model_profile: 'balanced',
+      commit_docs: false,
+      parallelization: true,
+      workflow: {
+        research: true,
+        verifier: true,
+        auto_advance: true,
+        skip_discuss: false,
+      },
+    }),
+  );
+
+  // ROADMAP.md
+  await writeFile(join(planningDir, 'ROADMAP.md'), '# Roadmap\n\n## Phase 01: Wave Test\n');
+
+  const planTemplate = (id: string, wave: number, dependsOn: string[] = []) => `---
+phase: "01"
+plan: "${id}"
+type: "feature"
+wave: ${wave}
+depends_on: [${dependsOn.map(d => `"${d}"`).join(', ')}]
+files_modified: ["src/${id}.ts"]
+autonomous: true
+requirements: []
+must_haves:
+  truths: ["${id} exists"]
+  artifacts: []
+  key_links: []
+---
+
+# Plan: ${id}
+
+<task type="code" name="Create ${id}" files="src/${id}.ts">
+  <read_first>none</read_first>
+  <action>Create ${id}</action>
+  <verify>File exists</verify>
+  <acceptance_criteria>
+    - File exists
+  </acceptance_criteria>
+  <done>Done</done>
+</task>
+`;
+
+  // Wave 1 plans (parallel)
+  await writeFile(join(phaseDir, '01-wave-test-01-PLAN.md'), planTemplate('01-wave-test-01', 1));
+  await writeFile(join(phaseDir, '01-wave-test-02-PLAN.md'), planTemplate('01-wave-test-02', 1));
+
+  // Wave 2 plan (depends on wave 1)
+  await writeFile(
+    join(phaseDir, '01-wave-test-03-PLAN.md'),
+    planTemplate('01-wave-test-03', 2, ['01-wave-test-01']),
+  );
+
+  // Summary for plan 01 — marks it as completed
+  await writeFile(
+    join(phaseDir, '01-wave-test-01-SUMMARY.md'),
+    `---\nresult: pass\nplan: "01-wave-test-01"\ncost_usd: 0.01\nduration_ms: 1000\n---\n\n# Summary\n\nAll tasks completed.\n`,
+  );
+
+  return tmpDir;
+}
+
+describe('Integration: phasePlanIndex and wave execution', () => {
+  let tmpDir: string;
+  let tools: GSDTools;
+
+  beforeAll(async () => {
+    tmpDir = await createMultiWavePlanningDir();
+    tools = new GSDTools({
+      projectDir: tmpDir,
+      gsdToolsPath: GSD_TOOLS_PATH,
+      timeoutMs: 10_000,
+    });
+  });
+
+  afterAll(async () => {
+    if (tmpDir) {
+      await rm(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it('phasePlanIndex returns typed PhasePlanIndex with correct wave grouping', async () => {
+    const index = await tools.phasePlanIndex('01');
+
+    // 3 plans total
+    expect(index.plans).toHaveLength(3);
+
+    // Wave grouping: wave 1 has 2 plans, wave 2 has 1
+    expect(index.waves['1']).toHaveLength(2);
+    expect(index.waves['1']).toContain('01-wave-test-01');
+    expect(index.waves['1']).toContain('01-wave-test-02');
+    expect(index.waves['2']).toHaveLength(1);
+    expect(index.waves['2']).toContain('01-wave-test-03');
+
+    // Incomplete: plan 01 has summary so only 02 and 03 are incomplete
+    expect(index.incomplete).toHaveLength(2);
+    expect(index.incomplete).toContain('01-wave-test-02');
+    expect(index.incomplete).toContain('01-wave-test-03');
+
+    // All autonomous → no checkpoints
+    expect(index.has_checkpoints).toBe(false);
+
+    // Phase ID correct
+    expect(index.phase).toBe('01');
+  });
+
+  it('phasePlanIndex marks has_summary correctly per plan', async () => {
+    const index = await tools.phasePlanIndex('01');
+
+    // Plan 01 has a SUMMARY.md on disk
+    const plan01 = index.plans.find(p => p.id === '01-wave-test-01');
+    expect(plan01).toBeDefined();
+    expect(plan01!.has_summary).toBe(true);
+
+    // Plans 02 and 03 have no summary
+    const plan02 = index.plans.find(p => p.id === '01-wave-test-02');
+    expect(plan02).toBeDefined();
+    expect(plan02!.has_summary).toBe(false);
+
+    const plan03 = index.plans.find(p => p.id === '01-wave-test-03');
+    expect(plan03).toBeDefined();
+    expect(plan03!.has_summary).toBe(false);
+  });
+
+  it('phasePlanIndex for nonexistent phase returns empty plans', async () => {
+    const index = await tools.phasePlanIndex('99');
+
+    expect(index.plans).toHaveLength(0);
+    expect(Object.keys(index.waves)).toHaveLength(0);
+    expect(index.incomplete).toHaveLength(0);
+    expect(index.has_checkpoints).toBe(false);
+  });
+});
--- a/sdk/src/phase-runner.test.ts
+++ b/sdk/src/phase-runner.test.ts
--- a/sdk/src/phase-runner.ts
+++ b/sdk/src/phase-runner.ts
--- a/sdk/src/plan-parser.test.ts
+++ b/sdk/src/plan-parser.test.ts
@@ -0,0 +1,528 @@
+import { describe, it, expect } from 'vitest';
+import { parsePlan, parseTasks, extractFrontmatter } from './plan-parser.js';
+
+// ─── Fixtures ────────────────────────────────────────────────────────────────
+
+const FULL_PLAN = `---
+phase: 03-features
+plan: 01
+type: execute
+wave: 2
+depends_on: [01-01, 01-02]
+files_modified: [src/models/user.ts, src/api/users.ts, src/components/UserList.tsx]
+autonomous: true
+requirements: [R001, R003]
+must_haves:
+  truths:
+    - "User can see existing messages"
+    - "User can send a message"
+  artifacts:
+    - path: src/components/Chat.tsx
+      provides: Message list rendering
+      min_lines: 30
+    - path: src/app/api/chat/route.ts
+      provides: Message CRUD operations
+  key_links:
+    - from: src/components/Chat.tsx
+      to: /api/chat
+      via: fetch in useEffect
+      pattern: "fetch.*api/chat"
+---
+
+<objective>
+Implement complete User feature as vertical slice.
+
+Purpose: Self-contained user management that can run parallel to other features.
+Output: User model, API endpoints, and UI components.
+</objective>
+
+<execution_context>
+@~/.claude/get-shit-done/workflows/execute-plan.md
+@~/.claude/get-shit-done/templates/summary.md
+</execution_context>
+
+<context>
+@.planning/PROJECT.md
+@.planning/ROADMAP.md
+@.planning/STATE.md
+
+# Only include SUMMARY refs if genuinely needed
+@src/relevant/source.ts
+</context>
+
+<tasks>
+
+<task type="auto">
+  <name>Task 1: Create User model</name>
+  <files>src/models/user.ts</files>
+  <read_first>src/existing/types.ts, src/config/db.ts</read_first>
+  <action>Define User type with id, email, name, createdAt. Export TypeScript interface.</action>
+  <verify>tsc --noEmit passes</verify>
+  <acceptance_criteria>
+    - User type is exported from src/models/user.ts
+    - Type includes id, email, name, createdAt fields
+  </acceptance_criteria>
+  <done>User type exported and usable</done>
+</task>
+
+<task type="auto">
+  <name>Task 2: Create User API endpoints</name>
+  <files>src/api/users.ts, src/api/middleware.ts</files>
+  <action>GET /users (list), GET /users/:id (single), POST /users (create). Use User type from model.</action>
+  <verify>fetch tests pass for all endpoints</verify>
+  <done>All CRUD operations work</done>
+</task>
+
+<task type="checkpoint:human-verify" gate="blocking">
+  <name>Verify UI visually</name>
+  <files>src/components/UserList.tsx</files>
+  <action>Start dev server and present for review.</action>
+  <verify>User confirms layout is correct</verify>
+  <done>Visual verification passed</done>
+</task>
+
+</tasks>
+
+<verification>
+- [ ] npm run build succeeds
+- [ ] API endpoints respond correctly
+</verification>
+
+<success_criteria>
+- All tasks completed
+- User feature works end-to-end
+</success_criteria>
+`;
+
+const MINIMAL_PLAN = `---
+phase: 01-test
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified: []
+autonomous: true
+requirements: []
+must_haves:
+  truths: []
+  artifacts: []
+  key_links: []
+---
+
+<objective>
+Minimal test plan.
+</objective>
+
+<tasks>
+<task type="auto">
+  <name>Single task</name>
+  <files>output.txt</files>
+  <action>Create output.txt</action>
+  <verify>test -f output.txt</verify>
+  <done>File exists</done>
+</task>
+</tasks>
+`;
+
+const MULTILINE_ACTION_PLAN = `---
+phase: 02-impl
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified: [src/server.ts]
+autonomous: true
+requirements: [R005]
+must_haves:
+  truths: []
+  artifacts: []
+  key_links: []
+---
+
+<tasks>
+<task type="auto">
+  <name>Build server with config</name>
+  <files>src/server.ts</files>
+  <action>
+Create the Express server with the following setup:
+
+1. Import express and configure middleware
+2. Add routes for health check and API
+3. Configure error handling with proper types:
+   - ValidationError => 400
+   - NotFoundError => 404
+   - Default => 500
+
+Example code structure:
+\`\`\`typescript
+const app = express();
+app.get('/health', (req, res) => {
+  res.json({ status: 'ok' });
+});
+\`\`\`
+
+Make sure to handle the edge case where \`req.body\` contains
+angle brackets like <script> or XML-like content.
+  </action>
+  <verify>npm run build && curl localhost:3000/health</verify>
+  <done>Server starts and health endpoint returns 200</done>
+</task>
+</tasks>
+`;
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('extractFrontmatter', () => {
+  it('extracts basic key-value pairs', () => {
+    const result = extractFrontmatter(FULL_PLAN);
+    expect(result.phase).toBe('03-features');
+    expect(result.plan).toBe('01');
+    expect(result.type).toBe('execute');
+  });
+
+  it('coerces numeric values', () => {
+    const result = extractFrontmatter(FULL_PLAN);
+    expect(result.wave).toBe(2);
+  });
+
+  it('coerces boolean values', () => {
+    const result = extractFrontmatter(FULL_PLAN);
+    expect(result.autonomous).toBe(true);
+  });
+
+  it('parses inline arrays', () => {
+    const result = extractFrontmatter(FULL_PLAN);
+    expect(result.depends_on).toEqual(['01-01', '01-02']);
+    expect(result.files_modified).toEqual([
+      'src/models/user.ts',
+      'src/api/users.ts',
+      'src/components/UserList.tsx',
+    ]);
+    expect(result.requirements).toEqual(['R001', 'R003']);
+  });
+
+  it('parses empty inline arrays', () => {
+    const result = extractFrontmatter(MINIMAL_PLAN);
+    expect(result.depends_on).toEqual([]);
+    expect(result.files_modified).toEqual([]);
+    expect(result.requirements).toEqual([]);
+  });
+
+  it('returns empty object for content without frontmatter', () => {
+    const result = extractFrontmatter('# Just a heading\nSome content');
+    expect(result).toEqual({});
+  });
+
+  it('returns empty object for empty string', () => {
+    const result = extractFrontmatter('');
+    expect(result).toEqual({});
+  });
+});
+
+describe('parsePlan — frontmatter', () => {
+  it('parses all typed frontmatter fields', () => {
+    const result = parsePlan(FULL_PLAN);
+    const fm = result.frontmatter;
+
+    expect(fm.phase).toBe('03-features');
+    expect(fm.plan).toBe('01');
+    expect(fm.type).toBe('execute');
+    expect(fm.wave).toBe(2);
+    expect(fm.depends_on).toEqual(['01-01', '01-02']);
+    expect(fm.files_modified).toEqual([
+      'src/models/user.ts',
+      'src/api/users.ts',
+      'src/components/UserList.tsx',
+    ]);
+    expect(fm.autonomous).toBe(true);
+    expect(fm.requirements).toEqual(['R001', 'R003']);
+  });
+
+  it('parses must_haves.truths', () => {
+    const result = parsePlan(FULL_PLAN);
+    expect(result.frontmatter.must_haves.truths).toEqual([
+      'User can see existing messages',
+      'User can send a message',
+    ]);
+  });
+
+  it('parses must_haves.artifacts', () => {
+    const result = parsePlan(FULL_PLAN);
+    const artifacts = result.frontmatter.must_haves.artifacts;
+    expect(artifacts).toHaveLength(2);
+    expect(artifacts[0]).toMatchObject({
+      path: 'src/components/Chat.tsx',
+      provides: 'Message list rendering',
+      min_lines: 30,
+    });
+    expect(artifacts[1]).toMatchObject({
+      path: 'src/app/api/chat/route.ts',
+      provides: 'Message CRUD operations',
+    });
+  });
+
+  it('parses must_haves.key_links', () => {
+    const result = parsePlan(FULL_PLAN);
+    const links = result.frontmatter.must_haves.key_links;
+    expect(links).toHaveLength(1);
+    expect(links[0]).toMatchObject({
+      from: 'src/components/Chat.tsx',
+      to: '/api/chat',
+      via: 'fetch in useEffect',
+      pattern: 'fetch.*api/chat',
+    });
+  });
+
+  it('parses empty must_haves', () => {
+    const result = parsePlan(MINIMAL_PLAN);
+    expect(result.frontmatter.must_haves).toEqual({
+      truths: [],
+      artifacts: [],
+      key_links: [],
+    });
+  });
+
+  it('provides defaults for missing frontmatter', () => {
+    const result = parsePlan('<tasks></tasks>');
+    expect(result.frontmatter.phase).toBe('');
+    expect(result.frontmatter.wave).toBe(1);
+    expect(result.frontmatter.depends_on).toEqual([]);
+    expect(result.frontmatter.autonomous).toBe(true);
+    expect(result.frontmatter.must_haves).toEqual({
+      truths: [],
+      artifacts: [],
+      key_links: [],
+    });
+  });
+});
+
+describe('parsePlan — XML tasks', () => {
+  it('parses auto tasks', () => {
+    const result = parsePlan(FULL_PLAN);
+    expect(result.tasks).toHaveLength(3);
+
+    const task1 = result.tasks[0];
+    expect(task1.type).toBe('auto');
+    expect(task1.name).toBe('Task 1: Create User model');
+    expect(task1.files).toEqual(['src/models/user.ts']);
+    expect(task1.read_first).toEqual(['src/existing/types.ts', 'src/config/db.ts']);
+    expect(task1.action).toBe(
+      'Define User type with id, email, name, createdAt. Export TypeScript interface.',
+    );
+    expect(task1.verify).toBe('tsc --noEmit passes');
+    expect(task1.done).toBe('User type exported and usable');
+  });
+
+  it('parses checkpoint tasks', () => {
+    const result = parsePlan(FULL_PLAN);
+    const checkpoint = result.tasks[2];
+    expect(checkpoint.type).toBe('checkpoint:human-verify');
+    expect(checkpoint.name).toBe('Verify UI visually');
+  });
+
+  it('parses acceptance_criteria list', () => {
+    const result = parsePlan(FULL_PLAN);
+    expect(result.tasks[0].acceptance_criteria).toEqual([
+      'User type is exported from src/models/user.ts',
+      'Type includes id, email, name, createdAt fields',
+    ]);
+  });
+
+  it('parses multiple files from comma-separated list', () => {
+    const result = parsePlan(FULL_PLAN);
+    const task2 = result.tasks[1];
+    expect(task2.files).toEqual(['src/api/users.ts', 'src/api/middleware.ts']);
+  });
+
+  it('handles missing optional elements', () => {
+    const result = parsePlan(FULL_PLAN);
+    const task2 = result.tasks[1];
+    // Task 2 has no read_first or acceptance_criteria
+    expect(task2.read_first).toEqual([]);
+    expect(task2.acceptance_criteria).toEqual([]);
+  });
+
+  it('handles multiline action blocks', () => {
+    const result = parsePlan(MULTILINE_ACTION_PLAN);
+    expect(result.tasks).toHaveLength(1);
+
+    const task = result.tasks[0];
+    expect(task.action).toContain('Create the Express server');
+    expect(task.action).toContain('ValidationError => 400');
+    expect(task.action).toContain('app.get');
+    // The angle brackets inside action should be preserved
+    expect(task.action).toContain('angle brackets like <script>');
+  });
+
+  it('returns empty array for no tasks', () => {
+    const result = parsePlan('---\nphase: test\n---\n\nNo tasks here.');
+    expect(result.tasks).toEqual([]);
+  });
+});
+
+describe('parsePlan — sections', () => {
+  it('extracts objective', () => {
+    const result = parsePlan(FULL_PLAN);
+    expect(result.objective).toContain('Implement complete User feature');
+    expect(result.objective).toContain('Self-contained user management');
+  });
+
+  it('extracts execution_context references', () => {
+    const result = parsePlan(FULL_PLAN);
+    expect(result.execution_context).toEqual([
+      '~/.claude/get-shit-done/workflows/execute-plan.md',
+      '~/.claude/get-shit-done/templates/summary.md',
+    ]);
+  });
+
+  it('extracts context references (skipping comments)', () => {
+    const result = parsePlan(FULL_PLAN);
+    expect(result.context_refs).toEqual([
+      '.planning/PROJECT.md',
+      '.planning/ROADMAP.md',
+      '.planning/STATE.md',
+      'src/relevant/source.ts',
+    ]);
+  });
+
+  it('returns empty sections for missing blocks', () => {
+    const result = parsePlan(MINIMAL_PLAN);
+    expect(result.execution_context).toEqual([]);
+    // context_refs should be empty when no <context> block
+    expect(result.context_refs).toEqual([]);
+  });
+});
+
+describe('parsePlan — edge cases', () => {
+  it('handles empty string input', () => {
+    const result = parsePlan('');
+    expect(result.frontmatter.phase).toBe('');
+    expect(result.tasks).toEqual([]);
+    expect(result.raw).toBe('');
+  });
+
+  it('handles null-ish input without crashing', () => {
+    // @ts-expect-error — testing runtime guard
+    const result = parsePlan(null);
+    expect(result.tasks).toEqual([]);
+    expect(result.raw).toBe('');
+  });
+
+  it('handles undefined input without crashing', () => {
+    // @ts-expect-error — testing runtime guard
+    const result = parsePlan(undefined);
+    expect(result.tasks).toEqual([]);
+    expect(result.raw).toBe('');
+  });
+
+  it('preserves raw content', () => {
+    const result = parsePlan(MINIMAL_PLAN);
+    expect(result.raw).toBe(MINIMAL_PLAN);
+  });
+
+  it('handles malformed XML gracefully (unclosed tags)', () => {
+    const content = `---
+phase: test
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified: []
+autonomous: true
+requirements: []
+must_haves:
+  truths: []
+  artifacts: []
+  key_links: []
+---
+
+<tasks>
+<task type="auto">
+  <name>Broken task</name>
+  <action>This action is never closed
+</tasks>
+`;
+    // Should not throw — just parse what it can
+    const result = parsePlan(content);
+    expect(result.tasks).toEqual([]); // Can't match <task>...</task> if malformed
+    expect(result.frontmatter.phase).toBe('test');
+  });
+
+  it('handles content with only frontmatter', () => {
+    const content = `---
+phase: 01-solo
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified: []
+autonomous: true
+requirements: [R001]
+must_haves:
+  truths: []
+  artifacts: []
+  key_links: []
+---
+`;
+    const result = parsePlan(content);
+    expect(result.frontmatter.phase).toBe('01-solo');
+    expect(result.frontmatter.requirements).toEqual(['R001']);
+    expect(result.tasks).toEqual([]);
+    expect(result.objective).toBe('');
+  });
+
+  it('handles code snippets with angle brackets inside action', () => {
+    const result = parsePlan(MULTILINE_ACTION_PLAN);
+    const action = result.tasks[0].action;
+    // The <script> inside the action text should be preserved (it's between <action>...</action>)
+    expect(action).toContain('<script>');
+    // TypeScript code block with angle brackets should be preserved
+    expect(action).toContain("res.json({ status: 'ok' })");
+  });
+
+  it('handles plan with boolean autonomous=false', () => {
+    const content = `---
+phase: test
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified: []
+autonomous: false
+requirements: []
+must_haves:
+  truths: []
+  artifacts: []
+  key_links: []
+---
+`;
+    const result = parsePlan(content);
+    expect(result.frontmatter.autonomous).toBe(false);
+  });
+});
+
+describe('parseTasks — standalone', () => {
+  it('extracts tasks from raw task XML', () => {
+    const xml = `
+<tasks>
+<task type="auto">
+  <name>Do something</name>
+  <files>a.ts</files>
+  <action>Build the thing</action>
+  <verify>npm test</verify>
+  <done>It works</done>
+</task>
+</tasks>
+`;
+    const tasks = parseTasks(xml);
+    expect(tasks).toHaveLength(1);
+    expect(tasks[0].name).toBe('Do something');
+    expect(tasks[0].type).toBe('auto');
+  });
+
+  it('defaults task type to auto when attribute missing', () => {
+    const xml = `<tasks><task><name>No type</name><action>Do it</action></task></tasks>`;
+    const tasks = parseTasks(xml);
+    expect(tasks[0].type).toBe('auto');
+  });
+});
--- a/sdk/src/plan-parser.ts
+++ b/sdk/src/plan-parser.ts
@@ -0,0 +1,427 @@
+/**
+ * plan-parser.ts — Parse GSD-1 PLAN.md files into structured data.
+ *
+ * Extracts YAML frontmatter, XML task bodies, and markdown sections
+ * (<objective>, <execution_context>, <context>) from plan files.
+ *
+ * Ported from get-shit-done/bin/lib/frontmatter.cjs with TypeScript types.
+ */
+
+import { readFile } from 'node:fs/promises';
+import type {
+  PlanFrontmatter,
+  PlanTask,
+  ParsedPlan,
+  MustHaves,
+  MustHaveArtifact,
+  MustHaveKeyLink,
+} from './types.js';
+
+// ─── YAML frontmatter extraction ─────────────────────────────────────────────
+
+/**
+ * Extract frontmatter from a PLAN.md content string.
+ *
+ * Uses a stack-based parser that handles nested objects, inline arrays,
+ * multi-line arrays, and boolean/numeric coercion. Ported from the CJS
+ * reference implementation with the same edge-case coverage.
+ */
+export function extractFrontmatter(content: string): Record<string, unknown> {
+  const frontmatter: Record<string, unknown> = {};
+
+  // Find ALL frontmatter blocks — if multiple exist (corruption), use the last one
+  const allBlocks = [...content.matchAll(/(?:^|\n)\s*---\r?\n([\s\S]+?)\r?\n---/g)];
+  const match = allBlocks.length > 0 ? allBlocks[allBlocks.length - 1] : null;
+  if (!match) return frontmatter;
+
+  const yaml = match[1];
+  const lines = yaml.split(/\r?\n/);
+
+  // Stack tracks nested objects: [{obj, key, indent}]
+  const stack: Array<{ obj: Record<string, unknown> | unknown[]; key: string | null; indent: number }> = [
+    { obj: frontmatter, key: null, indent: -1 },
+  ];
+
+  for (const line of lines) {
+    if (line.trim() === '') continue;
+
+    const indentMatch = line.match(/^(\s*)/);
+    const indent = indentMatch ? indentMatch[1].length : 0;
+
+    // Pop stack back to appropriate level
+    while (stack.length > 1 && indent <= stack[stack.length - 1].indent) {
+      stack.pop();
+    }
+
+    const current = stack[stack.length - 1];
+    const currentObj = current.obj as Record<string, unknown>;
+
+    // Key: value pattern
+    const keyMatch = line.match(/^(\s*)([a-zA-Z0-9_-]+):\s*(.*)/);
+    if (keyMatch) {
+      const key = keyMatch[2];
+      const value = keyMatch[3].trim();
+
+      if (value === '' || value === '[') {
+        // Key with no value or opening bracket — nested object or array (TBD)
+        currentObj[key] = value === '[' ? [] : {};
+        current.key = null;
+        stack.push({ obj: currentObj[key] as Record<string, unknown>, key: null, indent });
+      } else if (value.startsWith('[') && value.endsWith(']')) {
+        // Inline array: key: [a, b, c]
+        currentObj[key] = value
+          .slice(1, -1)
+          .split(',')
+          .map((s) => s.trim().replace(/^["']|["']$/g, ''))
+          .filter(Boolean);
+        current.key = null;
+      } else {
+        // Simple key: value — coerce booleans and numbers
+        const cleanValue = value.replace(/^["']|["']$/g, '');
+        currentObj[key] = coerceValue(cleanValue);
+        current.key = null;
+      }
+    } else if (line.trim().startsWith('- ')) {
+      // Array item — could be a plain string or "- key: value" (start of mapping item)
+      const afterDash = line.trim().slice(2);
+      const dashKvMatch = afterDash.match(/^([a-zA-Z0-9_-]+):\s*(.*)/);
+
+      // Determine the value to push
+      let itemToPush: unknown;
+      if (dashKvMatch) {
+        // "- key: value" → start of a mapping item (object in array)
+        const obj: Record<string, unknown> = {};
+        const val = dashKvMatch[2].trim().replace(/^["']|["']$/g, '');
+        obj[dashKvMatch[1]] = coerceValue(val);
+        itemToPush = obj;
+      } else {
+        const itemValue = afterDash.replace(/^["']|["']$/g, '');
+        itemToPush = coerceValue(itemValue);
+      }
+
+      // If current context is an empty object, convert to array
+      if (
+        typeof current.obj === 'object' &&
+        !Array.isArray(current.obj) &&
+        Object.keys(current.obj).length === 0
+      ) {
+        const parent = stack.length > 1 ? stack[stack.length - 2] : null;
+        if (parent && typeof parent.obj === 'object' && !Array.isArray(parent.obj)) {
+          const parentObj = parent.obj as Record<string, unknown>;
+          for (const k of Object.keys(parentObj)) {
+            if (parentObj[k] === current.obj) {
+              parentObj[k] = [itemToPush];
+              current.obj = parentObj[k] as unknown[];
+              break;
+            }
+          }
+        }
+      } else if (Array.isArray(current.obj)) {
+        current.obj.push(itemToPush);
+      }
+
+      // If we pushed a mapping object, push it onto the stack so subsequent
+      // indented key-value lines populate the same object
+      if (dashKvMatch && typeof itemToPush === 'object') {
+        stack.push({
+          obj: itemToPush as Record<string, unknown>,
+          key: null,
+          indent, // use dash indent so sub-keys (more indented) populate this object
+        });
+      }
+    }
+  }
+
+  return frontmatter;
+}
+
+/**
+ * Coerce string values to appropriate JS types.
+ * Preserves leading-zero strings (e.g., "01") as strings.
+ */
+function coerceValue(value: string): unknown {
+  if (value === 'true') return true;
+  if (value === 'false') return false;
+  // Only coerce numbers without leading zeros (01, 007 stay as strings)
+  if (/^[1-9]\d*$/.test(value) || value === '0') return parseInt(value, 10);
+  if (/^\d+\.\d+$/.test(value) && !value.startsWith('0')) return parseFloat(value);
+  return value;
+}
+
+// ─── must_haves block parsing ────────────────────────────────────────────────
+
+/**
+ * Parse the must_haves nested structure from raw frontmatter.
+ *
+ * The must_haves field has three sub-keys: truths (string[]),
+ * artifacts (object[]), and key_links (object[]).
+ * The stack-based parser above produces these as nested objects
+ * which need further normalization.
+ */
+function parseMustHaves(raw: unknown): MustHaves {
+  const defaults: MustHaves = { truths: [], artifacts: [], key_links: [] };
+  if (!raw || typeof raw !== 'object') return defaults;
+
+  const obj = raw as Record<string, unknown>;
+
+  return {
+    truths: normalizeStringArray(obj.truths),
+    artifacts: normalizeArtifacts(obj.artifacts),
+    key_links: normalizeKeyLinks(obj.key_links),
+  };
+}
+
+function normalizeStringArray(val: unknown): string[] {
+  if (Array.isArray(val)) return val.map(String);
+  return [];
+}
+
+function normalizeArtifacts(val: unknown): MustHaveArtifact[] {
+  if (!Array.isArray(val)) return [];
+  return val
+    .filter((item) => typeof item === 'object' && item !== null)
+    .map((item) => {
+      const obj = item as Record<string, unknown>;
+      return {
+        path: String(obj.path ?? ''),
+        provides: String(obj.provides ?? ''),
+        ...(obj.min_lines !== undefined ? { min_lines: Number(obj.min_lines) } : {}),
+        ...(obj.exports !== undefined ? { exports: normalizeStringArray(obj.exports) } : {}),
+        ...(obj.contains !== undefined ? { contains: String(obj.contains) } : {}),
+      };
+    });
+}
+
+function normalizeKeyLinks(val: unknown): MustHaveKeyLink[] {
+  if (!Array.isArray(val)) return [];
+  return val
+    .filter((item) => typeof item === 'object' && item !== null)
+    .map((item) => {
+      const obj = item as Record<string, unknown>;
+      return {
+        from: String(obj.from ?? ''),
+        to: String(obj.to ?? ''),
+        via: String(obj.via ?? ''),
+        ...(obj.pattern !== undefined ? { pattern: String(obj.pattern) } : {}),
+      };
+    });
+}
+
+// ─── XML task extraction ─────────────────────────────────────────────────────
+
+/**
+ * Extract inner text of an XML element from a task body.
+ * Handles multiline content and trims whitespace.
+ */
+function extractElement(taskBody: string, tagName: string): string {
+  const regex = new RegExp(`<${tagName}>([\\s\\S]*?)</${tagName}>`, 'i');
+  const match = taskBody.match(regex);
+  return match ? match[1].trim() : '';
+}
+
+/**
+ * Extract the type attribute from a <task> opening tag.
+ */
+function extractTaskType(taskTag: string): string {
+  const match = taskTag.match(/type\s*=\s*["']([^"']+)["']/);
+  return match ? match[1] : 'auto';
+}
+
+/**
+ * Parse XML task blocks from the <tasks> section.
+ *
+ * Uses a regex to match <task ...>...</task> blocks, then extracts
+ * inner elements (name, files, read_first, action, verify,
+ * acceptance_criteria, done).
+ *
+ * Handles:
+ * - Multiline <action> blocks (including code snippets with angle brackets)
+ * - Optional elements (missing elements → empty string/array)
+ * - Both auto and checkpoint task types
+ */
+export function parseTasks(content: string): PlanTask[] {
+  const tasks: PlanTask[] = [];
+
+  // Extract the <tasks>...</tasks> section first
+  const tasksSection = content.match(/<tasks>([\s\S]*?)<\/tasks>/i);
+  const taskContent = tasksSection ? tasksSection[1] : content;
+
+  // Match individual task blocks — use a greedy-enough approach
+  // that handles nested angle brackets in action blocks
+  const taskRegex = /<task\b([^>]*)>([\s\S]*?)<\/task>/gi;
+  let taskMatch: RegExpExecArray | null;
+
+  while ((taskMatch = taskRegex.exec(taskContent)) !== null) {
+    const attrs = taskMatch[1];
+    const body = taskMatch[2];
+
+    const type = extractTaskType(attrs);
+    const name = extractElement(body, 'name');
+    const filesStr = extractElement(body, 'files');
+    const readFirstStr = extractElement(body, 'read_first');
+    const action = extractElement(body, 'action');
+    const verify = extractElement(body, 'verify');
+    const done = extractElement(body, 'done');
+
+    // Parse acceptance_criteria — can be a block with "- " list items
+    const acRaw = extractElement(body, 'acceptance_criteria');
+    const acceptance_criteria = acRaw
+      ? acRaw
+          .split('\n')
+          .map((line) => line.trim())
+          .filter((line) => line.startsWith('- '))
+          .map((line) => line.slice(2).trim())
+      : [];
+
+    // Parse file lists (comma-separated)
+    const files = filesStr
+      ? filesStr
+          .split(',')
+          .map((f) => f.trim())
+          .filter(Boolean)
+      : [];
+    const read_first = readFirstStr
+      ? readFirstStr
+          .split(',')
+          .map((f) => f.trim())
+          .filter(Boolean)
+      : [];
+
+    tasks.push({
+      type,
+      name,
+      files,
+      read_first,
+      action,
+      verify,
+      acceptance_criteria,
+      done,
+    });
+  }
+
+  return tasks;
+}
+
+// ─── Section extraction ──────────────────────────────────────────────────────
+
+/**
+ * Extract content of a named XML section (e.g., <objective>...</objective>).
+ */
+function extractSection(content: string, sectionName: string): string {
+  const regex = new RegExp(`<${sectionName}>([\\s\\S]*?)</${sectionName}>`, 'i');
+  const match = content.match(regex);
+  return match ? match[1].trim() : '';
+}
+
+/**
+ * Extract context references from the <context> block.
+ * Returns an array of file paths (lines starting with @).
+ */
+function extractContextRefs(content: string): string[] {
+  const contextBlock = extractSection(content, 'context');
+  if (!contextBlock) return [];
+
+  return contextBlock
+    .split('\n')
+    .map((line) => line.trim())
+    .filter((line) => line.startsWith('@'))
+    .map((line) => line.slice(1).trim());
+}
+
+/**
+ * Extract execution_context references.
+ * Returns an array of file paths (lines starting with @).
+ */
+function extractExecutionContext(content: string): string[] {
+  const block = extractSection(content, 'execution_context');
+  if (!block) return [];
+
+  return block
+    .split('\n')
+    .map((line) => line.trim())
+    .filter((line) => line.startsWith('@'))
+    .map((line) => line.slice(1).trim());
+}
+
+// ─── Public API ──────────────────────────────────────────────────────────────
+
+/**
+ * Parse a GSD-1 PLAN.md content string into a structured ParsedPlan.
+ *
+ * Extracts:
+ * - YAML frontmatter (phase, wave, depends_on, must_haves, etc.)
+ * - <objective> section
+ * - <execution_context> references
+ * - <context> file references
+ * - <task> blocks with all inner elements
+ *
+ * Handles edge cases:
+ * - Empty input → empty frontmatter, no tasks
+ * - Missing frontmatter → empty object with defaults
+ * - Malformed XML → partial extraction, no crash
+ */
+export function parsePlan(content: string): ParsedPlan {
+  if (!content || typeof content !== 'string') {
+    return {
+      frontmatter: createDefaultFrontmatter(),
+      objective: '',
+      execution_context: [],
+      context_refs: [],
+      tasks: [],
+      raw: content ?? '',
+    };
+  }
+
+  const rawFrontmatter = extractFrontmatter(content);
+
+  // Build typed frontmatter with defaults
+  const frontmatter: PlanFrontmatter = {
+    phase: String(rawFrontmatter.phase ?? ''),
+    plan: String(rawFrontmatter.plan ?? ''),
+    type: String(rawFrontmatter.type ?? 'execute'),
+    wave: Number(rawFrontmatter.wave ?? 1),
+    depends_on: normalizeStringArray(rawFrontmatter.depends_on),
+    files_modified: normalizeStringArray(rawFrontmatter.files_modified),
+    autonomous: rawFrontmatter.autonomous !== false,
+    requirements: normalizeStringArray(rawFrontmatter.requirements),
+    must_haves: parseMustHaves(rawFrontmatter.must_haves),
+  };
+
+  // Preserve any extra frontmatter keys
+  for (const [key, value] of Object.entries(rawFrontmatter)) {
+    if (!(key in frontmatter)) {
+      frontmatter[key] = value;
+    }
+  }
+
+  return {
+    frontmatter,
+    objective: extractSection(content, 'objective'),
+    execution_context: extractExecutionContext(content),
+    context_refs: extractContextRefs(content),
+    tasks: parseTasks(content),
+    raw: content,
+  };
+}
+
+function createDefaultFrontmatter(): PlanFrontmatter {
+  return {
+    phase: '',
+    plan: '',
+    type: 'execute',
+    wave: 1,
+    depends_on: [],
+    files_modified: [],
+    autonomous: true,
+    requirements: [],
+    must_haves: { truths: [], artifacts: [], key_links: [] },
+  };
+}
+
+/**
+ * Convenience wrapper — reads a PLAN.md file from disk and parses it.
+ */
+export async function parsePlanFile(filePath: string): Promise<ParsedPlan> {
+  const content = await readFile(filePath, 'utf-8');
+  return parsePlan(content);
+}
--- a/sdk/src/prompt-builder.test.ts
+++ b/sdk/src/prompt-builder.test.ts
@@ -0,0 +1,306 @@
+/**
+ * Unit tests for prompt-builder.ts
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  buildExecutorPrompt,
+  parseAgentTools,
+  parseAgentRole,
+  DEFAULT_ALLOWED_TOOLS,
+} from './prompt-builder.js';
+import type { ParsedPlan, PlanFrontmatter, MustHaves } from './types.js';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makePlan(overrides: Partial<ParsedPlan> = {}): ParsedPlan {
+  const defaultFrontmatter: PlanFrontmatter = {
+    phase: '01-auth',
+    plan: '01',
+    type: 'execute',
+    wave: 1,
+    depends_on: [],
+    files_modified: [],
+    autonomous: true,
+    requirements: ['AUTH-01'],
+    must_haves: { truths: [], artifacts: [], key_links: [] },
+  };
+
+  return {
+    frontmatter: { ...defaultFrontmatter, ...overrides.frontmatter },
+    objective: overrides.objective ?? 'Implement JWT authentication with refresh tokens',
+    execution_context: overrides.execution_context ?? [],
+    context_refs: overrides.context_refs ?? [],
+    tasks: overrides.tasks ?? [
+      {
+        type: 'auto',
+        name: 'Create auth module',
+        files: ['src/auth.ts'],
+        read_first: ['src/types.ts'],
+        action: 'Create the auth module with login and refresh endpoints',
+        verify: 'npm test -- --filter auth',
+        acceptance_criteria: ['JWT tokens issued on login', 'Refresh tokens rotate correctly'],
+        done: 'Auth module created and tests pass',
+      },
+      {
+        type: 'auto',
+        name: 'Add middleware',
+        files: ['src/middleware.ts'],
+        read_first: [],
+        action: 'Create auth middleware for protected routes',
+        verify: 'npm test -- --filter middleware',
+        acceptance_criteria: [],
+        done: 'Middleware validates JWT on protected routes',
+      },
+    ],
+    raw: '',
+  };
+}
+
+const SAMPLE_AGENT_DEF = `---
+name: gsd-executor
+description: Executes GSD plans
+tools: Read, Write, Edit, Bash, Grep, Glob
+permissionMode: acceptEdits
+---
+
+<role>
+You are a GSD plan executor. You execute PLAN.md files atomically.
+</role>
+
+<execution_flow>
+Some flow content
+</execution_flow>`;
+
+// ─── parseAgentTools ─────────────────────────────────────────────────────────
+
+describe('parseAgentTools', () => {
+  it('extracts tools from agent definition frontmatter', () => {
+    const tools = parseAgentTools(SAMPLE_AGENT_DEF);
+    expect(tools).toEqual(['Read', 'Write', 'Edit', 'Bash', 'Grep', 'Glob']);
+  });
+
+  it('returns defaults when no frontmatter found', () => {
+    const tools = parseAgentTools('Just some text without frontmatter');
+    expect(tools).toEqual(DEFAULT_ALLOWED_TOOLS);
+  });
+
+  it('returns defaults when frontmatter has no tools key', () => {
+    const def = `---\nname: test\n---\nContent`;
+    const tools = parseAgentTools(def);
+    expect(tools).toEqual(DEFAULT_ALLOWED_TOOLS);
+  });
+
+  it('handles empty tools value', () => {
+    const def = `---\ntools: \n---`;
+    const tools = parseAgentTools(def);
+    expect(tools).toEqual(DEFAULT_ALLOWED_TOOLS);
+  });
+});
+
+// ─── parseAgentRole ──────────────────────────────────────────────────────────
+
+describe('parseAgentRole', () => {
+  it('extracts role content from agent definition', () => {
+    const role = parseAgentRole(SAMPLE_AGENT_DEF);
+    expect(role).toContain('GSD plan executor');
+    expect(role).toContain('PLAN.md files atomically');
+  });
+
+  it('returns empty string when no role block', () => {
+    expect(parseAgentRole('No role block here')).toBe('');
+  });
+});
+
+// ─── buildExecutorPrompt ─────────────────────────────────────────────────────
+
+describe('buildExecutorPrompt', () => {
+  it('includes the objective text', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Implement JWT authentication with refresh tokens');
+  });
+
+  it('includes all task names', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Create auth module');
+    expect(prompt).toContain('Add middleware');
+  });
+
+  it('includes task actions', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Create the auth module with login and refresh endpoints');
+    expect(prompt).toContain('Create auth middleware for protected routes');
+  });
+
+  it('includes task verification commands', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('npm test -- --filter auth');
+    expect(prompt).toContain('npm test -- --filter middleware');
+  });
+
+  it('includes task file references', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('src/auth.ts');
+    expect(prompt).toContain('src/types.ts');
+  });
+
+  it('includes acceptance criteria', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('JWT tokens issued on login');
+    expect(prompt).toContain('Refresh tokens rotate correctly');
+  });
+
+  it('includes SUMMARY.md creation instruction', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('SUMMARY.md');
+    expect(prompt).toContain('Create a SUMMARY.md file');
+  });
+
+  it('includes sequential execution instruction', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Execute these tasks sequentially');
+  });
+
+  it('handles plan with no tasks gracefully', () => {
+    const plan = makePlan({ tasks: [] });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('No tasks defined');
+    expect(prompt).toContain('SUMMARY.md');
+    // Should not throw
+    expect(prompt.length).toBeGreaterThan(0);
+  });
+
+  it('includes context references when present', () => {
+    const plan = makePlan({
+      context_refs: ['src/config.ts', 'docs/architecture.md'],
+    });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('@src/config.ts');
+    expect(prompt).toContain('@docs/architecture.md');
+    expect(prompt).toContain('Read these files for context');
+  });
+
+  it('omits context section when no refs', () => {
+    const plan = makePlan({ context_refs: [] });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).not.toContain('Context Files');
+  });
+
+  it('includes plan metadata', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Phase: 01-auth');
+    expect(prompt).toContain('Plan: 01');
+  });
+
+  it('includes must-have truths when present', () => {
+    const plan = makePlan({
+      frontmatter: {
+        phase: '01',
+        plan: '01',
+        type: 'execute',
+        wave: 1,
+        depends_on: [],
+        files_modified: [],
+        autonomous: true,
+        requirements: [],
+        must_haves: {
+          truths: ['All endpoints require JWT auth', 'Tokens expire after 15 minutes'],
+          artifacts: [],
+          key_links: [],
+        },
+      },
+    });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('All endpoints require JWT auth');
+    expect(prompt).toContain('Tokens expire after 15 minutes');
+  });
+
+  it('includes must-have artifacts', () => {
+    const plan = makePlan({
+      frontmatter: {
+        phase: '01',
+        plan: '01',
+        type: 'execute',
+        wave: 1,
+        depends_on: [],
+        files_modified: [],
+        autonomous: true,
+        requirements: [],
+        must_haves: {
+          truths: [],
+          artifacts: [{ path: 'src/auth.ts', provides: 'JWT auth module' }],
+          key_links: [],
+        },
+      },
+    });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('`src/auth.ts`');
+    expect(prompt).toContain('JWT auth module');
+  });
+
+  it('includes must-have key_links', () => {
+    const plan = makePlan({
+      frontmatter: {
+        phase: '01',
+        plan: '01',
+        type: 'execute',
+        wave: 1,
+        depends_on: [],
+        files_modified: [],
+        autonomous: true,
+        requirements: [],
+        must_haves: {
+          truths: [],
+          artifacts: [],
+          key_links: [{ from: 'auth.ts', to: 'middleware.ts', via: 'import' }],
+        },
+      },
+    });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('auth.ts → middleware.ts via import');
+  });
+
+  it('includes role from agent definition when provided', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan, SAMPLE_AGENT_DEF);
+    expect(prompt).toContain('## Role');
+    expect(prompt).toContain('GSD plan executor');
+  });
+
+  it('works without agent definition', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    // Should still produce a valid prompt without role section
+    expect(prompt).toContain('## Objective');
+    expect(prompt).toContain('## Tasks');
+    expect(prompt).not.toContain('## Role');
+  });
+
+  it('provides fallback objective when plan has empty objective', () => {
+    const plan = makePlan({ objective: '' });
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Execute plan: 01');
+  });
+
+  it('includes done criteria for tasks', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Auth module created and tests pass');
+    expect(prompt).toContain('Middleware validates JWT on protected routes');
+  });
+
+  it('includes commit instruction in completion section', () => {
+    const plan = makePlan();
+    const prompt = buildExecutorPrompt(plan);
+    expect(prompt).toContain('Commit the SUMMARY.md');
+  });
+});
--- a/sdk/src/prompt-builder.ts
+++ b/sdk/src/prompt-builder.ts
@@ -0,0 +1,193 @@
+/**
+ * Prompt builder — assembles executor prompts from parsed plans.
+ *
+ * Converts a ParsedPlan into a structured prompt that tells the
+ * executor agent exactly what to do: follow the tasks sequentially,
+ * verify each one, and produce a SUMMARY.md at the end.
+ */
+
+import type { ParsedPlan, PlanTask } from './types.js';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const DEFAULT_ALLOWED_TOOLS = ['Read', 'Write', 'Edit', 'Bash', 'Grep', 'Glob'];
+
+// ─── Agent definition parsing ────────────────────────────────────────────────
+
+/**
+ * Extract the tools list from a gsd-executor.md agent definition.
+ * Falls back to DEFAULT_ALLOWED_TOOLS if parsing fails.
+ */
+export function parseAgentTools(agentDef: string): string[] {
+  // Look for "tools:" in the YAML frontmatter
+  const frontmatterMatch = agentDef.match(/^---\s*\n([\s\S]*?)\n---/);
+  if (!frontmatterMatch) return DEFAULT_ALLOWED_TOOLS;
+
+  const toolsMatch = frontmatterMatch[1].match(/^tools:\s*(.+)$/m);
+  if (!toolsMatch) return DEFAULT_ALLOWED_TOOLS;
+
+  const tools = toolsMatch[1]
+    .split(',')
+    .map((t) => t.trim())
+    .filter(Boolean);
+
+  return tools.length > 0 ? tools : DEFAULT_ALLOWED_TOOLS;
+}
+
+/**
+ * Extract the role instructions from a gsd-executor.md agent definition.
+ * Returns the <role>...</role> block content, or empty string.
+ */
+export function parseAgentRole(agentDef: string): string {
+  const match = agentDef.match(/<role>([\s\S]*?)<\/role>/i);
+  return match ? match[1].trim() : '';
+}
+
+// ─── Prompt assembly ─────────────────────────────────────────────────────────
+
+/**
+ * Format a single task into a prompt block.
+ */
+function formatTask(task: PlanTask, index: number): string {
+  const lines: string[] = [];
+  lines.push(`### Task ${index + 1}: ${task.name}`);
+
+  if (task.files.length > 0) {
+    lines.push(`**Files:** ${task.files.join(', ')}`);
+  }
+
+  if (task.read_first.length > 0) {
+    lines.push(`**Read first:** ${task.read_first.join(', ')}`);
+  }
+
+  lines.push('');
+  lines.push('**Action:**');
+  lines.push(task.action);
+
+  if (task.verify) {
+    lines.push('');
+    lines.push('**Verify:**');
+    lines.push(task.verify);
+  }
+
+  if (task.done) {
+    lines.push('');
+    lines.push('**Done when:**');
+    lines.push(task.done);
+  }
+
+  if (task.acceptance_criteria.length > 0) {
+    lines.push('');
+    lines.push('**Acceptance criteria:**');
+    for (const criterion of task.acceptance_criteria) {
+      lines.push(`- ${criterion}`);
+    }
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * Build the executor prompt from a parsed plan and optional agent definition.
+ *
+ * The prompt instructs the executor to:
+ * 1. Follow the plan tasks sequentially
+ * 2. Run verification for each task
+ * 3. Commit each task individually
+ * 4. Produce a SUMMARY.md file on completion
+ *
+ * @param plan - Parsed plan structure from plan-parser
+ * @param agentDef - Raw content of gsd-executor.md agent definition (optional)
+ * @returns Assembled prompt string
+ */
+export function buildExecutorPrompt(plan: ParsedPlan, agentDef?: string): string {
+  const sections: string[] = [];
+
+  // ── Role instructions from agent definition ──
+  if (agentDef) {
+    const role = parseAgentRole(agentDef);
+    if (role) {
+      sections.push(`## Role\n\n${role}`);
+    }
+  }
+
+  // ── Objective ──
+  if (plan.objective) {
+    sections.push(`## Objective\n\n${plan.objective}`);
+  } else {
+    sections.push(`## Objective\n\nExecute plan: ${plan.frontmatter.plan || plan.frontmatter.phase || 'unnamed'}`);
+  }
+
+  // ── Plan metadata ──
+  const meta: string[] = [];
+  if (plan.frontmatter.phase) meta.push(`Phase: ${plan.frontmatter.phase}`);
+  if (plan.frontmatter.plan) meta.push(`Plan: ${plan.frontmatter.plan}`);
+  if (plan.frontmatter.type) meta.push(`Type: ${plan.frontmatter.type}`);
+  if (meta.length > 0) {
+    sections.push(`## Plan Info\n\n${meta.join('\n')}`);
+  }
+
+  // ── Context references ──
+  if (plan.context_refs.length > 0) {
+    const refs = plan.context_refs.map((r) => `- @${r}`).join('\n');
+    sections.push(`## Context Files\n\nRead these files for context before starting:\n${refs}`);
+  }
+
+  // ── Tasks ──
+  if (plan.tasks.length > 0) {
+    const taskBlocks = plan.tasks.map((t, i) => formatTask(t, i)).join('\n\n---\n\n');
+    sections.push(`## Tasks\n\nExecute these tasks sequentially. For each task: read any referenced files, execute the action, run verification, confirm done criteria, then commit.\n\n${taskBlocks}`);
+  } else {
+    sections.push(`## Tasks\n\nNo tasks defined in this plan. Review the objective and determine if any actions are needed.`);
+  }
+
+  // ── Must-haves ──
+  if (plan.frontmatter.must_haves) {
+    const mh = plan.frontmatter.must_haves;
+    const parts: string[] = [];
+
+    if (mh.truths.length > 0) {
+      parts.push('**Truths (invariants):**');
+      for (const t of mh.truths) {
+        parts.push(`- ${t}`);
+      }
+    }
+
+    if (mh.artifacts.length > 0) {
+      parts.push('**Required artifacts:**');
+      for (const a of mh.artifacts) {
+        parts.push(`- \`${a.path}\`: ${a.provides}`);
+      }
+    }
+
+    if (mh.key_links.length > 0) {
+      parts.push('**Key links:**');
+      for (const l of mh.key_links) {
+        parts.push(`- ${l.from} → ${l.to} via ${l.via}`);
+      }
+    }
+
+    if (parts.length > 0) {
+      sections.push(`## Must-Haves\n\n${parts.join('\n')}`);
+    }
+  }
+
+  // ── Completion instructions ──
+  sections.push(
+    `## Completion\n\n` +
+    `After all tasks are complete:\n` +
+    `1. Run any overall verification or success criteria checks\n` +
+    `2. Create a SUMMARY.md file documenting:\n` +
+    `   - One-line summary of what was accomplished\n` +
+    `   - Tasks completed with commit hashes\n` +
+    `   - Any deviations from the plan\n` +
+    `   - Files created or modified\n` +
+    `   - Known issues (if any)\n` +
+    `3. Commit the SUMMARY.md\n` +
+    `4. Report completion`,
+  );
+
+  return sections.join('\n\n');
+}
+
+export { DEFAULT_ALLOWED_TOOLS };
--- a/sdk/src/session-runner.ts
+++ b/sdk/src/session-runner.ts
@@ -0,0 +1,299 @@
+/**
+ * Session runner — orchestrates Agent SDK query() calls for plan execution.
+ *
+ * Takes a parsed plan, builds the executor prompt, configures query() options,
+ * processes the message stream, and extracts results into a typed PlanResult.
+ */
+
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import type { SDKMessage, SDKResultMessage, SDKResultSuccess, SDKResultError } from '@anthropic-ai/claude-agent-sdk';
+import type { ParsedPlan, PlanResult, SessionOptions, SessionUsage, GSDCostUpdateEvent, PhaseStepType } from './types.js';
+import { GSDEventType, PhaseType } from './types.js';
+import type { GSDConfig } from './config.js';
+import { buildExecutorPrompt, parseAgentTools, DEFAULT_ALLOWED_TOOLS } from './prompt-builder.js';
+import type { GSDEventStream, EventStreamContext } from './event-stream.js';
+import { getToolsForPhase } from './tool-scoping.js';
+
+// ─── Model resolution ────────────────────────────────────────────────────────
+
+/**
+ * Resolve model identifier from options or config profile.
+ *
+ * Priority: explicit model option > config model_profile > default.
+ */
+function resolveModel(options?: SessionOptions, config?: GSDConfig): string | undefined {
+  if (options?.model) return options.model;
+
+  // Map model_profile names to model IDs
+  if (config?.model_profile) {
+    const profileMap: Record<string, string> = {
+      balanced: 'claude-sonnet-4-6',
+      quality: 'claude-opus-4-6',
+      speed: 'claude-haiku-3-5',
+    };
+    return profileMap[config.model_profile] ?? config.model_profile;
+  }
+
+  return undefined; // Let SDK use its default
+}
+
+// ─── Session runner ──────────────────────────────────────────────────────────
+
+/**
+ * Run a plan execution session via the Agent SDK query() function.
+ *
+ * Builds the executor prompt from the parsed plan, configures query() with
+ * appropriate permissions, tool restrictions, and budget limits, then iterates
+ * the message stream to extract the result.
+ *
+ * @param plan - Parsed plan structure
+ * @param config - GSD project configuration
+ * @param options - Session overrides (maxTurns, budget, model, etc.)
+ * @param agentDef - Raw agent definition content (optional, for tool/role extraction)
+ * @returns Typed PlanResult with cost, duration, success/error status
+ */
+export async function runPlanSession(
+  plan: ParsedPlan,
+  config: GSDConfig,
+  options?: SessionOptions,
+  agentDef?: string,
+  eventStream?: GSDEventStream,
+  streamContext?: EventStreamContext,
+): Promise<PlanResult> {
+  // Build the executor prompt
+  const executorPrompt = buildExecutorPrompt(plan, agentDef);
+
+  // Resolve allowed tools — from agent definition or defaults
+  const allowedTools = options?.allowedTools ??
+    (agentDef ? parseAgentTools(agentDef) : DEFAULT_ALLOWED_TOOLS);
+
+  // Resolve model
+  const model = resolveModel(options, config);
+
+  // Configure query options
+  const maxTurns = options?.maxTurns ?? 50;
+  const maxBudgetUsd = options?.maxBudgetUsd ?? 5.0;
+  const cwd = options?.cwd ?? process.cwd();
+
+  const queryStream = query({
+    prompt: `Execute this plan:\n\n${plan.objective || 'Execute the plan tasks below.'}`,
+    options: {
+      systemPrompt: {
+        type: 'preset',
+        preset: 'claude_code',
+        append: executorPrompt,
+      },
+      settingSources: ['project'],
+      allowedTools,
+      permissionMode: 'bypassPermissions',
+      allowDangerouslySkipPermissions: true,
+      maxTurns,
+      maxBudgetUsd,
+      cwd,
+      ...(model ? { model } : {}),
+    },
+  });
+
+  return processQueryStream(queryStream, eventStream, streamContext);
+}
+
+// ─── Result extraction ───────────────────────────────────────────────────────
+
+function isResultMessage(msg: SDKMessage): msg is SDKResultMessage {
+  return msg.type === 'result';
+}
+
+function isSuccessResult(msg: SDKResultMessage): msg is SDKResultSuccess {
+  return msg.subtype === 'success';
+}
+
+function isErrorResult(msg: SDKResultMessage): msg is SDKResultError {
+  return msg.subtype !== 'success';
+}
+
+function emptyUsage(): SessionUsage {
+  return {
+    inputTokens: 0,
+    outputTokens: 0,
+    cacheReadInputTokens: 0,
+    cacheCreationInputTokens: 0,
+  };
+}
+
+function extractUsage(msg: SDKResultMessage): SessionUsage {
+  const u = msg.usage;
+  return {
+    inputTokens: u.input_tokens ?? 0,
+    outputTokens: u.output_tokens ?? 0,
+    cacheReadInputTokens: u.cache_read_input_tokens ?? 0,
+    cacheCreationInputTokens: u.cache_creation_input_tokens ?? 0,
+  };
+}
+
+function extractResult(msg: SDKResultMessage): PlanResult {
+  const base = {
+    sessionId: msg.session_id,
+    totalCostUsd: msg.total_cost_usd,
+    durationMs: msg.duration_ms,
+    usage: extractUsage(msg),
+    numTurns: msg.num_turns,
+  };
+
+  if (isSuccessResult(msg)) {
+    return {
+      ...base,
+      success: true,
+    };
+  }
+
+  // Error result
+  const errorMsg = msg as SDKResultError;
+  return {
+    ...base,
+    success: false,
+    error: {
+      subtype: errorMsg.subtype,
+      messages: errorMsg.errors ?? [],
+    },
+  };
+}
+
+// ─── Shared stream processing ────────────────────────────────────────────────
+
+/**
+ * Process a query() message stream, emit events, and extract the result.
+ * Shared between runPlanSession and runPhaseStepSession to avoid duplication.
+ */
+async function processQueryStream(
+  queryStream: AsyncIterable<SDKMessage>,
+  eventStream?: GSDEventStream,
+  streamContext?: EventStreamContext,
+): Promise<PlanResult> {
+  let resultMessage: SDKResultMessage | undefined;
+
+  try {
+    for await (const message of queryStream) {
+      if (eventStream) {
+        eventStream.mapAndEmit(message, streamContext ?? {});
+      }
+      if (isResultMessage(message)) {
+        resultMessage = message;
+      }
+    }
+  } catch (err) {
+    return {
+      success: false,
+      sessionId: '',
+      totalCostUsd: 0,
+      durationMs: 0,
+      usage: emptyUsage(),
+      numTurns: 0,
+      error: {
+        subtype: 'error_during_execution',
+        messages: [err instanceof Error ? err.message : String(err)],
+      },
+    };
+  }
+
+  if (!resultMessage) {
+    return {
+      success: false,
+      sessionId: '',
+      totalCostUsd: 0,
+      durationMs: 0,
+      usage: emptyUsage(),
+      numTurns: 0,
+      error: {
+        subtype: 'error_during_execution',
+        messages: ['No result message received from query stream'],
+      },
+    };
+  }
+
+  const result = extractResult(resultMessage);
+
+  if (eventStream) {
+    const cost = eventStream.getCost();
+    eventStream.emitEvent({
+      type: GSDEventType.CostUpdate,
+      timestamp: new Date().toISOString(),
+      sessionId: resultMessage.session_id,
+      phase: streamContext?.phase,
+      planName: streamContext?.planName,
+      sessionCostUsd: result.totalCostUsd,
+      cumulativeCostUsd: cost.cumulative,
+    } as GSDCostUpdateEvent);
+  }
+
+  return result;
+}
+
+// ─── Phase step session runner ───────────────────────────────────────────────
+
+/**
+ * Map PhaseStepType to PhaseType for tool scoping.
+ * PhaseStepType includes 'advance' which has no session-level equivalent.
+ */
+function stepTypeToPhaseType(step: PhaseStepType): PhaseType {
+  const mapping: Record<string, PhaseType> = {
+    discuss: PhaseType.Discuss,
+    research: PhaseType.Research,
+    plan: PhaseType.Plan,
+    plan_check: PhaseType.Verify,
+    execute: PhaseType.Execute,
+    verify: PhaseType.Verify,
+  };
+  return mapping[step] ?? PhaseType.Execute;
+}
+
+/**
+ * Run a phase step session via the Agent SDK query() function.
+ *
+ * Unlike runPlanSession which takes a ParsedPlan, this accepts a raw prompt
+ * string and a phase step type. The prompt becomes the system prompt append,
+ * and tools are scoped by phase type.
+ *
+ * @param prompt - Raw prompt string to append to the system prompt
+ * @param phaseStep - Phase step type (determines tool scoping)
+ * @param config - GSD project configuration
+ * @param options - Session overrides (maxTurns, budget, model, etc.)
+ * @param eventStream - Optional event stream for observability
+ * @param streamContext - Optional context for event tagging
+ * @returns Typed PlanResult with cost, duration, success/error status
+ */
+export async function runPhaseStepSession(
+  prompt: string,
+  phaseStep: PhaseStepType,
+  config: GSDConfig,
+  options?: SessionOptions,
+  eventStream?: GSDEventStream,
+  streamContext?: EventStreamContext,
+): Promise<PlanResult> {
+  const phaseType = stepTypeToPhaseType(phaseStep);
+  const allowedTools = options?.allowedTools ?? getToolsForPhase(phaseType);
+  const model = resolveModel(options, config);
+  const maxTurns = options?.maxTurns ?? 50;
+  const maxBudgetUsd = options?.maxBudgetUsd ?? 5.0;
+  const cwd = options?.cwd ?? process.cwd();
+
+  const queryStream = query({
+    prompt: prompt,
+    options: {
+      systemPrompt: {
+        type: 'preset',
+        preset: 'claude_code',
+        append: prompt,
+      },
+      settingSources: ['project'],
+      allowedTools,
+      permissionMode: 'bypassPermissions',
+      allowDangerouslySkipPermissions: true,
+      maxTurns,
+      maxBudgetUsd,
+      cwd,
+      ...(model ? { model } : {}),
+    },
+  });
+
+  return processQueryStream(queryStream, eventStream, streamContext);
+}
--- a/sdk/src/tool-scoping.test.ts
+++ b/sdk/src/tool-scoping.test.ts
@@ -0,0 +1,160 @@
+import { describe, it, expect } from 'vitest';
+import { getToolsForPhase, PHASE_AGENT_MAP, PHASE_DEFAULT_TOOLS } from './tool-scoping.js';
+import { PhaseType } from './types.js';
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('getToolsForPhase', () => {
+  describe('default tools per phase', () => {
+    it('research phase: read-only + web search, no Write/Edit', () => {
+      const tools = getToolsForPhase(PhaseType.Research);
+      expect(tools).toContain('Read');
+      expect(tools).toContain('Grep');
+      expect(tools).toContain('Glob');
+      expect(tools).toContain('Bash');
+      expect(tools).toContain('WebSearch');
+      expect(tools).not.toContain('Write');
+      expect(tools).not.toContain('Edit');
+    });
+
+    it('execute phase: full read/write', () => {
+      const tools = getToolsForPhase(PhaseType.Execute);
+      expect(tools).toContain('Read');
+      expect(tools).toContain('Write');
+      expect(tools).toContain('Edit');
+      expect(tools).toContain('Bash');
+      expect(tools).toContain('Grep');
+      expect(tools).toContain('Glob');
+    });
+
+    it('verify phase: read-only, no Write/Edit', () => {
+      const tools = getToolsForPhase(PhaseType.Verify);
+      expect(tools).toContain('Read');
+      expect(tools).toContain('Bash');
+      expect(tools).toContain('Grep');
+      expect(tools).toContain('Glob');
+      expect(tools).not.toContain('Write');
+      expect(tools).not.toContain('Edit');
+    });
+
+    it('discuss phase: read-only, no Write/Edit', () => {
+      const tools = getToolsForPhase(PhaseType.Discuss);
+      expect(tools).toContain('Read');
+      expect(tools).toContain('Bash');
+      expect(tools).toContain('Grep');
+      expect(tools).toContain('Glob');
+      expect(tools).not.toContain('Write');
+      expect(tools).not.toContain('Edit');
+    });
+
+    it('plan phase: read/write + web, has Write but no Edit', () => {
+      const tools = getToolsForPhase(PhaseType.Plan);
+      expect(tools).toContain('Read');
+      expect(tools).toContain('Write');
+      expect(tools).toContain('Bash');
+      expect(tools).toContain('Glob');
+      expect(tools).toContain('Grep');
+      expect(tools).toContain('WebFetch');
+    });
+  });
+
+  describe('returns copies, not references', () => {
+    it('mutating returned array does not affect future calls', () => {
+      const tools1 = getToolsForPhase(PhaseType.Execute);
+      tools1.push('CustomTool');
+      const tools2 = getToolsForPhase(PhaseType.Execute);
+      expect(tools2).not.toContain('CustomTool');
+    });
+  });
+
+  describe('agent definition override', () => {
+    it('parses tools from agent def frontmatter when provided', () => {
+      const agentDef = `---
+name: test-agent
+tools: Bash, Grep, CustomTool
+---
+
+<role>Test agent</role>`;
+
+      const tools = getToolsForPhase(PhaseType.Execute, agentDef);
+      expect(tools).toEqual(['Bash', 'Grep', 'CustomTool']);
+    });
+
+    it('falls back to defaults when agent def has no tools line', () => {
+      const agentDef = `---
+name: test-agent
+---
+
+<role>Test agent</role>`;
+
+      const tools = getToolsForPhase(PhaseType.Execute, agentDef);
+      // parseAgentTools returns DEFAULT_ALLOWED_TOOLS when no tools: line found
+      expect(tools).toEqual(['Read', 'Write', 'Edit', 'Bash', 'Grep', 'Glob']);
+    });
+
+    it('falls back to defaults when agent def has no frontmatter', () => {
+      const agentDef = '<role>Test agent with no frontmatter</role>';
+
+      const tools = getToolsForPhase(PhaseType.Research, agentDef);
+      // parseAgentTools returns DEFAULT_ALLOWED_TOOLS for no frontmatter
+      expect(tools).toEqual(['Read', 'Write', 'Edit', 'Bash', 'Grep', 'Glob']);
+    });
+  });
+
+  describe('R015 compliance', () => {
+    it('research has no Write or Edit on source', () => {
+      const tools = getToolsForPhase(PhaseType.Research);
+      expect(tools).not.toContain('Write');
+      expect(tools).not.toContain('Edit');
+    });
+
+    it('execute has Write and Edit for source modification', () => {
+      const tools = getToolsForPhase(PhaseType.Execute);
+      expect(tools).toContain('Write');
+      expect(tools).toContain('Edit');
+    });
+
+    it('verify has no Write or Edit (read-only verification)', () => {
+      const tools = getToolsForPhase(PhaseType.Verify);
+      expect(tools).not.toContain('Write');
+      expect(tools).not.toContain('Edit');
+    });
+  });
+});
+
+describe('PHASE_AGENT_MAP', () => {
+  it('maps all phase types', () => {
+    for (const phase of Object.values(PhaseType)) {
+      expect(phase in PHASE_AGENT_MAP).toBe(true);
+    }
+  });
+
+  it('execute maps to gsd-executor.md', () => {
+    expect(PHASE_AGENT_MAP[PhaseType.Execute]).toBe('gsd-executor.md');
+  });
+
+  it('research maps to gsd-phase-researcher.md', () => {
+    expect(PHASE_AGENT_MAP[PhaseType.Research]).toBe('gsd-phase-researcher.md');
+  });
+
+  it('plan maps to gsd-planner.md', () => {
+    expect(PHASE_AGENT_MAP[PhaseType.Plan]).toBe('gsd-planner.md');
+  });
+
+  it('verify maps to gsd-verifier.md', () => {
+    expect(PHASE_AGENT_MAP[PhaseType.Verify]).toBe('gsd-verifier.md');
+  });
+
+  it('discuss maps to null (no dedicated agent)', () => {
+    expect(PHASE_AGENT_MAP[PhaseType.Discuss]).toBeNull();
+  });
+});
+
+describe('PHASE_DEFAULT_TOOLS', () => {
+  it('covers all phase types', () => {
+    for (const phase of Object.values(PhaseType)) {
+      expect(PHASE_DEFAULT_TOOLS[phase]).toBeDefined();
+      expect(PHASE_DEFAULT_TOOLS[phase].length).toBeGreaterThan(0);
+    }
+  });
+});
--- a/sdk/src/tool-scoping.ts
+++ b/sdk/src/tool-scoping.ts
@@ -0,0 +1,59 @@
+/**
+ * Tool scoping — maps phase types to allowed tool sets.
+ *
+ * Per R015, different phases get different tool access:
+ * - Research: read-only + web search (no Write/Edit on source)
+ * - Execute: full read/write
+ * - Verify: read-only (no Write/Edit)
+ * - Discuss: read-only
+ * - Plan: read/write + web (for creating plan files)
+ */
+
+import { PhaseType } from './types.js';
+import { parseAgentTools } from './prompt-builder.js';
+
+// ─── Phase default tool sets ─────────────────────────────────────────────────
+
+const PHASE_DEFAULT_TOOLS: Record<PhaseType, string[]> = {
+  [PhaseType.Research]: ['Read', 'Grep', 'Glob', 'Bash', 'WebSearch'],
+  [PhaseType.Execute]: ['Read', 'Write', 'Edit', 'Bash', 'Grep', 'Glob'],
+  [PhaseType.Verify]: ['Read', 'Bash', 'Grep', 'Glob'],
+  [PhaseType.Discuss]: ['Read', 'Bash', 'Grep', 'Glob'],
+  [PhaseType.Plan]: ['Read', 'Write', 'Bash', 'Glob', 'Grep', 'WebFetch'],
+};
+
+// ─── Phase → agent definition filename ──────────────────────────────────────
+
+/**
+ * Maps each phase type to its corresponding agent definition filename.
+ * Discuss has no dedicated agent — it runs in the main conversation.
+ */
+export const PHASE_AGENT_MAP: Record<PhaseType, string | null> = {
+  [PhaseType.Execute]: 'gsd-executor.md',
+  [PhaseType.Research]: 'gsd-phase-researcher.md',
+  [PhaseType.Plan]: 'gsd-planner.md',
+  [PhaseType.Verify]: 'gsd-verifier.md',
+  [PhaseType.Discuss]: null,
+};
+
+// ─── Public API ──────────────────────────────────────────────────────────────
+
+/**
+ * Get the allowed tools for a phase type.
+ *
+ * If an agent definition string is provided, tools are parsed from its
+ * frontmatter (reusing parseAgentTools from prompt-builder). Otherwise,
+ * returns the hardcoded phase defaults per R015.
+ *
+ * @param phaseType - The phase being executed
+ * @param agentDef - Optional raw agent .md file content to parse tools from
+ * @returns Array of allowed tool names
+ */
+export function getToolsForPhase(phaseType: PhaseType, agentDef?: string): string[] {
+  if (agentDef) {
+    return parseAgentTools(agentDef);
+  }
+  return [...PHASE_DEFAULT_TOOLS[phaseType]];
+}
+
+export { PHASE_DEFAULT_TOOLS };
--- a/sdk/src/types.ts
+++ b/sdk/src/types.ts
@@ -0,0 +1,849 @@
+/**
+ * Core type definitions for GSD-1 PLAN.md structures.
+ *
+ * These types model the YAML frontmatter + XML task bodies
+ * that make up a GSD plan file.
+ */
+
+// ─── Frontmatter types ───────────────────────────────────────────────────────
+
+export interface MustHaveArtifact {
+  path: string;
+  provides: string;
+  min_lines?: number;
+  exports?: string[];
+  contains?: string;
+}
+
+export interface MustHaveKeyLink {
+  from: string;
+  to: string;
+  via: string;
+  pattern?: string;
+}
+
+export interface MustHaves {
+  truths: string[];
+  artifacts: MustHaveArtifact[];
+  key_links: MustHaveKeyLink[];
+}
+
+export interface UserSetupEnvVar {
+  name: string;
+  source: string;
+}
+
+export interface UserSetupDashboardConfig {
+  task: string;
+  location: string;
+  details: string;
+}
+
+export interface UserSetupItem {
+  service: string;
+  why: string;
+  env_vars?: UserSetupEnvVar[];
+  dashboard_config?: UserSetupDashboardConfig[];
+  local_dev?: string[];
+}
+
+export interface PlanFrontmatter {
+  phase: string;
+  plan: string;
+  type: string;
+  wave: number;
+  depends_on: string[];
+  files_modified: string[];
+  autonomous: boolean;
+  requirements: string[];
+  user_setup?: UserSetupItem[];
+  must_haves: MustHaves;
+  [key: string]: unknown; // Allow additional fields
+}
+
+// ─── Task types ──────────────────────────────────────────────────────────────
+
+export interface PlanTask {
+  type: string;
+  name: string;
+  files: string[];
+  read_first: string[];
+  action: string;
+  verify: string;
+  acceptance_criteria: string[];
+  done: string;
+}
+
+// ─── Parsed plan ─────────────────────────────────────────────────────────────
+
+export interface ParsedPlan {
+  frontmatter: PlanFrontmatter;
+  objective: string;
+  execution_context: string[];
+  context_refs: string[];
+  tasks: PlanTask[];
+  raw: string;
+}
+
+// ─── Init command types ──────────────────────────────────────────────────────
+
+/**
+ * JSON output from `gsd-tools.cjs init new-project`.
+ * Describes project state and model configuration for the init workflow.
+ */
+export interface InitNewProjectInfo {
+  /** Model resolved for the gsd-project-researcher agent. */
+  researcher_model: string;
+  /** Model resolved for the gsd-research-synthesizer agent. */
+  synthesizer_model: string;
+  /** Model resolved for the gsd-roadmapper agent. */
+  roadmapper_model: string;
+
+  /** Whether docs should be committed after generation. */
+  commit_docs: boolean;
+
+  /** Whether .planning/PROJECT.md already exists. */
+  project_exists: boolean;
+  /** Whether a .planning/codebase directory exists. */
+  has_codebase_map: boolean;
+  /** Whether .planning/ directory exists at all. */
+  planning_exists: boolean;
+
+  /** Whether source code files were detected in the project. */
+  has_existing_code: boolean;
+  /** Whether a package manifest (package.json, Cargo.toml, etc.) was found. */
+  has_package_file: boolean;
+  /** True when existing code or a package manifest is present. */
+  is_brownfield: boolean;
+  /** True when brownfield but no codebase map exists yet. */
+  needs_codebase_map: boolean;
+
+  /** Whether a .git directory exists. */
+  has_git: boolean;
+
+  /** Whether Brave Search API key is available. */
+  brave_search_available: boolean;
+  /** Whether Firecrawl API key is available. */
+  firecrawl_available: boolean;
+  /** Whether Exa Search API key is available. */
+  exa_search_available: boolean;
+
+  /** Relative path to PROJECT.md (always '.planning/PROJECT.md'). */
+  project_path: string;
+
+  /** Absolute project root path (injected by withProjectRoot). */
+  project_root?: string;
+
+  /** Allow additional fields from gsd-tools evolution. */
+  [key: string]: unknown;
+}
+
+// ─── Session & execution types ───────────────────────────────────────────────
+
+/**
+ * Options for configuring a single plan execution session.
+ */
+export interface SessionOptions {
+  /** Maximum agentic turns before stopping. Default: 50. */
+  maxTurns?: number;
+  /** Maximum budget in USD. Default: 5.0. */
+  maxBudgetUsd?: number;
+  /** Model ID to use (e.g., 'claude-sonnet-4-6'). Falls back to config model_profile. */
+  model?: string;
+  /** Working directory for the session. */
+  cwd?: string;
+  /** Allowed tool names. Default: ['Read','Write','Edit','Bash','Grep','Glob']. */
+  allowedTools?: string[];
+}
+
+/**
+ * Usage statistics from a completed session.
+ */
+export interface SessionUsage {
+  inputTokens: number;
+  outputTokens: number;
+  cacheReadInputTokens: number;
+  cacheCreationInputTokens: number;
+}
+
+/**
+ * Result of a plan execution session.
+ */
+export interface PlanResult {
+  /** Whether the plan completed successfully. */
+  success: boolean;
+  /** Session UUID for audit trail. */
+  sessionId: string;
+  /** Total cost in USD. */
+  totalCostUsd: number;
+  /** Total wall-clock duration in milliseconds. */
+  durationMs: number;
+  /** Token usage breakdown. */
+  usage: SessionUsage;
+  /** Number of agentic turns used. */
+  numTurns: number;
+  /** Error details when success is false. */
+  error?: {
+    /** Error subtype from SDK result (e.g., 'error_max_turns', 'error_during_execution'). */
+    subtype: string;
+    /** Error messages. */
+    messages: string[];
+  };
+}
+
+/**
+ * Options for creating a GSD instance.
+ */
+export interface GSDOptions {
+  /** Root directory of the project. */
+  projectDir: string;
+  /** Path to gsd-tools.cjs. Falls back to ~/.claude/get-shit-done/bin/gsd-tools.cjs. */
+  gsdToolsPath?: string;
+  /** Model to use for execution sessions. */
+  model?: string;
+  /** Maximum budget per plan execution in USD. Default: 5.0. */
+  maxBudgetUsd?: number;
+  /** Maximum turns per plan execution. Default: 50. */
+  maxTurns?: number;
+  /** Enable auto mode: sets auto_advance=true, skip_discuss=false in workflow config. */
+  autoMode?: boolean;
+}
+
+// ─── S02: Event stream types ─────────────────────────────────────────────────
+
+/**
+ * Phase types for GSD execution workflow.
+ */
+export enum PhaseType {
+  Discuss = 'discuss',
+  Research = 'research',
+  Plan = 'plan',
+  Execute = 'execute',
+  Verify = 'verify',
+}
+
+/**
+ * Event types emitted by the GSD event stream.
+ * Maps from SDKMessage variants to domain-meaningful events.
+ */
+export enum GSDEventType {
+  SessionInit = 'session_init',
+  SessionComplete = 'session_complete',
+  SessionError = 'session_error',
+  AssistantText = 'assistant_text',
+  ToolCall = 'tool_call',
+  ToolProgress = 'tool_progress',
+  ToolUseSummary = 'tool_use_summary',
+  TaskStarted = 'task_started',
+  TaskProgress = 'task_progress',
+  TaskNotification = 'task_notification',
+  CostUpdate = 'cost_update',
+  APIRetry = 'api_retry',
+  RateLimit = 'rate_limit',
+  StatusChange = 'status_change',
+  CompactBoundary = 'compact_boundary',
+  StreamEvent = 'stream_event',
+  PhaseStart = 'phase_start',
+  PhaseStepStart = 'phase_step_start',
+  PhaseStepComplete = 'phase_step_complete',
+  PhaseComplete = 'phase_complete',
+  WaveStart = 'wave_start',
+  WaveComplete = 'wave_complete',
+  MilestoneStart = 'milestone_start',
+  MilestoneComplete = 'milestone_complete',
+  InitStart = 'init_start',
+  InitStepStart = 'init_step_start',
+  InitStepComplete = 'init_step_complete',
+  InitComplete = 'init_complete',
+  InitResearchSpawn = 'init_research_spawn',
+}
+
+/**
+ * Base fields present on every GSD event.
+ */
+export interface GSDEventBase {
+  type: GSDEventType;
+  timestamp: string;
+  sessionId: string;
+  phase?: PhaseType;
+  planName?: string;
+}
+
+/**
+ * Session initialized — emitted on SDKSystemMessage subtype 'init'.
+ */
+export interface GSDSessionInitEvent extends GSDEventBase {
+  type: GSDEventType.SessionInit;
+  model: string;
+  tools: string[];
+  cwd: string;
+}
+
+/**
+ * Session completed successfully — emitted on SDKResultSuccess.
+ */
+export interface GSDSessionCompleteEvent extends GSDEventBase {
+  type: GSDEventType.SessionComplete;
+  success: true;
+  totalCostUsd: number;
+  durationMs: number;
+  numTurns: number;
+  result?: string;
+}
+
+/**
+ * Session ended with an error — emitted on SDKResultError.
+ */
+export interface GSDSessionErrorEvent extends GSDEventBase {
+  type: GSDEventType.SessionError;
+  success: false;
+  totalCostUsd: number;
+  durationMs: number;
+  numTurns: number;
+  errorSubtype: string;
+  errors: string[];
+}
+
+/**
+ * Assistant produced text output.
+ */
+export interface GSDAssistantTextEvent extends GSDEventBase {
+  type: GSDEventType.AssistantText;
+  text: string;
+}
+
+/**
+ * Tool invocation detected in assistant response.
+ */
+export interface GSDToolCallEvent extends GSDEventBase {
+  type: GSDEventType.ToolCall;
+  toolName: string;
+  toolUseId: string;
+  input: Record<string, unknown>;
+}
+
+/**
+ * Tool execution progress update.
+ */
+export interface GSDToolProgressEvent extends GSDEventBase {
+  type: GSDEventType.ToolProgress;
+  toolName: string;
+  toolUseId: string;
+  elapsedSeconds: number;
+}
+
+/**
+ * Tool use summary after completion.
+ */
+export interface GSDToolUseSummaryEvent extends GSDEventBase {
+  type: GSDEventType.ToolUseSummary;
+  summary: string;
+  toolUseIds: string[];
+}
+
+/**
+ * Subagent task started.
+ */
+export interface GSDTaskStartedEvent extends GSDEventBase {
+  type: GSDEventType.TaskStarted;
+  taskId: string;
+  description: string;
+  taskType?: string;
+}
+
+/**
+ * Subagent task progress.
+ */
+export interface GSDTaskProgressEvent extends GSDEventBase {
+  type: GSDEventType.TaskProgress;
+  taskId: string;
+  description: string;
+  totalTokens: number;
+  toolUses: number;
+  durationMs: number;
+  lastToolName?: string;
+}
+
+/**
+ * Subagent task completed/failed/stopped.
+ */
+export interface GSDTaskNotificationEvent extends GSDEventBase {
+  type: GSDEventType.TaskNotification;
+  taskId: string;
+  status: 'completed' | 'failed' | 'stopped';
+  summary: string;
+}
+
+/**
+ * Cost updated (emitted on session_complete and periodically).
+ */
+export interface GSDCostUpdateEvent extends GSDEventBase {
+  type: GSDEventType.CostUpdate;
+  sessionCostUsd: number;
+  cumulativeCostUsd: number;
+}
+
+/**
+ * API retry in progress.
+ */
+export interface GSDAPIRetryEvent extends GSDEventBase {
+  type: GSDEventType.APIRetry;
+  attempt: number;
+  maxRetries: number;
+  retryDelayMs: number;
+  errorStatus: number | null;
+}
+
+/**
+ * Rate limit information updated.
+ */
+export interface GSDRateLimitEvent extends GSDEventBase {
+  type: GSDEventType.RateLimit;
+  status: string;
+  resetsAt?: number;
+  utilization?: number;
+}
+
+/**
+ * System status change (e.g., compacting).
+ */
+export interface GSDStatusChangeEvent extends GSDEventBase {
+  type: GSDEventType.StatusChange;
+  status: string | null;
+}
+
+/**
+ * Compact boundary — context window was compacted.
+ */
+export interface GSDCompactBoundaryEvent extends GSDEventBase {
+  type: GSDEventType.CompactBoundary;
+  trigger: 'manual' | 'auto';
+  preTokens: number;
+}
+
+/**
+ * Raw stream event from SDK (partial assistant messages).
+ */
+export interface GSDStreamEvent extends GSDEventBase {
+  type: GSDEventType.StreamEvent;
+  event: unknown;
+}
+
+/**
+ * Phase execution started.
+ */
+export interface GSDPhaseStartEvent extends GSDEventBase {
+  type: GSDEventType.PhaseStart;
+  phaseNumber: string;
+  phaseName: string;
+}
+
+/**
+ * A single phase step (discuss, research, etc.) started.
+ */
+export interface GSDPhaseStepStartEvent extends GSDEventBase {
+  type: GSDEventType.PhaseStepStart;
+  phaseNumber: string;
+  step: PhaseStepType;
+}
+
+/**
+ * A single phase step completed.
+ */
+export interface GSDPhaseStepCompleteEvent extends GSDEventBase {
+  type: GSDEventType.PhaseStepComplete;
+  phaseNumber: string;
+  step: PhaseStepType;
+  success: boolean;
+  durationMs: number;
+  error?: string;
+}
+
+/**
+ * Full phase execution completed.
+ */
+export interface GSDPhaseCompleteEvent extends GSDEventBase {
+  type: GSDEventType.PhaseComplete;
+  phaseNumber: string;
+  phaseName: string;
+  success: boolean;
+  totalCostUsd: number;
+  totalDurationMs: number;
+  stepsCompleted: number;
+}
+
+// ─── S04: Plan index & wave event types ─────────────────────────────────────
+
+/**
+ * Info about a single plan within a phase, as returned by phase-plan-index.
+ */
+export interface PlanInfo {
+  id: string;
+  wave: number;
+  autonomous: boolean;
+  objective: string | null;
+  files_modified: string[];
+  task_count: number;
+  has_summary: boolean;
+}
+
+/**
+ * Structured plan index for a phase, grouping plans into dependency waves.
+ */
+export interface PhasePlanIndex {
+  phase: string;
+  plans: PlanInfo[];
+  waves: Record<string, string[]>;
+  incomplete: string[];
+  has_checkpoints: boolean;
+}
+
+/**
+ * Wave execution started — emitted before concurrent plans launch.
+ */
+export interface GSDWaveStartEvent extends GSDEventBase {
+  type: GSDEventType.WaveStart;
+  phaseNumber: string;
+  waveNumber: number;
+  planCount: number;
+  planIds: string[];
+}
+
+/**
+ * Wave execution completed — emitted after all plans in a wave settle.
+ */
+export interface GSDWaveCompleteEvent extends GSDEventBase {
+  type: GSDEventType.WaveComplete;
+  phaseNumber: string;
+  waveNumber: number;
+  successCount: number;
+  failureCount: number;
+  durationMs: number;
+}
+
+// ─── S05: Milestone-level types ──────────────────────────────────────────────
+
+/**
+ * Single phase entry from `gsd-tools.cjs roadmap analyze`.
+ */
+export interface RoadmapPhaseInfo {
+  number: string;
+  disk_status: string;
+  roadmap_complete: boolean;
+  phase_name: string;
+}
+
+/**
+ * Structured output from `gsd-tools.cjs roadmap analyze`.
+ */
+export interface RoadmapAnalysis {
+  phases: RoadmapPhaseInfo[];
+  [key: string]: unknown;
+}
+
+/**
+ * Options for configuring a milestone-level run (multi-phase orchestration).
+ * Superset of PhaseRunnerOptions so phase-level callbacks pass through.
+ */
+export interface MilestoneRunnerOptions extends PhaseRunnerOptions {
+  /** Called after each phase completes. Return 'stop' to halt milestone execution. */
+  onPhaseComplete?: (result: PhaseRunnerResult, phaseInfo: RoadmapPhaseInfo) => Promise<void | 'stop'>;
+}
+
+/**
+ * Result of a full milestone run (all phases).
+ */
+export interface MilestoneRunnerResult {
+  success: boolean;
+  phases: PhaseRunnerResult[];
+  totalCostUsd: number;
+  totalDurationMs: number;
+}
+
+/**
+ * Milestone execution started.
+ */
+export interface GSDMilestoneStartEvent extends GSDEventBase {
+  type: GSDEventType.MilestoneStart;
+  phaseCount: number;
+  prompt: string;
+}
+
+/**
+ * Milestone execution completed.
+ */
+export interface GSDMilestoneCompleteEvent extends GSDEventBase {
+  type: GSDEventType.MilestoneComplete;
+  success: boolean;
+  totalCostUsd: number;
+  totalDurationMs: number;
+  phasesCompleted: number;
+}
+
+// ─── Init workflow types ─────────────────────────────────────────────────────
+
+/**
+ * Named steps in the init workflow.
+ */
+export type InitStepName =
+  | 'setup'
+  | 'config'
+  | 'project'
+  | 'research-stack'
+  | 'research-features'
+  | 'research-architecture'
+  | 'research-pitfalls'
+  | 'synthesis'
+  | 'requirements'
+  | 'roadmap';
+
+/**
+ * Configuration overrides for InitRunner.
+ */
+export interface InitConfig {
+  /** Model for research sessions (overrides gsd-tools detected model). */
+  researchModel?: string;
+  /** Model for synthesis/roadmap sessions. */
+  orchestratorModel?: string;
+  /** Max budget per individual session in USD. Default: 3.0. */
+  maxBudgetPerSession?: number;
+  /** Max turns per session. Default: 30. */
+  maxTurnsPerSession?: number;
+}
+
+/**
+ * Result of a single init workflow step.
+ */
+export interface InitStepResult {
+  step: InitStepName;
+  success: boolean;
+  durationMs: number;
+  costUsd: number;
+  error?: string;
+  artifacts?: string[];
+}
+
+/**
+ * Result of the full init workflow run.
+ */
+export interface InitResult {
+  success: boolean;
+  steps: InitStepResult[];
+  totalCostUsd: number;
+  totalDurationMs: number;
+  artifacts: string[];
+}
+
+/**
+ * Init workflow started.
+ */
+export interface GSDInitStartEvent extends GSDEventBase {
+  type: GSDEventType.InitStart;
+  input: string;
+  projectDir: string;
+}
+
+/**
+ * Init workflow step started.
+ */
+export interface GSDInitStepStartEvent extends GSDEventBase {
+  type: GSDEventType.InitStepStart;
+  step: InitStepName;
+}
+
+/**
+ * Init workflow step completed.
+ */
+export interface GSDInitStepCompleteEvent extends GSDEventBase {
+  type: GSDEventType.InitStepComplete;
+  step: InitStepName;
+  success: boolean;
+  durationMs: number;
+  costUsd: number;
+  error?: string;
+}
+
+/**
+ * Init workflow completed.
+ */
+export interface GSDInitCompleteEvent extends GSDEventBase {
+  type: GSDEventType.InitComplete;
+  success: boolean;
+  totalCostUsd: number;
+  totalDurationMs: number;
+  artifactCount: number;
+}
+
+/**
+ * Research sessions spawned in parallel during init.
+ */
+export interface GSDInitResearchSpawnEvent extends GSDEventBase {
+  type: GSDEventType.InitResearchSpawn;
+  sessionCount: number;
+  researchTypes: string[];
+}
+
+/**
+ * Discriminated union of all GSD events.
+ */
+export type GSDEvent =
+  | GSDSessionInitEvent
+  | GSDSessionCompleteEvent
+  | GSDSessionErrorEvent
+  | GSDAssistantTextEvent
+  | GSDToolCallEvent
+  | GSDToolProgressEvent
+  | GSDToolUseSummaryEvent
+  | GSDTaskStartedEvent
+  | GSDTaskProgressEvent
+  | GSDTaskNotificationEvent
+  | GSDCostUpdateEvent
+  | GSDAPIRetryEvent
+  | GSDRateLimitEvent
+  | GSDStatusChangeEvent
+  | GSDCompactBoundaryEvent
+  | GSDStreamEvent
+  | GSDPhaseStartEvent
+  | GSDPhaseStepStartEvent
+  | GSDPhaseStepCompleteEvent
+  | GSDPhaseCompleteEvent
+  | GSDWaveStartEvent
+  | GSDWaveCompleteEvent
+  | GSDMilestoneStartEvent
+  | GSDMilestoneCompleteEvent
+  | GSDInitStartEvent
+  | GSDInitStepStartEvent
+  | GSDInitStepCompleteEvent
+  | GSDInitCompleteEvent
+  | GSDInitResearchSpawnEvent;
+
+/**
+ * Transport handler interface for consuming GSD events.
+ * Transports receive all events and can write to files, WebSockets, etc.
+ */
+export interface TransportHandler {
+  /** Called for each event. Must not throw. */
+  onEvent(event: GSDEvent): void;
+  /** Called when the stream is closing. Clean up resources. */
+  close(): void;
+}
+
+/**
+ * Context files resolved for a phase execution.
+ */
+export interface ContextFiles {
+  state?: string;
+  roadmap?: string;
+  context?: string;
+  research?: string;
+  requirements?: string;
+  config?: string;
+  plan?: string;
+  summary?: string;
+}
+
+/**
+ * Per-session cost bucket for tracking execution costs.
+ */
+export interface CostBucket {
+  sessionId: string;
+  costUsd: number;
+}
+
+/**
+ * Cost tracker interface for per-session and cumulative cost tracking.
+ * Uses per-session buckets keyed by session_id for thread-safety in parallel execution.
+ */
+export interface CostTracker {
+  /** Per-session cost buckets. */
+  sessions: Map<string, CostBucket>;
+  /** Total cumulative cost across all sessions. */
+  cumulativeCostUsd: number;
+  /** Current active session ID. */
+  activeSessionId?: string;
+}
+
+// ─── S03: Phase lifecycle types ──────────────────────────────────────────────
+
+/**
+ * Steps in the phase lifecycle state machine.
+ * Extends beyond the existing PhaseType enum (which covers session types)
+ * to include the full lifecycle including 'advance'.
+ */
+export enum PhaseStepType {
+  Discuss = 'discuss',
+  Research = 'research',
+  Plan = 'plan',
+  PlanCheck = 'plan_check',
+  Execute = 'execute',
+  Verify = 'verify',
+  Advance = 'advance',
+}
+
+/**
+ * Structured output from `gsd-tools.cjs init phase-op <N>`.
+ * Describes the current state of a phase on disk.
+ */
+export interface PhaseOpInfo {
+  phase_found: boolean;
+  phase_dir: string;
+  phase_number: string;
+  phase_name: string;
+  phase_slug: string;
+  padded_phase: string;
+  has_research: boolean;
+  has_context: boolean;
+  has_plans: boolean;
+  has_verification: boolean;
+  plan_count: number;
+  roadmap_exists: boolean;
+  planning_exists: boolean;
+  commit_docs: boolean;
+  context_path: string;
+  research_path: string;
+}
+
+/**
+ * Result of a single phase step execution.
+ */
+export interface PhaseStepResult {
+  step: PhaseStepType;
+  success: boolean;
+  durationMs: number;
+  error?: string;
+  planResults?: PlanResult[];
+}
+
+/**
+ * Result of a full phase lifecycle run.
+ */
+export interface PhaseRunnerResult {
+  phaseNumber: string;
+  phaseName: string;
+  steps: PhaseStepResult[];
+  success: boolean;
+  totalCostUsd: number;
+  totalDurationMs: number;
+}
+
+/**
+ * Callback hooks for human gates in the phase lifecycle.
+ * When not provided, the runner auto-approves at each gate.
+ */
+export interface HumanGateCallbacks {
+  onDiscussApproval?: (context: { phaseNumber: string; phaseName: string }) => Promise<'approve' | 'reject' | 'modify'>;
+  onVerificationReview?: (result: { phaseNumber: string; stepResult: PhaseStepResult }) => Promise<'accept' | 'reject' | 'retry'>;
+  onBlockerDecision?: (blocker: { phaseNumber: string; step: PhaseStepType; error?: string }) => Promise<'retry' | 'skip' | 'stop'>;
+}
+
+/**
+ * Options for configuring a PhaseRunner execution.
+ */
+export interface PhaseRunnerOptions {
+  callbacks?: HumanGateCallbacks;
+  maxBudgetPerStep?: number;
+  maxTurnsPerStep?: number;
+  model?: string;
+  /** Maximum gap closure retries when verification finds gaps. Default: 1. */
+  maxGapRetries?: number;
+}
--- a/sdk/src/ws-transport.test.ts
+++ b/sdk/src/ws-transport.test.ts
@@ -0,0 +1,161 @@
+import { describe, it, expect, afterEach } from 'vitest';
+import { WebSocket } from 'ws';
+import { WSTransport } from './ws-transport.js';
+import { GSDEventType, type GSDEvent, type GSDEventBase } from './types.js';
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function makeBase(overrides: Partial<GSDEventBase> = {}): Omit<GSDEventBase, 'type'> {
+  return {
+    timestamp: '2025-06-15T14:30:45.123Z',
+    sessionId: 'test-session',
+    ...overrides,
+  };
+}
+
+/** Connect a WS client and resolve once open. */
+function connectClient(port: number): Promise<WebSocket> {
+  return new Promise((resolve, reject) => {
+    const ws = new WebSocket(`ws://127.0.0.1:${port}`);
+    ws.on('open', () => resolve(ws));
+    ws.on('error', reject);
+  });
+}
+
+/** Wait for the next message on a WS client. */
+function waitForMessage(ws: WebSocket): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const timeout = setTimeout(() => reject(new Error('message timeout')), 5000);
+    ws.once('message', (data) => {
+      clearTimeout(timeout);
+      resolve(data.toString());
+    });
+  });
+}
+
+// Track transports for cleanup
+const activeTransports: WSTransport[] = [];
+
+afterEach(() => {
+  for (const t of activeTransports) {
+    try { t.close(); } catch { /* ignore */ }
+  }
+  activeTransports.length = 0;
+});
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe('WSTransport', () => {
+  it('start() creates a server on the specified port', async () => {
+    const transport = new WSTransport({ port: 0 }); // dynamic port
+    activeTransports.push(transport);
+
+    await transport.start();
+
+    // Server is listening — we can connect a client
+    const address = (transport as any).server?.address();
+    expect(address).toBeTruthy();
+    expect(typeof address.port).toBe('number');
+    expect(address.port).toBeGreaterThan(0);
+  });
+
+  it('onEvent broadcasts JSON to connected client', async () => {
+    const transport = new WSTransport({ port: 0 });
+    activeTransports.push(transport);
+    await transport.start();
+
+    const address = (transport as any).server?.address();
+    const client = await connectClient(address.port);
+
+    const event: GSDEvent = {
+      ...makeBase(),
+      type: GSDEventType.SessionInit,
+      model: 'claude-sonnet-4-20250514',
+      tools: ['Read', 'Write'],
+      cwd: '/tmp/test',
+    } as GSDEvent;
+
+    const msgPromise = waitForMessage(client);
+    transport.onEvent(event);
+
+    const received = await msgPromise;
+    const parsed = JSON.parse(received);
+
+    expect(parsed.type).toBe('session_init');
+    expect(parsed.model).toBe('claude-sonnet-4-20250514');
+    expect(parsed.tools).toEqual(['Read', 'Write']);
+
+    client.close();
+  });
+
+  it('onEvent handles no connected clients without error', async () => {
+    const transport = new WSTransport({ port: 0 });
+    activeTransports.push(transport);
+    await transport.start();
+
+    // No clients connected — should not throw
+    expect(() => {
+      transport.onEvent({
+        ...makeBase(),
+        type: GSDEventType.MilestoneStart,
+        phaseCount: 2,
+        prompt: 'test',
+      } as GSDEvent);
+    }).not.toThrow();
+  });
+
+  it('close() shuts down the server', async () => {
+    const transport = new WSTransport({ port: 0 });
+    // Don't push to activeTransports — we close manually
+
+    await transport.start();
+    const address = (transport as any).server?.address();
+    expect(address).toBeTruthy();
+
+    transport.close();
+
+    // Server should be null after close
+    expect((transport as any).server).toBeNull();
+
+    // Connecting should fail
+    await expect(connectClient(address.port)).rejects.toThrow();
+  });
+
+  it('close() before start() does not throw', () => {
+    const transport = new WSTransport({ port: 0 });
+    expect(() => transport.close()).not.toThrow();
+  });
+
+  it('broadcasts to multiple connected clients', async () => {
+    const transport = new WSTransport({ port: 0 });
+    activeTransports.push(transport);
+    await transport.start();
+
+    const address = (transport as any).server?.address();
+    const client1 = await connectClient(address.port);
+    const client2 = await connectClient(address.port);
+
+    const event: GSDEvent = {
+      ...makeBase(),
+      type: GSDEventType.MilestoneComplete,
+      success: true,
+      totalCostUsd: 5.0,
+      totalDurationMs: 120000,
+      phasesCompleted: 3,
+    } as GSDEvent;
+
+    const msg1Promise = waitForMessage(client1);
+    const msg2Promise = waitForMessage(client2);
+
+    transport.onEvent(event);
+
+    const [msg1, msg2] = await Promise.all([msg1Promise, msg2Promise]);
+
+    expect(JSON.parse(msg1).type).toBe('milestone_complete');
+    expect(JSON.parse(msg2).type).toBe('milestone_complete');
+    expect(JSON.parse(msg1).success).toBe(true);
+
+    client1.close();
+    client2.close();
+  });
+});
--- a/sdk/src/ws-transport.ts
+++ b/sdk/src/ws-transport.ts
@@ -0,0 +1,93 @@
+/**
+ * WebSocket Transport — broadcasts GSD events as JSON over WebSocket.
+ *
+ * Implements TransportHandler. Starts a WebSocketServer on a given port
+ * and JSON-serializes each event to all connected clients.
+ */
+
+import { WebSocketServer, WebSocket } from 'ws';
+import type { GSDEvent, TransportHandler } from './types.js';
+
+export interface WSTransportOptions {
+  port: number;
+}
+
+export class WSTransport implements TransportHandler {
+  private readonly port: number;
+  private server: WebSocketServer | null = null;
+  private closing = false;
+
+  constructor(options: WSTransportOptions) {
+    this.port = options.port;
+  }
+
+  /**
+   * Start the WebSocket server on the configured port.
+   * Resolves once the server is listening.
+   */
+  async start(): Promise<void> {
+    if (this.closing) return;
+
+    return new Promise<void>((resolve, reject) => {
+      try {
+        this.server = new WebSocketServer({ port: this.port });
+        this.server.on('listening', () => resolve());
+        this.server.on('error', (err) => reject(err));
+      } catch (err) {
+        reject(err);
+      }
+    });
+  }
+
+  /**
+   * Broadcast a GSD event as JSON to all connected clients.
+   * Never throws — wraps each client.send in try/catch.
+   */
+  onEvent(event: GSDEvent): void {
+    try {
+      if (!this.server) return;
+
+      const payload = JSON.stringify(event);
+
+      for (const client of this.server.clients) {
+        if (client.readyState === WebSocket.OPEN) {
+          try {
+            client.send(payload);
+          } catch {
+            // Ignore individual client send errors
+          }
+        }
+      }
+    } catch {
+      // TransportHandler contract: onEvent must never throw
+    }
+  }
+
+  /**
+   * Close all client connections and shut down the server.
+   * Safe to call before start() — sets a closing flag.
+   */
+  close(): void {
+    this.closing = true;
+
+    if (!this.server) return;
+
+    // Terminate all clients
+    for (const client of this.server.clients) {
+      try {
+        client.terminate();
+      } catch {
+        // Ignore client close errors
+      }
+    }
+
+    // Close the server
+    try {
+      this.server.close();
+    } catch {
+      // Ignore server close errors
+    }
+
+    this.server = null;
+  }
+}
--- a/sdk/test-fixtures/sample-plan.md
+++ b/sdk/test-fixtures/sample-plan.md
@@ -0,0 +1,32 @@
+---
+phase: '01-test'
+plan: '01'
+type: execute
+wave: 1
+depends_on: []
+files_modified:
+  - output.txt
+autonomous: true
+requirements:
+  - TEST-01
+must_haves:
+  truths:
+    - output.txt exists with expected content
+  artifacts:
+    - output.txt
+  key_links: []
+---
+
+<objective>
+Create a simple output file to prove the SDK can execute a plan end-to-end.
+</objective>
+
+<tasks>
+<task type="auto">
+<name>Create output file</name>
+<files>output.txt</files>
+<action>Create output.txt with content 'hello from gsd-sdk'</action>
+<verify>test -f output.txt</verify>
+<done>output.txt exists with expected content</done>
+</task>
+</tasks>
--- a/sdk/tsconfig.json
+++ b/sdk/tsconfig.json
@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "strict": true,
+    "outDir": "dist",
+    "rootDir": "src",
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "isolatedModules": true
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["src/**/*.test.ts", "src/**/*.integration.test.ts", "dist", "node_modules"]
+}
--- a/sdk/vitest.config.ts
+++ b/sdk/vitest.config.ts
@@ -0,0 +1,22 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    projects: [
+      {
+        test: {
+          name: 'unit',
+          include: ['src/**/*.test.ts'],
+          exclude: ['src/**/*.integration.test.ts'],
+        },
+      },
+      {
+        test: {
+          name: 'integration',
+          include: ['src/**/*.integration.test.ts'],
+          testTimeout: 120_000,
+        },
+      },
+    ],
+  },
+});
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -0,0 +1,6 @@
+{
+  "files": [],
+  "references": [
+    { "path": "sdk" }
+  ]
+}
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -0,0 +1,24 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    projects: [
+      {
+        test: {
+          name: 'unit',
+          root: './sdk',
+          include: ['src/**/*.test.ts'],
+          exclude: ['src/**/*.integration.test.ts'],
+        },
+      },
+      {
+        test: {
+          name: 'integration',
+          root: './sdk',
+          include: ['src/**/*.integration.test.ts'],
+          testTimeout: 120_000,
+        },
+      },
+    ],
+  },
+});