Merge pull request #2934 from gsd-build/add-release-sdk-to-dev

ci(release-sdk): propagate release-sdk.yml to dev branch
ci(release-sdk): make release-sdk.yml dispatchable from the dev branch
2026-05-05 06:42:14 +02:00 · 2026-04-30 22:00:24 -04:00 · 2026-04-30 21:59:29 -04:00 · 2026-04-30 18:01:36 -04:00 · 2026-04-30 17:54:43 -04:00 · 2026-04-30 17:52:16 -04:00
520 changed files with 55012 additions and 9136 deletions
--- a/.github/workflows/canary.yml
+++ b/.github/workflows/canary.yml
@@ -0,0 +1,144 @@
+name: Canary
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: 'Dry run (skip npm publish, tagging, and push)'
+        required: false
+        type: boolean
+        default: false
+
+concurrency:
+  group: canary
+  cancel-in-progress: false
+
+env:
+  NODE_VERSION: 24
+
+jobs:
+  canary:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: write
+      id-token: write
+    environment: npm-publish
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f  # v6.3.0
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          registry-url: 'https://registry.npmjs.org'
+          cache: 'npm'
+
+      - name: Determine canary version
+        id: canary
+        run: |
+          # Strip any pre-release suffix from package.json version to get base (e.g. 1.39.0-rc.4 → 1.39.0)
+          RAW=$(node -p "require('./package.json').version")
+          BASE=$(echo "$RAW" | sed 's/-.*//')
+          # Find next sequential canary number from existing tags
+          N=1
+          while git tag -l "v${BASE}-canary.${N}" | grep -q .; do
+            N=$((N + 1))
+          done
+          CANARY_VERSION="${BASE}-canary.${N}"
+          echo "canary_version=$CANARY_VERSION" >> "$GITHUB_OUTPUT"
+
+      - name: Configure git identity
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Bump to canary version
+        env:
+          CANARY_VERSION: ${{ steps.canary.outputs.canary_version }}
+        run: |
+          npm version "$CANARY_VERSION" --no-git-tag-version
+          cd sdk && npm version "$CANARY_VERSION" --no-git-tag-version && cd ..
+
+      - name: Install and test
+        run: |
+          npm ci
+          npm test
+
+      - name: Build SDK dist for tarball
+        run: npm run build:sdk
+
+      - name: Verify tarball ships sdk/dist/cli.js (bug #2647)
+        run: bash scripts/verify-tarball-sdk-dist.sh
+
+      - name: Dry-run publish validation
+        run: |
+          npm publish --dry-run --tag canary
+          cd sdk && npm publish --dry-run --tag canary
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      - name: Tag and push
+        if: ${{ github.ref == 'refs/heads/dev' && !inputs.dry_run }}
+        env:
+          CANARY_VERSION: ${{ steps.canary.outputs.canary_version }}
+        run: |
+          git tag "v${CANARY_VERSION}"
+          git push origin "v${CANARY_VERSION}"
+
+      - name: Publish to npm (canary)
+        if: ${{ github.ref == 'refs/heads/dev' && !inputs.dry_run }}
+        run: npm publish --provenance --access public --tag canary
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      - name: Publish SDK to npm (canary)
+        if: ${{ github.ref == 'refs/heads/dev' && !inputs.dry_run }}
+        run: cd sdk && npm publish --provenance --access public --tag canary
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      - name: Verify publish
+        if: ${{ github.ref == 'refs/heads/dev' && !inputs.dry_run }}
+        env:
+          CANARY_VERSION: ${{ steps.canary.outputs.canary_version }}
+        run: |
+          PUBLISHED="NOT_FOUND"
+          SDK_PUBLISHED="NOT_FOUND"
+          for delay in 5 10 20 30 45; do
+            PUBLISHED=$(npm view get-shit-done-cc@"$CANARY_VERSION" version 2>/dev/null || echo "NOT_FOUND")
+            SDK_PUBLISHED=$(npm view @gsd-build/sdk@"$CANARY_VERSION" version 2>/dev/null || echo "NOT_FOUND")
+            if [ "$PUBLISHED" = "$CANARY_VERSION" ] && [ "$SDK_PUBLISHED" = "$CANARY_VERSION" ]; then
+              break
+            fi
+            echo "Not yet live (sleeping ${delay}s)..."
+            sleep "$delay"
+          done
+          if [ "$PUBLISHED" != "$CANARY_VERSION" ]; then
+            echo "::error::Published version verification failed. Expected $CANARY_VERSION, got $PUBLISHED"
+            exit 1
+          fi
+          echo "Verified: get-shit-done-cc@$CANARY_VERSION is live on npm"
+          if [ "$SDK_PUBLISHED" != "$CANARY_VERSION" ]; then
+            echo "::error::SDK version verification failed. Expected $CANARY_VERSION, got $SDK_PUBLISHED"
+            exit 1
+          fi
+          echo "Verified: @gsd-build/sdk@$CANARY_VERSION is live on npm"
+          CANARY_TAG=$(npm dist-tag ls get-shit-done-cc 2>/dev/null | grep "canary:" | awk '{print $2}')
+          echo "canary dist-tag points to: $CANARY_TAG"
+
+      - name: Summary
+        env:
+          CANARY_VERSION: ${{ steps.canary.outputs.canary_version }}
+          DRY_RUN: ${{ inputs.dry_run }}
+        run: |
+          echo "## Canary v${CANARY_VERSION}" >> "$GITHUB_STEP_SUMMARY"
+          if [ "$DRY_RUN" = "true" ]; then
+            echo "**DRY RUN** — npm publish, tagging, and push skipped" >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "- Published to npm as \`canary\`" >> "$GITHUB_STEP_SUMMARY"
+            echo "- SDK also published: \`@gsd-build/sdk@${CANARY_VERSION}\` on \`canary\`" >> "$GITHUB_STEP_SUMMARY"
+            echo "- Tagged \`v${CANARY_VERSION}\`" >> "$GITHUB_STEP_SUMMARY"
+            echo "- Install: \`npx get-shit-done-cc@canary\`" >> "$GITHUB_STEP_SUMMARY"
+          fi
--- a/.github/workflows/install-smoke.yml
+++ b/.github/workflows/install-smoke.yml
@@ -1,10 +1,13 @@
 name: Install Smoke

-# Exercises the real install path: `npm pack` → `npm install -g <tarball>`
-# → run `bin/install.js` → assert `gsd-sdk` is on PATH.
+# Exercises the real install paths:
+#   tarball: `npm pack` → `npm install -g <tarball>` → assert gsd-sdk on PATH
+#   unpacked: `npm install -g <dir>` (no pack) → assert gsd-sdk on PATH + executable
 #
-# Closes the CI gap that let #2439 ship: the rest of the suite only reads
-# `bin/install.js` as a string and never executes it.
+# The tarball path is the canonical ship path. The unpacked path reproduces the
+# mode-644 failure class (issue #2453): npm does NOT chmod bin targets when
+# installing from an unpacked local directory, so any stale tsc output lacking
+# execute bits will be caught by the unpacked job before release.
 #
 # - PRs: path-filtered, minimal runner (ubuntu + Node LTS) for fast signal.
 # - Push to release branches / main: full matrix.
@@ -16,6 +19,7 @@ on:
      - main
    paths:
      - 'bin/install.js'
+      - 'bin/gsd-sdk.js'
      - 'sdk/**'
      - 'package.json'
      - 'package-lock.json'
@@ -40,6 +44,9 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  # ---------------------------------------------------------------------------
+  # Job 1: tarball install (existing canonical path)
+  # ---------------------------------------------------------------------------
  smoke:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 12
@@ -78,6 +85,31 @@ jobs:
        if: steps.skip.outputs.skip != 'true'
        with:
          ref: ${{ inputs.ref || github.ref }}
+          # Need enough history to merge origin/main for stale-base detection.
+          fetch-depth: 0
+
+      # The default `refs/pull/N/merge` ref GitHub produces for PRs is cached
+      # against the recorded merge-base, not current main. When main advances
+      # after the PR was opened, the merge ref stays stale and CI can fail on
+      # issues that were already fixed upstream. Explicitly merge current
+      # origin/main into the PR head so smoke always tests the PR against the
+      # latest trunk. If the merge conflicts, emit a clear "rebase onto main"
+      # diagnostic instead of a downstream build error that looks unrelated.
+      - name: Rebase check — merge origin/main into PR head
+        if: steps.skip.outputs.skip != 'true' && github.event_name == 'pull_request'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.email "ci@gsd-build"
+          git config user.name "CI Rebase Check"
+          git fetch origin main
+          if ! git merge --no-edit --no-ff origin/main; then
+            echo "::error::This PR cannot cleanly merge origin/main. Rebase your branch onto current main and push again."
+            echo "::error::Conflicting files:"
+            git diff --name-only --diff-filter=U
+            git merge --abort
+            exit 1
+          fi

      - name: Set up Node.js ${{ matrix.node-version }}
        if: steps.skip.outputs.skip != 'true'
@@ -90,6 +122,23 @@ jobs:
        if: steps.skip.outputs.skip != 'true'
        run: npm ci

+      # Isolated SDK typecheck — if the build fails, emit a clear "stale base
+      # or real type error" diagnostic instead of letting the failure cascade
+      # into the tarball install step, where the downstream PATH assertion
+      # misreports it as "gsd-sdk not on PATH — installSdkIfNeeded regression".
+      - name: SDK typecheck (fails fast on type regressions)
+        if: steps.skip.outputs.skip != 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          if ! npm run build:sdk; then
+            echo "::error::SDK build (npm run build:sdk) failed."
+            echo "::error::Common cause: your PR base is behind main and picks up intermediate type errors that are already fixed on trunk."
+            echo "::error::Fix: git fetch origin main && git rebase origin/main && git push --force-with-lease"
+            echo "::error::If the error persists on a fresh rebase, the type error is real — fix it in sdk/src/ and push."
+            exit 1
+          fi
+
      - name: Pack root tarball
        if: steps.skip.outputs.skip != 'true'
        id: pack
@@ -109,7 +158,7 @@ jobs:
          echo "$NPM_BIN" >> "$GITHUB_PATH"
          echo "npm global bin: $NPM_BIN"

-      - name: Install tarball globally (runs bin/install.js → installSdkIfNeeded)
+      - name: Install tarball globally
        if: steps.skip.outputs.skip != 'true'
        shell: bash
        env:
@@ -121,13 +170,14 @@ jobs:
          cd "$TMPDIR_ROOT"
          npm install -g "$WORKSPACE/$TARBALL"
          command -v get-shit-done-cc
-          # `--claude --local` is the non-interactive code path (see
-          # install.js main block: when both a runtime and location are set,
-          # installAllRuntimes runs with isInteractive=false, no prompts).
-          # We tolerate non-zero here because the authoritative assertion is
-          # the next step: gsd-sdk must land on PATH. Some runtime targets
-          # may exit before the SDK step for unrelated reasons on CI.
-          get-shit-done-cc --claude --local || true
+          # `--claude --local` is the non-interactive code path. Don't swallow
+          # non-zero exit — if the installer fails, that IS the CI failure, and
+          # its own error message is more useful than the downstream "shim
+          # regression" assertion masking the real cause.
+          if ! get-shit-done-cc --claude --local; then
+            echo "::error::get-shit-done-cc --claude --local failed. See the install.js output above for the real error (SDK build, PATH resolution, chmod, etc.)."
+            exit 1
+          fi

      - name: Assert gsd-sdk resolves on PATH
        if: steps.skip.outputs.skip != 'true'
@@ -135,7 +185,7 @@ jobs:
        run: |
          set -euo pipefail
          if ! command -v gsd-sdk >/dev/null 2>&1; then
-            echo "::error::gsd-sdk is not on PATH after install — installSdkIfNeeded() regression"
+            echo "::error::gsd-sdk is not on PATH after tarball install — shim regression"
            NPM_BIN="$(npm config get prefix)/bin"
            echo "npm global bin: $NPM_BIN"
            ls -la "$NPM_BIN" | grep -i gsd || true
@@ -150,3 +200,99 @@ jobs:
          set -euo pipefail
          gsd-sdk --version || gsd-sdk --help
          echo "✓ gsd-sdk is executable"
+
+  # ---------------------------------------------------------------------------
+  # Job 2: unpacked-dir install — reproduces the mode-644 failure class (#2453)
+  #
+  # `npm install -g <directory>` does NOT chmod bin targets when the source
+  # file was produced by a build script (tsc emits 0o644). This job catches
+  # regressions where sdk/dist/cli.js loses its execute bit before publish.
+  # ---------------------------------------------------------------------------
+  smoke-unpacked:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          ref: ${{ inputs.ref || github.ref }}
+          fetch-depth: 0
+
+      # See the `smoke` job above for rationale — refs/pull/N/merge is cached
+      # against the recorded merge-base, not current main. Explicitly merge
+      # origin/main so smoke-unpacked also runs against the latest trunk.
+      - name: Rebase check — merge origin/main into PR head
+        if: github.event_name == 'pull_request'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.email "ci@gsd-build"
+          git config user.name "CI Rebase Check"
+          git fetch origin main
+          if ! git merge --no-edit --no-ff origin/main; then
+            echo "::error::This PR cannot cleanly merge origin/main. Rebase your branch onto current main and push again."
+            echo "::error::Conflicting files:"
+            git diff --name-only --diff-filter=U
+            git merge --abort
+            exit 1
+          fi
+
+      - name: Set up Node.js 22
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f  # v6.3.0
+        with:
+          node-version: 22
+          cache: 'npm'
+
+      - name: Install root deps
+        run: npm ci
+
+      - name: Build SDK dist (sdk/dist is gitignored — must build for unpacked install)
+        run: npm run build:sdk
+
+      - name: Ensure npm global bin is on PATH
+        shell: bash
+        run: |
+          NPM_BIN="$(npm config get prefix)/bin"
+          echo "$NPM_BIN" >> "$GITHUB_PATH"
+          echo "npm global bin: $NPM_BIN"
+
+      - name: Strip execute bit from sdk/dist/cli.js to simulate tsc-fresh output
+        shell: bash
+        run: |
+          set -euo pipefail
+          # Simulate the exact state tsc produces: cli.js at mode 644.
+          chmod 644 sdk/dist/cli.js
+          echo "Stripped execute bit: $(stat -c '%a' sdk/dist/cli.js 2>/dev/null || stat -f '%p' sdk/dist/cli.js)"
+
+      - name: Install from unpacked directory (no npm pack)
+        shell: bash
+        run: |
+          set -euo pipefail
+          TMPDIR_ROOT=$(mktemp -d)
+          cd "$TMPDIR_ROOT"
+          npm install -g "$GITHUB_WORKSPACE"
+          command -v get-shit-done-cc
+          get-shit-done-cc --claude --local || true
+
+      - name: Assert gsd-sdk resolves on PATH after unpacked install
+        shell: bash
+        run: |
+          set -euo pipefail
+          if ! command -v gsd-sdk >/dev/null 2>&1; then
+            echo "::error::gsd-sdk is not on PATH after unpacked install — #2453 regression"
+            NPM_BIN="$(npm config get prefix)/bin"
+            ls -la "$NPM_BIN" | grep -i gsd || true
+            exit 1
+          fi
+          echo "✓ gsd-sdk resolves at: $(command -v gsd-sdk)"
+
+      - name: Assert gsd-sdk is executable after unpacked install (#2453)
+        shell: bash
+        run: |
+          set -euo pipefail
+          # This is the exact check that would have caught #2453 before release.
+          # The shim (bin/gsd-sdk.js) invokes sdk/dist/cli.js via `node`, so
+          # the execute bit on cli.js is not needed for the shim path. However
+          # installSdkIfNeeded() also chmods cli.js in-place as a safety net.
+          gsd-sdk --version || gsd-sdk --help
+          echo "✓ gsd-sdk is executable after unpacked install"
--- a/.github/workflows/release-sdk.yml
+++ b/.github/workflows/release-sdk.yml
@@ -0,0 +1,344 @@
+# Release SDK Bundle
+#
+# Stopgap workflow_dispatch publish path: builds get-shit-done-cc with the
+# compiled SDK and the SDK .tgz bundled inside the CC tarball, then
+# publishes the CC package to ONE chosen dist-tag (dev | next | latest)
+# per run.
+#
+# Why this exists: @gsd-build/sdk publishes from canary.yml and release.yml
+# fail because the @gsd-build npm token is currently unavailable. CC users
+# do not consume @gsd-build/sdk directly — bin/gsd-sdk.js resolves
+# sdk/dist/cli.js from inside the installed CC package, so the bundled
+# copy is sufficient for full functionality. This workflow ships CC alone
+# (no separate @gsd-build/sdk publish attempt) and additionally bakes a
+# bundled gsd-sdk-<version>.tgz at sdk-bundle/gsd-sdk.tgz inside the CC
+# tarball as a recoverable npm-installable artifact.
+#
+# Existing canary.yml and release.yml are intentionally untouched. They
+# remain the canonical two-package publish path; restore them to primary
+# use once @gsd-build/sdk ownership is recovered.
+#
+# Tracking issues: #2925 (initial workflow), #2929 (CI-gate parity with release.yml)
+
+name: Release SDK Bundle
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'npm dist-tag to publish under'
+        required: true
+        type: choice
+        options:
+          - dev
+          - next
+          - latest
+      version:
+        description: 'Explicit version (e.g. 1.50.0-dev.3, 1.50.0-rc.2, 1.50.0). Empty = derive from package.json base + tag-appropriate suffix.'
+        required: false
+        type: string
+      ref:
+        description: 'Branch or ref to build from (default: the workflow-dispatch ref, typically dev)'
+        required: false
+        type: string
+      dry_run:
+        description: 'Dry run (skip npm publish, git tag, and push)'
+        required: false
+        type: boolean
+        default: false
+
+# Per dist-tag, no concurrent publishes for the same stream. Different streams
+# can publish in parallel because they target different dist-tags.
+concurrency:
+  group: release-sdk-${{ inputs.tag }}
+  cancel-in-progress: false
+
+env:
+  NODE_VERSION: 24
+
+jobs:
+  # Cross-platform install validation gate (parity with release.yml).
+  # Publish job depends on this — won't proceed if the package fails to
+  # install cleanly across the supported matrix.
+  install-smoke:
+    permissions:
+      contents: read
+    uses: ./.github/workflows/install-smoke.yml
+    with:
+      ref: ${{ inputs.ref }}
+
+  release:
+    needs: install-smoke
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    permissions:
+      contents: write       # tag + push + GitHub Release
+      id-token: write       # provenance
+    environment: npm-publish
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 0
+          ref: ${{ inputs.ref }}
+
+      - uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f  # v6.3.0
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          registry-url: 'https://registry.npmjs.org'
+          cache: 'npm'
+
+      - name: Determine version
+        id: ver
+        env:
+          INPUT_TAG: ${{ inputs.tag }}
+          INPUT_OVERRIDE: ${{ inputs.version }}
+        run: |
+          set -e
+          RAW=$(node -p "require('./package.json').version")
+          BASE=$(echo "$RAW" | sed 's/-.*//')
+          if [ -n "$INPUT_OVERRIDE" ]; then
+            VERSION="$INPUT_OVERRIDE"
+          else
+            case "$INPUT_TAG" in
+              dev)
+                N=1
+                while git tag -l "v${BASE}-dev.${N}" | grep -q .; do
+                  N=$((N + 1))
+                done
+                VERSION="${BASE}-dev.${N}"
+                ;;
+              next)
+                N=1
+                while git tag -l "v${BASE}-rc.${N}" | grep -q .; do
+                  N=$((N + 1))
+                done
+                VERSION="${BASE}-rc.${N}"
+                ;;
+              latest)
+                VERSION="$BASE"
+                ;;
+              *)
+                echo "::error::Unknown tag '$INPUT_TAG' (expected dev|next|latest)"
+                exit 1
+                ;;
+            esac
+          fi
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "tag=$INPUT_TAG" >> "$GITHUB_OUTPUT"
+          echo "→ Will publish v${VERSION} to dist-tag '${INPUT_TAG}'"
+
+      - name: Refuse if version already exists on npm
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          EXISTING=$(npm view get-shit-done-cc@"$VERSION" version 2>/dev/null || true)
+          if [ -n "$EXISTING" ]; then
+            echo "::error::get-shit-done-cc@${VERSION} is already published. Bump version or pass an explicit override input."
+            exit 1
+          fi
+
+      # Tolerant tag-existence check (matches release.yml pattern). An
+      # operator re-running after a mid-flight publish-step failure should
+      # not be blocked just because the tag step succeeded last time. Only
+      # error if the existing tag points at a different commit than HEAD.
+      - name: Check git tag (skip if matches HEAD, error if mismatched)
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          if git rev-parse -q --verify "refs/tags/v${VERSION}" >/dev/null; then
+            EXISTING_SHA=$(git rev-parse "refs/tags/v${VERSION}")
+            HEAD_SHA=$(git rev-parse HEAD)
+            if [ "$EXISTING_SHA" != "$HEAD_SHA" ]; then
+              echo "::error::git tag v${VERSION} already exists pointing at ${EXISTING_SHA}, but HEAD is ${HEAD_SHA}"
+              exit 1
+            fi
+            echo "::notice::tag v${VERSION} already exists at HEAD; tag step will skip"
+          fi
+
+      - name: Configure git identity
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+      - name: Bump in-tree version (not committed)
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          npm version "$VERSION" --no-git-tag-version
+          cd sdk && npm version "$VERSION" --no-git-tag-version
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Run full test suite with coverage (parity with release.yml)
+        run: npm run test:coverage
+
+      - name: Build SDK dist for tarball
+        run: npm run build:sdk
+
+      - name: Verify CC tarball ships sdk/dist/cli.js (bug #2647 guard)
+        run: bash scripts/verify-tarball-sdk-dist.sh
+
+      - name: Pack SDK as tarball and bundle into CC source tree
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          set -e
+          cd sdk
+          npm pack
+          # npm pack emits gsd-build-sdk-<version>.tgz in the cwd
+          TARBALL="gsd-build-sdk-${VERSION}.tgz"
+          if [ ! -f "$TARBALL" ]; then
+            echo "::error::Expected $TARBALL but npm pack did not produce it. Listing sdk/:"
+            ls -la
+            exit 1
+          fi
+          mkdir -p ../sdk-bundle
+          mv "$TARBALL" ../sdk-bundle/gsd-sdk.tgz
+          cd ..
+          ls -la sdk-bundle/
+
+      - name: Add sdk-bundle to CC files whitelist (in-tree, not committed)
+        run: |
+          node <<'NODE'
+          const fs = require('fs');
+          const pkg = JSON.parse(fs.readFileSync('package.json', 'utf8'));
+          if (!Array.isArray(pkg.files)) {
+            console.error('::error::package.json files is not an array');
+            process.exit(1);
+          }
+          if (!pkg.files.includes('sdk-bundle')) {
+            pkg.files.push('sdk-bundle');
+            fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n');
+            console.log('Added sdk-bundle/ to package.json files whitelist');
+          } else {
+            console.log('sdk-bundle/ already in files whitelist');
+          }
+          NODE
+
+      - name: Verify CC tarball will contain sdk-bundle/gsd-sdk.tgz
+        run: |
+          set -e
+          TARBALL=$(npm pack --ignore-scripts 2>/dev/null | tail -1)
+          if [ -z "$TARBALL" ] || [ ! -f "$TARBALL" ]; then
+            echo "::error::npm pack produced no tarball"
+            exit 1
+          fi
+          echo "Inspecting $TARBALL for sdk-bundle/gsd-sdk.tgz:"
+          if ! tar -tzf "$TARBALL" | grep -q "package/sdk-bundle/gsd-sdk.tgz"; then
+            echo "::error::CC tarball is missing package/sdk-bundle/gsd-sdk.tgz"
+            tar -tzf "$TARBALL" | grep -E "sdk-bundle|sdk/dist" | head -20
+            exit 1
+          fi
+          echo "✅ CC tarball contains sdk-bundle/gsd-sdk.tgz"
+          rm -f "$TARBALL"
+
+      - name: Dry-run publish validation
+        env:
+          TAG: ${{ steps.ver.outputs.tag }}
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: npm publish --dry-run --tag "$TAG"
+
+      - name: Tag and push
+        if: ${{ !inputs.dry_run }}
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+        run: |
+          if git rev-parse -q --verify "refs/tags/v${VERSION}" >/dev/null; then
+            echo "Tag v${VERSION} already exists at HEAD (per pre-flight check); skipping git tag step"
+          else
+            git tag "v${VERSION}"
+          fi
+          git push origin "v${VERSION}"
+
+      - name: Publish to npm (CC bundle, SDK included as both loose tree and .tgz)
+        if: ${{ !inputs.dry_run }}
+        env:
+          TAG: ${{ steps.ver.outputs.tag }}
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: npm publish --provenance --access public --tag "$TAG"
+
+      # Keep `next` from going stale relative to `latest`. When publishing a
+      # stable release, also point `next` at it so users on `@next` don't
+      # get stuck on an older pre-release than what's now stable. Parity
+      # with release.yml#finalize "Clean up next dist-tag" step.
+      - name: Re-point next dist-tag at the new latest (only when tag=latest)
+        if: ${{ !inputs.dry_run && steps.ver.outputs.tag == 'latest' }}
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: |
+          npm dist-tag add "get-shit-done-cc@${VERSION}" next
+          echo "✅ next dist-tag re-pointed to v${VERSION} (matches latest)"
+
+      - name: Create GitHub Release
+        if: ${{ !inputs.dry_run }}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          VERSION: ${{ steps.ver.outputs.version }}
+          TAG: ${{ steps.ver.outputs.tag }}
+        run: |
+          # Per-tag release flags:
+          #   dev, next → --prerelease (won't be highlighted as the latest release on the repo page)
+          #   latest    → --latest (becomes the highlighted release)
+          if [ "$TAG" = "latest" ]; then
+            gh release create "v${VERSION}" \
+              --title "v${VERSION}" \
+              --generate-notes \
+              --latest
+          else
+            gh release create "v${VERSION}" \
+              --title "v${VERSION}" \
+              --generate-notes \
+              --prerelease
+          fi
+          echo "✅ GitHub Release v${VERSION} created"
+
+      - name: Verify publish landed on registry
+        if: ${{ !inputs.dry_run }}
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+          TAG: ${{ steps.ver.outputs.tag }}
+        run: |
+          PUBLISHED="NOT_FOUND"
+          for delay in 5 10 20 30 45; do
+            PUBLISHED=$(npm view get-shit-done-cc@"$VERSION" version 2>/dev/null || echo "NOT_FOUND")
+            if [ "$PUBLISHED" = "$VERSION" ]; then
+              break
+            fi
+            echo "Waiting ${delay}s for registry to catch up (saw: $PUBLISHED)..."
+            sleep "$delay"
+          done
+          if [ "$PUBLISHED" != "$VERSION" ]; then
+            echo "::error::Version $VERSION did not appear on the registry within timeout"
+            exit 1
+          fi
+          TAG_VERSION=$(npm view get-shit-done-cc dist-tags."$TAG" 2>/dev/null || echo "NOT_FOUND")
+          if [ "$TAG_VERSION" != "$VERSION" ]; then
+            echo "::error::dist-tag '$TAG' resolves to '$TAG_VERSION', expected '$VERSION'"
+            exit 1
+          fi
+          echo "✅ get-shit-done-cc@${VERSION} live on dist-tag '${TAG}'"
+
+      - name: Summary
+        env:
+          VERSION: ${{ steps.ver.outputs.version }}
+          TAG: ${{ steps.ver.outputs.tag }}
+          DRY_RUN: ${{ inputs.dry_run }}
+        run: |
+          echo "## Release SDK Bundle: v${VERSION} → @${TAG}" >> "$GITHUB_STEP_SUMMARY"
+          echo "" >> "$GITHUB_STEP_SUMMARY"
+          if [ "$DRY_RUN" = "true" ]; then
+            echo "**DRY RUN** — npm publish, git tag, push, and GitHub Release were skipped." >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "- Published \`get-shit-done-cc@${VERSION}\` to dist-tag \`${TAG}\`" >> "$GITHUB_STEP_SUMMARY"
+            echo "- SDK bundled inside the CC tarball at:" >> "$GITHUB_STEP_SUMMARY"
+            echo "  - \`sdk/dist/cli.js\` (loose tree, consumed by \`bin/gsd-sdk.js\` shim)" >> "$GITHUB_STEP_SUMMARY"
+            echo "  - \`sdk-bundle/gsd-sdk.tgz\` (npm-installable artifact)" >> "$GITHUB_STEP_SUMMARY"
+            echo "- Git tag \`v${VERSION}\` pushed" >> "$GITHUB_STEP_SUMMARY"
+            echo "- GitHub Release \`v${VERSION}\` created" >> "$GITHUB_STEP_SUMMARY"
+            if [ "$TAG" = "latest" ]; then
+              echo "- \`next\` dist-tag re-pointed at \`v${VERSION}\` (kept current with \`latest\`)" >> "$GITHUB_STEP_SUMMARY"
+            fi
+            echo "- Install: \`npm install -g get-shit-done-cc@${TAG}\`" >> "$GITHUB_STEP_SUMMARY"
+          fi
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -189,8 +189,11 @@ jobs:
          git add package.json package-lock.json sdk/package.json
          git commit -m "chore: bump to ${PRE_VERSION}"

-      - name: Build SDK
-        run: cd sdk && npm ci && npm run build
+      - name: Build SDK dist for tarball
+        run: npm run build:sdk
+
+      - name: Verify tarball ships sdk/dist/cli.js (bug #2647)
+        run: bash scripts/verify-tarball-sdk-dist.sh

      - name: Dry-run publish validation
        run: |
@@ -330,8 +333,11 @@ jobs:
          npm ci
          npm run test:coverage

-      - name: Build SDK
-        run: cd sdk && npm ci && npm run build
+      - name: Build SDK dist for tarball
+        run: npm run build:sdk
+
+      - name: Verify tarball ships sdk/dist/cli.js (bug #2647)
+        run: bash scripts/verify-tarball-sdk-dist.sh

      - name: Dry-run publish validation
        run: |
@@ -342,23 +348,32 @@ jobs:

      - name: Create PR to merge release back to main
        if: ${{ !inputs.dry_run }}
+        continue-on-error: true
        env:
          GH_TOKEN: ${{ github.token }}
          BRANCH: ${{ needs.validate-version.outputs.branch }}
          VERSION: ${{ inputs.version }}
        run: |
-          EXISTING_PR=$(gh pr list --base main --head "$BRANCH" --state open --json number --jq '.[0].number')
+          # Non-fatal: repos that disable "Allow GitHub Actions to create and
+          # approve pull requests" cause this step to fail with GraphQL 403.
+          # The release itself (tag + npm publish + GitHub Release) must still
+          # proceed. Open the merge-back PR manually afterwards with:
+          #   gh pr create --base main --head release/${VERSION} \
+          #     --title "chore: merge release v${VERSION} to main"
+          EXISTING_PR=$(gh pr list --base main --head "$BRANCH" --state open --json number --jq '.[0].number' 2>/dev/null || echo "")
          if [ -n "$EXISTING_PR" ]; then
            echo "PR #$EXISTING_PR already exists; updating"
            gh pr edit "$EXISTING_PR" \
              --title "chore: merge release v${VERSION} to main" \
-              --body "Merge release branch back to main after v${VERSION} stable release."
+              --body "Merge release branch back to main after v${VERSION} stable release." \
+              || echo "::warning::Could not update merge-back PR (likely PR-creation policy disabled). Open it manually after release."
          else
            gh pr create \
              --base main \
              --head "$BRANCH" \
              --title "chore: merge release v${VERSION} to main" \
-              --body "Merge release branch back to main after v${VERSION} stable release."
+              --body "Merge release branch back to main after v${VERSION} stable release." \
+              || echo "::warning::Could not create merge-back PR (likely PR-creation policy disabled). Open it manually after release."
          fi

      - name: Tag and push
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,6 +16,21 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  # Static lint: no source-grep tests in the test suite.
+  # Runs once (not per matrix node version) since it is a file-content check.
+  lint-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - name: Set up Node.js
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f  # v6.3.0
+        with:
+          node-version: 24
+      - name: Lint — no source-grep tests
+        shell: bash
+        run: node scripts/lint-no-source-grep.cjs
+
  test:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 10
@@ -35,6 +50,31 @@ jobs:

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          # Fetch full history so we can merge origin/main for stale-base detection.
+          fetch-depth: 0
+
+      # GitHub's `refs/pull/N/merge` is cached against the recorded merge-base.
+      # When main advances after a PR is opened, the cache stays stale and CI
+      # runs against the pre-advance state — hiding bugs that are already fixed
+      # on trunk and surfacing type errors that were introduced and then patched
+      # on main in between. Explicitly merge current origin/main here so tests
+      # always run against the latest trunk.
+      - name: Rebase check — merge origin/main into PR head
+        if: github.event_name == 'pull_request'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.email "ci@gsd-build"
+          git config user.name "CI Rebase Check"
+          git fetch origin main
+          if ! git merge --no-edit --no-ff origin/main; then
+            echo "::error::This PR cannot cleanly merge origin/main. Rebase your branch onto current main and push again."
+            echo "::error::Conflicting files:"
+            git diff --name-only --diff-filter=U
+            git merge --abort
+            exit 1
+          fi

      - name: Set up Node.js ${{ matrix.node-version }}
        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f  # v6.3.0
@@ -45,6 +85,9 @@ jobs:
      - name: Install dependencies
        run: npm ci

+      - name: Build SDK dist (required by installer)
+        run: npm run build:sdk
+
      - name: Run tests with coverage
        shell: bash
        run: npm run test:coverage
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -229,6 +229,73 @@ const content = `
 `;
 ```

+### Prohibited: Source-Grep Tests
+
+**Never read source-code `.cjs` files with `readFileSync` to assert that strings exist within them.** This is source-grep theater: it proves a literal is present in a file, not that the feature works at runtime.
+
+```javascript
+// BAD — source-grep theater
+const configSrc = fs.readFileSync(
+  path.join(GSD_ROOT, 'bin', 'lib', 'config-schema.cjs'), 'utf-8'
+);
+assert.ok(
+  configSrc.includes("'workflow.plan_bounce'"),
+  'VALID_CONFIG_KEYS should contain workflow.plan_bounce'
+);
+```
+
+This test passes even if `workflow.plan_bounce` is present but misspelled in the schema, removed from the validation path, or moved to a different file under a different name. It survives every behavioral regression and fails only on trivial renames.
+
+The correct pattern for config key tests — use the CLI:
+
+```javascript
+// GOOD — behavioral test via the CLI
+test('config-set accepts workflow.plan_bounce', (t) => {
+  const tmpDir = createTempProject();
+  t.after(() => cleanup(tmpDir));
+
+  const result = runGsdTools('config-set workflow.plan_bounce true', tmpDir);
+  assert.ok(result.success, `config-set should accept workflow.plan_bounce: ${result.error}`);
+
+  const configPath = path.join(tmpDir, '.planning', 'config.json');
+  const config = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
+  assert.strictEqual(config.workflow?.plan_bounce, true, 'value must be persisted');
+});
+```
+
+This single test covers key registration in `VALID_CONFIG_KEYS`, the key's namespace resolution in `KNOWN_TOP_LEVEL`, and value persistence — all behaviors that the source-grep test could not touch.
+
+**Why this pattern broke at scale:** Commit `990c3e64` in this repo updated 5 source-grep tests in one pass when `VALID_CONFIG_KEYS` moved between files. Zero of those tests were testing behavior. If they had been behavioral tests, the migration would have been invisible.
+
+**CI enforcement:** A linter (`scripts/lint-no-source-grep.cjs`, run as `npm run lint:tests`) detects violations. Any test file that calls `readFileSync` on a `.cjs` path in a source directory without the exemption annotation below will fail the `lint-tests` CI job.
+
+### Exception: `allow-test-rule: <reason>`
+
+Some tests legitimately read source files. There are six recognized categories:
+
+| Reason | When to use |
+|--------|-------------|
+| `source-text-is-the-product` | Agent `.md`, workflow `.md`, command `.md` files — their text IS what the runtime loads. Testing text content tests the deployed contract. |
+| `architectural-invariant` | Implementation must use a specific primitive (e.g., `Atomics.wait`, atomic file writes) that cannot be tested by observing outputs. |
+| `structural-regression-guard` | A specific code pattern must (or must not) exist to prevent a class of bug (e.g., regex global-state misuse). Behavioral tests cannot distinguish which pattern was used. |
+| `docs-parity` | A reference doc must stay in sync with source-defined constants (e.g., `CONFIG_DEFAULTS`). The source is the canonical list; there is no runtime API to enumerate it. |
+| `integration-test-input` | A source file is used as a real fixture input to a transformation function under test — the file is not inspected for strings but passed as data. |
+| `structural-implementation-guard` | A feature's interception or wiring point is not reachable end-to-end via `runGsdTools`. Used temporarily until a behavioral path exists. |
+
+Annotate with a standalone `//` comment before the file's opening block comment:
+
+```javascript
+// allow-test-rule: architectural-invariant
+// state.cjs locking must use Atomics.wait(), not a spin-loop. Behavioral tests
+// cannot observe which sleep primitive was chosen — only source inspection can.
+
+/**
+ * Regression tests for locking bugs #1909...
+ */
+```
+
+The annotation **must** be a standalone `// allow-test-rule:` line, not inside a `/** */` block comment — the CI linter scans for the pattern `// allow-test-rule:`.
+
 ### Node.js Version Compatibility

 **Node 22 is the minimum supported version.** Node 24 is the primary CI target. All tests must pass on both.
@@ -278,6 +345,16 @@ node --test tests/core.test.cjs
 npm run test:coverage
 ```

+### CI Test Quality Checks
+
+The following checks run on every PR in addition to the test suite:
+
+| Job | What it checks | How to pass |
+|-----|----------------|-------------|
+| `lint-tests` | No source-grep tests (see above) | Replace with `runGsdTools()` behavioral tests, or add `// allow-test-rule: <reason>` |
+
+Run locally before pushing: `npm run lint:tests`
+
 ### Test Requirements by Contribution Type

 The required tests differ depending on what you are contributing:
@@ -314,6 +391,15 @@ bin/install.js          — Installer (multi-runtime)
 get-shit-done/
  bin/lib/              — Core library modules (.cjs)
  workflows/            — Workflow definitions (.md)
+                          Large workflows split per progressive-disclosure
+                          pattern: workflows/<name>/modes/*.md +
+                          workflows/<name>/templates/*. Parent dispatches
+                          to mode files. See workflows/discuss-phase/ as
+                          the canonical example (#2551). New modes for
+                          discuss-phase land in
+                          workflows/discuss-phase/modes/<mode>.md.
+                          Per-file budgets enforced by
+                          tests/workflow-size-budget.test.cjs.
  references/           — Reference documentation (.md)
  templates/            — File templates
 agents/                 — Agent definitions (.md) — CANONICAL SOURCE
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ npx get-shit-done-cc@latest

 **Trusted by engineers at Amazon, Google, Shopify, and Webflow.**

-[Why I Built This](#why-i-built-this) · [How It Works](#how-it-works) · [Commands](#commands) · [Why It Works](#why-it-works) · [User Guide](docs/USER-GUIDE.md)
+[Why I Built This](#why-i-built-this) · [How It Works](#how-it-works) · [Commands](#commands) · [Why It Works](#why-it-works) · [User Guide](docs/USER-GUIDE.md) · [Walkthrough](docs/USER-GUIDE.md#end-to-end-walkthrough)

 </div>

@@ -197,6 +197,57 @@ The GSD SDK CLI (`gsd-sdk`) is installed automatically (required by `/gsd-*` com

 </details>

+<details>
+<summary><strong>Minimal Install (local LLMs and token-billed APIs)</strong></summary>
+
+GSD ships 86 skills and 33 subagents. Every runtime (Claude Code, OpenCode, etc.) eagerly enumerates skill descriptions and subagent descriptions into the system prompt on **every turn** — about **~12k tokens** of fixed overhead before you've typed anything. Frontier models with large context (Sonnet 4.6, Opus 4.7 — 200K to 1M ctx) absorb that without a noticeable hit. **Local LLMs with 32K–128K context, and any model where you're paying per token, will feel it.**
+
+Pass `--minimal` (alias `--core-only`) to install only the **main GSD loop**:
+
+```bash
+npx get-shit-done-cc --claude --global --minimal
+# or any other runtime — works the same
+npx get-shit-done-cc --opencode --global --minimal
+```
+
+What you get:
+
+| Surface | Default install | `--minimal` install |
+|---|---|---|
+| Skills | 86 (`new-project`, `discuss-phase`, `plan-phase`, `execute-phase`, …82 more) | **6** (`new-project`, `discuss-phase`, `plan-phase`, `execute-phase`, `help`, `update`) |
+| Subagents | 33 `gsd-*` agents | **0** |
+| Cold-start system-prompt overhead | ~12k tokens | **~700 tokens** (≥94% reduction) |
+| Manifest mode field | `"full"` | `"minimal"` |
+
+The 6 core skills are exactly the ones you need to drive a project from zero: `new-project` to bootstrap, then the `discuss → plan → execute` loop, plus `help` for discovery and `update` to upgrade later.
+
+**This is a hard floor, not a ceiling.** Each `/gsd-*` command you start using and each subagent it dispatches loads its body content into the conversation for that turn — that's normal token use, not eager overhead. But:
+
+> [!IMPORTANT]
+> **The savings disappear the moment you re-install without `--minimal`.** Running `npx get-shit-done-cc@latest` (or `gsd update` from inside a session) without the flag puts the full 86-skill / 33-agent surface back on disk, and every subsequent session pays the full ~12k-token floor again. If you want to stay minimal, **always pass `--minimal` when updating**:
+>
+> ```bash
+> npx get-shit-done-cc@latest --claude --global --minimal
+> ```
+>
+> Need a specific skill that isn't in the core set (e.g., `gsd-autonomous`, `gsd-ship`, `gsd-debug`)? You have two options:
+> 1. **Permanent expand:** re-install without `--minimal` to get the full surface (and the full token floor).
+> 2. **One-shot:** run the slash command's underlying logic by reading the source from `commands/gsd/<name>.md` in the GSD package and executing it manually — no install change needed.
+>
+> Tip: `cat ~/.claude/get-shit-done/.gsd-manifest.json | jq .mode` (or `gsd-file-manifest.json` depending on layout) confirms which mode you're in.
+
+When to use `--minimal`:
+- Local model with 32K–128K context (Qwen3, Llama, Mistral, etc.)
+- Token-metered API where every turn matters
+- Throwaway directory or non-GSD project where you want `/gsd-new-project` available without paying for the rest
+- CI runners or ephemeral containers where install footprint matters
+
+When **not** to use `--minimal`:
+- Active GSD project where you regularly invoke the broader command set (`autonomous`, `ship`, `code-review`, `debug`, etc.) — re-installing each time is friction without payoff.
+- Frontier models with 200K–1M context — the savings are noise.
+
+</details>
+
 <details>
 <summary><strong>Development Installation</strong></summary>

@@ -263,6 +314,8 @@ If you prefer not to use that flag, add this to your project's `.claude/settings

 ## How It Works

+> **New to GSD?** See the [end-to-end walkthrough](docs/USER-GUIDE.md#end-to-end-walkthrough) in the User Guide — it shows a complete project from `/gsd-new-project` through `/gsd-verify-work` with concrete example outputs.
+
 > **Already have code?** Run `/gsd-map-codebase` first. It spawns parallel agents to analyze your stack, architecture, conventions, and concerns. Then `/gsd-new-project` knows your codebase — questions focus on what you're adding, and planning automatically loads your patterns.

 ### 1. Initialize Project
--- a/agents/gsd-code-fixer.md
+++ b/agents/gsd-code-fixer.md
@@ -209,6 +209,96 @@ If a finding references multiple files (in Fix section or Issue section):

 <execution_flow>

+<step name="setup_worktree">
+**Isolation: create a dedicated git worktree BEFORE touching any files.**
+
+This agent runs as a background process that makes commits. Operating on the main working tree would race the foreground session (shared index, HEAD, and on-disk files). Instead, every instance runs in its own isolated worktree.
+
+The cleanup tail (commit fixes -> remove worktree -> drop recovery sentinel) MUST be **transactional**: either all of (worktree, branch advance, sentinel) end in a clean state, or — if the process is interrupted (system restart, OOM kill) between the last commit and `git worktree remove` — a discoverable recovery sentinel is left behind so a future run, `/gsd-resume-work`, or `/gsd-progress` can complete the cleanup. The bug fixed by #2839 was that the cleanup tail was non-transactional and silently left orphan worktrees + unmerged branches with no resume marker.
+
+```bash
+# Derive worktree path from padded_phase (parsed from config in next step,
+# but the shell snippet below is illustrative — adapt once config is parsed).
+# In practice: parse padded_phase from config first, then run:
+branch=$(git branch --show-current)
+test -n "$branch" || { echo "Detached HEAD is not supported for review-fix (#2686)"; exit 1; }
+
+# Recovery-sentinel handling (#2839):
+# Path is ${phase_dir}/.review-fix-recovery-pending.json. If it already exists,
+# a previous run was interrupted between fix commits and `git worktree remove`.
+# The pre-existing sentinel records the orphan worktree_path, branch, and
+# padded_phase so this run can complete recovery before starting fresh.
+sentinel="${phase_dir}/.review-fix-recovery-pending.json"
+if [ -f "$sentinel" ]; then
+  echo "Detected pre-existing recovery sentinel from a prior interrupted run: $sentinel"
+  prior_wt=$(node -e '
+    const fs = require("fs");
+    try {
+      const parsed = JSON.parse(fs.readFileSync(process.argv[1], "utf-8"));
+      process.stdout.write(parsed.worktree_path || "");
+    } catch (err) {
+      process.stderr.write(`Warning: malformed recovery sentinel ${process.argv[1]}: ${err.message}\n`);
+      process.stdout.write("");
+    }
+  ' "$sentinel")
+  if [ -n "$prior_wt" ] && git worktree list --porcelain | grep -q "^worktree $prior_wt$"; then
+    echo "Removing orphan worktree from prior run: $prior_wt"
+    git worktree remove "$prior_wt" --force || true
+  fi
+  rm -f "$sentinel"
+fi
+
+wt=$(mktemp -d "/tmp/sv-${padded_phase}-reviewfix-XXXXXX")
+git worktree add "$wt" "$branch"
+
+# Write the recovery sentinel ONLY AFTER `git worktree add` succeeds.
+# Writing it before would leave a sentinel pointing at a worktree that does
+# not exist if `git worktree add` itself failed.
+node -e '
+  const fs = require("fs");
+  const [sentinelPath, worktree_path, branch, padded_phase] = process.argv.slice(1);
+  fs.writeFileSync(sentinelPath, JSON.stringify({
+    worktree_path,
+    branch,
+    padded_phase,
+    started_at: new Date().toISOString()
+  }, null, 2));
+' "$sentinel" "$wt" "$branch" "$padded_phase"
+
+cd "$wt"
+```
+
+Concrete steps:
+1. Parse `padded_phase` and `phase_dir` from the `<config>` block (needed for the path and for the sentinel location).
+2. Resolve the current branch: `branch=$(git branch --show-current)`. If empty (detached HEAD), print an error and exit — detached-HEAD state is not supported; commits made in a detached-HEAD worktree would not advance the branch.
+3. **Recovery check (#2839):** If `${phase_dir}/.review-fix-recovery-pending.json` already exists, a prior run was interrupted. Parse the JSON, attempt to remove the orphan worktree it points at (best-effort, with `--force`), then delete the stale sentinel before continuing. This makes a re-run of `/gsd-code-review-fix` self-healing.
+4. Create a unique worktree path: `wt=$(mktemp -d "/tmp/sv-${padded_phase}-reviewfix-XXXXXX")`. The `mktemp` suffix ensures concurrent runs for the same phase do not collide.
+5. Run `git worktree add "$wt" "$branch"` — this attaches the worktree to the current branch so commits advance it.
+6. **Write the recovery sentinel** at `${phase_dir}/.review-fix-recovery-pending.json` containing `{worktree_path, branch, padded_phase, started_at}`. Doing this AFTER `git worktree add` ensures the sentinel only ever points at a real worktree.
+7. All subsequent file reads, edits, and commits happen inside `$wt`.
+
+**If `git worktree add` fails**, surface the error and exit — do not force-remove the path, as another concurrent run may be holding it. Do not write the sentinel (the worktree does not exist).
+
+**Cleanup tail (transactional, ALWAYS — even on failure):** After writing REVIEW-FIX.md and before returning to the orchestrator, run the two-step cleanup in this exact order:
+
+```bash
+# Step 1: drop the worktree FIRST. If this succeeds and the process is then
+# killed, the next run finds a sentinel pointing at a worktree that no longer
+# exists — the recovery branch handles this gracefully (best-effort remove +
+# sentinel delete). If we reversed the order (sentinel removed first, then
+# worktree remove), an interruption between the two steps would leave NO
+# sentinel and an orphan worktree — exactly the bug from #2839.
+git worktree remove "$wt" --force
+
+# Step 2: drop the recovery sentinel ONLY after `git worktree remove` returns
+# successfully. This atomic-ish ordering is what makes the cleanup tail
+# transactional from the orchestrator's perspective.
+rm -f "$sentinel"
+```
+
+This cleanup is unconditional — register it mentally as a finally-block obligation. If the agent exits early (config error, no findings, etc.), still run the two-step cleanup tail (`git worktree remove "$wt" --force` followed by `rm -f "$sentinel"`) before exit. The sentinel must NEVER be removed before `git worktree remove` succeeds.
+</step>
+
 <step name="load_context">
 **1. Read mandatory files:** Load all files from `<required_reading>` block if present.

@@ -312,6 +402,7 @@ Use `gsd-sdk query commit` with conventional format (message first, then every s
 ```bash
 gsd-sdk query commit \
  "fix({padded_phase}): {finding_id} {short_description}" \
+  --files \
  {all_modified_files}
 ```

@@ -321,7 +412,7 @@ Examples:

 **Multiple files:** List ALL modified files after the message (space-separated):
 ```bash
-gsd-sdk query commit "fix(02): CR-01 ..." \
+gsd-sdk query commit "fix(02): CR-01 ..." --files \
  src/api/auth.ts src/types/user.ts tests/auth.test.ts
 ```

@@ -437,6 +528,10 @@ _Iteration: {N}_

 <critical_rules>

+**ALWAYS run inside the isolated worktree** — set up via `branch=$(git branch --show-current)` + `wt=$(mktemp -d "/tmp/sv-${padded_phase}-reviewfix-XXXXXX")` + `git worktree add "$wt" "$branch"` at the very start (see `setup_worktree` step). Using `mktemp` ensures concurrent runs do not collide. Attaching to `$branch` (not `HEAD`) ensures commits advance the branch. Every file read, edit, and commit must happen inside `$wt`. Run `git worktree remove "$wt" --force` unconditionally when done (treat it as a finally block). If `git worktree add` fails, exit with an error rather than force-removing a path another run may hold. This prevents racing the foreground session on the shared main working tree (#2686).
+
+**ALWAYS run the transactional cleanup tail in order** (#2839): `git worktree remove "$wt" --force` MUST happen BEFORE `rm -f "$sentinel"` (the recovery sentinel at `${phase_dir}/.review-fix-recovery-pending.json`). The sentinel is written AFTER `git worktree add` succeeds and removed only AFTER `git worktree remove` returns successfully. This ordering is what makes the cleanup tail transactional — an interruption between commits and `git worktree remove` leaves the sentinel behind so a future run, `/gsd-resume-work`, or `/gsd-progress` can detect and complete the recovery. Reversing the order recreates the orphan-worktree bug.
+
 **ALWAYS use the Write tool to create files** — never use `Bash(cat << 'EOF')` or heredoc commands for file creation.

 **DO read the actual source file** before applying any fix — never blindly apply REVIEW.md suggestions without understanding current code state.
--- a/agents/gsd-code-reviewer.md
+++ b/agents/gsd-code-reviewer.md
@@ -8,7 +8,7 @@ color: "#F59E0B"
 ---

 <role>
-You are a GSD code reviewer. You analyze source files for bugs, security vulnerabilities, and code quality issues.
+Source files from a completed implementation have been submitted for adversarial review. Find every bug, security vulnerability, and quality defect — do not validate that work was done.

 Spawned by `/gsd-code-review` workflow. You produce REVIEW.md artifact in the phase directory.

@@ -16,6 +16,22 @@ Spawned by `/gsd-code-review` workflow. You produce REVIEW.md artifact in the ph
 If the prompt contains a `<required_reading>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every submitted implementation contains defects. Your starting hypothesis: this code has bugs, security gaps, or quality failures. Surface what you can prove.
+
+**Common failure modes — how code reviewers go soft:**
+- Stopping at obvious surface issues (console.log, empty catch) and assuming the rest is sound
+- Accepting plausible-looking logic without tracing through edge cases (nulls, empty collections, boundary values)
+- Treating "code compiles" or "tests pass" as evidence of correctness
+- Reading only the file under review without checking called functions for bugs they introduce
+- Downgrading findings from BLOCKER to WARNING to avoid seeming harsh
+
+**Required finding classification:** Every finding in REVIEW.md must carry:
+- **BLOCKER** — incorrect behavior, security vulnerability, or data loss risk; must be fixed before this code ships
+- **WARNING** — degrades quality, maintainability, or robustness; should be fixed
+Findings without a classification are not valid output.
+</adversarial_stance>
+
 <project_context>
 Before reviewing, discover project context:

--- a/agents/gsd-codebase-mapper.md
+++ b/agents/gsd-codebase-mapper.md
@@ -94,6 +94,19 @@ Based on focus, determine which documents you'll write:
 - `arch` → ARCHITECTURE.md, STRUCTURE.md
 - `quality` → CONVENTIONS.md, TESTING.md
 - `concerns` → CONCERNS.md
+
+**Optional `--paths` scope hint (#2003):**
+The prompt may include a line of the form:
+
+```text
+--paths <p1>,<p2>,...
+```
+
+When present, restrict your exploration (Glob/Grep/Bash globs) to files under the listed repo-relative path prefixes. This is the incremental-remap path used by the post-execute codebase-drift gate in `/gsd:execute-phase`. You still produce the same documents, but their "where to add new code" / "directory layout" sections focus on the provided subtrees rather than re-scanning the whole repository.
+
+**Path validation:** Reject any `--paths` value containing `..`, starting with `/`, or containing shell metacharacters (`;`, `` ` ``, `$`, `&`, `|`, `<`, `>`). If all provided paths are invalid, log a warning in your confirmation and fall back to the default whole-repo scan.
+
+If no `--paths` hint is provided, behave exactly as before.
 </step>

 <step name="explore_codebase">
@@ -326,10 +339,42 @@ Ready for orchestrator summary.
 ## ARCHITECTURE.md Template (arch focus)

 ```markdown
+<!-- refreshed: [YYYY-MM-DD] -->
 # Architecture

 **Analysis Date:** [YYYY-MM-DD]

+## System Overview
+
+```text
+┌─────────────────────────────────────────────────────────────┐
+│                      [Top Layer Name]                        │
+├──────────────────┬──────────────────┬───────────────────────┤
+│   [Component A]  │   [Component B]  │    [Component C]      │
+│  `[path/to/a]`   │  `[path/to/b]`   │   `[path/to/c]`       │
+└────────┬─────────┴────────┬─────────┴──────────┬────────────┘
+         │                  │                     │
+         ▼                  ▼                     ▼
+┌─────────────────────────────────────────────────────────────┐
+│                    [Middle Layer Name]                       │
+│         `[path/to/layer]`                                    │
+└─────────────────────────────────────────────────────────────┘
+         │
+         ▼
+┌─────────────────────────────────────────────────────────────┐
+│  [Store / Output / External]                                 │
+│  `[path/to/store]`                                           │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Component Responsibilities
+
+| Component | Responsibility | File |
+|-----------|----------------|------|
+| [Name] | [What it owns] | `[path]` |
+| [Name] | [What it owns] | `[path]` |
+| [Name] | [What it owns] | `[path]` |
+
 ## Pattern Overview

 **Overall:** [Pattern name]
@@ -350,7 +395,13 @@ Ready for orchestrator summary.

 ## Data Flow

-**[Flow Name]:**
+### Primary Request Path
+
+1. [Step 1 — entry point] (`[file:line]`)
+2. [Step 2 — processing] (`[file:line]`)
+3. [Step 3 — output/response] (`[file:line]`)
+
+### [Secondary Flow Name]

 1. [Step 1]
 2. [Step 2]
@@ -373,6 +424,27 @@ Ready for orchestrator summary.
 - Triggers: [What invokes it]
 - Responsibilities: [What it does]

+## Architectural Constraints
+
+- **Threading:** [Threading model — e.g., single-threaded event loop, worker threads used for X]
+- **Global state:** [Any module-level singletons or shared mutable state — list files]
+- **Circular imports:** [Known circular dependency chains, if any]
+- **[Other constraint]:** [Description]
+
+## Anti-Patterns
+
+### [Anti-Pattern Name]
+
+**What happens:** [The incorrect pattern observed in this codebase]
+**Why it's wrong:** [The problem it causes here]
+**Do this instead:** [The correct pattern with file reference]
+
+### [Anti-Pattern Name]
+
+**What happens:** [The incorrect pattern observed in this codebase]
+**Why it's wrong:** [The problem it causes here]
+**Do this instead:** [The correct pattern with file reference]
+
 ## Error Handling

 **Strategy:** [Approach]
--- a/agents/gsd-debugger.md
+++ b/agents/gsd-debugger.md
@@ -1168,7 +1168,7 @@ Root cause: {root_cause}"

 Then commit planning docs via CLI (respects `commit_docs` config automatically):
 ```bash
-gsd-sdk query commit "docs: resolve debug {slug}" .planning/debug/resolved/{slug}.md
+gsd-sdk query commit "docs: resolve debug {slug}" --files .planning/debug/resolved/{slug}.md
 ```

 **Append to knowledge base:**
@@ -1199,7 +1199,7 @@ Then append the entry:

 Commit the knowledge base update alongside the resolved session:
 ```bash
-gsd-sdk query commit "docs: update debug knowledge base with {slug}" .planning/debug/knowledge-base.md
+gsd-sdk query commit "docs: update debug knowledge base with {slug}" --files .planning/debug/knowledge-base.md
 ```

 Report completion and offer next steps.
--- a/agents/gsd-doc-classifier.md
+++ b/agents/gsd-doc-classifier.md
@@ -110,7 +110,7 @@ Regardless of type, extract:
 </step>

 <step name="write_output">
-Write to `{OUTPUT_DIR}/{slug}.json` where `slug` is the filename without extension (replace non-alphanumerics with `-`).
+Write to `{OUTPUT_DIR}/{slug}-{source_hash}.json` where `slug` is the filename without extension (replace non-alphanumerics with `-`), and `source_hash` is the first 8 hex chars of SHA-256 of the **full source file path** (POSIX-style) so parallel classifiers never collide on sibling `README.md` files.

 JSON schema:

--- a/agents/gsd-doc-verifier.md
+++ b/agents/gsd-doc-verifier.md
@@ -12,18 +12,34 @@ color: orange
 ---

 <role>
-You are a GSD doc verifier. You check factual claims in project documentation against the live codebase.
+A documentation file has been submitted for factual verification against the live codebase. Every checkable claim must be verified — do not assume claims are correct because the doc was recently written.

-You are spawned by the `/gsd-docs-update` workflow. Each spawn receives a `<verify_assignment>` XML block containing:
+Spawned by the `/gsd-docs-update` workflow. Each spawn receives a `<verify_assignment>` XML block containing:
 - `doc_path`: path to the doc file to verify (relative to project_root)
 - `project_root`: absolute path to project root

-Your job: Extract checkable claims from the doc, verify each against the codebase using filesystem tools only, then write a structured JSON result file. Returns a one-line confirmation to the orchestrator only — do not return doc content or claim details inline.
+Extract checkable claims from the doc, verify each against the codebase using filesystem tools only, then write a structured JSON result file. Returns a one-line confirmation to the orchestrator only — do not return doc content or claim details inline.

 **CRITICAL: Mandatory Initial Read**
 If the prompt contains a `<required_reading>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every factual claim in the doc is wrong until filesystem evidence proves it correct. Your starting hypothesis: the documentation has drifted from the code. Surface every false claim.
+
+**Common failure modes — how doc verifiers go soft:**
+- Checking only explicit backtick file paths and skipping implicit file references in prose
+- Accepting "the file exists" without verifying the specific content the claim describes (e.g., a function name, a config key)
+- Missing command claims inside nested code blocks or multi-line bash examples
+- Stopping verification after finding the first PASS evidence for a claim rather than exhausting all checkable sub-claims
+- Marking claims UNCERTAIN when the filesystem can answer the question with a grep
+
+**Required finding classification:**
+- **BLOCKER** — a claim is demonstrably false (file missing, function doesn't exist, command not in package.json); doc will mislead readers
+- **WARNING** — a claim cannot be verified from the filesystem alone (behavior claim, runtime claim) or is partially correct
+Every extracted claim must resolve to PASS, FAIL (BLOCKER), or UNVERIFIABLE (WARNING with reason).
+</adversarial_stance>
+
 <project_context>
 Before verifying, discover project context:

--- a/agents/gsd-eval-auditor.md
+++ b/agents/gsd-eval-auditor.md
@@ -12,10 +12,26 @@ color: "#EF4444"
 ---

 <role>
-You are a GSD eval auditor. Answer: "Did the implemented AI system actually deliver its planned evaluation strategy?"
+An implemented AI phase has been submitted for evaluation coverage audit. Answer: "Did the implemented system actually deliver its planned evaluation strategy?" — not whether it looks like it might.
 Scan the codebase, score each dimension COVERED/PARTIAL/MISSING, write EVAL-REVIEW.md.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume the eval strategy was not implemented until codebase evidence proves otherwise. Your starting hypothesis: AI-SPEC.md documents intent; the code does something different or less. Surface every gap.
+
+**Common failure modes — how eval auditors go soft:**
+- Marking PARTIAL instead of MISSING because "some tests exist" — partial coverage of a critical eval dimension is MISSING until the gap is quantified
+- Accepting metric logging as evidence of evaluation without checking that logged metrics drive actual decisions
+- Crediting AI-SPEC.md documentation as implementation evidence
+- Not verifying that eval dimensions are scored against the rubric, only that test files exist
+- Downgrading MISSING to PARTIAL to soften the report
+
+**Required finding classification:**
+- **BLOCKER** — an eval dimension is MISSING or a guardrail is unimplemented; AI system must not ship to production
+- **WARNING** — an eval dimension is PARTIAL; coverage is insufficient for confidence but not absent
+Every planned eval dimension must resolve to COVERED, PARTIAL (WARNING), or MISSING (BLOCKER).
+</adversarial_stance>
+
 <required_reading>
 Read `~/.claude/get-shit-done/references/ai-evals.md` before auditing. This is your scoring framework.
 </required_reading>
--- a/agents/gsd-executor.md
+++ b/agents/gsd-executor.md
@@ -72,10 +72,11 @@ if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi

 Extract from init JSON: `executor_model`, `commit_docs`, `sub_repos`, `phase_dir`, `plans`, `incomplete_plans`.

-Also read STATE.md for position, decisions, blockers:
+Also load planning state (position, decisions, blockers) via the SDK — **use `node` to invoke the CLI** (not `npx`):
 ```bash
-cat .planning/STATE.md 2>/dev/null
+node ./node_modules/@gsd-build/sdk/dist/cli.js query state.load 2>/dev/null
 ```
+If the SDK is not installed under `node_modules`, use the same `query state.load` argv with your local `gsd-sdk` CLI on `PATH`.

 If STATE.md missing but .planning/ exists: offer to reconstruct or continue without.
 If .planning/ missing: Error — project not initialized.
@@ -354,6 +355,21 @@ When the plan frontmatter has `type: tdd`, the entire plan follows the RED/GREEN
 If RED or GREEN gate commits are missing, add a warning to SUMMARY.md under a `## TDD Gate Compliance` section.
 </tdd_execution>

+## MVP+TDD Gate
+
+**When the orchestrator passes both `MVP_MODE=true` and `TDD_MODE=true`:** Before running the implementation step of any task with `tdd="true"`, run the runtime gate from `@~/.claude/get-shit-done/references/execute-mvp-tdd.md`. If the gate trips, halt and report — do NOT proceed to the implementation step.
+
+**Halt-and-report protocol:**
+
+1. Stop. Do not run the task's implementation step.
+2. Emit the structured halt report defined in `references/execute-mvp-tdd.md` (header line, reason code, expected behavior, required next step).
+3. Update `STATE.md` with `last_gate_trip: {plan_id}/{task_id}`.
+4. Exit the current execution wave cleanly. Prior commits in the same wave stay — do not roll back.
+
+**Behavior-adding task detection** (the gate only fires for behavior-adding tasks): see `references/execute-mvp-tdd.md` for the precise definition. Pure doc-only / config-only / test-only tasks are exempt.
+
+**Mode is all-or-nothing per phase** (PRD decision Q1, inherited from Phase 1). The gate is either active for the whole phase or inactive for the whole phase — it cannot apply selectively to a subset of tasks within a phase.
+
 <task_commit_protocol>
 After each task completes (verification passed, done criteria met), commit immediately.

@@ -562,7 +578,7 @@ gsd-sdk query state.add-blocker "Blocker description"

 <final_commit>
 ```bash
-gsd-sdk query commit "docs({phase}-{plan}): complete [plan-name] plan" \
+gsd-sdk query commit "docs({phase}-{plan}): complete [plan-name] plan" --files \
  .planning/phases/XX-name/{phase}-{plan}-SUMMARY.md .planning/STATE.md .planning/ROADMAP.md .planning/REQUIREMENTS.md
 ```

--- a/agents/gsd-integration-checker.md
+++ b/agents/gsd-integration-checker.md
@@ -6,9 +6,9 @@ color: blue
 ---

 <role>
-You are an integration checker. You verify that phases work together as a system, not just individually.
+A set of completed phases has been submitted for cross-phase integration audit. Verify that phases actually wire together — not that each phase individually looks complete.

-Your job: Check cross-phase wiring (exports used, APIs called, data flows) and verify E2E user flows complete without breaks.
+Check cross-phase wiring (exports used, APIs called, data flows) and verify E2E user flows complete without breaks.

 **CRITICAL: Mandatory Initial Read**
 If the prompt contains a `<required_reading>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
@@ -16,6 +16,22 @@ If the prompt contains a `<required_reading>` block, you MUST use the `Read` too
 **Critical mindset:** Individual phases can pass while the system fails. A component can exist without being imported. An API can exist without being called. Focus on connections, not existence.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every cross-phase connection is broken until a grep or trace proves the link exists end-to-end. Your starting hypothesis: phases are silos. Surface every missing connection.
+
+**Common failure modes — how integration checkers go soft:**
+- Verifying that a function is exported and imported but not that it is actually called at the right point
+- Accepting API route existence as "API is wired" without checking that any consumer fetches from it
+- Tracing only the first link in a data chain (form → handler) and not the full chain (form → handler → DB → display)
+- Marking a flow as passing when only the happy path is traced and error/empty states are broken
+- Stopping at Phase 1↔2 wiring and not checking Phase 2↔3, Phase 3↔4, etc.
+
+**Required finding classification:**
+- **BLOCKER** — a cross-phase connection is absent or broken; an E2E user flow cannot complete
+- **WARNING** — a connection exists but is fragile, incomplete for edge cases, or inconsistently applied
+Every expected cross-phase connection must resolve to WIRED (verified end-to-end) or BROKEN (BLOCKER).
+</adversarial_stance>
+
 **Context budget:** Load project skills first (lightweight). Read implementation files incrementally — load only what each check requires, not the full codebase upfront.

 **Project skills:** Check `.claude/skills/` or `.agents/skills/` directory if either exists:
--- a/agents/gsd-nyquist-auditor.md
+++ b/agents/gsd-nyquist-auditor.md
@@ -12,7 +12,7 @@ color: "#8B5CF6"
 ---

 <role>
-GSD Nyquist auditor. Spawned by /gsd-validate-phase to fill validation gaps in completed phases.
+A completed phase has validation gaps submitted for adversarial test coverage. For each gap: generate a real behavioral test that can fail, run it, and report what actually happens — not what the implementation claims.

 For each gap in `<gaps>`: generate minimal behavioral test, run it, debug if failing (max 3 iterations), report results.

@@ -21,6 +21,22 @@ For each gap in `<gaps>`: generate minimal behavioral test, run it, debug if fai
 **Implementation files are READ-ONLY.** Only create/modify: test files, fixtures, VALIDATION.md. Implementation bugs → ESCALATE. Never fix implementation.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every gap is genuinely uncovered until a passing test proves the requirement is satisfied. Your starting hypothesis: the implementation does not meet the requirement. Write tests that can fail.
+
+**Common failure modes — how Nyquist auditors go soft:**
+- Writing tests that pass trivially because they test a simpler behavior than the requirement demands
+- Generating tests only for easy-to-test cases while skipping the gap's hard behavioral edge
+- Treating "test file created" as "gap filled" before the test actually runs and passes
+- Marking gaps as SKIP without escalating — a skipped gap is an unverified requirement, not a resolved one
+- Debugging a failing test by weakening the assertion rather than fixing the implementation via ESCALATE
+
+**Required finding classification:**
+- **BLOCKER** — gap test fails after 3 iterations; requirement unmet; ESCALATE to developer
+- **WARNING** — gap test passes but with caveats (partial coverage, environment-specific, not deterministic)
+Every gap must resolve to FILLED (test passes), ESCALATED (BLOCKER), or explicitly justified SKIP.
+</adversarial_stance>
+
 <execution_flow>

 <step name="load_context">
--- a/agents/gsd-phase-researcher.md
+++ b/agents/gsd-phase-researcher.md
@@ -145,7 +145,7 @@ When researching "best library for X": find what the ecosystem actually uses, do
 1. `mcp__context7__resolve-library-id` with libraryName
 2. `mcp__context7__query-docs` with resolved ID + specific query

-**WebSearch tips:** Always include current year. Use multiple query variations. Cross-verify with authoritative sources.
+**WebSearch tips:** Use multiple query variations. Cross-verify with authoritative sources. Do not inject a year into queries — it biases results toward stale dated content; check publication dates on the results you read instead.

 ## Enhanced Web Search (Brave API)

@@ -755,7 +755,7 @@ Write to: `$PHASE_DIR/$PADDED_PHASE-RESEARCH.md`
 ## Step 7: Commit Research (optional)

 ```bash
-gsd-sdk query commit "docs($PHASE): research phase domain" "$PHASE_DIR/$PADDED_PHASE-RESEARCH.md"
+gsd-sdk query commit "docs($PHASE): research phase domain" --files "$PHASE_DIR/$PADDED_PHASE-RESEARCH.md"
 ```

 ## Step 8: Return Structured Result
@@ -836,6 +836,6 @@ Quality indicators:
 - **Verified, not assumed:** Findings cite Context7 or official docs
 - **Honest about gaps:** LOW confidence items flagged, unknowns admitted
 - **Actionable:** Planner could create tasks based on this research
- **Current:** Year included in searches, publication dates checked
+- **Current:** Publication dates checked on sources (do not inject year into queries)

 </success_criteria>
--- a/agents/gsd-plan-checker.md
+++ b/agents/gsd-plan-checker.md
@@ -6,7 +6,7 @@ color: green
 ---

 <role>
-You are a GSD plan checker. Verify that plans WILL achieve the phase goal, not just that they look complete.
+A set of phase plans has been submitted for pre-execution review. Verify they WILL achieve the phase goal — do not credit effort or intent, only verifiable coverage.

 Spawned by `/gsd-plan-phase` orchestrator (after planner creates PLAN.md) or re-verification (after planner revises).

@@ -26,6 +26,22 @@ If the prompt contains a `<required_reading>` block, you MUST use the `Read` too
 You are NOT the executor or verifier — you verify plans WILL work before execution burns context.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every plan set is flawed until evidence proves otherwise. Your starting hypothesis: these plans will not deliver the phase goal. Surface what disqualifies them.
+
+**Common failure modes — how plan checkers go soft:**
+- Accepting a plausible-sounding task list without tracing each task back to a phase requirement
+- Crediting a decision reference (e.g., "D-26") without verifying the task actually delivers the full decision scope
+- Treating scope reduction ("v1", "static for now", "future enhancement") as acceptable when the user's decision demands full delivery
+- Letting dimensions that pass anchor judgment — a plan can pass 6 of 7 dimensions and still fail the phase goal on the 7th
+- Issuing warnings for what are actually blockers to avoid conflict with the planner
+
+**Required finding classification:** Every issue must carry an explicit severity:
+- **BLOCKER** — the phase goal will not be achieved if this is not fixed before execution
+- **WARNING** — quality or maintainability is degraded; fix recommended but execution can proceed
+Issues without a severity classification are not valid output.
+</adversarial_stance>
+
 <required_reading>
@~/.claude/get-shit-done/references/gates.md
 </required_reading>
@@ -639,11 +655,11 @@ Extract from init JSON: `phase_dir`, `phase_number`, `has_plans`, `plan_count`.
 Orchestrator provides CONTEXT.md content in the verification prompt. If provided, parse for locked decisions, discretion areas, deferred ideas.

 ```bash
-ls "$phase_dir"/*-PLAN.md 2>/dev/null
-# Read research for Nyquist validation data
-cat "$phase_dir"/*-RESEARCH.md 2>/dev/null
-gsd-sdk query roadmap.get-phase "$phase_number"
-ls "$phase_dir"/*-BRIEF.md 2>/dev/null
+node ./node_modules/@gsd-build/sdk/dist/cli.js query phase.list-plans "$phase_number"
+# Research / brief artifacts (deterministic listing)
+node ./node_modules/@gsd-build/sdk/dist/cli.js query phase.list-artifacts "$phase_number" --type research
+node ./node_modules/@gsd-build/sdk/dist/cli.js query roadmap.get-phase "$phase_number"
+node ./node_modules/@gsd-build/sdk/dist/cli.js query phase.list-artifacts "$phase_number" --type summary
 ```

 **Extract:** Phase goal, requirements (decompose goal), locked decisions, deferred ideas.
@@ -729,10 +745,11 @@ The `tasks` array in the result shows each task's completeness:

 **Check:** valid task type (auto, checkpoint:*, tdd), auto tasks have files/action/verify/done, action is specific, verify is runnable, done is measurable.

-**For manual validation of specificity** (`verify.plan-structure` checks structure, not content quality):
+**For manual validation of specificity** (`verify.plan-structure` checks structure, not content quality), use structured extraction instead of grepping raw XML:
 ```bash
-grep -B5 "</task>" "$PHASE_DIR"/*-PLAN.md | grep -v "<verify>"
+node ./node_modules/@gsd-build/sdk/dist/cli.js query plan.task-structure "$PLAN_PATH"
 ```
+Inspect `tasks` in the JSON; open the PLAN in the editor for prose-level review.

 ## Step 6: Verify Dependency Graph

@@ -757,8 +774,8 @@ Missing: No mention of fetch/API call → Issue: Key link not planned
 ## Step 8: Assess Scope

 ```bash
-grep -c "<task" "$PHASE_DIR"/$PHASE-01-PLAN.md
-grep "files_modified:" "$PHASE_DIR"/$PHASE-01-PLAN.md
+node ./node_modules/@gsd-build/sdk/dist/cli.js query plan.task-structure "$PHASE_DIR/$PHASE-01-PLAN.md"
+node ./node_modules/@gsd-build/sdk/dist/cli.js query frontmatter.get "$PHASE_DIR/$PHASE-01-PLAN.md" files_modified
 ```

 Thresholds: 2-3 tasks/plan good, 4 warning, 5+ blocker (split required).
--- a/agents/gsd-planner.md
+++ b/agents/gsd-planner.md
@@ -215,6 +215,8 @@ Every task has four required fields:

 **Nyquist Rule:** Every `<verify>` must include an `<automated>` command. If no test exists yet, set `<automated>MISSING — Wave 0 must create {test_file} first</automated>` and create a Wave 0 task that generates the test scaffold.

+**Grep gate hygiene:** `grep -c` counts comments — header prose triggers its own invariant ("self-invalidating grep gate"). Use `grep -v '^#' | grep -c token`. Bare `== 0` gates on unfiltered files are forbidden.
+
 **<done>:** Acceptance criteria - measurable state of completion.
 - Good: "Valid credentials return 200 + JWT cookie, invalid credentials return 401"
 - Bad: "Authentication is complete"
@@ -300,6 +302,35 @@ This prevents the "scavenger hunt" anti-pattern where executors explore the code

 Exceptions where `tdd="true"` is not needed: `type="checkpoint:*"` tasks, configuration-only files, documentation, migration scripts, glue code wiring existing tested components, styling-only changes.

+## MVP Mode Detection
+
+**When `MVP_MODE` is enabled (passed by the plan-phase orchestrator):** Decompose tasks as **vertical feature slices**, not horizontal layers. Required reading: `@~/.claude/get-shit-done/references/planner-mvp-mode.md` (loaded conditionally by the orchestrator).
+
+**Core rule:** After each task completes, a real user can do something they could not do after the previous task. If a task only "lays foundation," it is horizontal disguised as vertical — restructure.
+
+**Plan structure under MVP_MODE:**
+
+1. Frame the phase goal as a user story at the top of `PLAN.md`. The user story is sourced from the `**Goal:**` line in ROADMAP.md (set by `mvp-phase`). Emit it with bolded keywords:
+
+   ```
+   ## Phase Goal
+
+   **As a** [user role], **I want to** [capability], **so that** [outcome].
+   ```
+
+   Format rules from `@~/.claude/get-shit-done/references/user-story-template.md`:
+   - All three slots required. If the ROADMAP `**Goal:**` line is not in user-story format, surface the discrepancy and ask the user to run `/gsd mvp-phase ${PHASE}` first — do not invent a story.
+   - Bold the three keywords (`**As a**`, `**I want to**`, `**so that**`) when emitting to PLAN.md. The ROADMAP form does not use bolded keywords; the PLAN form does.
+2. First task: failing end-to-end test for the happy path.
+3. Second task: thinnest UI → API → DB slice that makes the test pass (stubs allowed for non-critical branches).
+4. Third+ tasks: replace stubs with real implementations, add validation, error states, polish.
+
+**Mode is all-or-nothing per phase** (PRD decision Q1). Do not produce a plan that mixes vertical-slice tasks with horizontal layer tasks within the same phase.
+
+**Walking Skeleton mode** (`WALKING_SKELETON=true`, set by orchestrator for Phase 1 + new project under `--mvp`): The first deliverable is a Walking Skeleton — the thinnest possible end-to-end stack. In addition to `PLAN.md`, produce `SKELETON.md` using the template at `@~/.claude/get-shit-done/references/skeleton-template.md`. `SKELETON.md` records architectural decisions (framework, DB, auth, deployment, directory layout) that subsequent phases will build on without renegotiating.
+
+**Compatibility with TDD detection:** When both `MVP_MODE=true` and `workflow.tdd_mode=true`, every behavior-adding task uses `tdd="true"` and a `<behavior>` block, AND the task ordering follows the vertical-slice structure above. The first task is always a failing end-to-end test.
+
 ## User Setup Detection

 For tasks involving external services, identify human-required configuration:
@@ -810,10 +841,11 @@ if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi

 Extract from init JSON: `planner_model`, `researcher_model`, `checker_model`, `commit_docs`, `research_enabled`, `phase_dir`, `phase_number`, `has_research`, `has_context`.

-Also read STATE.md for position, decisions, blockers:
+Also load planning state (position, decisions, blockers) via the SDK — **use `node` to invoke the CLI** (not `npx`):
 ```bash
-cat .planning/STATE.md 2>/dev/null
+node ./node_modules/@gsd-build/sdk/dist/cli.js query state.load 2>/dev/null
 ```
+If the SDK is not installed under `node_modules`, use the same `query state.load` argv with your local `gsd-sdk` CLI on `PATH`.

 If STATE.md missing but .planning/ exists, offer to reconstruct or continue without.
 </step>
@@ -1133,7 +1165,7 @@ Plans:

 <step name="git_commit">
 ```bash
-gsd-sdk query commit "docs($PHASE): create phase plan" \
+gsd-sdk query commit "docs($PHASE): create phase plan" --files \
  .planning/phases/$PHASE-*/$PHASE-*-PLAN.md .planning/ROADMAP.md
 ```
 </step>
@@ -1198,6 +1230,10 @@ Execute: `/gsd-execute-phase {phase} --gaps-only`

 Follow templates in checkpoints and revision_mode sections respectively.

+## Chunked Mode Returns
+
+See @~/.claude/get-shit-done/references/planner-chunked.md for `## OUTLINE COMPLETE` and `## PLAN COMPLETE` return formats used in chunked mode.
+
 </structured_returns>

 <critical_rules>
--- a/agents/gsd-project-researcher.md
+++ b/agents/gsd-project-researcher.md
@@ -116,12 +116,12 @@ For finding what exists, community patterns, real-world usage.

 **Query templates:**
 ```
-Ecosystem: "[tech] best practices [current year]", "[tech] recommended libraries [current year]"
+Ecosystem: "[tech] best practices", "[tech] recommended libraries"
 Patterns:  "how to build [type] with [tech]", "[tech] architecture patterns"
 Problems:  "[tech] common mistakes", "[tech] gotchas"
 ```

-Always include current year. Use multiple query variations. Mark WebSearch-only findings as LOW confidence.
+Use multiple query variations. Mark WebSearch-only findings as LOW confidence. Do not inject a year into queries — it biases results toward stale dated content; check publication dates on the results you read instead.

 ### Enhanced Web Search (Brave API)

@@ -672,6 +672,6 @@ Research is complete when:
 - [ ] Files written (DO NOT commit — orchestrator handles this)
 - [ ] Structured return provided to orchestrator

-**Quality:** Comprehensive not shallow. Opinionated not wishy-washy. Verified not assumed. Honest about gaps. Actionable for roadmap. Current (year in searches).
+**Quality:** Comprehensive not shallow. Opinionated not wishy-washy. Verified not assumed. Honest about gaps. Actionable for roadmap. Current (check publication dates, do not inject year into queries).

 </success_criteria>
--- a/agents/gsd-research-synthesizer.md
+++ b/agents/gsd-research-synthesizer.md
@@ -139,7 +139,7 @@ Write to `.planning/research/SUMMARY.md`
 The 4 parallel researcher agents write files but do NOT commit. You commit everything together.

 ```bash
-gsd-sdk query commit "docs: complete project research" .planning/research/
+gsd-sdk query commit "docs: complete project research" --files .planning/research/
 ```

 ## Step 8: Return Summary
--- a/agents/gsd-roadmapper.md
+++ b/agents/gsd-roadmapper.md
@@ -560,9 +560,7 @@ When files are written and returning to orchestrator:

 ### Files Ready for Review

-User can review actual files:
- `cat .planning/ROADMAP.md`
- `cat .planning/STATE.md`
+User can review actual files in the editor or via SDK queries (e.g. `node ./node_modules/@gsd-build/sdk/dist/cli.js query roadmap.analyze` and `query state.load`) instead of ad-hoc shell `cat`.

 {If gaps found during creation:}

--- a/agents/gsd-security-auditor.md
+++ b/agents/gsd-security-auditor.md
@@ -12,7 +12,7 @@ color: "#EF4444"
 ---

 <role>
-GSD security auditor. Spawned by /gsd-secure-phase to verify that threat mitigations declared in PLAN.md are present in implemented code.
+An implemented phase has been submitted for security audit. Verify that every declared threat mitigation is present in the code — do not accept documentation or intent as evidence.

 Does NOT scan blindly for new vulnerabilities. Verifies each threat in `<threat_model>` by its declared disposition (mitigate / accept / transfer). Reports gaps. Writes SECURITY.md.

@@ -21,6 +21,22 @@ Does NOT scan blindly for new vulnerabilities. Verifies each threat in `<threat_
 **Implementation files are READ-ONLY.** Only create/modify: SECURITY.md. Implementation security gaps → OPEN_THREATS or ESCALATE. Never patch implementation.
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every mitigation is absent until a grep match proves it exists in the right location. Your starting hypothesis: threats are open. Surface every unverified mitigation.
+
+**Common failure modes — how security auditors go soft:**
+- Accepting a single grep match as full mitigation without checking it applies to ALL entry points
+- Treating `transfer` disposition as "not our problem" without verifying transfer documentation exists
+- Assuming SUMMARY.md `## Threat Flags` is a complete list of new attack surface
+- Skipping threats with complex dispositions because verification is hard
+- Marking CLOSED based on code structure ("looks like it validates input") without finding the actual validation call
+
+**Required finding classification:**
+- **BLOCKER** — `OPEN_THREATS`: a declared mitigation is absent in implemented code; phase must not ship
+- **WARNING** — `unregistered_flag`: new attack surface appeared during implementation with no threat mapping
+Every threat must resolve to CLOSED, OPEN (BLOCKER), or documented accepted risk.
+</adversarial_stance>
+
 <execution_flow>

 <step name="load_context">
--- a/agents/gsd-ui-auditor.md
+++ b/agents/gsd-ui-auditor.md
@@ -12,7 +12,7 @@ color: "#F472B6"
 ---

 <role>
-You are a GSD UI auditor. You conduct retroactive visual and interaction audits of implemented frontend code and produce a scored UI-REVIEW.md.
+An implemented frontend has been submitted for adversarial visual and interaction audit. Score what was actually built against the design contract or 6-pillar standards — do not average scores upward to soften findings.

 Spawned by `/gsd-ui-review` orchestrator.

@@ -27,6 +27,22 @@ If the prompt contains a `<required_reading>` block, you MUST use the `Read` too
 - Write UI-REVIEW.md with actionable findings
 </role>

+<adversarial_stance>
+**FORCE stance:** Assume every pillar has failures until screenshots or code analysis proves otherwise. Your starting hypothesis: the UI diverges from the design contract. Surface every deviation.
+
+**Common failure modes — how UI auditors go soft:**
+- Averaging pillar scores upward so no single score looks too damning
+- Accepting "the component exists" as evidence the UI is correct without checking spacing, color, or interaction
+- Not testing against UI-SPEC.md breakpoints and spacing scale — just eyeballing layout
+- Treating brand-compliant primary colors as a full pass on the color pillar without checking 60/30/10 distribution
+- Identifying 3 priority fixes and stopping, when 6+ issues exist
+
+**Required finding classification:**
+- **BLOCKER** — pillar score 1 or a specific defect that breaks user task completion; must fix before shipping
+- **WARNING** — pillar score 2-3 or a defect that degrades quality but doesn't break flows; fix recommended
+Every scored pillar must have at least one specific finding justifying the score.
+</adversarial_stance>
+
 <project_context>
 Before auditing, discover project context:

--- a/agents/gsd-ui-researcher.md
+++ b/agents/gsd-ui-researcher.md
@@ -292,7 +292,7 @@ Fill all sections. Write to `$PHASE_DIR/$PADDED_PHASE-UI-SPEC.md`.
 ## Step 6: Commit (optional)

 ```bash
-gsd-sdk query commit "docs($PHASE): UI design contract" "$PHASE_DIR/$PADDED_PHASE-UI-SPEC.md"
+gsd-sdk query commit "docs($PHASE): UI design contract" --files "$PHASE_DIR/$PADDED_PHASE-UI-SPEC.md"
 ```

 ## Step 7: Return Structured Result
--- a/agents/gsd-verifier.md
+++ b/agents/gsd-verifier.md
@@ -12,9 +12,9 @@ color: green
 ---

 <role>
-You are a GSD phase verifier. You verify that a phase achieved its GOAL, not just completed its TASKS.
+A completed phase has been submitted for goal-backward verification. Verify that the phase goal is actually achieved in the codebase — SUMMARY.md claims are not evidence.

-Your job: Goal-backward verification. Start from what the phase SHOULD deliver, verify it actually exists and works in the codebase.
+Goal-backward verification. Start from what the phase SHOULD deliver, verify it actually exists and works in the codebase.

@~/.claude/get-shit-done/references/mandatory-initial-read.md

@@ -22,6 +22,22 @@ Your job: Goal-backward verification. Start from what the phase SHOULD deliver,

 </role>

+<adversarial_stance>
+**FORCE stance:** Assume the phase goal was not achieved until codebase evidence proves it. Your starting hypothesis: tasks completed, goal missed. Falsify the SUMMARY.md narrative.
+
+**Common failure modes — how verifiers go soft:**
+- Trusting SUMMARY.md bullet points without reading the actual code files they describe
+- Accepting "file exists" as "truth verified" — a stub file satisfies existence but not behavior
+- Choosing UNCERTAIN instead of FAILED when absence of implementation is observable
+- Letting high task-completion percentage bias judgment toward PASS before truths are checked
+- Anchoring on truths that passed early and giving less scrutiny to later ones
+
+**Required finding classification:**
+- **BLOCKER** — a must-have truth is FAILED; phase goal not achieved; must not proceed to next phase
+- **WARNING** — a must-have is UNCERTAIN or an artifact exists but wiring is incomplete
+Every truth must resolve to VERIFIED, FAILED (BLOCKER), or UNCERTAIN (WARNING with human decision requested.
+</adversarial_stance>
+
 <required_reading>
@~/.claude/get-shit-done/references/verification-overrides.md
@~/.claude/get-shit-done/references/gates.md
@@ -569,6 +585,27 @@ Deferred items are informational only — they do not require closure plans.

 </verification_process>

+<mvp_mode_verification>
+
+## MVP Mode Verification
+
+**When the phase under verification has `mode: mvp` in ROADMAP.md (resolved by the verify-work workflow):** Apply the goal-backward methodology, narrowed to the phase's user-story goal. Required reading: `@~/.claude/get-shit-done/references/verify-mvp-mode.md`.
+
+**Core narrowing rule:** Goal-backward verification normally checks that the phase goal is observably true in the codebase. Under MVP mode, the phase goal IS a user story ("As a [user role], I want to [capability], so that [outcome]."). Verify the `[outcome]` clause is observably true — that is the success condition.
+
+**VERIFICATION.md output structure under MVP mode:**
+
+1. Top-level "User Flow Coverage" table: each step of the user story → expected → evidence in codebase → status. (Format defined in `references/verify-mvp-mode.md`.)
+2. Standard technical-check sections (API verification, error handling, etc.) follow below — only if the user flow coverage is complete.
+
+**User-story-format guard:** If the phase has `mode: mvp` but the `**Goal:**` line is not in user-story format, refuse to verify. Surface the discrepancy and ask the user to run `/gsd mvp-phase ${PHASE}` to set a proper user-story goal. Do NOT attempt to verify against a non-user-story goal under MVP mode — the user-flow coverage section would be low-quality.
+
+**Mode is all-or-nothing per phase** (PRD decision Q1, inherited from Phase 1). The MVP Mode Verification rules apply to the whole phase or not at all.
+
+**Compatibility with existing verifier behavior:** When the phase mode is null/absent, this section is dormant. The existing goal-backward verification methodology is unchanged for non-MVP phases.
+
+</mvp_mode_verification>
+
 <output>

 ## Create VERIFICATION.md
--- a/bin/gsd-sdk.js
+++ b/bin/gsd-sdk.js
@@ -0,0 +1,37 @@
+#!/usr/bin/env node
+/**
+ * bin/gsd-sdk.js — back-compat shim for external callers of `gsd-sdk`.
+ *
+ * When the parent package is installed globally (`npm install -g get-shit-done-cc`)
+ * npm creates a `gsd-sdk` symlink in the global bin directory pointing at this
+ * file. npm correctly chmods bin entries from a tarball, so the execute-bit
+ * problem that afflicted the sub-install approach (issue #2453) cannot occur here.
+ *
+ * NOTE (#2775): `npx get-shit-done-cc` does NOT link this shim — npx only
+ * exposes the package's primary bin (`get-shit-done-cc`). For npx-based usage,
+ * the installer (`bin/install.js#installSdkIfNeeded`) self-symlinks `gsd-sdk`
+ * into `~/.local/bin` when needed and verifies PATH callability before
+ * reporting `✓ GSD SDK ready`.
+ *
+ * This shim resolves sdk/dist/cli.js relative to its own location and delegates
+ * to it via `node`, so `gsd-sdk <args>` behaves identically to
+ * `node <packageDir>/sdk/dist/cli.js <args>`.
+ *
+ * Call sites (slash commands, agent prompts, hook scripts) continue to work without
+ * changes because `gsd-sdk` still resolves on PATH — it just comes from this shim
+ * in the parent package rather than from a separately installed @gsd-build/sdk.
+ */
+
+'use strict';
+
+const path = require('path');
+const { spawnSync } = require('child_process');
+
+const cliPath = path.resolve(__dirname, '..', 'sdk', 'dist', 'cli.js');
+
+const result = spawnSync(process.execPath, [cliPath, ...process.argv.slice(2)], {
+  stdio: 'inherit',
+  env: process.env,
+});
+
+process.exit(result.status ?? 1);
--- a/bin/install.js
+++ b/bin/install.js
--- a/commands/gsd/add-backlog.md
+++ b/commands/gsd/add-backlog.md
@@ -54,7 +54,7 @@ the normal phase sequence and accumulate context over time.

 5. **Commit:**
   ```bash
-   gsd-sdk query commit "docs: add backlog item ${NEXT} — ${ARGUMENTS}" .planning/ROADMAP.md ".planning/phases/${NEXT}-${SLUG}/.gitkeep"
+   gsd-sdk query commit "docs: add backlog item ${NEXT} — ${ARGUMENTS}" --files .planning/ROADMAP.md ".planning/phases/${NEXT}-${SLUG}/.gitkeep"
   ```

 6. **Report:**
--- a/commands/gsd/ai-integration-phase.md
+++ b/commands/gsd/ai-integration-phase.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:ai-integration-phase
-description: Generate AI design contract (AI-SPEC.md) for phases that involve building AI systems — framework selection, implementation guidance from official docs, and evaluation strategy
+description: Generate an AI-SPEC.md design contract for phases that involve building AI systems.
 argument-hint: "[phase number]"
 allowed-tools:
  - Read
--- a/commands/gsd/code-review-fix.md
+++ b/commands/gsd/code-review-fix.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:code-review-fix
-description: Auto-fix issues found by code review in REVIEW.md. Spawns fixer agent, commits each fix atomically, produces REVIEW-FIX.md summary.
+description: Auto-fix issues found by code review in REVIEW.md; commits each fix atomically.
 argument-hint: "<phase-number> [--all] [--auto]"
 allowed-tools:
  - Read
--- a/commands/gsd/discuss-phase.md
+++ b/commands/gsd/discuss-phase.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:discuss-phase
-description: Gather phase context through adaptive questioning before planning. Use --all to skip area selection and discuss all gray areas interactively. Use --auto to skip interactive questions (Claude picks recommended defaults). Use --chain for interactive discuss followed by automatic plan+execute. Use --power for bulk question generation into a file-based UI (answer at your own pace).
+description: Gather phase context through adaptive questioning before planning.
 argument-hint: "<phase> [--all] [--auto] [--chain] [--batch] [--analyze] [--text] [--power]"
 allowed-tools:
  - Read
@@ -29,10 +29,8 @@ Extract implementation decisions that downstream agents need — researcher and
 </objective>

 <execution_context>
-@~/.claude/get-shit-done/workflows/discuss-phase.md
-@~/.claude/get-shit-done/workflows/discuss-phase-assumptions.md
-@~/.claude/get-shit-done/workflows/discuss-phase-power.md
-@~/.claude/get-shit-done/templates/context.md
+Workflow files are loaded on-demand in the <process> section below — not upfront.
+Do not pre-load any workflow files before reading the mode routing instructions.
 </execution_context>

 <runtime_note>
@@ -51,11 +49,15 @@ Context files are resolved in-workflow using `init phase-op` and roadmap/state t
 DISCUSS_MODE=$(gsd-sdk query config-get workflow.discuss_mode 2>/dev/null || echo "discuss")
 ```

-If `DISCUSS_MODE` is `"assumptions"`: Read and execute @~/.claude/get-shit-done/workflows/discuss-phase-assumptions.md end-to-end.
+If `DISCUSS_MODE` is `"assumptions"`:
+Read and execute `~/.claude/get-shit-done/workflows/discuss-phase-assumptions.md` end-to-end.

-If `DISCUSS_MODE` is `"discuss"` (or unset, or any other value): Read and execute @~/.claude/get-shit-done/workflows/discuss-phase.md end-to-end.
+If `DISCUSS_MODE` is `"discuss"` (or unset, or any other value):
+Read and execute `~/.claude/get-shit-done/workflows/discuss-phase.md` end-to-end.

-**MANDATORY:** The execution_context files listed above ARE the instructions. Read the workflow file BEFORE taking any action. The objective and success_criteria sections in this command file are summaries — the workflow file contains the complete step-by-step process with all required behaviors, config checks, and interaction patterns. Do not improvise from the summary.
+**MANDATORY:** Read the appropriate workflow file BEFORE taking any action. The objective and success_criteria sections in this command file are summaries — the workflow file contains the complete step-by-step process with all required behaviors, config checks, and interaction patterns. Do not improvise from the summary.
+
+**Lazy loading:** `templates/context.md` is loaded inside the `write_context` step of the active workflow. `discuss-phase-power.md` is loaded inside `discuss-phase.md` when `--power` is detected. Do not load either here.
 </process>

 <success_criteria>
--- a/commands/gsd/edit-phase.md
+++ b/commands/gsd/edit-phase.md
@@ -0,0 +1,35 @@
+---
+name: gsd:edit-phase
+description: Edit any field of an existing roadmap phase in place, preserving number and position
+argument-hint: <phase-number> [--force]
+allowed-tools:
+  - Read
+  - Write
+  - Bash
+---
+
+<objective>
+Modify any field of an existing phase in ROADMAP.md in place.
+
+Supports:
+- Editing individual fields (title, description/goal, requirements, success criteria, depends_on)
+- Full regeneration of all fields from a clarified intent
+- Guarded edits: refuses in_progress/completed phases unless --force is passed
+- Depends-on validation: blocks invalid references with a clear error
+- Diff + confirmation before writing
+</objective>
+
+<execution_context>
+@~/.claude/get-shit-done/workflows/edit-phase.md
+</execution_context>
+
+<context>
+Arguments: $ARGUMENTS (format: <phase-number> [--force])
+
+Roadmap and state are resolved in-workflow via `init phase-op` and targeted reads.
+</context>
+
+<process>
+Execute the edit-phase workflow from @~/.claude/get-shit-done/workflows/edit-phase.md end-to-end.
+Preserve all validation gates (phase existence, status guard, depends_on validation, diff + confirmation).
+</process>
--- a/commands/gsd/eval-review.md
+++ b/commands/gsd/eval-review.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:eval-review
-description: Retroactively audit an executed AI phase's evaluation coverage — scores each eval dimension as COVERED/PARTIAL/MISSING and produces an actionable EVAL-REVIEW.md with remediation plan
+description: Audit an executed AI phase's evaluation coverage and produce an EVAL-REVIEW.md remediation plan.
 argument-hint: "[phase number]"
 allowed-tools:
  - Read
--- a/commands/gsd/forensics.md
+++ b/commands/gsd/forensics.md
@@ -1,7 +1,7 @@
 ---
 type: prompt
 name: gsd:forensics
-description: Post-mortem investigation for failed GSD workflows — analyzes git history, artifacts, and state to diagnose what went wrong
+description: Post-mortem investigation for failed GSD workflows — diagnoses what went wrong.
 argument-hint: "[problem description]"
 allowed-tools:
  - Read
--- a/commands/gsd/graphify.md
+++ b/commands/gsd/graphify.md
@@ -153,7 +153,7 @@ gsd-tools path: $HOME/.claude/get-shit-done/bin/gsd-tools.cjs
 1. **Invoke graphify:**
   Run from the project root:
   ```
-   graphify . --update
+   graphify update .
   ```
   This builds the knowledge graph with SHA256 incremental caching.
   Timeout: up to 5 minutes (or as configured via graphify.build_timeout).
@@ -193,6 +193,19 @@ Wait for the agent to complete.

 ---

+## MVP-Mode Node Rendering
+
+**MVP-mode rendering.** When a phase has `**Mode:** mvp` in ROADMAP.md (resolved via `gsd-sdk query roadmap.get-phase --pick mode`), render its graph node with two distinct visual signals:
+
+1. **Distinct fill color.** Use `#22c55e` (green) for MVP-mode phase nodes. Standard phases keep the default fill color. Two-channel signaling (color + label) handles color-blind and grayscale renders.
+2. **`MVP` label suffix.** Append ` (MVP)` to the node's label text. Example: a phase originally labeled `Phase 1: User Auth` renders as `Phase 1: User Auth (MVP)`.
+
+Both signals fire together — never just one. Per PRD Q5 decision, the goal is unambiguous visual distinction in any render context.
+
+When the phase mode is null/absent, render with the standard color and label — no behavioral change for non-MVP phases.
+
+---
+
 ## Anti-Patterns

 1. DO NOT spawn an agent for query/status/diff operations -- these are inline CLI calls
--- a/commands/gsd/inbox.md
+++ b/commands/gsd/inbox.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:inbox
-description: Triage and review all open GitHub issues and PRs against project templates and contribution guidelines
+description: Triage and review open GitHub issues and PRs against project templates and contribution guidelines.
 argument-hint: "[--issues] [--prs] [--label] [--close-incomplete] [--repo owner/repo]"
 allowed-tools:
  - Read
--- a/commands/gsd/ingest-docs.md
+++ b/commands/gsd/ingest-docs.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:ingest-docs
-description: Scan a repo for mixed ADRs, PRDs, SPECs, and DOCs and bootstrap or merge the full .planning/ setup from them. Classifies each doc in parallel, synthesizes a consolidated context with a conflicts report, and routes to new-project or merge-milestone depending on whether .planning/ already exists.
+description: Bootstrap or merge a .planning/ setup from existing ADRs, PRDs, SPECs, and docs in a repo.
 argument-hint: "[path] [--mode new|merge] [--manifest <file>] [--resolve auto|interactive]"
 allowed-tools:
  - Read
--- a/commands/gsd/insert-phase.md
+++ b/commands/gsd/insert-phase.md
@@ -4,7 +4,6 @@ description: Insert urgent work as decimal phase (e.g., 72.1) between existing p
 argument-hint: <after> <description>
 allowed-tools:
  - Read
-  - Write
  - Bash
 ---

--- a/commands/gsd/mvp-phase.md
+++ b/commands/gsd/mvp-phase.md
@@ -0,0 +1,45 @@
+---
+name: gsd:mvp-phase
+description: Plan a phase as a vertical MVP slice — user story, SPIDR splitting, then plan-phase
+argument-hint: "<phase-number>"
+agent: gsd-planner
+allowed-tools:
+  - Read
+  - Write
+  - Bash
+  - Glob
+  - Grep
+  - Task
+  - AskUserQuestion
+---
+<objective>
+Guide the user through MVP-mode planning for a phase. The command:
+
+1. Prompts for an "As a / I want to / So that" user story (three structured questions)
+2. Runs SPIDR splitting check — if the story is too large, walks through Spike/Paths/Interfaces/Data/Rules and offers to split into multiple phases
+3. Writes `**Mode:** mvp` and the reformatted `**Goal:**` to the phase's ROADMAP.md section
+4. Delegates to `/gsd plan-phase <N>` which auto-detects MVP mode via the roadmap field
+
+Phase 1 of the vertical-mvp-slice PRD shipped the planner-side machinery; this command is the user entry point for it.
+</objective>
+
+<execution_context>
+@~/.claude/get-shit-done/workflows/mvp-phase.md
+@~/.claude/get-shit-done/references/spidr-splitting.md
+@~/.claude/get-shit-done/references/user-story-template.md
+</execution_context>
+
+<runtime_note>
+**Copilot (VS Code):** Use `vscode_askquestions` wherever this workflow calls `AskUserQuestion`. Equivalent API.
+</runtime_note>
+
+<context>
+Phase number: $ARGUMENTS (required — integer or decimal like `2.1`)
+
+The phase must already exist in ROADMAP.md (created via `/gsd new-project`, `/gsd add-phase`, or `/gsd insert-phase`). This command does not create new phases — it converts an existing phase to MVP mode.
+</context>
+
+<process>
+Execute the mvp-phase workflow from @~/.claude/get-shit-done/workflows/mvp-phase.md end-to-end.
+Preserve all gates: phase existence, status guard (refuse in_progress/completed), user-story format validation, SPIDR splitting check, ROADMAP write confirmation, plan-phase delegation.
+</process>
--- a/commands/gsd/plan-phase.md
+++ b/commands/gsd/plan-phase.md
@@ -1,7 +1,7 @@
 ---
 name: gsd:plan-phase
 description: Create detailed phase plan (PLAN.md) with verification loop
-argument-hint: "[phase] [--auto] [--research] [--skip-research] [--gaps] [--skip-verify] [--prd <file>] [--reviews] [--text] [--tdd]"
+argument-hint: "[phase] [--auto] [--research] [--skip-research] [--gaps] [--skip-verify] [--prd <file>] [--reviews] [--text] [--tdd] [--mvp]"
 agent: gsd-planner
 allowed-tools:
  - Read
@@ -42,6 +42,7 @@ Phase number: $ARGUMENTS (optional — auto-detects next unplanned phase if omit
 - `--prd <file>` — Use a PRD/acceptance criteria file instead of discuss-phase. Parses requirements into CONTEXT.md automatically. Skips discuss-phase entirely.
 - `--reviews` — Replan incorporating cross-AI review feedback from REVIEWS.md (produced by `/gsd-review`)
 - `--text` — Use plain-text numbered lists instead of TUI menus (required for `/rc` remote sessions)
+- `--mvp` — Vertical MVP mode. Planner organizes tasks as feature slices (UI→API→DB) instead of horizontal layers. On Phase 1 of a new project, also emits `SKELETON.md` (Walking Skeleton). Can be persisted on a phase via `**Mode:** mvp` in ROADMAP.md.

 Normalize phase input in step 2 before any directory lookups.
 </context>
--- a/commands/gsd/plan-review-convergence.md
+++ b/commands/gsd/plan-review-convergence.md
@@ -1,7 +1,7 @@
 ---
 name: gsd:plan-review-convergence
-description: "Cross-AI plan convergence loop — replan with review feedback until no HIGH concerns remain (max 3 cycles)"
-argument-hint: "<phase> [--codex] [--gemini] [--claude] [--opencode] [--text] [--ws <name>] [--all] [--max-cycles N]"
+description: "Cross-AI plan convergence loop — replan with review feedback until no HIGH concerns remain."
+argument-hint: "<phase> [--codex] [--gemini] [--claude] [--opencode] [--ollama] [--lm-studio] [--llama-cpp] [--text] [--ws <name>] [--all] [--max-cycles N]"
 allowed-tools:
  - Read
  - Write
@@ -42,8 +42,14 @@ Phase number: extracted from $ARGUMENTS (required)
 - `--gemini` — Use Gemini CLI as reviewer
 - `--claude` — Use Claude CLI as reviewer (separate session)
 - `--opencode` — Use OpenCode as reviewer
- `--all` — Use all available CLIs
+- `--ollama` — Use local Ollama server as reviewer (OpenAI-compatible, default host `http://localhost:11434`; configure model via `review.models.ollama`)
+- `--lm-studio` — Use local LM Studio server as reviewer (OpenAI-compatible, default host `http://localhost:1234`; configure model via `review.models.lm_studio`)
+- `--llama-cpp` — Use local llama.cpp server as reviewer (OpenAI-compatible, default host `http://localhost:8080`; configure model via `review.models.llama_cpp`)
+- `--all` — Use all available CLIs and running local model servers
 - `--max-cycles N` — Maximum replan→review cycles (default: 3)
+
+**Feature gate:** This command requires `workflow.plan_review_convergence=true`. Enable with:
+`gsd config-set workflow.plan_review_convergence true`
 </context>

 <process>
--- a/commands/gsd/plant-seed.md
+++ b/commands/gsd/plant-seed.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:plant-seed
-description: Capture a forward-looking idea with trigger conditions — surfaces automatically at the right milestone
+description: Capture a forward-looking idea that surfaces automatically at the right milestone.
 argument-hint: "[idea summary]"
 allowed-tools:
  - Read
--- a/commands/gsd/progress.md
+++ b/commands/gsd/progress.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:progress
-description: Check project progress, show context, and route to next action (execute or plan). Use --forensic to append a 6-check integrity audit after the standard report.
+description: Check project progress, show context, and route to the next action (execute or plan).
 argument-hint: "[--forensic]"
 allowed-tools:
  - Read
--- a/commands/gsd/quick.md
+++ b/commands/gsd/quick.md
@@ -71,7 +71,7 @@ For each directory found:
 - Check if PLAN.md exists
 - Check if SUMMARY.md exists; if so, read `status` from its frontmatter via:
  ```bash
-  gsd-sdk query frontmatter.get .planning/quick/{dir}/SUMMARY.md status 2>/dev/null
+  gsd-sdk query frontmatter.get .planning/quick/{dir}/SUMMARY.md status
  ```
 - Determine directory creation date: `stat -f "%SB" -t "%Y-%m-%d"` (macOS) or `stat -c "%w"` (Linux); fall back to the date prefix in the directory name (format: `YYYYMMDD-` prefix)
 - Derive display status:
--- a/commands/gsd/reapply-patches.md
+++ b/commands/gsd/reapply-patches.md
@@ -129,7 +129,7 @@ The quality of the merge depends on having a **pristine baseline** — the origi

 Check for baseline sources in priority order:

-### Option A: Git history (most reliable)
+### Option A: Pristine hash from backup-meta.json + git history (most reliable)
 If the config directory is a git repository:
 ```bash
 CONFIG_DIR=$(dirname "$PATCHES_DIR")
@@ -137,15 +137,35 @@ if git -C "$CONFIG_DIR" rev-parse --git-dir >/dev/null 2>&1; then
  HAS_GIT=true
 fi
 ```
-When `HAS_GIT=true`, use `git log` to find the commit where GSD was originally installed (before user edits). For each file, the pristine baseline can be extracted with:
+When `HAS_GIT=true`, use the `pristine_hashes` recorded in `backup-meta.json` to locate the correct baseline commit. For each file, iterate commits that touched it and find the one whose blob SHA-256 matches the recorded pristine hash:
 ```bash
-git -C "$CONFIG_DIR" log --diff-filter=A --format="%H" -- "{file_path}"
+# Get the expected pristine SHA-256 from backup-meta.json
+PRISTINE_HASH=$(jq -r ".pristine_hashes[\"${file_path}\"] // empty" "$PATCHES_DIR/backup-meta.json")
+
+BASELINE_COMMIT=""
+if [ -n "$PRISTINE_HASH" ]; then
+  # Walk commits that touched this file, pick the one matching the pristine hash
+  while IFS= read -r commit_hash; do
+    blob_hash=$(git -C "$CONFIG_DIR" show "${commit_hash}:${file_path}" 2>/dev/null | sha256sum | cut -d' ' -f1)
+    if [ "$blob_hash" = "$PRISTINE_HASH" ]; then
+      BASELINE_COMMIT="$commit_hash"
+      break
+    fi
+  done < <(git -C "$CONFIG_DIR" log --format="%H" -- "${file_path}")
+fi
+
+# Fallback: if no pristine hash in backup-meta (older installer), use first-add commit
+if [ -z "$BASELINE_COMMIT" ]; then
+  BASELINE_COMMIT=$(git -C "$CONFIG_DIR" log --diff-filter=A --format="%H" -- "${file_path}" | tail -1)
+fi
 ```
-This gives the commit that first added the file (the install commit). Extract the pristine version:
+Extract the pristine version from the matched commit:
 ```bash
-git -C "$CONFIG_DIR" show {install_commit}:{file_path}
+git -C "$CONFIG_DIR" show "${BASELINE_COMMIT}:${file_path}"
 ```

+**Why this matters:** `git log --diff-filter=A` returns the commit that *first added* the file, which is the wrong baseline on repos that have been through multiple GSD update cycles. The `pristine_hashes` field in `backup-meta.json` records the SHA-256 of the file as it existed in the pre-update GSD release — matching against it finds the correct baseline regardless of how many updates have occurred.
+
 ### Option B: Pristine snapshot directory
 Check if a `gsd-pristine/` directory exists alongside `gsd-local-patches/`:
 ```bash
--- a/commands/gsd/review-backlog.md
+++ b/commands/gsd/review-backlog.md
@@ -47,7 +47,7 @@ milestone sequence or remove stale entries.

 6. **Commit changes:**
   ```bash
-   gsd-sdk query commit "docs: review backlog — promoted N, removed M" .planning/ROADMAP.md
+   gsd-sdk query commit "docs: review backlog — promoted N, removed M" --files .planning/ROADMAP.md
   ```

 7. **Report summary:**
--- a/commands/gsd/settings-advanced.md
+++ b/commands/gsd/settings-advanced.md
@@ -0,0 +1,39 @@
+---
+name: gsd:settings-advanced
+description: Power-user configuration for plan bounce, timeouts, branch templates, and cross-AI execution.
+allowed-tools:
+  - Read
+  - Write
+  - Bash
+  - AskUserQuestion
+---
+
+<objective>
+Interactive configuration of GSD power-user knobs that don't belong in the common-case `/gsd-settings` prompt.
+
+Routes to the settings-advanced workflow which handles:
+- Config existence ensuring (workstream-aware path resolution)
+- Current settings reading and parsing
+- Sectioned prompts: Planning Tuning, Execution Tuning, Discussion Tuning, Cross-AI Execution, Git Customization, Runtime / Output
+- Config merging that preserves every unrelated key
+- Confirmation table display
+
+Use `/gsd-settings` for the common-case toggles (model profile, research/plan_check/verifier, branching strategy, context warnings). Use `/gsd-settings-advanced` once those are set and you want to tune the internals.
+</objective>
+
+<execution_context>
+@~/.claude/get-shit-done/workflows/settings-advanced.md
+</execution_context>
+
+<process>
+**Follow the settings-advanced workflow** from `@~/.claude/get-shit-done/workflows/settings-advanced.md`.
+
+The workflow handles all logic including:
+1. Config file creation with defaults if missing (via `gsd-sdk query config-ensure-section`)
+2. Current config reading
+3. Six sectioned AskUserQuestion batches with current values pre-selected
+4. Numeric-input validation (non-numeric rejected, empty input keeps current)
+5. Answer parsing and config merging (preserves unrelated keys)
+6. File writing (atomic)
+7. Confirmation table display
+</process>
--- a/commands/gsd/settings-integrations.md
+++ b/commands/gsd/settings-integrations.md
@@ -0,0 +1,44 @@
+---
+name: gsd:settings-integrations
+description: Configure third-party API keys, code-review CLI routing, and agent-skill injection
+allowed-tools:
+  - Read
+  - Write
+  - Bash
+  - AskUserQuestion
+---
+
+<objective>
+Interactive configuration of GSD's third-party integration surface:
+- Search API keys: `brave_search`, `firecrawl`, `exa_search`, and
+  the `search_gitignored` toggle
+- Code-review CLI routing: `review.models.{claude,codex,gemini,opencode}`
+- Agent-skill injection: `agent_skills.<agent-type>`
+
+API keys are stored plaintext in `.planning/config.json` but are masked
+(`****<last-4>`) in every piece of interactive output. The workflow never
+echoes plaintext to stdout, stderr, or any log.
+
+This command is deliberately distinct from `/gsd-settings` (workflow toggles)
+and any `/gsd-settings-advanced` tuning surface. It handles *connectivity*,
+not pipeline shape.
+</objective>
+
+<execution_context>
+@~/.claude/get-shit-done/workflows/settings-integrations.md
+</execution_context>
+
+<process>
+**Follow the settings-integrations workflow** from
+`@~/.claude/get-shit-done/workflows/settings-integrations.md`.
+
+The workflow handles:
+1. Resolving `$GSD_CONFIG_PATH` (flat vs workstream)
+2. Reading current integration values (masked for display)
+3. Section 1 — Search Integrations: Brave / Firecrawl / Exa / search_gitignored
+4. Section 2 — Review CLI Routing: review.models.{claude,codex,gemini,opencode}
+5. Section 3 — Agent Skills Injection: agent_skills.<agent-type>
+6. Writing values via `gsd-sdk query config-set` (which merges, preserving
+   unrelated keys)
+7. Masked confirmation display
+</process>
--- a/commands/gsd/sketch.md
+++ b/commands/gsd/sketch.md
@@ -1,7 +1,7 @@
 ---
 name: gsd:sketch
-description: Rapidly sketch UI/design ideas using throwaway HTML mockups with multi-variant exploration
-argument-hint: "<design idea to explore> [--quick]"
+description: Sketch UI/design ideas with throwaway HTML mockups, or propose what to sketch next (frontier mode)
+argument-hint: "[design idea to explore] [--quick] [--text] or [frontier]"
 allowed-tools:
  - Read
  - Write
@@ -10,11 +10,20 @@ allowed-tools:
  - Grep
  - Glob
  - AskUserQuestion
+  - WebSearch
+  - WebFetch
+  - mcp__context7__resolve-library-id
+  - mcp__context7__query-docs
 ---
 <objective>
 Explore design directions through throwaway HTML mockups before committing to implementation.
 Each sketch produces 2-3 variants for comparison. Sketches live in `.planning/sketches/` and
-integrate with GSD commit patterns, state tracking, and handoff workflows.
+integrate with GSD commit patterns, state tracking, and handoff workflows. Loads spike
+findings to ground mockups in real data shapes and validated interaction patterns.
+
+Two modes:
+- **Idea mode** (default) — describe a design idea to sketch
+- **Frontier mode** (no argument or "frontier") — analyzes existing sketch landscape and proposes consistency and frontier sketches

 Does not require `/gsd-new-project` — auto-creates `.planning/sketches/` if needed.
 </objective>
@@ -41,5 +50,5 @@ Design idea: $ARGUMENTS

 <process>
 Execute the sketch workflow from @~/.claude/get-shit-done/workflows/sketch.md end-to-end.
-Preserve all workflow gates (intake, decomposition, variant evaluation, MANIFEST updates, commit patterns).
+Preserve all workflow gates (intake, decomposition, target stack research, variant evaluation, MANIFEST updates, commit patterns).
 </process>
--- a/commands/gsd/spec-phase.md
+++ b/commands/gsd/spec-phase.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:spec-phase
-description: Socratic spec refinement — clarify WHAT a phase delivers with ambiguity scoring before discuss-phase. Produces a SPEC.md with falsifiable requirements locked before implementation decisions begin.
+description: Clarify WHAT a phase delivers with ambiguity scoring; produces a SPEC.md before discuss-phase.
 argument-hint: "<phase> [--auto] [--text]"
 allowed-tools:
  - Read
--- a/commands/gsd/spike-wrap-up.md
+++ b/commands/gsd/spike-wrap-up.md
@@ -27,5 +27,5 @@ project history. Output skill goes to `./.claude/skills/spike-findings-[project]

 <process>
 Execute the spike-wrap-up workflow from @~/.claude/get-shit-done/workflows/spike-wrap-up.md end-to-end.
-Preserve all curation gates (per-spike review, grouping approval, CLAUDE.md routing line).
+Preserve all workflow gates (auto-include, feature-area grouping, skill synthesis, CLAUDE.md routing line, intelligent next-step routing).
 </process>
--- a/commands/gsd/spike.md
+++ b/commands/gsd/spike.md
@@ -1,7 +1,7 @@
 ---
 name: gsd:spike
-description: Rapidly spike an idea with throwaway experiments to validate feasibility before planning
-argument-hint: "<idea to validate> [--quick]"
+description: Spike an idea through experiential exploration, or propose what to spike next (frontier mode)
+argument-hint: "[idea to validate] [--quick] [--text] or [frontier]"
 allowed-tools:
  - Read
  - Write
@@ -10,11 +10,20 @@ allowed-tools:
  - Grep
  - Glob
  - AskUserQuestion
+  - WebSearch
+  - WebFetch
+  - mcp__context7__resolve-library-id
+  - mcp__context7__query-docs
 ---
 <objective>
-Rapid feasibility validation through focused, throwaway experiments. Each spike answers one
-specific question with observable evidence. Spikes live in `.planning/spikes/` and integrate
-with GSD commit patterns, state tracking, and handoff workflows.
+Spike an idea through experiential exploration — build focused experiments to feel the pieces
+of a future app, validate feasibility, and produce verified knowledge for the real build.
+Spikes live in `.planning/spikes/` and integrate with GSD commit patterns, state tracking,
+and handoff workflows.
+
+Two modes:
+- **Idea mode** (default) — describe an idea to spike
+- **Frontier mode** (no argument or "frontier") — analyzes existing spike landscape and proposes integration and frontier spikes

 Does not require `/gsd-new-project` — auto-creates `.planning/spikes/` if needed.
 </objective>
@@ -33,9 +42,10 @@ Idea: $ARGUMENTS

 **Available flags:**
 - `--quick` — Skip decomposition/alignment, jump straight to building. Use when you already know what to spike.
+- `--text` — Use plain-text numbered lists instead of AskUserQuestion (for non-Claude runtimes).
 </context>

 <process>
 Execute the spike workflow from @~/.claude/get-shit-done/workflows/spike.md end-to-end.
-Preserve all workflow gates (decomposition, risk ordering, verification, MANIFEST updates, commit patterns).
+Preserve all workflow gates (prior spike check, decomposition, research, risk ordering, observability assessment, verification, MANIFEST updates, commit patterns).
 </process>
--- a/commands/gsd/sync-skills.md
+++ b/commands/gsd/sync-skills.md
@@ -0,0 +1,19 @@
+---
+name: gsd:sync-skills
+description: Sync managed GSD skills across runtime roots so multi-runtime users stay aligned after an update
+allowed-tools:
+  - Bash
+  - AskUserQuestion
+---
+
+<objective>
+Sync managed `gsd-*` skill directories from one canonical runtime's skills root to one or more destination runtime skills roots.
+
+Routes to the sync-skills workflow which handles:
+- Argument parsing (--from, --to, --dry-run, --apply)
+- Runtime skills root resolution via install.js --skills-root
+- Diff computation (CREATE / UPDATE / REMOVE per destination)
+- Dry-run reporting (default — no writes)
+- Apply execution (copy and remove with idempotency)
+- Non-GSD skill preservation (only gsd-* dirs are touched)
+</objective>
--- a/commands/gsd/thread.md
+++ b/commands/gsd/thread.md
@@ -38,7 +38,7 @@ ls .planning/threads/*.md 2>/dev/null
 For each thread file found:
 - Read frontmatter `status` field via:
  ```bash
-  gsd-sdk query frontmatter.get .planning/threads/{file} status 2>/dev/null
+  gsd-sdk query frontmatter.get .planning/threads/{file} status
  ```
 - If frontmatter `status` field is missing, fall back to reading markdown heading `## Status: OPEN` (or IN PROGRESS / RESOLVED) from the file body
 - Read frontmatter `updated` field for the last-updated date
@@ -83,7 +83,7 @@ When SUBCMD=close and SLUG is set (already sanitized):

 3. Commit:
   ```bash
-   gsd-sdk query commit "docs: resolve thread — {SLUG}" ".planning/threads/{SLUG}.md"
+   gsd-sdk query commit "docs: resolve thread — {SLUG}" --files ".planning/threads/{SLUG}.md"
   ```

 4. Print:
@@ -191,7 +191,7 @@ updated: {today ISO date}

 5. Commit:
   ```bash
-   gsd-sdk query commit "docs: create thread — ${ARGUMENTS}" ".planning/threads/${SLUG}.md"
+   gsd-sdk query commit "docs: create thread — ${ARGUMENTS}" --files ".planning/threads/${SLUG}.md"
   ```

 6. Report:
--- a/commands/gsd/ultraplan-phase.md
+++ b/commands/gsd/ultraplan-phase.md
@@ -1,6 +1,6 @@
 ---
 name: gsd:ultraplan-phase
-description: "[BETA] Offload plan phase to Claude Code's ultraplan cloud — drafts remotely while terminal stays free, review in browser with inline comments, import back via /gsd-import. Claude Code only."
+description: "[BETA] Offload plan phase to Claude Code's ultraplan cloud; review in browser and import back."
 argument-hint: "[phase-number]"
 allowed-tools:
  - Read
--- a/docs/AGENTS.md
+++ b/docs/AGENTS.md
@@ -343,18 +343,26 @@ GSD uses a multi-agent architecture where thin orchestrators (workflow files) sp

 | Property | Value |
 |----------|-------|
-| **Spawned by** | `/gsd-map-codebase` |
+| **Spawned by** | `/gsd-map-codebase`, post-execute drift gate in `/gsd:execute-phase` |
 | **Parallelism** | 4 instances (tech, architecture, quality, concerns) |
 | **Tools** | Read, Bash, Grep, Glob, Write |
 | **Model (balanced)** | Haiku |
 | **Color** | Cyan |
-| **Produces** | `.planning/codebase/*.md` (7 documents) |
+| **Produces** | `.planning/codebase/*.md` (7 documents, with `last_mapped_commit` frontmatter) |

 **Key behaviors:**
 - Read-only exploration + structured output
 - Writes documents directly to disk
 - No reasoning required — pattern extraction from file contents

+**`--paths <p1,p2,...>` scope hint (#2003):**
+Accepts an optional `--paths` directive in its prompt. When present, the
+mapper restricts Glob/Grep/Bash exploration to the listed repo-relative path
+prefixes — this is the incremental-remap path used by the post-execute
+codebase-drift gate. Path values that contain `..`, start with `/`, or
+include shell metacharacters are rejected. Without the hint, the mapper
+runs its default whole-repo scan.
+
 ---

 ### gsd-debugger
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -76,6 +76,7 @@ Every agent spawned by an orchestrator gets a clean context window (up to 200K t
 ### 2. Thin Orchestrators

 Workflow files (`get-shit-done/workflows/*.md`) never do heavy lifting. They:
+
 - Load context via `gsd-sdk query init.<workflow>` (or legacy `gsd-tools.cjs init <workflow>`)
 - Spawn specialized agents with focused prompts
 - Collect results and route to the next step
@@ -84,6 +85,7 @@ Workflow files (`get-shit-done/workflows/*.md`) never do heavy lifting. They:
 ### 3. File-Based State

 All state lives in `.planning/` as human-readable Markdown and JSON. No database, no server, no external dependencies. This means:
+
 - State survives context resets (`/clear`)
 - State is inspectable by both humans and agents
 - State can be committed to git for team visibility
@@ -95,6 +97,7 @@ Workflow feature flags follow the **absent = enabled** pattern. If a key is miss
 ### 5. Defense in Depth

 Multiple layers prevent common failure modes:
+
 - Plans are verified before execution (plan-checker agent)
 - Execution produces atomic commits per task
 - Post-execution verification checks against phase goals
@@ -107,6 +110,7 @@ Multiple layers prevent common failure modes:
 ### Commands (`commands/gsd/*.md`)

 User-facing entry points. Each file contains YAML frontmatter (name, description, allowed-tools) and a prompt body that bootstraps the workflow. Commands are installed as:
+
 - **Claude Code:** Custom slash commands (`/gsd-command-name`)
 - **OpenCode / Kilo:** Slash commands (`/gsd-command-name`)
 - **Codex:** Skills (`$gsd-command-name`)
@@ -118,6 +122,7 @@ User-facing entry points. Each file contains YAML frontmatter (name, description
 ### Workflows (`get-shit-done/workflows/*.md`)

 Orchestration logic that commands reference. Contains the step-by-step process including:
+
 - Context loading via `gsd-sdk query` init handlers (or legacy `gsd-tools.cjs init`)
 - Agent spawn instructions with model resolution
 - Gate/checkpoint definitions
@@ -126,9 +131,37 @@ Orchestration logic that commands reference. Contains the step-by-step process i

 **Total workflows:** see [`docs/INVENTORY.md`](INVENTORY.md#workflows) for the authoritative count and full roster.

+#### Progressive disclosure for workflows
+
+Workflow files are loaded verbatim into Claude's context every time the
+corresponding `/gsd:*` command is invoked. To keep that cost bounded, the
+workflow size budget enforced by `tests/workflow-size-budget.test.cjs`
+mirrors the agent budget from #2361:
+
+| Tier      | Per-file line limit |
+|-----------|--------------------|
+| `XL`      | 1700 — top-level orchestrators (`execute-phase`, `plan-phase`, `new-project`) |
+| `LARGE`   | 1500 — multi-step planners and large feature workflows |
+| `DEFAULT` | 1000 — focused single-purpose workflows (the target tier) |
+
+`workflows/discuss-phase.md` is held to a stricter <500-line ceiling per
+issue #2551. When a workflow grows beyond its tier, extract per-mode bodies
+into `workflows/<workflow>/modes/<mode>.md`, templates into
+`workflows/<workflow>/templates/`, and shared knowledge into
+`get-shit-done/references/`. The parent file becomes a thin dispatcher that
+Reads only the mode and template files needed for the current invocation.
+
+`workflows/discuss-phase/` is the canonical example of this pattern —
+parent dispatches, modes/ holds per-flag behavior (`power.md`, `all.md`,
+`auto.md`, `chain.md`, `text.md`, `batch.md`, `analyze.md`, `default.md`,
+`advisor.md`), and templates/ holds CONTEXT.md, DISCUSSION-LOG.md, and
+checkpoint.json schemas that are read only when the corresponding output
+file is being written.
+
 ### Agents (`agents/*.md`)

 Specialized agent definitions with frontmatter specifying:
+
 - `name` — Agent identifier
 - `description` — Role and purpose
 - `tools` — Allowed tool access (Read, Write, Edit, Bash, Grep, Glob, WebSearch, etc.)
@@ -141,6 +174,7 @@ Specialized agent definitions with frontmatter specifying:
 Shared knowledge documents that workflows and agents `@-reference` (see [`docs/INVENTORY.md`](INVENTORY.md#references-41-shipped) for the authoritative count and full roster):

 **Core references:**
+
 - `checkpoints.md` — Checkpoint type definitions and interaction patterns
 - `gates.md` — 4 canonical gate types (Confirm, Quality, Safety, Transition) wired into plan-checker and verifier
 - `model-profiles.md` — Per-agent model tier assignments
@@ -156,6 +190,7 @@ Shared knowledge documents that workflows and agents `@-reference` (see [`docs/I
 - `common-bug-patterns.md` — Common bug patterns for code review and verification

 **Workflow references:**
+
 - `agent-contracts.md` — Formal interface between orchestrators and agents
 - `context-budget.md` — Context window budget allocation rules
 - `continuation-format.md` — Session continuation/resume format
@@ -190,7 +225,7 @@ The planner agent (`agents/gsd-planner.md`) was decomposed from a single monolit

 ### Templates (`get-shit-done/templates/`)

-Markdown templates for all planning artifacts. Used by `gsd-tools.cjs template fill` and `scaffold` commands to create pre-structured files:
+Markdown templates for all planning artifacts. Used by `gsd-sdk query template.fill` / `phase.scaffold` (and legacy `gsd-tools.cjs template fill` / top-level `scaffold`) to create pre-structured files:
 - `project.md`, `requirements.md`, `roadmap.md`, `state.md` — Core project files
 - `phase-prompt.md` — Phase execution prompt template
 - `summary.md` (+ `summary-minimal.md`, `summary-standard.md`, `summary-complex.md`) — Granularity-aware summary templates
@@ -224,27 +259,29 @@ See [`docs/INVENTORY.md`](INVENTORY.md#hooks-11-shipped) for the authoritative 1

 Node.js CLI utility (`gsd-tools.cjs`) with domain modules split across `get-shit-done/bin/lib/` (see [`docs/INVENTORY.md`](INVENTORY.md#cli-modules-24-shipped) for the authoritative roster):

-| Module | Responsibility |
-|--------|---------------|
-| `core.cjs` | Error handling, output formatting, shared utilities |
-| `state.cjs` | STATE.md parsing, updating, progression, metrics |
-| `phase.cjs` | Phase directory operations, decimal numbering, plan indexing |
-| `roadmap.cjs` | ROADMAP.md parsing, phase extraction, plan progress |
-| `config.cjs` | config.json read/write, section initialization |
-| `verify.cjs` | Plan structure, phase completeness, reference, commit validation |
-| `template.cjs` | Template selection and filling with variable substitution |
-| `frontmatter.cjs` | YAML frontmatter CRUD operations |
-| `init.cjs` | Compound context loading for each workflow type |
-| `milestone.cjs` | Milestone archival, requirements marking |
-| `commands.cjs` | Misc commands (slug, timestamp, todos, scaffolding, stats) |
-| `model-profiles.cjs` | Model profile resolution table |
-| `security.cjs` | Path traversal prevention, prompt injection detection, safe JSON parsing, shell argument validation |
-| `uat.cjs` | UAT file parsing, verification debt tracking, audit-uat support |
-| `docs.cjs` | Docs-update workflow init, Markdown scanning, monorepo detection |
-| `workstream.cjs` | Workstream CRUD, migration, session-scoped active pointer |
-| `schema-detect.cjs` | Schema-drift detection for ORM patterns (Prisma, Drizzle, etc.) |
-| `profile-pipeline.cjs` | User behavioral profiling data pipeline, session file scanning |
-| `profile-output.cjs` | Profile rendering, USER-PROFILE.md and dev-preferences.md generation |
+
+| Module                 | Responsibility                                                                                      |
+| ---------------------- | --------------------------------------------------------------------------------------------------- |
+| `core.cjs`             | Error handling, output formatting, shared utilities                                                 |
+| `state.cjs`            | STATE.md parsing, updating, progression, metrics                                                    |
+| `phase.cjs`            | Phase directory operations, decimal numbering, plan indexing                                        |
+| `roadmap.cjs`          | ROADMAP.md parsing, phase extraction, plan progress                                                 |
+| `config.cjs`           | config.json read/write, section initialization                                                      |
+| `verify.cjs`           | Plan structure, phase completeness, reference, commit validation                                    |
+| `template.cjs`         | Template selection and filling with variable substitution                                           |
+| `frontmatter.cjs`      | YAML frontmatter CRUD operations                                                                    |
+| `init.cjs`             | Compound context loading for each workflow type                                                     |
+| `milestone.cjs`        | Milestone archival, requirements marking                                                            |
+| `commands.cjs`         | Misc commands (slug, timestamp, todos, scaffolding, stats)                                          |
+| `model-profiles.cjs`   | Model profile resolution table                                                                      |
+| `security.cjs`         | Path traversal prevention, prompt injection detection, safe JSON parsing, shell argument validation |
+| `uat.cjs`              | UAT file parsing, verification debt tracking, audit-uat support                                     |
+| `docs.cjs`             | Docs-update workflow init, Markdown scanning, monorepo detection                                    |
+| `workstream.cjs`       | Workstream CRUD, migration, session-scoped active pointer                                           |
+| `schema-detect.cjs`    | Schema-drift detection for ORM patterns (Prisma, Drizzle, etc.)                                     |
+| `profile-pipeline.cjs` | User behavioral profiling data pipeline, session file scanning                                      |
+| `profile-output.cjs`   | Profile rendering, USER-PROFILE.md and dev-preferences.md generation                                |
+

 ---

@@ -255,10 +292,10 @@ Node.js CLI utility (`gsd-tools.cjs`) with domain modules split across `get-shit
 ```
 Orchestrator (workflow .md)
    │
-    ├── Load context: gsd-tools.cjs init <workflow> <phase>
+    ├── Load context: gsd-sdk query init.<workflow> <phase> (or legacy gsd-tools.cjs init)
    │   Returns JSON with: project info, config, state, phase details
    │
-    ├── Resolve model: gsd-tools.cjs resolve-model <agent-name>
+    ├── Resolve model: gsd-sdk query resolve-model <agent-name>
    │   Returns: opus | sonnet | haiku | inherit
    │
    ├── Spawn Agent (Task/SubAgent call)
@@ -269,27 +306,29 @@ Orchestrator (workflow .md)
    │
    ├── Collect result
    │
-    └── Update state: gsd-tools.cjs state update/patch/advance-plan
+    └── Update state: gsd-sdk query state.update / state.patch / state.advance-plan (or legacy gsd-tools.cjs)
 ```

 ### Primary Agent Spawn Categories

 Conceptual spawn-pattern taxonomy for the 21 primary agents. For the authoritative 31-agent roster (including the 10 advanced/specialized agents such as `gsd-pattern-mapper`, `gsd-code-reviewer`, `gsd-code-fixer`, `gsd-ai-researcher`, `gsd-domain-researcher`, `gsd-eval-planner`, `gsd-eval-auditor`, `gsd-framework-selector`, `gsd-debug-session-manager`, `gsd-intel-updater`), see [`docs/INVENTORY.md`](INVENTORY.md#agents-31-shipped).

-| Category | Agents | Parallelism |
-|----------|--------|-------------|
-| **Researchers** | gsd-project-researcher, gsd-phase-researcher, gsd-ui-researcher, gsd-advisor-researcher | 4 parallel (stack, features, architecture, pitfalls); advisor spawns during discuss-phase |
-| **Synthesizers** | gsd-research-synthesizer | Sequential (after researchers complete) |
-| **Planners** | gsd-planner, gsd-roadmapper | Sequential |
-| **Checkers** | gsd-plan-checker, gsd-integration-checker, gsd-ui-checker, gsd-nyquist-auditor | Sequential (verification loop, max 3 iterations) |
-| **Executors** | gsd-executor | Parallel within waves, sequential across waves |
-| **Verifiers** | gsd-verifier | Sequential (after all executors complete) |
-| **Mappers** | gsd-codebase-mapper | 4 parallel (tech, arch, quality, concerns) |
-| **Debuggers** | gsd-debugger | Sequential (interactive) |
-| **Auditors** | gsd-ui-auditor, gsd-security-auditor | Sequential |
-| **Doc Writers** | gsd-doc-writer, gsd-doc-verifier | Sequential (writer then verifier) |
-| **Profilers** | gsd-user-profiler | Sequential |
-| **Analyzers** | gsd-assumptions-analyzer | Sequential (during discuss-phase) |
+
+| Category         | Agents                                                                                  | Parallelism                                                                               |
+| ---------------- | --------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------- |
+| **Researchers**  | gsd-project-researcher, gsd-phase-researcher, gsd-ui-researcher, gsd-advisor-researcher | 4 parallel (stack, features, architecture, pitfalls); advisor spawns during discuss-phase |
+| **Synthesizers** | gsd-research-synthesizer                                                                | Sequential (after researchers complete)                                                   |
+| **Planners**     | gsd-planner, gsd-roadmapper                                                             | Sequential                                                                                |
+| **Checkers**     | gsd-plan-checker, gsd-integration-checker, gsd-ui-checker, gsd-nyquist-auditor          | Sequential (verification loop, max 3 iterations)                                          |
+| **Executors**    | gsd-executor                                                                            | Parallel within waves, sequential across waves                                            |
+| **Verifiers**    | gsd-verifier                                                                            | Sequential (after all executors complete)                                                 |
+| **Mappers**      | gsd-codebase-mapper                                                                     | 4 parallel (tech, arch, quality, concerns)                                                |
+| **Debuggers**    | gsd-debugger                                                                            | Sequential (interactive)                                                                  |
+| **Auditors**     | gsd-ui-auditor, gsd-security-auditor                                                    | Sequential                                                                                |
+| **Doc Writers**  | gsd-doc-writer, gsd-doc-verifier                                                        | Sequential (writer then verifier)                                                         |
+| **Profilers**    | gsd-user-profiler                                                                       | Sequential                                                                                |
+| **Analyzers**    | gsd-assumptions-analyzer                                                                | Sequential (during discuss-phase)                                                         |
+

 ### Wave Execution Model

@@ -305,6 +344,7 @@ Wave Analysis:
 ```

 Each executor gets:
+
 - Fresh 200K context window (or up to 1M for models that support it)
 - The specific PLAN.md to execute
 - Project context (PROJECT.md, STATE.md)
@@ -317,14 +357,13 @@ When the context window is 500K+ tokens (1M-class models like Opus 4.6, Sonnet 4
 - **Executor agents** receive prior wave SUMMARY.md files and the phase CONTEXT.md/RESEARCH.md, enabling cross-plan awareness within a phase
 - **Verifier agents** receive all PLAN.md, SUMMARY.md, CONTEXT.md files plus REQUIREMENTS.md, enabling history-aware verification

-The orchestrator reads `context_window` from config (`gsd-tools.cjs config-get context_window`) and conditionally includes richer context when the value is >= 500,000. For standard 200K windows, prompts use truncated versions with cache-friendly ordering to maximize context efficiency.
+The orchestrator reads `context_window` from config (`gsd-sdk query config-get context_window`, or legacy `gsd-tools.cjs config-get`) and conditionally includes richer context when the value is >= 500,000. For standard 200K windows, prompts use truncated versions with cache-friendly ordering to maximize context efficiency.

 #### Parallel Commit Safety

 When multiple executors run within the same wave, two mechanisms prevent conflicts:

-1. **`--no-verify` commits** — Parallel agents skip pre-commit hooks (which can cause build lock contention, e.g., cargo lock fights in Rust projects). The orchestrator runs `git hook run pre-commit` once after each wave completes.
-
+1. `--no-verify` commits — Parallel agents skip pre-commit hooks (which can cause build lock contention, e.g., cargo lock fights in Rust projects). The orchestrator runs `git hook run pre-commit` once after each wave completes.
 2. **STATE.md file locking** — All `writeStateMd()` calls use lockfile-based mutual exclusion (`STATE.md.lock` with `O_EXCL` atomic creation). This prevents the read-modify-write race condition where two agents read STATE.md, modify different fields, and the last writer overwrites the other's changes. Includes stale lock detection (10s timeout) and spin-wait with jitter.

 ---
@@ -372,7 +411,9 @@ plan-phase
    ├── Research gate (blocks if RESEARCH.md has unresolved open questions)
    ├── Phase Researcher → RESEARCH.md
    ├── Planner (with reachability check) → PLAN.md files
-    └── Plan Checker → Verify loop (max 3x)
+    ├── Plan Checker → Verify loop (max 3x)
+    ├── Requirements coverage gate (REQ-IDs → plans)
+    └── Decision coverage gate (CONTEXT.md `<decisions>` → plans, BLOCKING — #2492)
    │
    ▼
 state planned-phase → STATE.md (Planned/Ready to execute)
@@ -383,6 +424,7 @@ execute-phase (context reduction: truncated prompts, cache-friendly ordering)
    ├── Executor per plan → code + atomic commits
    ├── SUMMARY.md per plan
    └── Verifier → VERIFICATION.md
+        └── Decision coverage gate (CONTEXT.md decisions → shipped artifacts, NON-BLOCKING — #2492)
    │
    ▼
 verify-work → UAT.md (user acceptance testing)
@@ -430,6 +472,7 @@ UI-SPEC.md (per phase) ───────────────────
 ```

 Equivalent paths for other runtimes:
+
 - **OpenCode:** `~/.config/opencode/` or `~/.opencode/`
 - **Kilo:** `~/.config/kilo/` or `~/.kilo/`
 - **Gemini CLI:** `~/.gemini/`
@@ -454,8 +497,8 @@ Equivalent paths for other runtimes:
 │   ├── ARCHITECTURE.md
 │   └── PITFALLS.md
 ├── codebase/               # Brownfield mapping (from /gsd-map-codebase)
-│   ├── STACK.md
-│   ├── ARCHITECTURE.md
+│   ├── STACK.md            # YAML frontmatter carries `last_mapped_commit`
+│   ├── ARCHITECTURE.md     # for the post-execute drift gate (#2003)
 │   ├── CONVENTIONS.md
 │   ├── CONCERNS.md
 │   ├── STRUCTURE.md
@@ -489,6 +532,30 @@ Equivalent paths for other runtimes:
 └── continue-here.md        # Context handoff (from pause-work)
 ```

+### Post-Execute Codebase Drift Gate (#2003)
+
+After the last wave of `/gsd:execute-phase` commits, the workflow runs a
+non-blocking `codebase_drift_gate` step (between `schema_drift_gate` and
+`verify_phase_goal`). It compares the diff `last_mapped_commit..HEAD`
+against `.planning/codebase/STRUCTURE.md` and counts four kinds of
+structural elements:
+
+1. New directories outside mapped paths
+2. New barrel exports at `(packages|apps)/<name>/src/index.*`
+3. New migration files
+4. New route modules under `routes/` or `api/`
+
+If the count meets `workflow.drift_threshold` (default 3), the gate either
+**warns** (default) with the suggested `/gsd:map-codebase --paths …` command,
+or **auto-remaps** (`workflow.drift_action = auto-remap`) by spawning
+`gsd-codebase-mapper` scoped to the affected paths. Any error in detection
+or remap is logged and the phase continues — drift detection cannot fail
+verification.
+
+`last_mapped_commit` lives in YAML frontmatter at the top of each
+`.planning/codebase/*.md` file; `bin/lib/drift.cjs` provides
+`readMappedCommit` and `writeMappedCommit` round-trip helpers.
+
 ---

 ## Installer Architecture
@@ -499,16 +566,16 @@ The installer (`bin/install.js`, ~3,000 lines) handles:
 2. **Location selection** — Global (`--global`) or local (`--local`)
 3. **File deployment** — Copies commands, workflows, references, templates, agents, hooks
 4. **Runtime adaptation** — Transforms file content per runtime:
-   - Claude Code: Uses as-is
-   - OpenCode: Converts commands/agents to OpenCode-compatible flat command + subagent format
-   - Kilo: Reuses the OpenCode conversion pipeline with Kilo config paths
-   - Codex: Generates TOML config + skills from commands
-   - Copilot: Maps tool names (Read→read, Bash→execute, etc.)
-   - Gemini: Adjusts hook event names (`AfterTool` instead of `PostToolUse`)
-   - Antigravity: Skills-first with Google model equivalents
-   - Trae: Skills-first install to `~/.trae` / `./.trae` with no `settings.json` or hook integration
-   - Cline: Writes `.clinerules` for rule-based integration
-   - Augment Code: Skills-first with full skill conversion and config management
+  - Claude Code: Uses as-is
+  - OpenCode: Converts commands/agents to OpenCode-compatible flat command + subagent format
+  - Kilo: Reuses the OpenCode conversion pipeline with Kilo config paths
+  - Codex: Generates TOML config + skills from commands
+  - Copilot: Maps tool names (Read→read, Bash→execute, etc.)
+  - Gemini: Adjusts hook event names (`AfterTool` instead of `PostToolUse`)
+  - Antigravity: Skills-first with Google model equivalents
+  - Trae: Skills-first install to `~/.trae` / `./.trae` with no `settings.json` or hook integration
+  - Cline: Writes `.clinerules` for rule-based integration
+  - Augment Code: Skills-first with full skill conversion and config management
 5. **Path normalization** — Replaces `~/.claude/` paths with runtime-specific paths
 6. **Settings integration** — Registers hooks in runtime's `settings.json`
 7. **Patch backup** — Since v1.17, backs up locally modified files to `gsd-local-patches/` for `/gsd-reapply-patches`
@@ -545,11 +612,13 @@ Runtime Engine (Claude Code / Gemini CLI)

 ### Context Monitor Thresholds

-| Remaining Context | Level | Agent Behavior |
-|-------------------|-------|----------------|
-| > 35% | Normal | No warning injected |
-| ≤ 35% | WARNING | "Avoid starting new complex work" |
-| ≤ 25% | CRITICAL | "Context nearly exhausted, inform user" |
+
+| Remaining Context | Level    | Agent Behavior                          |
+| ----------------- | -------- | --------------------------------------- |
+| > 35%             | Normal   | No warning injected                     |
+| ≤ 35%             | WARNING  | "Avoid starting new complex work"       |
+| ≤ 25%             | CRITICAL | "Context nearly exhausted, inform user" |
+

 Debounce: 5 tool uses between repeated warnings. Severity escalation (WARNING→CRITICAL) bypasses debounce.

@@ -564,12 +633,14 @@ Debounce: 5 tool uses between repeated warnings. Severity escalation (WARNING→
 ### Security Hooks (v1.27)

 **Prompt Guard** (`gsd-prompt-guard.js`):
+
 - Triggers on Write/Edit to `.planning/` files
 - Scans content for prompt injection patterns (role override, instruction bypass, system tag injection)
 - Advisory-only — logs detection, does not block
 - Patterns are inlined (subset of `security.cjs`) for hook independence

 **Workflow Guard** (`gsd-workflow-guard.js`):
+
 - Triggers on Write/Edit to non-`.planning/` files
 - Detects edits outside GSD workflow context (no active `/gsd-` command or Task subagent)
 - Advises using `/gsd-quick` or `/gsd-fast` for state-tracked changes
@@ -581,18 +652,20 @@ Debounce: 5 tool uses between repeated warnings. Severity escalation (WARNING→

 GSD supports multiple AI coding runtimes through a unified command/workflow architecture:

-| Runtime | Command Format | Agent System | Config Location |
-|---------|---------------|--------------|-----------------|
-| Claude Code | `/gsd-command` | Task spawning | `~/.claude/` |
-| OpenCode | `/gsd-command` | Subagent mode | `~/.config/opencode/` |
-| Kilo | `/gsd-command` | Subagent mode | `~/.config/kilo/` |
-| Gemini CLI | `/gsd-command` | Task spawning | `~/.gemini/` |
-| Codex | `$gsd-command` | Skills | `~/.codex/` |
-| Copilot | `/gsd-command` | Agent delegation | `~/.github/` |
-| Antigravity | Skills | Skills | `~/.gemini/antigravity/` |
-| Trae | Skills | Skills | `~/.trae/` |
-| Cline | Rules | Rules | `.clinerules` |
-| Augment Code | Skills | Skills | Augment config |
+
+| Runtime      | Command Format | Agent System     | Config Location          |
+| ------------ | -------------- | ---------------- | ------------------------ |
+| Claude Code  | `/gsd-command` | Task spawning    | `~/.claude/`             |
+| OpenCode     | `/gsd-command` | Subagent mode    | `~/.config/opencode/`    |
+| Kilo         | `/gsd-command` | Subagent mode    | `~/.config/kilo/`        |
+| Gemini CLI   | `/gsd-command` | Task spawning    | `~/.gemini/`             |
+| Codex        | `$gsd-command` | Skills           | `~/.codex/`              |
+| Copilot      | `/gsd-command` | Agent delegation | `~/.github/`             |
+| Antigravity  | Skills         | Skills           | `~/.gemini/antigravity/` |
+| Trae         | Skills         | Skills           | `~/.trae/`               |
+| Cline        | Rules          | Rules            | `.clinerules`            |
+| Augment Code | Skills         | Skills           | Augment config           |
+

 ### Abstraction Points

@@ -602,4 +675,4 @@ GSD supports multiple AI coding runtimes through a unified command/workflow arch
 4. **Path conventions** — Each runtime stores config in different directories
 5. **Model references** — `inherit` profile lets GSD defer to runtime's model selection

-The installer handles all translation at install time. Workflows and agents are written in Claude Code's native format and transformed during deployment.
+The installer handles all translation at install time. Workflows and agents are written in Claude Code's native format and transformed during deployment.
--- a/docs/CANARY.md
+++ b/docs/CANARY.md
@@ -0,0 +1,66 @@
+# Canary Stream
+
+The **canary** dist-tag is GSD's earliest preview channel. It exists so contributors and willing early adopters can exercise in-flight features against the long-lived `dev` integration branch before they have any expectation of stability.
+
+## Stream policy
+
+GSD ships through three npm dist-tags, each fed by exactly one git branch. **Streams do not mix.**
+
+| Branch | dist-tag | Audience | Stability |
+|---|---|---|---|
+| `dev` | `canary` | Contributors, willing early adopters | Best-effort. May regress between cuts. Roll-forward only. |
+| `main` | `next` | Maintainers, RC testers | Release-candidate quality. Bug-bar enforced. |
+| `main` | `latest` | Everyone else | Production stable. The default `npm install` target. |
+
+`dev` is the integration branch for in-flight feature work (typically multi-PR vertical slices like the MVP/TDD/UAT track in 1.50.0). When the dev work stabilizes, it promotes to `main` as an RC train (`vX.Y.Z-rc.N` published to `next`), and after the RC train bakes, the same train promotes again to `latest`.
+
+A canary build NEVER becomes a `next` build directly, and a `next` build NEVER becomes a `latest` build directly — every promotion goes through a fresh tag and a fresh release.
+
+## Installing canary
+
+```bash
+# One-off invocation (npx)
+npx get-shit-done-cc@canary
+
+# Pin to the canary dist-tag globally
+npm install -g get-shit-done-cc@canary
+
+# Pin to an exact canary version
+npm install -g get-shit-done-cc@1.50.0-canary.1
+```
+
+The CC installer's defensive purge rewrites stale config blocks left by older GSD versions, so reinstalling on top of an existing project is safe.
+
+## When to install canary
+
+✅ **Do** install canary when you want to:
+- Exercise in-flight planning/execution/verification features early and report findings
+- Validate a fix you've contributed to `dev` is reachable end-to-end
+- Help shake out canary-bake items (rough edges that won't ship to `next` until resolved)
+
+❌ **Do NOT** install canary on:
+- Production projects you depend on for delivery
+- A machine where rolling back means recreating GSD state (use a profile or a workspace instead)
+- A demo or onboarding setup — pin to `@latest` so audiences see the stable surface
+
+## Rolling back from canary
+
+```bash
+# Back to the current stable
+npm install -g get-shit-done-cc@latest
+
+# Or to the next/RC train
+npm install -g get-shit-done-cc@next
+```
+
+If you have a local project that interacted with canary-only features (for instance, an MVP-mode phase planned by 1.50.0-canary), the planner artifacts in `.planning/` remain valid — older GSD versions will just ignore the `**Mode:** mvp` field on phases.
+
+## Reporting issues against canary
+
+File against the [issue tracker](https://github.com/gsd-build/get-shit-done/issues) with the `bug` template. Include the exact canary version (`get-shit-done-cc --version` reports it) so triage can route the report back into the `dev` stream rather than the stable stream.
+
+## Where to look next
+
+- Active canary release notes: [`docs/RELEASE-v1.50.0-canary.1.md`](RELEASE-v1.50.0-canary.1.md)
+- Stable release notes: [`CHANGELOG.md`](../CHANGELOG.md)
+- Stream architecture rationale: discussed across [#2727](https://github.com/gsd-build/get-shit-done/issues/2727), [#2773](https://github.com/gsd-build/get-shit-done/issues/2773) (codex schema-break and the resulting promotion bottleneck that motivated explicit stream isolation)
--- a/docs/CLI-TOOLS.md
+++ b/docs/CLI-TOOLS.md
@@ -1,29 +1,71 @@
 # GSD CLI Tools Reference

-> Programmatic API reference for `gsd-tools.cjs`. Used by workflows and agents internally. For user-facing commands, see [Command Reference](COMMANDS.md).
+> Surface-area reference for `get-shit-done/bin/gsd-tools.cjs` (legacy Node CLI). Workflows and agents should prefer `gsd-sdk query` or `@gsd-build/sdk` where a handler exists — see [SDK and programmatic access](#sdk-and-programmatic-access). For slash commands and user flows, see [Command Reference](COMMANDS.md).

 ---

 ## Overview

-`gsd-tools.cjs` is a Node.js CLI utility that replaces repetitive inline bash patterns across GSD's ~50 command, workflow, and agent files. It centralizes: config parsing, model resolution, phase lookup, git commits, summary verification, state management, and template operations.
+`gsd-tools.cjs` centralizes config parsing, model resolution, phase lookup, git commits, summary verification, state management, and template operations across GSD commands, workflows, and agents.

-**Preferred for new orchestration:** Many of the same operations are available as `gsd-sdk query <command>` (see `sdk/src/query/index.ts` and `docs/QUERY-HANDLERS.md`). Use that in workflows and examples where the handler exists; keep `node … gsd-tools.cjs` for commands not yet in the registry (for example graphify) or when you need CJS-only flags.

-**Location:** `get-shit-done/bin/gsd-tools.cjs`
-**Modules:** see the [Module Architecture](#module-architecture) table; the `get-shit-done/bin/lib/` directory is authoritative.
+|                    |                                                                                                                                                                                                        |
+| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **Shipped path**   | `get-shit-done/bin/gsd-tools.cjs`                                                                                                                                                                      |
+| **Implementation** | 20 domain modules under `get-shit-done/bin/lib/` (the directory is authoritative)                                                                                                                        |
+| **Status**         | Maintained for parity tests and CJS-only entrypoints; `gsd-sdk query` / SDK registry are the supported path for new orchestration (see [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md)). |
+
+
+**Usage (CJS):**

-**Usage:**
 ```bash
 node gsd-tools.cjs <command> [args] [--raw] [--cwd <path>]
 ```

-**Global Flags:**
-| Flag | Description |
-|------|-------------|
-| `--raw` | Machine-readable output (JSON or plain text, no formatting) |
-| `--cwd <path>` | Override working directory (for sandboxed subagents) |
-| `--ws <name>` | Target a specific workstream context (SDK only) |
+**Global flags (CJS):**
+
+
+| Flag           | Description                                                                  |
+| -------------- | ---------------------------------------------------------------------------- |
+| `--raw`        | Machine-readable output (JSON or plain text, no formatting)                  |
+| `--cwd <path>` | Override working directory (for sandboxed subagents)                         |
+| `--ws <name>`  | Workstream context (also honored when the SDK spawns this binary; see below) |
+
+
+---
+
+## SDK and programmatic access
+
+Use this when authoring workflows, not when you only need the command list below.
+
+**1. CLI — `gsd-sdk query <argv…>`**
+
+- Resolves argv with the same **longest-prefix** rules as the typed registry (`resolveQueryArgv` in `sdk/src/query/registry.ts`). Unregistered commands **fail fast** — use `node …/gsd-tools.cjs` only for handlers not in the registry.
+- Full matrix (CJS command → registry key, CLI-only tools, aliases, golden tiers): [sdk/src/query/QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md).
+
+**2. TypeScript — `@gsd-build/sdk` (`GSDTools`, `createRegistry`)**
+
+- `GSDTools` (used by `PhaseRunner`, `InitRunner`, and `GSD.createTools()`) always shells out to `gsd-tools.cjs` via `execFile` — there is no in-process registry path on this class. For typed, in-process dispatch use `createRegistry()` from `sdk/src/query/index.ts`, or invoke `gsd-sdk query` (see [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md)).
+- Conventions: mutation event wiring, `GSDError` vs `{ data: { error } }`, locks, and stubs — [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md).
+
+**CJS → SDK examples (same project directory):**
+
+
+| Legacy CJS                               | Preferred `gsd-sdk query` (examples) |
+| ---------------------------------------- | ------------------------------------ |
+| `node gsd-tools.cjs init phase-op 12`    | `gsd-sdk query init phase-op 12`     |
+| `node gsd-tools.cjs phase-plan-index 12` | `gsd-sdk query phase-plan-index 12`  |
+| `node gsd-tools.cjs state json`          | `gsd-sdk query state json`           |
+| `node gsd-tools.cjs roadmap analyze`     | `gsd-sdk query roadmap analyze`      |
+
+
+**SDK state reads:** `gsd-sdk query state json` / `state.json` and `gsd-sdk query state load` / `state.load` currently share one native handler (rebuilt STATE.md frontmatter — CJS `cmdStateJson`). The legacy CJS `state load` payload (`config`, `state_raw`, existence flags) is still **CLI-only** via `node …/gsd-tools.cjs state load` until a separate registry handler exists. Full routing and golden rules: [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md).
+
+**CLI-only (not in registry):** e.g. **graphify**, **from-gsd2** / **gsd2-import** — call `gsd-tools.cjs` until registered.
+
+**Mutation events (SDK):** `QUERY_MUTATION_COMMANDS` in `sdk/src/query/index.ts` lists commands that may emit structured events after a successful dispatch. Exceptions called out in QUERY-HANDLERS: `state validate` (read-only), `skill-manifest` (writes only with `--write`), `intel update` (stub).
+
+**Golden parity:** Policy and CJS↔SDK test categories are documented under **Golden parity** in [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md).

 ---

@@ -373,7 +415,7 @@ node gsd-tools.cjs from-gsd2 [--path <dir>] [--force] [--dry-run]
 node gsd-tools.cjs commit <message> [--files f1 f2] [--amend] [--no-verify]
 ```

-> **`--no-verify`**: Skips pre-commit hooks. Used by parallel executor agents during wave-based execution to avoid build lock contention (e.g., cargo lock fights in Rust projects). The orchestrator runs hooks once after each wave completes. Do not use `--no-verify` during sequential execution — let hooks run normally.
+> `--no-verify`: Skips pre-commit hooks. Used by parallel executor agents during wave-based execution to avoid build lock contention (e.g., cargo lock fights in Rust projects). The orchestrator runs hooks once after each wave completes. Do not use `--no-verify` during sequential execution — let hooks run normally.

 # Web search (requires Brave API key)
 node gsd-tools.cjs websearch <query> [--limit N] [--freshness day|week|month]
@@ -430,3 +472,30 @@ User-facing entry point: `/gsd-graphify` (see [Command Reference](COMMANDS.md#gs
 | Audit | `lib/audit.cjs` | Phase/milestone audit queue handlers; `audit-open` helper |
 | GSD2 Import | `lib/gsd2-import.cjs` | Reverse-migration importer from GSD-2 projects (backs `/gsd-from-gsd2`) |
 | Intel | `lib/intel.cjs` | Queryable codebase intelligence index (backs `/gsd-intel`) |
+
+---
+
+## Reviewer CLI Routing
+
+`review.models.<cli>` maps a reviewer flavor to a shell command invoked by the code-review workflow. Set via [`/gsd-settings-integrations`](COMMANDS.md#gsd-settings-integrations) or directly:
+
+```bash
+gsd-sdk query config-set review.models.codex    "codex exec --model gpt-5"
+gsd-sdk query config-set review.models.gemini   "gemini -m gemini-2.5-pro"
+gsd-sdk query config-set review.models.opencode "opencode run --model claude-sonnet-4"
+gsd-sdk query config-set review.models.claude   ""   # clear — fall back to session model
+```
+
+Slugs are validated against `[a-zA-Z0-9_-]+`; empty or path-containing slugs are rejected. See [`docs/CONFIGURATION.md`](CONFIGURATION.md#code-review-cli-routing) for the full field reference.
+
+## Secret Handling
+
+API keys configured via `/gsd-settings-integrations` (`brave_search`, `firecrawl`, `exa_search`) are written plaintext to `.planning/config.json` but are masked (`****<last-4>`) in every `config-set` / `config-get` output, confirmation table, and interactive prompt. See `get-shit-done/bin/lib/secrets.cjs` for the masking implementation. The `config.json` file itself is the security boundary — protect it with filesystem permissions and keep it out of git (`.planning/` is gitignored by default).
+
+---
+
+## See also
+
+- [sdk/src/query/QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md) — registry matrix, routing, golden parity, intentional CJS differences
+- [Architecture](ARCHITECTURE.md) — where `gsd-sdk query` fits in orchestration
+- [Command Reference](COMMANDS.md) — user-facing `/gsd:` commands
--- a/docs/COMMANDS.md
+++ b/docs/COMMANDS.md
@@ -90,7 +90,7 @@ Remove a workspace and clean up git worktrees.

 ### `/gsd-discuss-phase`

-Capture implementation decisions before planning.
+Gather phase context through adaptive questioning before planning.

 | Argument | Required | Description |
 |----------|----------|-------------|
@@ -171,7 +171,7 @@ Research, plan, and verify a phase.

 ### `/gsd-plan-review-convergence`

-Cross-AI plan convergence loop. Runs `plan-phase → review → replan → re-review` cycles until no HIGH concerns remain (max 3 cycles by default). Spawns isolated agents for planning and review; orchestrator handles loop control, HIGH-concern counting, stall detection, and escalation.
+Cross-AI plan convergence loop — replan with review feedback until no HIGH concerns remain. Runs `plan-phase → review → replan → re-review` cycles (max 3 cycles by default). Spawns isolated agents for planning and review; orchestrator handles loop control, HIGH-concern counting, stall detection, and escalation.

 | Argument / Flag | Required | Description |
 |-----------------|----------|-------------|
@@ -192,7 +192,7 @@ Cross-AI plan convergence loop. Runs `plan-phase → review → replan → re-re

 ### `/gsd-ultraplan-phase`

-**[BETA — Claude Code only.]** Offload plan-phase work to Claude Code's ultraplan cloud. The plan drafts remotely so the terminal stays free; review inline comments in a browser, then import the finalized plan back into `.planning/` via `/gsd-import`.
+**[BETA]** Offload plan phase to Claude Code's ultraplan cloud; review in browser and import back. The plan drafts remotely so the terminal stays free; review inline comments in a browser, then import the finalized plan back into `.planning/` via `/gsd-import`.

 | Flag | Required | Description |
 |------|----------|-------------|
@@ -425,6 +425,28 @@ Append new phase to roadmap.
 /gsd-add-phase                      # Interactive — describe the phase
 ```

+### `/gsd-edit-phase`
+
+Edit any field of an existing roadmap phase in place.
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `N` | Yes | Phase number to edit |
+
+| Flag | Description |
+|------|-------------|
+| `--force` | Allow editing in-progress or completed phases |
+
+**Prerequisites:** `.planning/ROADMAP.md` exists, phase N must exist
+**Produces:** Updated phase section in ROADMAP.md (in place, number and position preserved)
+
+```bash
+/gsd-edit-phase 5                   # Edit any field of phase 5 (future phases only)
+/gsd-edit-phase 5 --force           # Edit phase 5 even if in-progress or completed
+```
+
+---
+
 ### `/gsd-insert-phase`

 Insert urgent work between phases using decimal numbering.
@@ -519,7 +541,7 @@ Retroactively audit and fill Nyquist validation gaps.

 ### `/gsd-progress`

-Show status and next steps.
+Check project progress, show context, and route to the next action (execute or plan).

 | Flag | Description |
 |------|-------------|
@@ -562,6 +584,24 @@ Interactive command center for managing multiple phases from one terminal.
 /gsd-manager                        # Open command center dashboard
 ```

+**Checkpoint Heartbeats (#2410):**
+
+Background `execute-phase` runs emit `[checkpoint]` markers at every wave and plan
+boundary so the Claude API SSE stream never idles long enough to trigger
+`Stream idle timeout - partial response received` on multi-plan phases. The
+format is:
+
+```
+[checkpoint] phase {N} wave {W}/{M} starting, {count} plan(s), {P}/{Q} plans done
+[checkpoint] phase {N} wave {W}/{M} plan {plan_id} starting ({P}/{Q} plans done)
+[checkpoint] phase {N} wave {W}/{M} plan {plan_id} complete ({P}/{Q} plans done)
+[checkpoint] phase {N} wave {W}/{M} complete, {P}/{Q} plans done ({ok}/{count} ok)
+```
+
+If a background phase fails partway through, grep the transcript for `[checkpoint]`
+to see the last confirmed boundary. The manager's background-completion handler
+uses these markers to report partial progress when an agent errors out.
+
 **Manager Passthrough Flags:**

 Configure per-step flags in `.planning/config.json` under `manager.flags`. These flags are appended to each dispatched command:
@@ -645,7 +685,7 @@ Ingest an external plan file into the GSD planning system with conflict detectio

 ### `/gsd-ingest-docs`

-Scan a repo containing mixed ADRs, PRDs, SPECs, and DOCs and bootstrap or merge the full `.planning/` setup from them in a single pass. Parallel classification (`gsd-doc-classifier`) plus synthesis with precedence rules and cycle detection (`gsd-doc-synthesizer`). Produces a three-bucket conflicts report (`INGEST-CONFLICTS.md`: auto-resolved, competing-variants, unresolved-blockers) and hard-blocks on LOCKED-vs-LOCKED ADR contradictions.
+Bootstrap or merge a .planning/ setup from existing ADRs, PRDs, SPECs, and docs in a repo. Runs parallel classification (`gsd-doc-classifier`) plus synthesis with precedence rules and cycle detection (`gsd-doc-synthesizer`). Produces a three-bucket conflicts report (`INGEST-CONFLICTS.md`: auto-resolved, competing-variants, unresolved-blockers) and hard-blocks on LOCKED-vs-LOCKED ADR contradictions.

 | Argument / Flag | Required | Description |
 |-----------------|----------|-------------|
@@ -946,7 +986,7 @@ Package winning sketch decisions into a reusable project-local skill so future s

 ### `/gsd-forensics`

-Post-mortem investigation of failed or stuck GSD workflows.
+Post-mortem investigation for failed GSD workflows — diagnoses what went wrong.

 | Argument | Required | Description |
 |----------|----------|-------------|
@@ -1037,12 +1077,73 @@ Manage parallel workstreams for concurrent work on different milestone areas.

 ### `/gsd-settings`

-Interactive configuration of workflow toggles and model profile.
+Interactive configuration of workflow toggles and model profile. Questions are grouped into six visual sections:
+
+- **Planning** — Research, Plan Checker, Pattern Mapper, Nyquist, UI Phase, UI Gate, AI Phase
+- **Execution** — Verifier, TDD Mode, Code Review, Code Review Depth _(conditional — only when Code Review is on)_, UI Review
+- **Docs & Output** — Commit Docs, Skip Discuss, Worktrees
+- **Features** — Intel, Graphify
+- **Model & Pipeline** — Model Profile, Auto-Advance, Branching
+- **Misc** — Context Warnings, Research Qs
+
+All answers are merged via `gsd-sdk query config-set` into the resolved project config path (`.planning/config.json` for a standard install, or `.planning/workstreams/<active>/config.json` when a workstream is active), preserving unrelated keys. After confirmation, the user may save the full settings object to `~/.gsd/defaults.json` so future `/gsd-new-project` runs start from the same baseline.

 ```bash
 /gsd-settings                       # Interactive config
 ```

+### `/gsd-settings-advanced`
+
+Power-user configuration for plan bounce, timeouts, branch templates, and cross-AI execution. Use after `/gsd-settings` once the common-case toggles are dialed in.
+
+Six sections, each a focused prompt batch:
+
+| Section | Keys |
+|---------|------|
+| Planning Tuning | `workflow.plan_bounce`, `workflow.plan_bounce_passes`, `workflow.plan_bounce_script`, `workflow.subagent_timeout`, `workflow.inline_plan_threshold` |
+| Execution Tuning | `workflow.node_repair`, `workflow.node_repair_budget`, `workflow.auto_prune_state` |
+| Discussion Tuning | `workflow.max_discuss_passes` |
+| Cross-AI Execution | `workflow.cross_ai_execution`, `workflow.cross_ai_command`, `workflow.cross_ai_timeout` |
+| Git Customization | `git.base_branch`, `git.phase_branch_template`, `git.milestone_branch_template` |
+| Runtime / Output | `response_language`, `context_window`, `search_gitignored`, `graphify.build_timeout` |
+
+Current values are pre-selected; an empty input keeps the existing value. Numeric fields reject non-numeric input and re-prompt. Null-allowed fields (`plan_bounce_script`, `cross_ai_command`, `response_language`) accept an empty input as a clear. Writes route through `gsd-sdk query config-set`, which preserves every unrelated key.
+
+```bash
+/gsd-settings-advanced              # Six-section interactive config
+```
+
+See [CONFIGURATION.md](CONFIGURATION.md) for the full schema and defaults.
+
+### `/gsd-settings-integrations`
+
+Interactive configuration of third-party integrations and cross-tool routing.
+Distinct from `/gsd-settings` (workflow toggles) — this command handles
+connectivity: API keys, reviewer CLI routing, and agent-skill injection.
+
+Covers:
+
+- **Search integrations:** `brave_search`, `firecrawl`, `exa_search` API keys,
+  and the `search_gitignored` toggle.
+- **Code-review CLI routing:** `review.models.{claude,codex,gemini,opencode}`
+  — a shell command per reviewer flavor.
+- **Agent-skill injection:** `agent_skills.<agent-type>` — skill names
+  injected into an agent's spawn frontmatter. Agent-type slugs are validated
+  against `[a-zA-Z0-9_-]+` so path separators and shell metacharacters are
+  rejected.
+
+API keys are stored plaintext in `.planning/config.json` but displayed masked
+(`****<last-4>`) in every interactive output, confirmation table, and
+`config-set` stdout/stderr line. Plaintext is never echoed, never logged,
+and never written to any file outside `config.json` by this workflow.
+
+```bash
+/gsd-settings-integrations           # Interactive config (three sections)
+```
+
+See [`docs/CONFIGURATION.md`](CONFIGURATION.md) for the per-field reference and
+[`docs/CLI-TOOLS.md`](CLI-TOOLS.md) for the reviewer-CLI routing contract.
+
 ### `/gsd-set-profile`

 Quick profile switch.
@@ -1141,7 +1242,7 @@ Build, query, and inspect the project knowledge graph stored in `.planning/graph

 ### `/gsd-ai-integration-phase`

-AI framework selection wizard for integrating AI/LLM capabilities into a project phase. Presents an interactive decision matrix, surfaces domain-specific failure modes and eval criteria, and produces `AI-SPEC.md` with a framework recommendation, implementation guidance, and evaluation strategy.
+Generate an AI-SPEC.md design contract for phases that involve building AI systems. Presents an interactive decision matrix, surfaces domain-specific failure modes and eval criteria, and produces `AI-SPEC.md` with a framework recommendation, implementation guidance, and evaluation strategy.

 **Produces:** `{phase}-AI-SPEC.md` in the phase directory

@@ -1156,7 +1257,7 @@ AI framework selection wizard for integrating AI/LLM capabilities into a project

 ### `/gsd-eval-review`

-Retroactive audit of an implemented AI phase's evaluation coverage. Checks implementation against the `AI-SPEC.md` evaluation plan produced by `/gsd-ai-integration-phase`. Scores each eval dimension as COVERED/PARTIAL/MISSING.
+Audit an executed AI phase's evaluation coverage and produce an EVAL-REVIEW.md remediation plan. Checks implementation against the `AI-SPEC.md` evaluation plan produced by `/gsd-ai-integration-phase`. Scores each eval dimension as COVERED/PARTIAL/MISSING.

 **Prerequisites:** Phase has been executed and has an `AI-SPEC.md`
 **Produces:** `{phase}-EVAL-REVIEW.md` with findings, gaps, and remediation guidance
@@ -1214,7 +1315,7 @@ Review source files changed during a phase for bugs, security vulnerabilities, a

 ### `/gsd-code-review-fix`

-Auto-fix issues found by `/gsd-code-review`. Reads `REVIEW.md`, spawns a fixer agent, commits each fix atomically, and produces a `REVIEW-FIX.md` summary.
+Auto-fix issues found by code review in REVIEW.md; commits each fix atomically. Reads `REVIEW.md`, spawns a fixer agent, and produces a `REVIEW-FIX.md` summary.

 | Argument | Required | Description |
 |----------|----------|-------------|
@@ -1413,7 +1514,7 @@ Review and promote backlog items to active milestone.

 ### `/gsd-plant-seed`

-Capture a forward-looking idea with trigger conditions — surfaces automatically at the right milestone.
+Capture a forward-looking idea that surfaces automatically at the right milestone.

 | Argument | Required | Description |
 |----------|----------|-------------|
@@ -1535,3 +1636,19 @@ Open Discord community invite.
 ```bash
 /gsd-join-discord
 ```
+
+---
+
+## Contributing: Skill Description Standards
+
+Skill descriptions (the `description:` field in each `commands/gsd/*.md` frontmatter) are
+injected into every session's system prompt. To keep per-session overhead low, descriptions
+must be ≤ 100 chars and must not duplicate flag documentation already in `argument-hint:`.
+
+A lint gate enforces the budget:
+
+```bash
+npm run lint:descriptions
+```
+
+The check is also run as part of `npm test` via `tests/enh-2789-description-budget.test.cjs`.
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -21,7 +21,7 @@ GSD stores project settings in `.planning/config.json`. Created during `/gsd-new
    "search_gitignored": false,
    "sub_repos": []
  },
-  "context_profile": null,
+  "context": null,
  "workflow": {
    "research": true,
    "plan_check": true,
@@ -30,10 +30,12 @@ GSD stores project settings in `.planning/config.json`. Created during `/gsd-new
    "nyquist_validation": true,
    "ui_phase": true,
    "ui_safety_gate": true,
+    "ui_review": true,
    "node_repair": true,
    "node_repair_budget": 2,
    "research_before_questions": false,
    "discuss_mode": "discuss",
+    "max_discuss_passes": 3,
    "skip_discuss": false,
    "tdd_mode": false,
    "text_mode": false,
@@ -43,13 +45,17 @@ GSD stores project settings in `.planning/config.json`. Created during `/gsd-new
    "plan_bounce": false,
    "plan_bounce_script": null,
    "plan_bounce_passes": 2,
+    "plan_chunked": false,
    "code_review_command": null,
    "cross_ai_execution": false,
    "cross_ai_command": null,
    "cross_ai_timeout": 300,
    "security_enforcement": true,
    "security_asvs_level": 1,
-    "security_block_on": "high"
+    "security_block_on": "high",
+    "post_planning_gaps": true,
+    "build_command": null,
+    "test_command": null
  },
  "hooks": {
    "context_warnings": true,
@@ -108,11 +114,15 @@ GSD stores project settings in `.planning/config.json`. Created during `/gsd-new
 |---------|------|---------|---------|-------------|
 | `mode` | enum | `interactive`, `yolo` | `interactive` | `yolo` auto-approves decisions; `interactive` confirms at each step |
 | `granularity` | enum | `coarse`, `standard`, `fine` | `standard` | Controls phase count: `coarse` (3-5), `standard` (5-8), `fine` (8-12) |
-| `model_profile` | enum | `quality`, `balanced`, `budget`, `inherit` | `balanced` | Model tier for each agent (see [Model Profiles](#model-profiles)) |
+| `model_profile` | enum | `quality`, `balanced`, `budget`, `adaptive`, `inherit` | `balanced` | Model tier for each agent (see [Model Profiles](#model-profiles)). `adaptive` was added per [#1713](https://github.com/gsd-build/get-shit-done/issues/1713) / [#1806](https://github.com/gsd-build/get-shit-done/issues/1806) and resolves the same way as the other tiers under runtime-aware profiles. |
+| `runtime` | string | `claude`, `codex`, or any string | (none) | Active runtime for [runtime-aware profile resolution](#runtime-aware-profiles-2517). When set, profile tiers (opus/sonnet/haiku) resolve to runtime-native model IDs. Today only the Codex install path emits per-agent model IDs from this resolver; other runtimes (`opencode`, `gemini`, `qwen`, `copilot`, …) consume the resolver at spawn time and gain dedicated install-path support in [#2612](https://github.com/gsd-build/get-shit-done/issues/2612). When unset (default), behavior is unchanged from prior versions. Added in v1.39 |
+| `model_profile_overrides.<runtime>.<tier>` | string \| object | per-runtime tier override | (none) | Override the runtime-aware tier mapping for a specific `(runtime, tier)`. Tier is one of `opus`, `sonnet`, `haiku`. Value is either a model ID string (e.g. `"gpt-5-pro"`) or `{ model, reasoning_effort }`. See [Runtime-Aware Profiles](#runtime-aware-profiles-2517). Added in v1.39 |
 | `project_code` | string | any short string | (none) | Prefix for phase directory names (e.g., `"ABC"` produces `ABC-01-setup/`). Added in v1.31 |
 | `response_language` | string | language code | (none) | Language for agent responses (e.g., `"pt"`, `"ko"`, `"ja"`). Propagates to all spawned agents for cross-phase language consistency. Added in v1.32 |
+| `context_window` | number | any integer | `200000` | Context window size in tokens. Set `1000000` for 1M-context models (e.g., `claude-opus-4-7[1m]`). Values `>= 500000` enable adaptive context enrichment (full-body reads of prior SUMMARY.md, deeper anti-pattern reads). Configured via `/gsd-settings-advanced`. |
 | `context_profile` | string | `dev`, `research`, `review` | (none) | Execution context preset that applies a pre-configured bundle of mode, model, and workflow settings for the current type of work. Added in v1.34 |
 | `claude_md_path` | string | any file path | `./CLAUDE.md` | Custom output path for the generated CLAUDE.md file. Useful for monorepos or projects that need CLAUDE.md in a non-root location. Defaults to `./CLAUDE.md` at the project root. Added in v1.36 |
+| `claude_md_assembly.mode` | enum | `embed`, `link` | `embed` | Controls how managed sections are written into CLAUDE.md. `embed` (default) inlines content between GSD markers. `link` writes `@.planning/<source-path>` instead — Claude Code expands the reference at runtime, reducing CLAUDE.md size by ~65% on typical projects. `link` only applies to sections that have a real source file; `workflow` and fallback sections always embed. Per-block overrides: `claude_md_assembly.blocks.<section>` (e.g. `claude_md_assembly.blocks.architecture: link`). Added in v1.38 |
 | `context` | string | any text | (none) | Custom context string injected into every agent prompt for the project. Use to provide persistent project-specific guidance (e.g., coding conventions, team practices) that every agent should be aware of |
 | `phase_naming` | string | any string | (none) | Custom prefix for phase directory names. When set, overrides the auto-generated phase slug (e.g., `"feature"` produces `feature-01-setup/` instead of the roadmap-derived slug) |
 | `brave_search` | boolean | `true`/`false` | auto-detected | Override auto-detection of Brave Search API availability. When unset, GSD checks for `BRAVE_API_KEY` env var or `~/.gsd/brave_api_key` file |
@@ -124,6 +134,41 @@ GSD stores project settings in `.planning/config.json`. Created during `/gsd-new

 ---

+## Integration Settings
+
+Configured interactively via [`/gsd-settings-integrations`](COMMANDS.md#gsd-settings-integrations). These are *connectivity* settings — API keys and cross-tool routing — and are intentionally kept separate from `/gsd-settings` (workflow toggles).
+
+### Search API keys
+
+API key fields accept a string value (the key itself). They can also be set to the sentinels `true`/`false`/`null` to override auto-detection from env vars / `~/.gsd/*_api_key` files (legacy behavior, see rows above).
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `brave_search` | string \| boolean \| null | `null` | Brave Search API key used for web research. Displayed as `****<last-4>` in all UI / `config-set` output; never echoed plaintext |
+| `firecrawl` | string \| boolean \| null | `null` | Firecrawl API key for deep-crawl scraping. Masked in display |
+| `exa_search` | string \| boolean \| null | `null` | Exa Search API key for semantic search. Masked in display |
+
+**Masking convention (`get-shit-done/bin/lib/secrets.cjs`):** keys 8+ characters render as `****<last-4>`; shorter keys render as `****`; `null`/empty renders as `(unset)`. Plaintext is written as-is to `.planning/config.json` — that file is the security boundary — but the CLI, confirmation tables, logs, and `AskUserQuestion` descriptions never display the plaintext. This applies to the `config-set` command output itself: `config-set brave_search <key>` returns a JSON payload with the value masked.
+
+### Code-review CLI routing
+
+`review.models.<cli>` maps a reviewer flavor to a shell command. The code-review workflow shells out using this command when a matching flavor is requested.
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `review.models.claude` | string | (session model) | Command for Claude-flavored review. Defaults to the session model when unset |
+| `review.models.codex` | string | `null` | Command for Codex review, e.g. `"codex exec --model gpt-5"` |
+| `review.models.gemini` | string | `null` | Command for Gemini review, e.g. `"gemini -m gemini-2.5-pro"` |
+| `review.models.opencode` | string | `null` | Command for OpenCode review, e.g. `"opencode run --model claude-sonnet-4"` |
+
+The `<cli>` slug is validated against `[a-zA-Z0-9_-]+`. Empty or path-containing slugs are rejected by `config-set`.
+
+### Agent-skill injection (dynamic)
+
+`agent_skills.<agent-type>` extends the `agent_skills` map documented below. Slug is validated against `[a-zA-Z0-9_-]+` — no path separators, no whitespace, no shell metacharacters. Configured interactively via `/gsd-settings-integrations`.
+
+---
+
 ## Workflow Toggles

 All workflow toggles follow the **absent = enabled** pattern. If a key is missing from config, it defaults to `true`.
@@ -137,10 +182,12 @@ All workflow toggles follow the **absent = enabled** pattern. If a key is missin
 | `workflow.nyquist_validation` | boolean | `true` | Test coverage mapping during plan-phase research |
 | `workflow.ui_phase` | boolean | `true` | Generate UI design contracts for frontend phases |
 | `workflow.ui_safety_gate` | boolean | `true` | Prompt to run /gsd-ui-phase for frontend phases during plan-phase |
+| `workflow.ui_review` | boolean | `true` | Run visual quality audit (`/gsd-ui-review`) after phase execution in autonomous mode. When `false`, the UI audit step is skipped. |
 | `workflow.node_repair` | boolean | `true` | Autonomous task repair on verification failure |
 | `workflow.node_repair_budget` | number | `2` | Max repair attempts per failed task |
 | `workflow.research_before_questions` | boolean | `false` | Run research before discussion questions instead of after |
 | `workflow.discuss_mode` | string | `'discuss'` | Controls how `/gsd-discuss-phase` gathers context. `'discuss'` (default) asks questions one-by-one. `'assumptions'` reads the codebase first, generates structured assumptions with confidence levels, and only asks you to correct what's wrong. Added in v1.28 |
+| `workflow.max_discuss_passes` | number | `3` | Maximum number of question rounds in discuss-phase before the workflow stops asking. Useful in headless/auto mode to prevent infinite discussion loops. |
 | `workflow.skip_discuss` | boolean | `false` | When `true`, `/gsd-autonomous` bypasses the discuss-phase entirely, writing minimal CONTEXT.md from the ROADMAP phase goal. Useful for projects where developer preferences are fully captured in PROJECT.md/REQUIREMENTS.md. Added in v1.28 |
 | `workflow.text_mode` | boolean | `false` | Replaces AskUserQuestion TUI menus with plain-text numbered lists. Required for Claude Code remote sessions (`/rc` mode) where TUI menus don't render. Can also be set per-session with `--text` flag on discuss-phase. Added in v1.28 |
 | `workflow.use_worktrees` | boolean | `true` | When `false`, disables git worktree isolation for parallel execution. Users who prefer sequential execution or whose environment does not support worktrees can disable this. Added in v1.31 |
@@ -149,6 +196,9 @@ All workflow toggles follow the **absent = enabled** pattern. If a key is missin
 | `workflow.plan_bounce` | boolean | `false` | Run external validation script against generated plans. When enabled, the plan-phase orchestrator pipes each PLAN.md through the script specified by `plan_bounce_script` and blocks on non-zero exit. Added in v1.36 |
 | `workflow.plan_bounce_script` | string | (none) | Path to the external script invoked for plan bounce validation. Receives the PLAN.md path as its first argument. Required when `plan_bounce` is `true`. Added in v1.36 |
 | `workflow.plan_bounce_passes` | number | `2` | Number of sequential bounce passes to run. Each pass feeds the previous pass's output back into the validator. Higher values increase rigor at the cost of latency. Added in v1.36 |
+| `workflow.post_planning_gaps` | boolean | `true` | Unified post-planning gap report (#2493). After all plans are generated and committed, scans REQUIREMENTS.md and CONTEXT.md `<decisions>` against every PLAN.md in the phase directory, then prints one `Source \| Item \| Status` table. Word-boundary matching (REQ-1 vs REQ-10) and natural sort (REQ-02 before REQ-10). Non-blocking — informational report only. Set to `false` to skip Step 13e of plan-phase. |
+| `workflow.plan_review_convergence` | boolean | `false` | Enable the `/gsd-plan-review-convergence` command. Disabled by default — the command exits with an enable instruction when this key is `false`. The command automates the manual plan→review→replan loop: it spawns configured reviewers (Codex, Gemini, Claude, OpenCode, Ollama, LM Studio, llama.cpp), counts unresolved HIGH concerns via the CYCLE_SUMMARY contract, replans with `--reviews` feedback, and repeats until converged or max cycles reached. Enable with `gsd config-set workflow.plan_review_convergence true`. Added in v1.39 |
+| `workflow.plan_chunked` | boolean | `false` | Enable chunked planning mode. When `true` (or when `--chunked` flag is passed to `/gsd-plan-phase`), the orchestrator splits the single long-lived planner Task into a short outline Task followed by N short per-plan Tasks (~3-5 min each). Each plan is committed individually for crash resilience. If a Task hangs and the terminal is force-killed, rerunning with `--chunked` resumes from the last completed plan. Particularly useful on Windows where long-lived Tasks may hang on stdio. Added in v1.38 |
 | `workflow.code_review_command` | string | (none) | Shell command for external code review integration in `/gsd-ship`. Receives changed file paths via stdin. Non-zero exit blocks the ship workflow. Added in v1.36 |
 | `workflow.tdd_mode` | boolean | `false` | Enable TDD pipeline as a first-class execution mode. When `true`, the planner aggressively applies `type: tdd` to eligible tasks (business logic, APIs, validations, algorithms) and the executor enforces RED/GREEN/REFACTOR gate sequence. An end-of-phase collaborative review checkpoint verifies gate compliance. Added in v1.36 |
 | `workflow.cross_ai_execution` | boolean | `false` | Delegate phase execution to an external AI CLI instead of spawning local executor agents. Useful for leveraging a different model's strengths for specific phases. Added in v1.36 |
@@ -159,6 +209,10 @@ All workflow toggles follow the **absent = enabled** pattern. If a key is missin
 | `workflow.pattern_mapper` | boolean | `true` | Run the `gsd-pattern-mapper` agent between research and planning to map new files to existing codebase analogs |
 | `workflow.subagent_timeout` | number | `600` | Timeout in seconds for individual subagent invocations. Increase for long-running research or execution phases |
 | `workflow.inline_plan_threshold` | number | `3` | Maximum number of tasks in a phase before the planner generates a separate PLAN.md file instead of inlining tasks in the prompt |
+| `workflow.drift_threshold` | number | `3` | Minimum number of new structural elements (new directories, barrel exports, migrations, route modules) introduced during a phase before the post-execute codebase-drift gate takes action. See [#2003](https://github.com/gsd-build/get-shit-done/issues/2003). Added in v1.39 |
+| `workflow.drift_action` | string | `warn` | What to do when `workflow.drift_threshold` is exceeded after `/gsd-execute-phase`. `warn` prints a message suggesting `/gsd-map-codebase --paths …`; `auto-remap` spawns `gsd-codebase-mapper` scoped to the affected paths. Added in v1.39 |
+| `workflow.build_command` | string | (none) | Shell command to build the project in the post-merge build gate (Step A of step 5.6 in execute-phase). When unset, the gate auto-detects: Xcode (`.xcodeproj` present) → `xcodebuild build`, `Makefile` with `build:` target → `make build`, Justfile → `just build`, `Cargo.toml` → `cargo build`, `go.mod` → `go build ./...`, Python → `python -m py_compile`, `package.json` with `build` script → `npm run build`. Runs with a 5-minute timeout; failure increments `WAVE_FAILURE_COUNT`. Added in v1.39 |
+| `workflow.test_command` | string | (none) | Shell command to run the project's test suite in the post-merge test gate (Step B of step 5.6 in execute-phase) and the regression gate. When unset, the gate auto-detects: Xcode (`.xcodeproj` present) → `xcodebuild test`, `Makefile` with `test:` target → `make test`, Justfile → `just test`, `package.json` → `npm test`, `Cargo.toml` → `cargo test`, `go.mod` → `go test ./...`, Python → `python -m pytest`. Runs with a 5-minute timeout; failure increments `WAVE_FAILURE_COUNT`. Added in v1.39 |

 ### Recommended Presets

@@ -178,6 +232,17 @@ All workflow toggles follow the **absent = enabled** pattern. If a key is missin
 | `planning.search_gitignored` | boolean | `false` | Add `--no-ignore` to broad searches to include `.planning/` |
 | `planning.sub_repos` | array of strings | `[]` | Paths of nested sub-repos relative to the project root. When set, GSD-aware tooling scopes phase-lookup, path-resolution, and commit operations per sub-repo instead of treating the outer repo as a monorepo |

+### Project-Root Resolution in Multi-Repo Workspaces
+
+When `sub_repos` is set and `gsd-tools.cjs` or `gsd-sdk query` is invoked from inside a listed child repo, both CLIs walk up to the parent workspace that owns `.planning/` before dispatching handlers. Resolution order (checked at each ancestor up to 10 levels, never above `$HOME`):
+
+1. If the starting directory already has its own `.planning/`, it is the project root (no walk-up).
+2. Parent has `.planning/config.json` listing the starting directory's top-level segment in `sub_repos` (or the legacy `planning.sub_repos` shape).
+3. Parent has `.planning/config.json` with legacy `multiRepo: true` and the starting directory is inside a git repo.
+4. Parent has `.planning/` and an ancestor up to the candidate parent contains `.git` (heuristic fallback).
+
+If none match, the starting directory is returned unchanged. Explicit `--project-dir /path/to/workspace` is idempotent under this resolution.
+
 ### Auto-Detection

 If `.planning/` is in `.gitignore`, `commit_docs` is automatically `false` regardless of config.json. This prevents git errors.
@@ -190,6 +255,7 @@ If `.planning/` is in `.gitignore`, `commit_docs` is automatically `false` regar
 |---------|------|---------|-------------|
 | `hooks.context_warnings` | boolean | `true` | Show context window usage warnings via context monitor hook |
 | `hooks.workflow_guard` | boolean | `false` | Warn when file edits happen outside GSD workflow context (advises using `/gsd-quick` or `/gsd-fast`) |
+| `statusline.show_last_command` | boolean | `false` | Append `last: /<cmd>` suffix to the statusline showing the most recently invoked slash command. Opt-in; reads the active session transcript to extract the latest `<command-name>` tag (closes #2538) |

 The prompt injection guard hook (`gsd-prompt-guard.js`) is always active and cannot be disabled — it's a security feature, not a workflow toggle.

@@ -247,7 +313,7 @@ Any GSD agent type can receive skills. Common types:

 ### How It Works

-At spawn time, workflows call `node gsd-tools.cjs agent-skills <type>` to load configured skills. If skills exist for the agent type, they are injected as an `<agent_skills>` block in the Task() prompt:
+At spawn time, workflows call `gsd-sdk query agent-skills <type>` (or legacy `node gsd-tools.cjs agent-skills <type>`) to load configured skills. If skills exist for the agent type, they are injected as an `<agent_skills>` block in the Task() prompt:

 ```xml
 <agent_skills>
@@ -264,7 +330,7 @@ If no skills are configured, the block is omitted (zero overhead).
 Set skills via the CLI:

 ```bash
-node gsd-tools.cjs config-set agent_skills.gsd-executor '["skills/my-skill"]'
+gsd-sdk query config-set agent_skills.gsd-executor '["skills/my-skill"]'
 ```

 ---
@@ -292,10 +358,10 @@ Toggle optional capabilities via the `features.*` config namespace. Feature flag

 ```bash
 # Enable a feature
-node gsd-tools.cjs config-set features.global_learnings true
+gsd-sdk query config-set features.global_learnings true

 # Disable a feature
-node gsd-tools.cjs config-set features.thinking_partner false
+gsd-sdk query config-set features.thinking_partner false
 ```

 The `features.*` namespace is a dynamic key pattern — new feature flags can be added without modifying `VALID_CONFIG_KEYS`. Any key matching `features.<name>` is accepted by the config system.
@@ -394,6 +460,8 @@ Control confirmation prompts during workflows.

 Settings for the security enforcement feature (v1.31). All follow the **absent = enabled** pattern. These keys live under `workflow.*` in `.planning/config.json` — matching the shipped template and the runtime reads in `workflows/plan-phase.md`, `workflows/execute-phase.md`, `workflows/secure-phase.md`, and `workflows/verify-work.md`.

+These keys live under `workflow.*` — that is where the workflows and installer write and read them. Setting them at the top level of `config.json` is silently ignored.
+
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `workflow.security_enforcement` | boolean | `true` | Enable threat-model-anchored security verification via `/gsd-secure-phase`. When `false`, security checks are skipped entirely |
@@ -402,6 +470,60 @@ Settings for the security enforcement feature (v1.31). All follow the **absent =

 ---

+## Decision Coverage Gates (`workflow.context_coverage_gate`)
+
+When `discuss-phase` writes implementation decisions into CONTEXT.md
+`<decisions>`, two gates ensure those decisions survive the trip into
+plans and shipped code (issue #2492).
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `workflow.context_coverage_gate` | boolean | `true` | Toggle for both decision-coverage gates. When `false`, both the plan-phase translation gate and the verify-phase validation gate skip silently. |
+
+### What the gates do
+
+**Plan-phase translation gate (BLOCKING).** Runs immediately after the
+existing requirements coverage gate, before plans are committed. For each
+trackable decision in `<decisions>`, it checks that the decision id
+(`D-NN`) or its text appears in at least one plan's `must_haves`,
+`truths`, or body. A miss surfaces the missing decision by id and refuses
+to mark the phase planned.
+
+**Verify-phase validation gate (NON-BLOCKING).** Runs alongside the other
+verify steps. Searches every shipped artifact (PLAN.md, SUMMARY.md, files
+modified, recent commit subjects) for each trackable decision. Misses are
+written to VERIFICATION.md as a warning section but do **not** flip the
+overall verification status. The asymmetry is deliberate — by verify time
+the work is done, and a fuzzy substring miss should not fail an otherwise
+green phase.
+
+### How to write decisions the gates accept
+
+The discuss-phase template already produces `D-NN`-numbered decisions.
+The gate is happiest when:
+
+1. Every plan that implements a decision **cites the id** somewhere —
+   `must_haves.truths: ["D-12: bit offsets exposed"]` or a `D-12:` mention
+   in the plan body. Strict id match is the cheapest, deterministic path.
+2. Soft phrase matching is a fallback for paraphrases — if a 6+-word slice
+   of the decision text appears verbatim in a plan/summary, it counts.
+
+### Opt-outs
+
+A decision is **not** subject to the gates when any of the following
+apply:
+
+- It lives under the `### Claude's Discretion` heading inside `<decisions>`.
+- It is tagged `[informational]`, `[folded]`, or `[deferred]` in its
+  bullet (e.g., `- **D-08 [informational]:** Naming style for internal
+  helpers`).
+
+Use these escape hatches when a decision genuinely doesn't need plan
+coverage — implementation discretion, future ideas captured for the
+record, or items already deferred to a later phase.
+
+---
+
 ## Review Settings

 Configure per-CLI model selection for `/gsd-review`. When set, overrides the CLI's default model for that reviewer.
@@ -414,6 +536,12 @@ Configure per-CLI model selection for `/gsd-review`. When set, overrides the CLI
 | `review.models.opencode` | string | (CLI default) | Model used when `--opencode` reviewer is invoked |
 | `review.models.qwen` | string | (CLI default) | Model used when `--qwen` reviewer is invoked |
 | `review.models.cursor` | string | (CLI default) | Model used when `--cursor` reviewer is invoked |
+| `review.models.ollama` | string | (server default) | Model name passed to Ollama when `--ollama` reviewer is invoked. If unset, the first available model reported by the server is used (e.g. `llama3`). Set to a specific tag: `gsd config-set review.models.ollama codellama` |
+| `review.models.lm_studio` | string | (server default) | Model name passed to LM Studio when `--lm-studio` reviewer is invoked. If unset, the first available model reported by the server is used. |
+| `review.models.llama_cpp` | string | (server default) | Model name passed to llama.cpp when `--llama-cpp` reviewer is invoked. If unset, the first model reported by `/v1/models` is used. |
+| `review.ollama_host` | string | `http://localhost:11434` | Base URL of the Ollama server. Override when running Ollama on a non-default port or remote host: `gsd config-set review.ollama_host http://192.168.1.10:11434` |
+| `review.lm_studio_host` | string | `http://localhost:1234` | Base URL of the LM Studio local server. Override when using a non-default port. |
+| `review.llama_cpp_host` | string | `http://localhost:8080` | Base URL of the llama.cpp server (`llama-server`). Override when using a non-default port. |

 ### Example

@@ -503,6 +631,17 @@ Override specific agents without changing the entire profile:

 Valid override values: `opus`, `sonnet`, `haiku`, `inherit`, or any fully-qualified model ID (e.g., `"openai/o3"`, `"google/gemini-2.5-pro"`).

+`model_overrides` can be set in either `.planning/config.json` (per-project)
+or `~/.gsd/defaults.json` (global). Per-project entries win on conflict and
+non-conflicting global entries are preserved, so you can tune a single
+agent's model in one repo without re-setting global defaults. This applies
+uniformly across Claude Code, Codex, OpenCode, Kilo, and the other
+supported runtimes. On Codex and OpenCode, the resolved model is embedded
+into each agent's static config at install time — `spawn_agent` and
+OpenCode's `task` interface do not accept an inline `model` parameter, so
+running `gsd install <runtime>` after editing `model_overrides` is required
+for the change to take effect. See issue #2256.
+
 ### Non-Claude Runtimes (Codex, OpenCode, Gemini CLI, Kilo)

 When GSD is installed for a non-Claude runtime, the installer automatically sets `resolve_model_ids: "omit"` in `~/.gsd/defaults.json`. This causes GSD to return an empty model parameter for all agents, so each agent uses whatever model the runtime is configured with. No additional setup is needed for the default case.
@@ -540,6 +679,64 @@ The intent is the same as the Claude profile tiers -- use a stronger model for p
 | `true` | Maps aliases to full Claude model IDs (`claude-opus-4-6`) | Claude Code with API that requires full IDs |
 | `"omit"` | Returns empty string (runtime picks its default) | Non-Claude runtimes (Codex, OpenCode, Gemini CLI, Kilo) |

+### Runtime-Aware Profiles (#2517)
+
+When `runtime` is set, profile tiers (`opus`/`sonnet`/`haiku`) resolve to runtime-native model IDs instead of Claude aliases. This lets a single shared `.planning/config.json` work cleanly across Claude and Codex.
+
+**Built-in tier maps:**
+
+| Runtime | `opus` | `sonnet` | `haiku` | reasoning_effort |
+|---------|--------|----------|---------|------------------|
+| `claude` | `claude-opus-4-6` | `claude-sonnet-4-6` | `claude-haiku-4-5` | (not used) |
+| `codex` | `gpt-5.4` | `gpt-5.3-codex` | `gpt-5.4-mini` | `xhigh` / `medium` / `medium` |
+
+**Codex example** — one config, tiered models, no large `model_overrides` block:
+
+```json
+{
+  "runtime": "codex",
+  "model_profile": "balanced"
+}
+```
+
+This resolves `gsd-planner` → `gpt-5.4` (xhigh), `gsd-executor` → `gpt-5.3-codex` (medium), `gsd-codebase-mapper` → `gpt-5.4-mini` (medium). The Codex installer embeds `model = "..."` and `model_reasoning_effort = "..."` in each generated agent TOML.
+
+**Claude example** — explicit opt-in resolves to full Claude IDs (no `resolve_model_ids: true` needed):
+
+```json
+{
+  "runtime": "claude",
+  "model_profile": "quality"
+}
+```
+
+**Per-runtime overrides** — replace one or more tier defaults:
+
+```json
+{
+  "runtime": "codex",
+  "model_profile": "quality",
+  "model_profile_overrides": {
+    "codex": {
+      "opus": "gpt-5-pro",
+      "haiku": { "model": "gpt-5-nano", "reasoning_effort": "low" }
+    }
+  }
+}
+```
+
+**Precedence (highest to lowest):**
+
+1. `model_overrides[<agent>]` — explicit per-agent ID always wins.
+2. **Runtime-aware tier resolution** (this section) — when `runtime` is set and profile is not `inherit`.
+3. `resolve_model_ids: "omit"` — returns empty string when no `runtime` is set.
+4. Claude-native default — `model_profile` tier as alias (current default).
+5. `inherit` — propagates literal `inherit` for `Task(model="inherit")` semantics.
+
+**Backwards compatibility.** Setups without `runtime` set see zero behavior change — every existing config continues to work identically. Codex installs that auto-set `resolve_model_ids: "omit"` continue to omit the model field unless the user opts in by setting `runtime: "codex"`.
+
+**Unknown runtimes.** If `runtime` is set to a value with no built-in tier map and no `model_profile_overrides[<runtime>]`, GSD falls back to the Claude-alias safe default rather than emit a model ID the runtime cannot accept. To support a new runtime, populate `model_profile_overrides.<runtime>.{opus,sonnet,haiku}` with valid IDs.
+
 ### Profile Philosophy

 | Profile | Philosophy | When to Use |
--- a/docs/FEATURES.md
+++ b/docs/FEATURES.md
@@ -802,6 +802,45 @@
 | `TESTING.md` | Test infrastructure, coverage, patterns |
 | `INTEGRATIONS.md` | External services, APIs, third-party dependencies |

+**Incremental remap — `--paths` (#2003):** The mapper accepts an optional
+`--paths <p1,p2,...>` scope hint. When provided, it restricts exploration
+to the listed repo-relative prefixes instead of scanning the whole tree.
+This is the pathway used by the post-execute codebase-drift gate to refresh
+only the subtrees the phase actually changed. Each produced document carries
+`last_mapped_commit` in its YAML frontmatter so drift can be measured
+against the mapping point, not HEAD.
+
+### 27a. Post-Execute Codebase Drift Detection
+
+**Introduced by:** #2003
+**Trigger:** Runs automatically at the end of every `/gsd:execute-phase`
+**Configuration:**
+- `workflow.drift_threshold` (integer, default `3`) — minimum new
+  structural elements before the gate acts.
+- `workflow.drift_action` (`warn` | `auto-remap`, default `warn`) —
+  warn-only or spawn `gsd-codebase-mapper` with `--paths` scoped to
+  affected subtrees.
+
+**What counts as drift:**
+- New directory outside mapped paths
+- New barrel export at `(packages|apps)/*/src/index.*`
+- New migration file (supabase/prisma/drizzle/src/migrations/…)
+- New route module under `routes/` or `api/`
+
+**Non-blocking guarantee:** any internal failure (missing STRUCTURE.md,
+git errors, mapper spawn failure) logs a single line and the phase
+continues. Drift detection cannot fail verification.
+
+**Requirements:**
+- REQ-DRIFT-01: System MUST detect the four drift categories from `git diff
+  --name-status last_mapped_commit..HEAD`
+- REQ-DRIFT-02: Action fires only when element count ≥ `workflow.drift_threshold`
+- REQ-DRIFT-03: `warn` action MUST NOT spawn any agent
+- REQ-DRIFT-04: `auto-remap` action MUST pass sanitized `--paths` to the mapper
+- REQ-DRIFT-05: Detection/remap failure MUST be non-blocking for `/gsd:execute-phase`
+- REQ-DRIFT-06: `last_mapped_commit` round-trip through YAML frontmatter
+  on each `.planning/codebase/*.md` file
+
 ---

 ## Utility Features
@@ -1752,6 +1791,7 @@ Test suite that scans all agent, workflow, and command files for embedded inject
 - REQ-CTXRED-01: System MUST truncate oversized markdown artifacts to fit within context budgets
 - REQ-CTXRED-02: System MUST order prompts for cache-friendly assembly (stable prefixes first)
 - REQ-CTXRED-03: Reduction MUST preserve essential information (headings, requirements, task structure)
+- REQ-CTXRED-04: Skill `description:` fields MUST be ≤ 100 chars; enforced by `npm run lint:descriptions` (see `scripts/lint-descriptions.cjs` and `tests/enh-2789-description-budget.test.cjs`)

 **Process:**
 1. **Measure** — Calculate total prompt size for the workflow
--- a/docs/INVENTORY-MANIFEST.json
+++ b/docs/INVENTORY-MANIFEST.json
@@ -1,5 +1,5 @@
 {
-  "generated": "2026-04-20",
+  "generated": "2026-04-30",
  "families": {
    "agents": [
      "gsd-advisor-researcher",
@@ -56,6 +56,7 @@
      "/gsd-discuss-phase",
      "/gsd-do",
      "/gsd-docs-update",
+      "/gsd-edit-phase",
      "/gsd-eval-review",
      "/gsd-execute-phase",
      "/gsd-explore",
@@ -77,6 +78,7 @@
      "/gsd-manager",
      "/gsd-map-codebase",
      "/gsd-milestone-summary",
+      "/gsd-mvp-phase",
      "/gsd-new-milestone",
      "/gsd-new-project",
      "/gsd-new-workspace",
@@ -103,6 +105,8 @@
      "/gsd-session-report",
      "/gsd-set-profile",
      "/gsd-settings",
+      "/gsd-settings-advanced",
+      "/gsd-settings-integrations",
      "/gsd-ship",
      "/gsd-sketch",
      "/gsd-sketch-wrap-up",
@@ -110,6 +114,7 @@
      "/gsd-spike",
      "/gsd-spike-wrap-up",
      "/gsd-stats",
+      "/gsd-sync-skills",
      "/gsd-thread",
      "/gsd-ui-phase",
      "/gsd-ui-review",
@@ -142,6 +147,7 @@
      "discuss-phase.md",
      "do.md",
      "docs-update.md",
+      "edit-phase.md",
      "eval-review.md",
      "execute-phase.md",
      "execute-plan.md",
@@ -149,6 +155,7 @@
      "extract_learnings.md",
      "fast.md",
      "forensics.md",
+      "graduation.md",
      "health.md",
      "help.md",
      "import.md",
@@ -160,6 +167,7 @@
      "manager.md",
      "map-codebase.md",
      "milestone-summary.md",
+      "mvp-phase.md",
      "new-milestone.md",
      "new-project.md",
      "new-workspace.md",
@@ -183,6 +191,8 @@
      "scan.md",
      "secure-phase.md",
      "session-report.md",
+      "settings-advanced.md",
+      "settings-integrations.md",
      "settings.md",
      "ship.md",
      "sketch-wrap-up.md",
@@ -191,6 +201,7 @@
      "spike-wrap-up.md",
      "spike.md",
      "stats.md",
+      "sync-skills.md",
      "transition.md",
      "ui-phase.md",
      "ui-review.md",
@@ -215,6 +226,7 @@
      "decimal-phase-calculation.md",
      "doc-conflict-engine.md",
      "domain-probes.md",
+      "execute-mvp-tdd.md",
      "executor-examples.md",
      "gate-prompts.md",
      "gates.md",
@@ -226,7 +238,9 @@
      "model-profiles.md",
      "phase-argument-parsing.md",
      "planner-antipatterns.md",
+      "planner-chunked.md",
      "planner-gap-closure.md",
+      "planner-mvp-mode.md",
      "planner-reviews.md",
      "planner-revision.md",
      "planner-source-audit.md",
@@ -234,10 +248,13 @@
      "project-skills-discovery.md",
      "questioning.md",
      "revision-loop.md",
+      "scout-codebase.md",
+      "skeleton-template.md",
      "sketch-interactivity.md",
      "sketch-theme-system.md",
      "sketch-tooling.md",
      "sketch-variant-patterns.md",
+      "spidr-splitting.md",
      "tdd.md",
      "thinking-models-debug.md",
      "thinking-models-execution.md",
@@ -248,21 +265,28 @@
      "ui-brand.md",
      "universal-anti-patterns.md",
      "user-profiling.md",
+      "user-story-template.md",
      "verification-overrides.md",
      "verification-patterns.md",
+      "verify-mvp-mode.md",
      "workstream-flag.md"
    ],
    "cli_modules": [
+      "artifacts.cjs",
      "audit.cjs",
      "commands.cjs",
      "config-schema.cjs",
      "config.cjs",
      "core.cjs",
+      "decisions.cjs",
      "docs.cjs",
+      "drift.cjs",
      "frontmatter.cjs",
+      "gap-checker.cjs",
      "graphify.cjs",
      "gsd2-import.cjs",
      "init.cjs",
+      "install-profiles.cjs",
      "intel.cjs",
      "learnings.cjs",
      "milestone.cjs",
@@ -272,6 +296,7 @@
      "profile-pipeline.cjs",
      "roadmap.cjs",
      "schema-detect.cjs",
+      "secrets.cjs",
      "security.cjs",
      "state.cjs",
      "template.cjs",
--- a/docs/INVENTORY.md
+++ b/docs/INVENTORY.md
@@ -54,7 +54,7 @@ Full roster at `agents/gsd-*.md`. The "Primary doc" column flags whether [`docs/

 ---

-## Commands (82 shipped)
+## Commands (87 shipped)

 Full roster at `commands/gsd/*.md`. The groupings below mirror `docs/COMMANDS.md` section order; each row carries the command name, a one-line role derived from the command's frontmatter `description:`, and a link to the source file. `tests/command-count-sync.test.cjs` locks the count against the filesystem.

@@ -67,6 +67,7 @@ Full roster at `commands/gsd/*.md`. The groupings below mirror `docs/COMMANDS.md
 | `/gsd-list-workspaces` | List active GSD workspaces and their status. | [commands/gsd/list-workspaces.md](../commands/gsd/list-workspaces.md) |
 | `/gsd-remove-workspace` | Remove a GSD workspace and clean up worktrees. | [commands/gsd/remove-workspace.md](../commands/gsd/remove-workspace.md) |
 | `/gsd-discuss-phase` | Gather phase context through adaptive questioning before planning. | [commands/gsd/discuss-phase.md](../commands/gsd/discuss-phase.md) |
+| `/gsd-mvp-phase` | Plan a phase as a vertical MVP slice — user story, SPIDR splitting, then plan-phase. | [commands/gsd/mvp-phase.md](../commands/gsd/mvp-phase.md) |
 | `/gsd-spec-phase` | Socratic spec refinement producing a SPEC.md with falsifiable requirements. | [commands/gsd/spec-phase.md](../commands/gsd/spec-phase.md) |
 | `/gsd-ui-phase` | Generate UI design contract (UI-SPEC.md) for frontend phases. | [commands/gsd/ui-phase.md](../commands/gsd/ui-phase.md) |
 | `/gsd-ai-integration-phase` | Generate AI design contract (AI-SPEC.md) via framework selection, research, and eval planning. | [commands/gsd/ai-integration-phase.md](../commands/gsd/ai-integration-phase.md) |
@@ -92,6 +93,7 @@ Full roster at `commands/gsd/*.md`. The groupings below mirror `docs/COMMANDS.md
 | Command | Role | Source |
 |---------|------|--------|
 | `/gsd-add-phase` | Add phase to end of current milestone in roadmap. | [commands/gsd/add-phase.md](../commands/gsd/add-phase.md) |
+| `/gsd-edit-phase` | Edit any field of an existing roadmap phase in place, preserving number and position. | [commands/gsd/edit-phase.md](../commands/gsd/edit-phase.md) |
 | `/gsd-insert-phase` | Insert urgent work as decimal phase (e.g., 72.1) between existing phases. | [commands/gsd/insert-phase.md](../commands/gsd/insert-phase.md) |
 | `/gsd-remove-phase` | Remove a future phase from roadmap and renumber subsequent phases. | [commands/gsd/remove-phase.md](../commands/gsd/remove-phase.md) |
 | `/gsd-add-tests` | Generate tests for a completed phase based on UAT criteria and implementation. | [commands/gsd/add-tests.md](../commands/gsd/add-tests.md) |
@@ -163,8 +165,11 @@ Full roster at `commands/gsd/*.md`. The groupings below mirror `docs/COMMANDS.md
 | `/gsd-sketch-wrap-up` | Package sketch design findings into a persistent project skill for future build conversations. | [commands/gsd/sketch-wrap-up.md](../commands/gsd/sketch-wrap-up.md) |
 | `/gsd-profile-user` | Generate developer behavioral profile and Claude-discoverable artifacts. | [commands/gsd/profile-user.md](../commands/gsd/profile-user.md) |
 | `/gsd-settings` | Configure GSD workflow toggles and model profile. | [commands/gsd/settings.md](../commands/gsd/settings.md) |
+| `/gsd-settings-advanced` | Power-user configuration — plan bounce, timeouts, branch templates, cross-AI execution, runtime knobs. | [commands/gsd/settings-advanced.md](../commands/gsd/settings-advanced.md) |
+| `/gsd-settings-integrations` | Configure third-party API keys, code-review CLI routing, and agent-skill injection. | [commands/gsd/settings-integrations.md](../commands/gsd/settings-integrations.md) |
 | `/gsd-set-profile` | Switch model profile for GSD agents (quality/balanced/budget/inherit). | [commands/gsd/set-profile.md](../commands/gsd/set-profile.md) |
 | `/gsd-pr-branch` | Create a clean PR branch by filtering out `.planning/` commits. | [commands/gsd/pr-branch.md](../commands/gsd/pr-branch.md) |
+| `/gsd-sync-skills` | Sync managed GSD skill directories across runtime roots for multi-runtime users. | [commands/gsd/sync-skills.md](../commands/gsd/sync-skills.md) |
 | `/gsd-update` | Update GSD to latest version with changelog display. | [commands/gsd/update.md](../commands/gsd/update.md) |
 | `/gsd-reapply-patches` | Reapply local modifications after a GSD update. | [commands/gsd/reapply-patches.md](../commands/gsd/reapply-patches.md) |
 | `/gsd-help` | Show available GSD commands and usage guide. | [commands/gsd/help.md](../commands/gsd/help.md) |
@@ -172,7 +177,7 @@ Full roster at `commands/gsd/*.md`. The groupings below mirror `docs/COMMANDS.md

 ---

-## Workflows (79 shipped)
+## Workflows (85 shipped)

 Full roster at `get-shit-done/workflows/*.md`. Workflows are thin orchestrators that commands reference internally; most are not read directly by end users. Rows below map each workflow file to its role (derived from the `<purpose>` block) and, where applicable, to the command that invokes it.

@@ -197,8 +202,10 @@ Full roster at `get-shit-done/workflows/*.md`. Workflows are thin orchestrators
 | `discuss-phase-assumptions.md` | Assumptions-mode discuss — extract implementation decisions via codebase-first analysis. | `/gsd-discuss-phase` (when `discuss_mode=assumptions`) |
 | `discuss-phase-power.md` | Power-user discuss — pre-generate all questions into a JSON state file + HTML UI. | `/gsd-discuss-phase --power` |
 | `discuss-phase.md` | Extract implementation decisions through iterative gray-area discussion. | `/gsd-discuss-phase` |
+| `mvp-phase.md` | Plan a phase as a vertical MVP slice — user story, SPIDR splitting, then plan-phase. | `/gsd-mvp-phase` |
 | `do.md` | Route freeform text from the user to the best matching GSD command. | `/gsd-do` |
 | `docs-update.md` | Generate, update, and verify canonical and hand-written project documentation. | `/gsd-docs-update` |
+| `edit-phase.md` | Edit any field of an existing phase in ROADMAP.md in place, preserving number and position. | `/gsd-edit-phase` |
 | `eval-review.md` | Retroactive audit of an implemented AI phase's evaluation coverage. | `/gsd-eval-review` |
 | `execute-phase.md` | Execute all plans in a phase using wave-based parallel execution. | `/gsd-execute-phase` |
 | `execute-plan.md` | Execute a phase prompt (PLAN.md) and create the outcome summary (SUMMARY.md). | `execute-phase.md` (per-plan subagent) |
@@ -206,6 +213,7 @@ Full roster at `get-shit-done/workflows/*.md`. Workflows are thin orchestrators
 | `extract_learnings.md` | Extract decisions, lessons, patterns, and surprises from completed phase artifacts. | `/gsd-extract-learnings` |
 | `fast.md` | Execute a trivial task inline without subagent overhead. | `/gsd-fast` |
 | `forensics.md` | Forensics investigation of failed workflows — git, artifacts, and state analysis. | `/gsd-forensics` |
+| `graduation.md` | Cluster recurring LEARNINGS.md items across phases and surface HITL promotion candidates. | `transition.md` (graduation_scan step) |
 | `health.md` | Validate `.planning/` directory integrity and report actionable issues. | `/gsd-health` |
 | `help.md` | Display the complete GSD command reference. | `/gsd-help` |
 | `import.md` | Ingest external plans with conflict detection against existing project decisions. | `/gsd-import` |
@@ -241,6 +249,8 @@ Full roster at `get-shit-done/workflows/*.md`. Workflows are thin orchestrators
 | `secure-phase.md` | Retroactive threat-mitigation audit for a completed phase. | `/gsd-secure-phase` |
 | `session-report.md` | Session report — token usage, work summary, outcomes. | `/gsd-session-report` |
 | `settings.md` | Configure GSD workflow toggles and model profile. | `/gsd-settings`, `/gsd-set-profile` |
+| `settings-advanced.md` | Configure GSD power-user knobs — plan bounce, timeouts, branch templates, cross-AI execution, runtime knobs. | `/gsd-settings-advanced` |
+| `settings-integrations.md` | Configure third-party API keys (Brave/Firecrawl/Exa), `review.models.<cli>` CLI routing, and `agent_skills.<agent-type>` injection with masked (`****<last-4>`) display. | `/gsd-settings-integrations` |
 | `ship.md` | Create PR, run review, and prepare for merge after verification. | `/gsd-ship` |
 | `sketch.md` | Explore design directions through throwaway HTML mockups with 2-3 variants per sketch. | `/gsd-sketch` |
 | `sketch-wrap-up.md` | Curate sketch findings and package them as a persistent `sketch-findings-[project]` skill. | `/gsd-sketch-wrap-up` |
@@ -248,6 +258,7 @@ Full roster at `get-shit-done/workflows/*.md`. Workflows are thin orchestrators
 | `spike.md` | Rapid feasibility validation through focused, throwaway experiments. | `/gsd-spike` |
 | `spike-wrap-up.md` | Curate spike findings and package them as a persistent `spike-findings-[project]` skill. | `/gsd-spike-wrap-up` |
 | `stats.md` | Project statistics rendering — phases, plans, requirements, git metrics. | `/gsd-stats` |
+| `sync-skills.md` | Cross-runtime GSD skill sync — diff and apply `gsd-*` skill directories across runtime roots. | `/gsd-sync-skills` |
 | `transition.md` | Phase-boundary transition workflow — workstream checks, state advancement. | `execute-phase.md`, `/gsd-next` |
 | `ui-phase.md` | Generate UI-SPEC.md design contract via gsd-ui-researcher. | `/gsd-ui-phase` |
 | `ui-review.md` | Retroactive 6-pillar visual audit via gsd-ui-auditor. | `/gsd-ui-review` |
@@ -262,7 +273,7 @@ Full roster at `get-shit-done/workflows/*.md`. Workflows are thin orchestrators

 ---

-## References (49 shipped)
+## References (57 shipped)

 Full roster at `get-shit-done/references/*.md`. References are shared knowledge documents that workflows and agents `@-reference`. The groupings below match [`docs/ARCHITECTURE.md`](ARCHITECTURE.md#references-get-shit-donereferencesmd) — core, workflow, thinking-model clusters, and the modular planner decomposition.

@@ -296,6 +307,7 @@ Full roster at `get-shit-done/references/*.md`. References are shared knowledge
 | `continuation-format.md` | Session continuation/resume format. |
 | `domain-probes.md` | Domain-specific probing questions for discuss-phase. |
 | `gate-prompts.md` | Gate/checkpoint prompt templates. |
+| `scout-codebase.md` | Phase-type→codebase-map selection table for discuss-phase scout step (extracted via #2551). |
 | `revision-loop.md` | Plan revision iteration patterns. |
 | `universal-anti-patterns.md` | Universal anti-patterns to detect and avoid. |
 | `artifact-types.md` | Planning artifact type definitions. |
@@ -310,6 +322,8 @@ Full roster at `get-shit-done/references/*.md`. References are shared knowledge
 | `ai-frameworks.md` | AI framework decision-matrix reference for `gsd-framework-selector`. |
 | `executor-examples.md` | Worked examples for the gsd-executor agent. |
 | `doc-conflict-engine.md` | Shared conflict-detection contract for ingest/import workflows. |
+| `execute-mvp-tdd.md` | Runtime gate semantics for execute-phase under MVP+TDD — pre-task failing-test verification, end-of-phase blocking review. |
+| `verify-mvp-mode.md` | UAT framing rules for MVP-mode phases — user-flow-first ordering, deferred technical checks, user-story-format guard. |

 ### Sketch References

@@ -341,31 +355,41 @@ The `gsd-planner` agent is decomposed into a core agent plus reference modules t
 | Reference | Role |
 |-----------|------|
 | `planner-antipatterns.md` | Planner anti-patterns and specificity examples. |
+| `planner-chunked.md` | Chunked mode return formats (`## OUTLINE COMPLETE`, `## PLAN COMPLETE`) for Windows stdio hang mitigation. |
 | `planner-gap-closure.md` | Gap-closure mode behavior (reads VERIFICATION.md, targeted replanning). |
 | `planner-reviews.md` | Cross-AI review integration (reads REVIEWS.md from `/gsd-review`). |
 | `planner-revision.md` | Plan revision patterns for iterative refinement. |
 | `planner-source-audit.md` | Planner source-audit and authority-limit rules. |
+| `planner-mvp-mode.md` | Vertical-slice planning rules for MVP mode. |
+| `skeleton-template.md` | SKELETON.md template emitted for new-project Walking Skeleton (Phase 1 + `--mvp`). |
+| `user-story-template.md` | User story format for MVP planning — "As a / I want to / So that" structured fields. |
+| `spidr-splitting.md` | SPIDR splitting decomposition rules for handling large user stories in MVP mode. |

-> **Subdirectory:** `get-shit-done/references/few-shot-examples/` contains additional few-shot examples (`plan-checker.md`, `verifier.md`) that are referenced from specific agents. These are not counted in the 49 top-level references.
+> **Subdirectory:** `get-shit-done/references/few-shot-examples/` contains additional few-shot examples (`plan-checker.md`, `verifier.md`) that are referenced from specific agents. These are not counted in the 53 top-level references.

 ---

-## CLI Modules (25 shipped)
+## CLI Modules (31 shipped)

 Full listing: `get-shit-done/bin/lib/*.cjs`.

 | Module | Responsibility |
 |--------|----------------|
+| `artifacts.cjs` | Canonical artifact registry — known `.planning/` root file names; used by `gsd-health` W019 lint |
 | `audit.cjs` | Audit dispatch, audit open sessions, audit storage helpers |
 | `commands.cjs` | Misc CLI commands (slug, timestamp, todos, scaffolding, stats) |
 | `config-schema.cjs` | Single source of truth for `VALID_CONFIG_KEYS` and dynamic key patterns; imported by both the validator and the config-schema-docs parity test |
 | `config.cjs` | `config.json` read/write, section initialization; imports validator from `config-schema.cjs` |
 | `core.cjs` | Error handling, output formatting, shared utilities, runtime fallbacks |
+| `decisions.cjs` | Shared parser for CONTEXT.md `<decisions>` blocks (D-NN entries); used by `gap-checker.cjs` and intended for #2492 plan/verify decision gates |
 | `docs.cjs` | Docs-update workflow init, Markdown scanning, monorepo detection |
+| `drift.cjs` | Post-execute codebase structural drift detector (#2003): classifies file changes into new-dir/barrel/migration/route categories and round-trips `last_mapped_commit` frontmatter |
 | `frontmatter.cjs` | YAML frontmatter CRUD operations |
+| `gap-checker.cjs` | Post-planning gap analysis (#2493): unified REQUIREMENTS.md + CONTEXT.md decisions vs PLAN.md coverage report (`gsd-tools gap-analysis`) |
 | `graphify.cjs` | Knowledge-graph build/query/status/diff for `/gsd-graphify` |
 | `gsd2-import.cjs` | External-plan ingest for `/gsd-from-gsd2` |
 | `init.cjs` | Compound context loading for each workflow type |
+| `install-profiles.cjs` | Install profile allowlist + skill staging for `--minimal` install (#2762); single source of truth for which `gsd-*` skills/agents land in runtime config dirs |
 | `intel.cjs` | Codebase intel store backing `/gsd-intel` and `gsd-intel-updater` |
 | `learnings.cjs` | Cross-phase learnings extraction for `/gsd-extract-learnings` |
 | `milestone.cjs` | Milestone archival, requirements marking |
@@ -375,6 +399,7 @@ Full listing: `get-shit-done/bin/lib/*.cjs`.
 | `profile-pipeline.cjs` | User behavioral profiling data pipeline, session file scanning |
 | `roadmap.cjs` | ROADMAP.md parsing, phase extraction, plan progress |
 | `schema-detect.cjs` | Schema-drift detection for ORM patterns (Prisma, Drizzle, etc.) |
+| `secrets.cjs` | Secret-config masking convention (`****<last-4>`) for integration keys managed by `/gsd-settings-integrations` — keeps plaintext out of `config-set` output |
 | `security.cjs` | Path traversal prevention, prompt injection detection, safe JSON/shell helpers |
 | `state.cjs` | STATE.md parsing, updating, progression, metrics |
 | `template.cjs` | Template selection and filling with variable substitution |
--- a/docs/README.md
+++ b/docs/README.md
@@ -17,13 +17,15 @@ Language versions: [English](README.md) · [Português (pt-BR)](pt-BR/README.md)
 | [User Guide](USER-GUIDE.md) | All users | Workflow walkthroughs, troubleshooting, and recovery |
 | [Context Monitor](context-monitor.md) | All users | Context window monitoring hook architecture |
 | [Discuss Mode](workflow-discuss-mode.md) | All users | Assumptions vs interview mode for discuss-phase |
+| [Canary Stream](CANARY.md) | Contributors, early adopters | `dev` → `@canary` dist-tag policy, when to install, rollback path |

 ## Quick Links

 - **What's new:** see [CHANGELOG](../CHANGELOG.md) for current release notes, and upstream [README](../README.md) for release highlights
+- **Canary preview:** [`docs/CANARY.md`](CANARY.md) — opt into the early-preview stream from `dev`. Active cut: [`v1.50.0-canary.1`](RELEASE-v1.50.0-canary.1.md)
 - **Getting started:** [README](../README.md) → install → `/gsd-new-project`
 - **Full workflow walkthrough:** [User Guide](USER-GUIDE.md)
 - **All commands at a glance:** [Command Reference](COMMANDS.md)
 - **Configuring GSD:** [Configuration Reference](CONFIGURATION.md)
 - **How the system works internally:** [Architecture](ARCHITECTURE.md)
- **Contributing or extending:** [CLI Tools Reference](CLI-TOOLS.md) + [Agent Reference](AGENTS.md)
+- **Contributing or extending:** [CLI Tools Reference](CLI-TOOLS.md) + [Agent Reference](AGENTS.md)
--- a/docs/RELEASE-v1.39.0-rc.4.md
+++ b/docs/RELEASE-v1.39.0-rc.4.md
@@ -0,0 +1,84 @@
+# v1.39.0-rc.4 Release Notes
+
+Pre-release candidate. Published to npm under the `next` tag.
+
+```
+npx get-shit-done-cc@next
+```
+
+---
+
+## What's in this release
+
+### Added
+
+**`--minimal` install flag** (alias `--core-only`) (#2762)
+
+Writes only the six core skills needed to run the main workflow loop:
+`new-project`, `discuss-phase`, `plan-phase`, `execute-phase`, `help`, `update`.
+No `gsd-*` subagents are installed.
+
+| Mode | Cold-start system-prompt overhead |
+|------|-----------------------------------|
+| full (default) | ~12k tokens |
+| minimal | ~700 tokens |
+
+Useful for local LLMs with 32K–128K context windows. Sonnet 4.6 / Opus 4.7 users
+don't need it — the full surface is the right default for cloud models.
+
+The install manifest records `mode: "minimal" | "full"`. Run `gsd update` without
+`--minimal` at any time to expand to the full skill set.
+
+---
+
+### Fixed
+
+**Codex install no longer corrupts `~/.codex/config.toml`** (#2760)
+
+Four users confirmed the same breakage: the previous installer left
+`~/.codex/config.toml` in a state that Codex rejected on launch, with manual file
+cleanup as the only workaround.
+
+The installer now:
+
+- Strips legacy `[agents]` (single-bracket) and `[[agents]]` (sequence) blocks
+  unconditionally — both are invalid in the current Codex TOML schema, regardless of
+  whether a GSD marker is present.
+- Emits the GSD-managed hook in the shape the user's config already uses:
+  `[[hooks.<Event>]]` namespaced AoT if any existing hook uses that form, otherwise
+  top-level `[[hooks]]`.
+- Migrates any legacy `[hooks.<Event>]` (map format) to `[[hooks.<Event>]]` (array
+  format) during write.
+- Writes atomically via a temp file + `renameSync` — no partial writes.
+- Validates the post-write bytes with a strict TOML parser that rejects duplicate
+  keys, repeated table headers, trailing bytes after values, and unsupported value
+  types.
+- On any pre-write or write-time failure, restores the pre-install snapshot and aborts
+  with a clear error instead of warn-and-continue.
+
+---
+
+## Installing the pre-release
+
+```bash
+# npm
+npm install -g get-shit-done-cc@next
+
+# npx (one-shot)
+npx get-shit-done-cc@next
+```
+
+To pin to this exact RC:
+
+```bash
+npm install -g get-shit-done-cc@1.39.0-rc.4
+```
+
+---
+
+## What's next
+
+- Run `rc` again on the release branch to publish rc.5 if further fixes land before
+  finalization.
+- Run `finalize` on the release workflow to promote `1.39.0` to `latest` when the RC
+  is stable.
--- a/docs/RELEASE-v1.39.0-rc.5.md
+++ b/docs/RELEASE-v1.39.0-rc.5.md
@@ -0,0 +1,99 @@
+# v1.39.0-rc.5 Release Notes
+
+Pre-release candidate. Published to npm under the `next` tag.
+
+```bash
+npx get-shit-done-cc@next
+```
+
+---
+
+## What's in this release
+
+All fixes from rc.4, plus:
+
+### Fixed
+
+**Codex hooks migrator correctness hardening** (#2809)
+
+Five edge-cases in the `[[hooks.<Event>]]` → `[[hooks.<Event>.hooks]]` two-level nested
+schema migration path, discovered across five rounds of code review:
+
+| Finding | Fix |
+|---------|-----|
+| `parseHooksBody` used a bare regex (`/^([\w.]+)\s*=/`) that silently dropped hyphenated keys such as `status-message` and any quoted TOML key | Replaced with `parseTomlKey()`, the existing full TOML key parser |
+| `buildNestedBlock` unconditionally emitted `[[hooks.TYPE.hooks]]` even when no handler fields were present, producing an entry with `type = "command"` but no `command` | Added guard: matcher-only / handler-field-free sections emit only the event-entry block |
+| `legacyMapSections` filter used `section.path.startsWith('hooks.')` without checking the segment count, so three-segment tables like `[hooks.SessionStart.hooks]` were misclassified as event entries and re-emitted as bogus nested events | Now uses `section.segments.length === 2` (same fix previously applied to `staleNamespacedAotSections`) |
+| No regression test for quoted event names containing dots — `[[hooks."before.tool"]]` has a 2-segment path but 3 dot-parts, and a `split('.')` check would misclassify it | Regression test added; quoted-dot names are correctly treated as a single two-segment namespace |
+| Handler command path assertion in install tests used a regex (`/gsd-check-update\.js/`) rather than the exact absolute path | Strengthened to `assert.strictEqual` with `path.join(codexHome, 'hooks', 'gsd-check-update.js')` |
+
+---
+
+## What was in rc.4
+
+### Added
+
+**`--minimal` install flag** (alias `--core-only`) (#2762)
+
+Writes only the six core skills needed to run the main workflow loop:
+`new-project`, `discuss-phase`, `plan-phase`, `execute-phase`, `help`, `update`.
+No `gsd-*` subagents are installed.
+
+| Mode | Cold-start system-prompt overhead |
+|------|-----------------------------------|
+| full (default) | ~12k tokens |
+| minimal | ~700 tokens |
+
+Useful for local LLMs with 32K–128K context windows. Sonnet 4.6 / Opus 4.7 users
+don't need it — the full surface is the right default for cloud models.
+
+The install manifest records `mode: "minimal" | "full"`. Run `gsd update` without
+`--minimal` at any time to expand to the full skill set.
+
+### Fixed (rc.4)
+
+**Codex install no longer corrupts `~/.codex/config.toml`** (#2760)
+
+The installer now:
+
+- Strips legacy `[agents]` (single-bracket) and `[[agents]]` (sequence) blocks
+  unconditionally — both are invalid in the current Codex TOML schema, regardless of
+  whether a GSD marker is present.
+- Emits the GSD-managed hook in the shape the user's config already uses:
+  `[[hooks.<Event>]]` namespaced AoT if any existing hook uses that form, otherwise
+  top-level `[[hooks]]`.
+- Migrates any legacy `[hooks.<Event>]` (map format) to `[[hooks.<Event>]]` (array
+  format) during write.
+- Writes atomically via a temp file + `renameSync` — no partial writes.
+- Validates the post-write bytes with a strict TOML parser that rejects duplicate
+  keys, repeated table headers, trailing bytes after values, and unsupported value
+  types.
+- On any pre-write or write-time failure, restores the pre-install snapshot and aborts
+  with a clear error instead of warn-and-continue.
+
+---
+
+## Installing the pre-release
+
+```bash
+# npm
+npm install -g get-shit-done-cc@next
+
+# npx (one-shot)
+npx get-shit-done-cc@next
+```
+
+To pin to this exact RC:
+
+```bash
+npm install -g get-shit-done-cc@1.39.0-rc.5
+```
+
+---
+
+## What's next
+
+- Run `rc` again on the release branch to publish rc.6 if further fixes land before
+  finalization.
+- Run `finalize` on the release workflow to promote `1.39.0` to `latest` when the RC
+  is stable.
--- a/docs/RELEASE-v1.39.0-rc.6.md
+++ b/docs/RELEASE-v1.39.0-rc.6.md
@@ -0,0 +1,116 @@
+# v1.39.0-rc.6 Release Notes
+
+Pre-release candidate. Published to npm under the `next` tag.
+
+```bash
+npx get-shit-done-cc@next
+```
+
+---
+
+## What's in this release
+
+**rc.6 is a republish of rc.5.** No new fixes were rolled in — `release/1.39.0`
+was bumped from `1.39.0-rc.5` to `1.39.0-rc.6` without first being merged with
+`main`, so the branch contents at the time of tag are byte-for-byte equivalent
+to rc.5 plus the version-bump commit.
+
+```bash
+$ git log v1.39.0-rc.5..v1.39.0-rc.6 --pretty='%h %s'
+388118d8 chore: bump to 1.39.0-rc.6
+```
+
+If you are already on `1.39.0-rc.5`, there is nothing new to install in rc.6.
+The expected next step is an rc.7 cut that first merges `main` into
+`release/1.39.0` so the eight fixes that landed after rc.5 reach the registry.
+
+---
+
+## What was in rc.5
+
+### Fixed
+
+**Codex hooks migrator correctness hardening** (#2809)
+
+Five edge-cases in the `[[hooks.<Event>]]` → `[[hooks.<Event>.hooks]]` two-level
+nested schema migration path, discovered across five rounds of code review:
+
+| Finding | Fix |
+|---------|-----|
+| `parseHooksBody` used a bare regex (`/^([\w.]+)\s*=/`) that silently dropped hyphenated keys such as `status-message` and any quoted TOML key | Replaced with `parseTomlKey()`, the existing full TOML key parser |
+| `buildNestedBlock` unconditionally emitted `[[hooks.TYPE.hooks]]` even when no handler fields were present, producing an entry with `type = "command"` but no `command` | Added guard: matcher-only / handler-field-free sections emit only the event-entry block |
+| `legacyMapSections` filter used `section.path.startsWith('hooks.')` without checking the segment count, so three-segment tables like `[hooks.SessionStart.hooks]` were misclassified as event entries and re-emitted as bogus nested events | Now uses `section.segments.length === 2` (same fix previously applied to `staleNamespacedAotSections`) |
+| No regression test for quoted event names containing dots — `[[hooks."before.tool"]]` has a 2-segment path but 3 dot-parts, and a `split('.')` check would misclassify it | Regression test added; quoted-dot names are correctly treated as a single two-segment namespace |
+| Handler command path assertion in install tests used a regex (`/gsd-check-update\.js/`) rather than the exact absolute path | Strengthened to `assert.strictEqual` with `path.join(codexHome, 'hooks', 'gsd-check-update.js')` |
+
+---
+
+## What was in rc.4
+
+### Added
+
+**`--minimal` install flag** (alias `--core-only`) (#2762)
+
+Writes only the six core skills needed to run the main workflow loop:
+`new-project`, `discuss-phase`, `plan-phase`, `execute-phase`, `help`, `update`.
+No `gsd-*` subagents are installed.
+
+| Mode | Cold-start system-prompt overhead |
+|------|-----------------------------------|
+| full (default) | ~12k tokens |
+| minimal | ~700 tokens |
+
+Useful for local LLMs with 32K–128K context windows. Sonnet 4.6 / Opus 4.7 users
+don't need it — the full surface is the right default for cloud models.
+
+The install manifest records `mode: "minimal" | "full"`. Run `gsd update` without
+`--minimal` at any time to expand to the full skill set.
+
+### Fixed (rc.4)
+
+**Codex install no longer corrupts `~/.codex/config.toml`** (#2760)
+
+The installer now:
+
+- Strips legacy `[agents]` (single-bracket) and `[[agents]]` (sequence) blocks
+  unconditionally — both are invalid in the current Codex TOML schema, regardless of
+  whether a GSD marker is present.
+- Emits the GSD-managed hook in the shape the user's config already uses:
+  `[[hooks.<Event>]]` namespaced AoT if any existing hook uses that form, otherwise
+  top-level `[[hooks]]`.
+- Migrates any legacy `[hooks.<Event>]` (map format) to `[[hooks.<Event>]]` (array
+  format) during write.
+- Writes atomically via a temp file + `renameSync` — no partial writes.
+- Validates the post-write bytes with a strict TOML parser that rejects duplicate
+  keys, repeated table headers, trailing bytes after values, and unsupported value
+  types.
+- On any pre-write or write-time failure, restores the pre-install snapshot and aborts
+  with a clear error instead of warn-and-continue.
+
+---
+
+## Installing the pre-release
+
+```bash
+# npm
+npm install -g get-shit-done-cc@next
+
+# npx (one-shot)
+npx get-shit-done-cc@next
+```
+
+To pin to this exact RC:
+
+```bash
+npm install -g get-shit-done-cc@1.39.0-rc.6
+```
+
+---
+
+## What's next
+
+- **rc.7** — cut from `release/1.39.0` after merging `main` into the release branch,
+  so the eight fixes that landed after rc.5 (#2828, #2829, #2831, #2832, #2835,
+  #2836, #2838, #2839) actually reach the registry.
+- Run `finalize` on the release workflow to promote `1.39.0` to `latest` once an RC
+  with the full main-branch contents is stable.
--- a/docs/RELEASE-v1.39.0-rc.7.md
+++ b/docs/RELEASE-v1.39.0-rc.7.md
@@ -0,0 +1,185 @@
+# v1.39.0-rc.7 Release Notes
+
+Pre-release candidate. Published to npm under the `next` tag.
+
+```bash
+npx get-shit-done-cc@next
+```
+
+---
+
+## What's in this release
+
+rc.7 is the first RC in the 1.39.0 train that rolls in the post-rc.5 fixes from
+`main`. rc.6 was content-identical to rc.5 (`release/1.39.0` was bumped without
+first being merged with `main` — see [#2856](https://github.com/gsd-build/get-shit-done/issues/2856)).
+rc.7 syncs the release branch with `main` so all of the work below actually
+reaches the registry.
+
+### Added
+
+- **Manual canary release workflow** — `.github/workflows/canary.yml` publishes
+  `{base}-canary.{N}` builds of `get-shit-done-cc` under the `canary` dist-tag on
+  demand via `workflow_dispatch` (manual trigger only). Optional `dry_run` boolean.
+  ([#2828](https://github.com/gsd-build/get-shit-done/issues/2828))
+
+### Fixed
+
+- **`extractCurrentMilestone` no longer truncates ROADMAP.md at heading-like lines
+  inside fenced code blocks** — the milestone-end search now scans line-by-line while
+  tracking ` ``` ` / `~~~` fence state, so a line like `# Ops runbook (v1.0 compat)`
+  inside a code block no longer acts as a milestone boundary.
+  ([#2787](https://github.com/gsd-build/get-shit-done/issues/2787))
+- **`audit-uat` parser reads `human_verification:` from frontmatter array** — the
+  previous body-only regex was too strict and missed valid UAT items declared in
+  YAML frontmatter, surfacing false-positive open gaps at every milestone-completion
+  audit. ([#2788](https://github.com/gsd-build/get-shit-done/issues/2788))
+- **Skill description anti-patterns trimmed; ≤ 100-char budget enforced** — three
+  anti-patterns eliminated across `commands/gsd/*.md`: flag documentation already in
+  `argument-hint:`, `Triggers:` keyword-stuffing lists, and numbered enumeration. New
+  CI lint gate `npm run lint:descriptions` fails if any description exceeds 100
+  chars. ([#2789](https://github.com/gsd-build/get-shit-done/issues/2789))
+- **`gsd-sdk` binary collision with `@gsd-build/sdk` resolved** — workstream-aware
+  query registry now respects the `GSD_WORKSTREAM` env var; `gsd-tools` bin alias
+  added. ([#2791](https://github.com/gsd-build/get-shit-done/issues/2791))
+- **`OpenCode` agents embed `model_profile_overrides.opencode.<tier>`** — per-tier
+  model overrides set via `/gsd-settings-advanced` are now propagated into generated
+  agent files. ([#2794](https://github.com/gsd-build/get-shit-done/issues/2794))
+- **`roadmap update-plan-progress` accepts `--phase` flag form** — SDK arg-parsing
+  regression in v0.1.0 silently dropped `--phase`/`--name`/`--plans` flags, causing
+  STATE.md corruption. ([#2796](https://github.com/gsd-build/get-shit-done/issues/2796))
+- **`context_window` added to `VALID_CONFIG_KEYS` allowlist** —
+  `/gsd-settings-advanced` could not set `context_window` because the key was missing
+  from the allowlist used by `config-set` validation.
+  ([#2798](https://github.com/gsd-build/get-shit-done/issues/2798))
+- **`gsd-tools init` dispatches `ingest-docs` handler** — `/gsd-ingest-docs` was
+  broken in v1.38.5 because the workflow called the new tool but no `ingest-docs`
+  init handler was registered. ([#2801](https://github.com/gsd-build/get-shit-done/issues/2801))
+- **`config-get` honors `--default <value>` flag** — fallback for missing keys
+  ported from CJS into the SDK. ([#2803](https://github.com/gsd-build/get-shit-done/issues/2803))
+- **`find-phase` returns `null` for archived phases** — when the current-milestone
+  phase had no directory yet, `init.plan-phase` / `init.execute-phase` returned the
+  archived prior-milestone directory instead of `null`, causing wrong-phase work.
+  ([#2805](https://github.com/gsd-build/get-shit-done/issues/2805))
+- **SKILL.md frontmatter `name:` migrated to hyphen form** — files that still used
+  the deprecated colon form (`gsd:cmd`) caused autocomplete to suggest `/gsd:command`.
+  ([#2808](https://github.com/gsd-build/get-shit-done/issues/2808))
+- **`gsd-sdk` resolvable in local-mode installs** — the previous `isLocal`
+  short-circuit returned before the PATH probe + self-link could run. When
+  `sdk/dist/cli.js` is present, local installs now run the same probe-and-link flow
+  as global installs. ([#2829](https://github.com/gsd-build/get-shit-done/issues/2829))
+- **OpenCode `@file` references use absolute paths on all platforms** — OpenCode
+  does not shell-expand `$HOME` in `@file` references on any platform; the
+  Windows-only guard from #2376 left macOS/Linux producing literal `@$HOME/...`
+  strings. Guard now applies unconditionally for OpenCode.
+  ([#2831](https://github.com/gsd-build/get-shit-done/issues/2831))
+- **`gsd-sdk auto` detects Codex runtime correctly** — `auto` mode ignored
+  `runtime: codex` and routed through `@anthropic-ai/claude-agent-sdk`, producing
+  the `[FAILED] $0.00 0.1s` symptom on autonomous runs. New `runtime-gate` raises a
+  clear error for non-Claude runtimes; `resolveModel()` honours `GSD_RUNTIME` env
+  precedence and never injects a Claude profile id under non-Claude runtimes.
+  ([#2832](https://github.com/gsd-build/get-shit-done/issues/2832))
+- **CR-INTEGRATION tests aligned with hyphen-form skill names** — tests now parse
+  `Skill(skill="...")` invocations structurally and reject the legacy colon form.
+  ([#2835](https://github.com/gsd-build/get-shit-done/issues/2835))
+- **`audit-open` quick-task scanner accepts `${quick_id}-SUMMARY.md`** — the
+  bare-`SUMMARY.md` check produced false-positive `status: missing` for every
+  documented quick task. UAT terminal-status enum also adds `resolved` (matches
+  `execute-phase.md`'s post-gap-closure terminal).
+  ([#2836](https://github.com/gsd-build/get-shit-done/issues/2836))
+- **`quick.md` / `execute-phase.md` SUMMARY rescue handles gitignored `.planning/`** —
+  rescue blocks used `git ls-files --exclude-standard`, silently no-op'ing when
+  `.planning/` was excluded; the worktree was then deleted with the SUMMARY.
+  Replaced with filesystem-level `find` + idempotent `cp`.
+  ([#2838](https://github.com/gsd-build/get-shit-done/issues/2838))
+- **`/gsd-code-review-fix` cleanup tail is transactional** — JSON recovery sentinel
+  at `${phase_dir}/.review-fix-recovery-pending.json` is written after `git worktree
+  add` succeeds and removed only after `git worktree remove` returns. New runs that
+  find a pre-existing sentinel force-remove the orphan worktree, making the agent
+  self-healing across crashes. ([#2839](https://github.com/gsd-build/get-shit-done/issues/2839))
+
+---
+
+## What was in rc.6
+
+```bash
+$ git log v1.39.0-rc.5..v1.39.0-rc.6 --pretty='%h %s'
+388118d8 chore: bump to 1.39.0-rc.6
+```
+
+rc.6 was a republish of rc.5 with no new content — `release/1.39.0` was bumped
+without first being merged with `main`. See
+[`RELEASE-v1.39.0-rc.6.md`](RELEASE-v1.39.0-rc.6.md) for the full context.
+
+---
+
+## What was in rc.5
+
+### Fixed
+
+**Codex hooks migrator correctness hardening** ([#2809](https://github.com/gsd-build/get-shit-done/issues/2809))
+
+Five edge-cases in the `[[hooks.<Event>]]` → `[[hooks.<Event>.hooks]]` two-level
+nested schema migration path, discovered across five rounds of code review:
+
+| Finding | Fix |
+|---------|-----|
+| `parseHooksBody` used a bare regex (`/^([\w.]+)\s*=/`) that silently dropped hyphenated keys such as `status-message` and any quoted TOML key | Replaced with `parseTomlKey()`, the existing full TOML key parser |
+| `buildNestedBlock` unconditionally emitted `[[hooks.TYPE.hooks]]` even when no handler fields were present, producing an entry with `type = "command"` but no `command` | Added guard: matcher-only / handler-field-free sections emit only the event-entry block |
+| `legacyMapSections` filter used `section.path.startsWith('hooks.')` without checking the segment count, so three-segment tables like `[hooks.SessionStart.hooks]` were misclassified as event entries and re-emitted as bogus nested events | Now uses `section.segments.length === 2` (same fix previously applied to `staleNamespacedAotSections`) |
+| No regression test for quoted event names containing dots — `[[hooks."before.tool"]]` has a 2-segment path but 3 dot-parts, and a `split('.')` check would misclassify it | Regression test added; quoted-dot names are correctly treated as a single two-segment namespace |
+| Handler command path assertion in install tests used a regex (`/gsd-check-update\.js/`) rather than the exact absolute path | Strengthened to `assert.strictEqual` with `path.join(codexHome, 'hooks', 'gsd-check-update.js')` |
+
+---
+
+## What was in rc.4
+
+### Added
+
+**`--minimal` install flag** (alias `--core-only`) ([#2762](https://github.com/gsd-build/get-shit-done/issues/2762))
+
+Writes only the six core skills needed to run the main workflow loop:
+`new-project`, `discuss-phase`, `plan-phase`, `execute-phase`, `help`, `update`.
+No `gsd-*` subagents are installed.
+
+| Mode | Cold-start system-prompt overhead |
+|------|-----------------------------------|
+| full (default) | ~12k tokens |
+| minimal | ~700 tokens |
+
+The install manifest records `mode: "minimal" | "full"`. Run `gsd update` without
+`--minimal` at any time to expand to the full skill set.
+
+### Fixed (rc.4)
+
+**Codex install no longer corrupts `~/.codex/config.toml`** ([#2760](https://github.com/gsd-build/get-shit-done/issues/2760))
+
+The installer now strips legacy `[agents]` blocks, emits hooks in the user's
+existing shape, migrates legacy `[hooks.<Event>]` map format to `[[hooks.<Event>]]`,
+writes atomically via temp-file + `renameSync`, and validates post-write bytes
+with a strict TOML parser.
+
+---
+
+## Installing the pre-release
+
+```bash
+# npm
+npm install -g get-shit-done-cc@next
+
+# npx (one-shot)
+npx get-shit-done-cc@next
+```
+
+To pin to this exact RC:
+
+```bash
+npm install -g get-shit-done-cc@1.39.0-rc.7
+```
+
+---
+
+## What's next
+
+- Run `finalize` on the release workflow to promote `1.39.0` to `latest` once
+  rc.7 has soaked.
--- a/docs/RELEASE-v1.50.0-canary.1.md
+++ b/docs/RELEASE-v1.50.0-canary.1.md
@@ -0,0 +1,94 @@
+# v1.50.0-canary.1 Release Notes
+
+First canary cut for the **1.50.0** train. Published to npm under the `canary` dist-tag.
+
+```bash
+npx get-shit-done-cc@canary
+# or pin exact:
+npm install -g get-shit-done-cc@1.50.0-canary.1
+```
+
+> **Canary stream caveat.** Canary builds come from the long-lived `dev` integration branch and may carry rough edges that the `next` (RC) and `latest` (stable) channels never see. Use canary when you want to exercise in-flight features early and report findings; do NOT pin production projects to it. See [CANARY.md](CANARY.md) for the stream policy and rollback path.
+
+---
+
+## Headline: Vertical MVP / TDD / UAT planning track
+
+The 1.50.0 train opens with a four-phase vertical slice that adds an end-to-end "MVP mode" to the GSD planning pipeline — from project kickoff, through phase planning, through execution, through verification. Issue [#2826](https://github.com/gsd-build/get-shit-done/issues/2826) is the umbrella PRD.
+
+### What's new
+
+#### `/gsd plan-phase --mvp` — vertical-slice planning ([#2867](https://github.com/gsd-build/get-shit-done/pull/2867))
+
+`/gsd plan-phase` learns a `--mvp` flag that flips the planner into vertical-slice mode. The planner reads `**Mode:** mvp` from a phase's ROADMAP entry, an explicit `--mvp` CLI override, or `workflow.mvp_mode` in `.planning/config.json` (precedence in that order, with the CLI flag winning). Under MVP mode the planner:
+
+- Surfaces a "Walking Skeleton" template for the very first phase of a new project — a thin end-to-end vertical slice that proves the wiring before any horizontal layer is built
+- Suppresses horizontal-layer language ("data layer first, then business logic, then UI") in favor of user-flow-driven decomposition
+- Emits the user story as a header at the top of `PLAN.md`
+
+New required-reading injection: `references/planner-mvp-mode.md`. New parser surface: `roadmap.cjs` extracts a `mode` field on every phase lookup.
+
+#### `/gsd mvp-phase <N>` — guided user-story phase framing ([#2874](https://github.com/gsd-build/get-shit-done/pull/2874))
+
+A new top-level command that walks the user through framing a phase as a vertical MVP slice before planning. Three structured prompts capture an "As a / I want to / So that" user story. If the story is too large, an interactive SPIDR (Spike / Path / Interface / Data / Rule) splitting flow surfaces a list of `/gsd add-phase` invocations to break the work apart. The command then:
+
+- Mutates the ROADMAP entry to set `**Mode:** mvp` and replaces `**Goal:**` with the assembled user story
+- Delegates to `/gsd plan-phase --mvp <N>` to produce the plan
+
+Two new references: [`spidr-splitting.md`](../get-shit-done/references/spidr-splitting.md), [`user-story-template.md`](../get-shit-done/references/user-story-template.md).
+
+#### Execute-phase MVP+TDD runtime gate ([#2878](https://github.com/gsd-build/get-shit-done/pull/2878))
+
+When `MVP_MODE` and `TDD_MODE` are both true at execution time, `execute-phase` adds a per-task gate that requires a `test(<phase>-<plan>):` commit to exist before the corresponding `feat(...)` commit. The reference [`execute-mvp-tdd.md`](../get-shit-done/references/execute-mvp-tdd.md) documents the contract; the executor agent (`agents/gsd-executor.md`) gains an MVP+TDD Gate section that explains when the gate trips, what evidence it expects, and how to escalate via the documented escape hatch.
+
+> **Known canary-bake item.** The current bash gate snippet uses some workflow variables that aren't fully wired (`${PLAN_ID}`, `${TASK_TDD}`) and the documented `--force-mvp-gate` escape hatch is referenced in the user-facing error message but not yet implemented in the argument parser. These are tracked as canary-bake follow-ups; the gate itself is functional for the dominant code path.
+
+#### Verify-work MVP-mode UAT framing ([#2880](https://github.com/gsd-build/get-shit-done/pull/2880))
+
+Under MVP mode, `verify-work` flips the UAT script's framing so user-flow steps come **before** technical correctness checks — the inverse of the default order. The verifier agent gains a `mvp_mode_verification` section. New reference: [`verify-mvp-mode.md`](../get-shit-done/references/verify-mvp-mode.md).
+
+A user-story format guard at the top of `extract_tests` will halt verification if a phase claims `**Mode:** mvp` but its `**Goal:**` doesn't parse as `As a … I want to … so that …` — pointing the user at `/gsd mvp-phase <N>` to repair.
+
+#### Discovery & progress surfaces ([#2883](https://github.com/gsd-build/get-shit-done/pull/2883))
+
+The MVP slice closes out with read-side surfaces:
+
+- **`/gsd new-project`** prompts up front for **Vertical MVP** vs **Horizontal Layers** mode and seeds the milestone accordingly
+- **`/gsd-progress`** emits a "User-flow next up" panel for MVP-mode phases, surfacing user-visible task names ahead of internal scaffolding
+- **`/gsd-stats`** adds an "MVP phases: N" summary line when the roadmap contains any
+- **`/gsd-graphify`** visually differentiates MVP-mode phase nodes from horizontal-layer phases in the rendered graph
+
+---
+
+## Bonus fixes also in this canary
+
+- **`/gsd-progress` no longer cites stale CLAUDE.md project blocks** as the source for the "Next Up" section ([#2912](https://github.com/gsd-build/get-shit-done/issues/2912)) — explicit context-authority directive added to the report step.
+
+(Other recent main-stream fixes — agent-skills CLI JSON wrap, audit-open ReferenceError, execute-phase branching, Hermes runtime — target the `next` stream and will arrive in the canary when they land in `dev`.)
+
+---
+
+## Install / upgrade
+
+```bash
+# Try the canary
+npx get-shit-done-cc@canary
+
+# Or pin exact
+npm install -g get-shit-done-cc@1.50.0-canary.1
+```
+
+The installer's defensive purge will rewrite stale config blocks left by older GSD versions on first run. No manual cleanup needed.
+
+## Reporting issues
+
+If something breaks on canary, file against [the issue tracker](https://github.com/gsd-build/get-shit-done/issues) with the `bug` template and mention `1.50.0-canary.1` so it gets routed back into the dev stream rather than the stable stream.
+
+## What ships next in this train
+
+Pending dev-stream merges that should land before promotion to `next`:
+- Resolve canary-bake items in the MVP+TDD gate (variable wiring + `--force-mvp-gate` parser)
+- Sync recent main-stream fixes (`#2918`, `#2919`, `#2921`, `#2917`, `#2920`) into dev
+- Ride a few canary cycles for real-user MVP/TDD/UAT feedback
+
+When the dev stream stabilizes, the train promotes to `main` as `v1.50.0-rc.1` (the `next` channel).
--- a/docs/USER-GUIDE.md
+++ b/docs/USER-GUIDE.md
@@ -6,6 +6,7 @@ A detailed reference for workflows, troubleshooting, and configuration. For quic

 ## Table of Contents

+- [End-to-End Walkthrough](#end-to-end-walkthrough)
 - [Workflow Diagrams](#workflow-diagrams)
 - [UI Design Contract](#ui-design-contract)
 - [Spiking & Sketching](#spiking--sketching)
@@ -19,6 +20,241 @@ A detailed reference for workflows, troubleshooting, and configuration. For quic

 ---

+## End-to-End Walkthrough
+
+This walkthrough shows how GSD phases connect for a typical single-phase project — a small Node.js REST API that validates webhook signatures. Follow it to understand what each command does, what it creates, and how the next command consumes it.
+
+### 1. Create the project
+
+```
+/gsd-new-project
+```
+
+GSD asks questions about your idea, spawns parallel research agents, extracts requirements, and creates a roadmap. You approve the roadmap before any code is written.
+
+**Example output (abridged):**
+
+```
+> What are you building?
+  A webhook signature validator middleware for Express apps.
+
+> Who's the user?
+  Backend developers integrating third-party webhooks (Stripe, GitHub, Shopify).
+
+[Research agents run in parallel...]
+[Requirements extracted...]
+
+Roadmap (1 phase):
+  Phase 1 — Core middleware: HMAC-SHA256 signature validation,
+             timing-safe compare, configurable tolerance window.
+
+Approve? [y/n]
+```
+
+**What gets created:**
+
+```
+.planning/
+  PROJECT.md          # "Webhook validator middleware — Express, HMAC-SHA256..."
+  REQUIREMENTS.md     # REQ-001: Validate signature header; REQ-002: Timing-safe...
+  ROADMAP.md          # Phase 1 status: pending
+  STATE.md            # Session memory, current position
+```
+
+`ROADMAP.md` excerpt:
+```markdown
+## Phase 1 — Core middleware
+**Status:** pending
+**Goal:** HMAC-SHA256 signature validation with timing-safe compare and a
+configurable replay-protection tolerance window.
+**Requirements:** REQ-001, REQ-002, REQ-003
+```
+
+### 2. Discuss and plan the phase
+
+```
+/gsd-discuss-phase 1
+```
+
+GSD reads the phase goal and asks about your implementation preferences before any planning happens. This is where you shape *how* it builds — not just *what* it builds.
+
+```
+> How should invalid signatures be handled?
+  Reject immediately with 401, log the raw header for debugging.
+
+> Should the tolerance window be configurable per-route or global?
+  Global config, but allow per-route override via middleware options.
+
+> Any library preferences for HMAC?
+  Node built-in crypto only — no extra dependencies.
+```
+
+**What gets created:** `.planning/phases/01-core-middleware/CONTEXT.md`
+
+`CONTEXT.md` excerpt:
+```markdown
+## Implementation Decisions
+- Invalid signatures → 401, log raw header
+- Tolerance window → global default, per-route override via options object
+- HMAC library → Node built-in crypto (no external deps)
+- Error format → { error: "invalid_signature", ts: <epoch> }
+```
+
+Now plan the phase:
+
+```
+/gsd-plan-phase 1
+```
+
+GSD spawns four parallel research agents (stack, features, architecture, pitfalls), then a planner reads `CONTEXT.md` + research findings and creates atomic task plans. A plan-checker verifies each plan achieves the phase goal before saving.
+
+**What gets created:**
+
+```
+.planning/phases/01-core-middleware/
+  RESEARCH.md         # Findings: crypto.timingSafeEqual docs, replay attack patterns...
+  01-01-PLAN.md       # Task: create validateSignature() core function
+  01-02-PLAN.md       # Task: Express middleware wrapper + error handling
+```
+
+`01-01-PLAN.md` excerpt:
+```xml
+<task type="auto">
+  <name>Create validateSignature core function</name>
+  <files>src/validate.js, src/validate.test.js</files>
+  <action>
+    Use crypto.createHmac('sha256', secret).update(rawBody).digest('hex').
+    Compare with crypto.timingSafeEqual() — never === or ==.
+    Accept tolerance window in ms; reject if |timestamp - now| exceeds it.
+  </action>
+  <verify>npm test -- --grep "validateSignature"</verify>
+  <done>All timing-safe comparison tests pass; replay outside window returns false</done>
+</task>
+```
+
+### 3. Execute
+
+```
+/gsd-execute-phase 1
+```
+
+GSD groups plans into waves (parallel where independent, sequential where dependent), spawns a fresh 200k-context executor per plan, and commits each task atomically.
+
+```
+Wave 1 (parallel):
+  [Executor A] → 01-01-PLAN.md (core function)  ✓ committed
+  [Executor B] → 01-02-PLAN.md (middleware)      ✓ committed
+
+[Verifier] Checking codebase against phase goals...
+  REQ-001 validateSignature() ✓
+  REQ-002 timing-safe compare ✓
+  REQ-003 tolerance window    ✓
+  Status: PASS
+```
+
+**Git history after execution:**
+
+```
+a1b2c3d feat(01-01): implement validateSignature with timingSafeEqual
+d4e5f6g feat(01-02): add Express middleware wrapper and 401 error format
+h7i8j9k chore(01): phase 1 verification — all requirements met
+```
+
+**What gets created:**
+
+```
+.planning/phases/01-core-middleware/
+  01-01-SUMMARY.md    # "Implemented validateSignature(), 47 lines, tests passing"
+  01-02-SUMMARY.md    # "Middleware wraps validator, logs raw header on 401"
+  VERIFICATION.md     # REQ-001 ✓, REQ-002 ✓, REQ-003 ✓ — PASS
+```
+
+`STATE.md` is updated automatically to reflect the new phase status:
+
+```markdown
+## Current Position
+Phase 1 — Core middleware: executed, pending verify
+```
+
+### 4. Verify
+
+```
+/gsd-verify-work 1
+```
+
+GSD extracts testable deliverables from the phase goal and walks you through them one at a time.
+
+```
+[1/3] Can you require the middleware and pass it to an Express route?
+> yes
+
+[2/3] Does a request with a valid signature return 200?
+> yes
+
+[3/3] Does a request with an invalid signature return 401 with { error: "invalid_signature" }?
+> no — I'm getting a 500 instead
+
+[Diagnosing...]
+Root cause: middleware catches crypto.timingSafeEqual TypeError when
+buffers are different lengths. Fix: normalize to same length before compare.
+
+Fix plan created: .planning/phases/01-core-middleware/01-03-PLAN.md
+Run /gsd-execute-phase 1 to apply.
+```
+
+After re-running execute and re-verifying:
+
+```
+All 3 checks passed. Phase 1 verified.
+```
+
+**What gets created:** `.planning/phases/01-core-middleware/UAT.md`
+
+### What's next
+
+Once a phase is verified, ship it:
+
+```
+/gsd-ship 1          # Creates a PR with auto-generated body
+```
+
+For multi-phase projects, repeat the loop:
+
+```
+/gsd-discuss-phase 2
+/gsd-plan-phase 2
+/gsd-execute-phase 2
+/gsd-verify-work 2
+```
+
+Or let GSD figure out the next step automatically:
+
+```
+/gsd-next
+```
+
+When all phases are done:
+
+```
+/gsd-audit-milestone     # Verify all requirements shipped
+/gsd-complete-milestone  # Archive, tag release
+```
+
+**Relevant flags covered in this walkthrough:**
+
+| Flag | Command | When to use |
+| ---- | ------- | ----------- |
+| `--auto` | `/gsd-new-project` | Skip interactive questions, ingest from a PRD file |
+| `--research` | `/gsd-quick` | Add a research agent to an ad-hoc task |
+| `--validate` | `/gsd-quick` | Add plan-checking and post-execution verification |
+| `--chain` | `/gsd-discuss-phase` | Auto-chain discuss → plan → execute without stopping |
+| `--skip-research` | `/gsd-plan-phase` | Skip research agents when the domain is already familiar |
+| `--draft` | `/gsd-ship` | Create a draft PR instead of a ready-for-review one |
+
+For the full command reference with all flags, see [`docs/COMMANDS.md`](COMMANDS.md). For configuration options (model profiles, workflow agents, git branching), see [`docs/CONFIGURATION.md`](CONFIGURATION.md).
+
+---
+
 ## Workflow Diagrams

 ### Full Project Lifecycle
@@ -165,18 +401,61 @@ By default, `/gsd-discuss-phase` asks open-ended questions about your implementa
 **Enable:** Set `workflow.discuss_mode` to `'assumptions'` via `/gsd-settings`.

 **How it works:**
+
 1. Reads PROJECT.md, codebase mapping, and existing conventions
 2. Generates a structured list of assumptions (tech choices, patterns, file locations)
 3. Presents assumptions for you to confirm, correct, or expand
 4. Writes CONTEXT.md from confirmed assumptions

 **When to use:**
+
 - Experienced developers who already know their codebase well
 - Rapid iteration where open-ended questions slow you down
 - Projects where patterns are well-established and predictable

 See [docs/workflow-discuss-mode.md](workflow-discuss-mode.md) for the full discuss-mode reference.

+### Decision Coverage Gates
+
+The discuss-phase captures implementation decisions in CONTEXT.md under a
+`<decisions>` block as numbered bullets (`- **D-01:** …`). Two gates — added
+for issue #2492 — ensure those decisions survive into plans and shipped
+code.
+
+**Plan-phase translation gate (blocking).** After planning, GSD refuses to
+mark the phase planned until every trackable decision appears in at least
+one plan's `must_haves`, `truths`, or body. The gate names each missed
+decision by id (`D-07: …`) so you know exactly what to add, move, or
+reclassify.
+
+**Verify-phase validation gate (non-blocking).** During verification, GSD
+searches plans, SUMMARY.md, modified files, and recent commit messages for
+each trackable decision. Misses are logged to VERIFICATION.md as a warning
+section; verification status is unchanged. The asymmetry is deliberate —
+the blocking gate is cheap at plan time but hostile at verify time.
+
+**Writing decisions the gate can match.** Two match modes:
+
+1. **Strict id match (recommended).** Cite the decision id anywhere in a
+   plan that implements it — `must_haves.truths: ["D-12: bit offsets
+   exposed"]`, a bullet in the plan body, a frontmatter comment. This is
+   deterministic and unambiguous.
+2. **Soft phrase match (fallback).** If a 6+-word slice of the decision
+   text appears verbatim in any plan or shipped artifact, it counts. This
+   forgives paraphrasing but is less reliable.
+
+**Opting a decision out.** If a decision genuinely should not be tracked —
+an implementation-discretion note, an informational capture, a decision
+already deferred — mark it one of these ways:
+
+- Move it under the `### Claude's Discretion` heading inside `<decisions>`.
+- Tag it in its bullet: `- **D-08 [informational]:** …`,
+  `- **D-09 [folded]:** …`, `- **D-10 [deferred]:** …`.
+
+**Disabling the gates.** Set
+`workflow.context_coverage_gate: false` in `.planning/config.json` (or via
+`/gsd-settings`) to skip both gates silently. Default is `true`.
+
 ---

 ## UI Design Contract
@@ -189,16 +468,19 @@ AI-generated frontends are visually inconsistent not because Claude Code is bad

 ### Commands

-| Command | Description |
-|---------|-------------|
-| `/gsd-ui-phase [N]` | Generate UI-SPEC.md design contract for a frontend phase |
-| `/gsd-ui-review [N]` | Retroactive 6-pillar visual audit of implemented UI |
+
+| Command              | Description                                              |
+| -------------------- | -------------------------------------------------------- |
+| `/gsd-ui-phase [N]`  | Generate UI-SPEC.md design contract for a frontend phase |
+| `/gsd-ui-review [N]` | Retroactive 6-pillar visual audit of implemented UI      |
+

 ### Workflow: `/gsd-ui-phase`

 **When to run:** After `/gsd-discuss-phase`, before `/gsd-plan-phase` — for phases with frontend/UI work.

 **Flow:**
+
 1. Reads CONTEXT.md, RESEARCH.md, REQUIREMENTS.md for existing decisions
 2. Detects design system state (shadcn components.json, Tailwind config, existing tokens)
 3. shadcn initialization gate — offers to initialize if React/Next.js/Vite project has none
@@ -216,6 +498,7 @@ AI-generated frontends are visually inconsistent not because Claude Code is bad
 **Standalone:** Works on any project, not just GSD-managed ones. If no UI-SPEC.md exists, audits against abstract 6-pillar standards.

 **6 Pillars (scored 1-4 each):**
+
 1. Copywriting — CTA labels, empty states, error states
 2. Visuals — focal points, visual hierarchy, icon accessibility
 3. Color — accent usage discipline, 60/30/10 compliance
@@ -227,10 +510,12 @@ AI-generated frontends are visually inconsistent not because Claude Code is bad

 ### Configuration

-| Setting | Default | Description |
-|---------|---------|-------------|
-| `workflow.ui_phase` | `true` | Generate UI design contracts for frontend phases |
-| `workflow.ui_safety_gate` | `true` | plan-phase prompts to run /gsd-ui-phase for frontend phases |
+
+| Setting                   | Default | Description                                                 |
+| ------------------------- | ------- | ----------------------------------------------------------- |
+| `workflow.ui_phase`       | `true`  | Generate UI design contracts for frontend phases            |
+| `workflow.ui_safety_gate` | `true`  | plan-phase prompts to run /gsd-ui-phase for frontend phases |
+

 Both follow the absent=enabled pattern. Disable via `/gsd-settings`.

@@ -248,6 +533,7 @@ The preset string becomes a first-class GSD planning artifact, reproducible acro
 ### Registry Safety Gate

 Third-party shadcn registries can inject arbitrary code. The safety gate requires:
+
 - `npx shadcn view {component}` — inspect before installing
 - `npx shadcn diff {component}` — compare against official

@@ -365,12 +651,14 @@ Workstreams let you work on multiple milestone areas concurrently without state

 ### Commands

-| Command | Purpose |
-|---------|---------|
-| `/gsd-workstreams create <name>` | Create a new workstream with isolated planning state |
-| `/gsd-workstreams switch <name>` | Switch active context to a different workstream |
-| `/gsd-workstreams list` | Show all workstreams and which is active |
-| `/gsd-workstreams complete <name>` | Mark a workstream as done and archive its state |
+
+| Command                            | Purpose                                              |
+| ---------------------------------- | ---------------------------------------------------- |
+| `/gsd-workstreams create <name>`   | Create a new workstream with isolated planning state |
+| `/gsd-workstreams switch <name>`   | Switch active context to a different workstream      |
+| `/gsd-workstreams list`            | Show all workstreams and which is active             |
+| `/gsd-workstreams complete <name>` | Mark a workstream as done and archive its state      |
+

 ### How It Works

@@ -393,6 +681,7 @@ All user-supplied file paths (`--text-file`, `--prd`) are validated to resolve w
 The `security.cjs` module scans for known injection patterns (role overrides, instruction bypasses, system tag injections) in user-supplied text before it enters planning artifacts.

 **Runtime Hooks:**
+
 - `gsd-prompt-guard.js` — Scans Write/Edit calls to `.planning/` for injection patterns (always active, advisory-only)
 - `gsd-workflow-guard.js` — Warns on file edits outside GSD workflow context (opt-in via `hooks.workflow_guard`)

@@ -573,6 +862,20 @@ claude --dangerously-skip-permissions
 # (normal phase workflow from here)
 ```

+**Post-execute drift detection (#2003).** After every `/gsd:execute-phase`,
+GSD checks whether the phase introduced enough structural change
+(new directories, barrel exports, migrations, or route modules) to make
+`.planning/codebase/STRUCTURE.md` stale. If it did, the default behavior is
+to print a one-shot warning suggesting the exact `/gsd:map-codebase --paths …`
+invocation to refresh just the affected subtrees. Flip the behavior with:
+
+```bash
+/gsd:settings workflow.drift_action auto-remap       # remap automatically
+/gsd:settings workflow.drift_threshold 5             # tune sensitivity
+```
+
+The gate is non-blocking: any internal failure logs and the phase continues.
+
 ### Quick Bug Fix

 ```bash
@@ -598,11 +901,13 @@ claude --dangerously-skip-permissions

 ### Speed vs Quality Presets

-| Scenario | Mode | Granularity | Profile | Research | Plan Check | Verifier |
-|----------|------|-------|---------|----------|------------|----------|
-| Prototyping | `yolo` | `coarse` | `budget` | off | off | off |
-| Normal dev | `interactive` | `standard` | `balanced` | on | on | on |
-| Production | `interactive` | `fine` | `quality` | on | on | on |
+
+| Scenario    | Mode          | Granularity | Profile    | Research | Plan Check | Verifier |
+| ----------- | ------------- | ----------- | ---------- | -------- | ---------- | -------- |
+| Prototyping | `yolo`        | `coarse`    | `budget`   | off      | off        | off      |
+| Normal dev  | `interactive` | `standard`  | `balanced` | on       | on         | on       |
+| Production  | `interactive` | `fine`      | `quality`  | on       | on         | on       |
+

 **Skipping discuss-phase in autonomous mode:** When running in `yolo` mode with well-established preferences already captured in PROJECT.md, set `workflow.skip_discuss: true` via `/gsd-settings`. This bypasses the discuss-phase entirely and writes a minimal CONTEXT.md derived from the ROADMAP phase goal. Useful when your PROJECT.md and conventions are comprehensive enough that discussion adds no new information.

@@ -637,6 +942,7 @@ cd ~/gsd-workspaces/feature-b
 ```

 Each workspace gets:
+
 - Its own `.planning/` directory (fully independent from source repos)
 - Git worktrees (default) or clones of specified repos
 - A `WORKSPACE.md` manifest tracking member repos
@@ -647,9 +953,9 @@ Each workspace gets:

 ### Programmatic CLI (`gsd-sdk query` vs `gsd-tools.cjs`)

-For automation and copy-paste from docs, prefer **`gsd-sdk query`** with a registered subcommand (see [CLI-TOOLS.md](CLI-TOOLS.md) and [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md)). The legacy **`node $HOME/.claude/get-shit-done/bin/gsd-tools.cjs`** CLI remains supported for dual-mode operation.
+For automation and copy-paste from docs, prefer **`gsd-sdk query`** with a registered subcommand (see [CLI-TOOLS.md — SDK and programmatic access](CLI-TOOLS.md#sdk-and-programmatic-access) and [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md)). The legacy `node $HOME/.claude/get-shit-done/bin/gsd-tools.cjs` CLI remains supported for dual-mode operation.

-**Not yet on `gsd-sdk query` (use CJS):** `state validate`, `state sync`, `audit-open`, `graphify`, `from-gsd2`, and any subcommand not listed in the registry.
+**CLI-only (not in the query registry):** **graphify**, **from-gsd2** / **gsd2-import** — call `gsd-tools.cjs` (see [QUERY-HANDLERS.md](../sdk/src/query/QUERY-HANDLERS.md)). **Two different `state` JSON shapes in the legacy CLI:** `state json` (frontmatter rebuild) vs `state load` (`config` + `state_raw` + flags). **`gsd-sdk query` today:** both `state.json` and `state.load` resolve to the frontmatter-rebuild handler — use `node …/gsd-tools.cjs state load` when you need the CJS `state load` shape. See [CLI-TOOLS.md](CLI-TOOLS.md#sdk-and-programmatic-access) and QUERY-HANDLERS.

 ### STATE.md Out of Sync

@@ -725,6 +1031,19 @@ To assign different models to different agents on a non-Claude runtime, add `mod

 The installer auto-configures `resolve_model_ids: "omit"` for Gemini CLI, OpenCode, Kilo, and Codex. If you're manually setting up a non-Claude runtime, add it to `.planning/config.json` yourself.

+#### Switching from Claude to Codex with one config change (#2517)
+
+If you want tiered models on Codex without writing a large `model_overrides` block, set `runtime: "codex"` and pick a profile:
+
+```json
+{
+  "runtime": "codex",
+  "model_profile": "balanced"
+}
+```
+
+GSD will resolve each agent's tier (`opus`/`sonnet`/`haiku`) to the Codex-native model and reasoning effort defined in the runtime tier map (`gpt-5.4` xhigh / `gpt-5.3-codex` medium / `gpt-5.4-mini` medium). The Codex installer embeds both `model` and `model_reasoning_effort` into each agent's TOML automatically. To override a single tier, add `model_profile_overrides.codex.<tier>`. See [Runtime-Aware Profiles](CONFIGURATION.md#runtime-aware-profiles-2517).
+
 See the [Configuration Reference](CONFIGURATION.md#non-claude-runtimes-codex-opencode-gemini-cli-kilo) for the full explanation.

 ### Installing for Cline
@@ -782,6 +1101,7 @@ If `npx get-shit-done-cc` fails due to npm outages or network restrictions, see
 When a workflow fails in a way that isn't obvious -- plans reference nonexistent files, execution produces unexpected results, or state seems corrupted -- run `/gsd-forensics` to generate a diagnostic report.

 **What it checks:**
+
 - Git history anomalies (orphaned commits, unexpected branch state, rebase artifacts)
 - Artifact integrity (missing or malformed planning files, broken cross-references)
 - State inconsistencies (ROADMAP status vs. actual file presence, config drift)
@@ -916,22 +1236,24 @@ If the installer crashes with `EPERM: operation not permitted, scandir` on Windo

 ## Recovery Quick Reference

-| Problem | Solution |
-|---------|----------|
-| Lost context / new session | `/gsd-resume-work` or `/gsd-progress` |
-| Phase went wrong | `git revert` the phase commits, then re-plan |
-| Need to change scope | `/gsd-add-phase`, `/gsd-insert-phase`, or `/gsd-remove-phase` |
-| Milestone audit found gaps | `/gsd-plan-milestone-gaps` |
-| Something broke | `/gsd-debug "description"` (add `--diagnose` for analysis without fixes) |
-| STATE.md out of sync | `state validate` then `state sync` |
-| Workflow state seems corrupted | `/gsd-forensics` |
-| Quick targeted fix | `/gsd-quick` |
-| Plan doesn't match your vision | `/gsd-discuss-phase [N]` then re-plan |
-| Costs running high | `/gsd-set-profile budget` and `/gsd-settings` to toggle agents off |
-| Update broke local changes | `/gsd-reapply-patches` |
-| Want session summary for stakeholder | `/gsd-session-report` |
-| Don't know what step is next | `/gsd-next` |
-| Parallel execution build errors | Update GSD or set `parallelization.enabled: false` |
+
+| Problem                              | Solution                                                                 |
+| ------------------------------------ | ------------------------------------------------------------------------ |
+| Lost context / new session           | `/gsd-resume-work` or `/gsd-progress`                                    |
+| Phase went wrong                     | `git revert` the phase commits, then re-plan                             |
+| Need to change scope                 | `/gsd-add-phase`, `/gsd-insert-phase`, or `/gsd-remove-phase`            |
+| Milestone audit found gaps           | `/gsd-plan-milestone-gaps`                                               |
+| Something broke                      | `/gsd-debug "description"` (add `--diagnose` for analysis without fixes) |
+| STATE.md out of sync                 | `state validate` then `state sync`                                       |
+| Workflow state seems corrupted       | `/gsd-forensics`                                                         |
+| Quick targeted fix                   | `/gsd-quick`                                                             |
+| Plan doesn't match your vision       | `/gsd-discuss-phase [N]` then re-plan                                    |
+| Costs running high                   | `/gsd-set-profile budget` and `/gsd-settings` to toggle agents off       |
+| Update broke local changes           | `/gsd-reapply-patches`                                                   |
+| Want session summary for stakeholder | `/gsd-session-report`                                                    |
+| Don't know what step is next         | `/gsd-next`                                                              |
+| Parallel execution build errors      | Update GSD or set `parallelization.enabled: false`                       |
+

 ---

@@ -975,3 +1297,4 @@ For reference, here is what GSD creates in your project:
      XX-UI-REVIEW.md     # Visual audit scores (from /gsd-ui-review)
  ui-reviews/             # Screenshots from /gsd-ui-review (gitignored)
 ```
+
--- a/docs/gsd-sdk-query-migration-blurb.md
+++ b/docs/gsd-sdk-query-migration-blurb.md
@@ -4,7 +4,7 @@ Copy-paste friendly for Discord and GitHub comments.

 ---

-**@gsd-build/sdk** replaces the untyped, monolithic `gsd-tools.cjs` subprocess with a typed, tested, registry-based query system and **`gsd-sdk query`**, giving GSD structured results, classified errors (`GSDQueryError`), and golden-verified parity with the old CLI. That gives the framework one stable contract instead of a fragile, very large CLI that every workflow had to spawn and parse by hand.
+**@gsd-build/sdk** replaces the untyped, monolithic `gsd-tools.cjs` subprocess with a typed, tested, registry-based query system and **`gsd-sdk query`**, giving GSD structured results, classified errors (`GSDError` with `ErrorClassification`), and golden-verified parity with the old CLI. That gives the framework one stable contract instead of a fragile, very large CLI that every workflow had to spawn and parse by hand.

 **What users can expect**

--- a/docs/ja-JP/README.md
+++ b/docs/ja-JP/README.md
@@ -10,7 +10,7 @@ Get Shit Done（GSD）フレームワークの包括的なドキュメントで
 | [機能リファレンス](FEATURES.md) | 全ユーザー | 全機能の詳細ドキュメントと要件 |
 | [コマンドリファレンス](COMMANDS.md) | 全ユーザー | 全コマンドの構文、フラグ、オプション、使用例 |
 | [設定リファレンス](CONFIGURATION.md) | 全ユーザー | 設定スキーマ、ワークフロートグル、モデルプロファイル、Git ブランチ |
-| [CLI ツールリファレンス](CLI-TOOLS.md) | コントリビューター、エージェント作成者 | `gsd-tools.cjs` のプログラマティック API（ワークフローおよびエージェント向け） |
+| [CLI ツールリファレンス](CLI-TOOLS.md) | コントリビューター、エージェント作成者 | CJS `gsd-tools.cjs` と **`gsd-sdk query` / SDK** のガイド |
 | [エージェントリファレンス](AGENTS.md) | コントリビューター、上級ユーザー | 全18種の専門エージェント — 役割、ツール、スポーンパターン |
 | [ユーザーガイド](USER-GUIDE.md) | 全ユーザー | ワークフローのウォークスルー、トラブルシューティング、リカバリー |
 | [コンテキストモニター](context-monitor.md) | 全ユーザー | コンテキストウィンドウ監視フックのアーキテクチャ |
--- a/docs/ko-KR/README.md
+++ b/docs/ko-KR/README.md
@@ -12,7 +12,7 @@ Get Shit Done (GSD) 프레임워크의 종합 문서입니다. GSD는 AI 코딩
 | [Feature Reference](FEATURES.md) | 전체 사용자 | 요구사항이 포함된 전체 기능 및 함수 문서 |
 | [Command Reference](COMMANDS.md) | 전체 사용자 | 모든 명령어의 구문, 플래그, 옵션 및 예제 |
 | [Configuration Reference](CONFIGURATION.md) | 전체 사용자 | 전체 설정 스키마, 워크플로우 토글, 모델 프로필, git 브랜칭 |
-| [CLI Tools Reference](CLI-TOOLS.md) | 기여자, 에이전트 작성자 | 워크플로우 및 에이전트를 위한 `gsd-tools.cjs` 프로그래매틱 API |
+| [CLI Tools Reference](CLI-TOOLS.md) | 기여자, 에이전트 작성자 | CJS `gsd-tools.cjs` + **`gsd-sdk query`/SDK** 안내 |
 | [Agent Reference](AGENTS.md) | 기여자, 고급 사용자 | 18개 전문 에이전트의 역할, 도구, 스폰 패턴 |
 | [User Guide](USER-GUIDE.md) | 전체 사용자 | 워크플로우 안내, 문제 해결, 복구 방법 |
 | [Context Monitor](context-monitor.md) | 전체 사용자 | 컨텍스트 윈도우 모니터링 훅 아키텍처 |
--- a/docs/pt-BR/CLI-TOOLS.md
+++ b/docs/pt-BR/CLI-TOOLS.md
@@ -1,7 +1,7 @@
 # Referência de Ferramentas CLI

 Resumo em Português das ferramentas CLI do GSD.  
-Para API completa (assinaturas, argumentos e comportamento detalhado), consulte [CLI-TOOLS.md em inglês](../CLI-TOOLS.md).
+Para API completa (assinaturas, argumentos e comportamento detalhado), consulte [CLI-TOOLS.md em inglês](../CLI-TOOLS.md) — inclui a secção **SDK and programmatic access** (`gsd-sdk query`, `@gsd-build/sdk`).

 ---

--- a/docs/pt-BR/README.md
+++ b/docs/pt-BR/README.md
@@ -12,7 +12,7 @@ Documentação abrangente do framework Get Shit Done (GSD) — um sistema de met
 | [Referência de configuração](CONFIGURATION.md) | Todos os usuários | Schema completo de configuração, toggles e perfis |
 | [Referência de recursos](FEATURES.md) | Todos os usuários | Recursos e requisitos detalhados |
 | [Referência de agentes](AGENTS.md) | Contribuidores, usuários avançados | Agentes especializados, papéis e padrões de orquestração |
-| [Ferramentas CLI](CLI-TOOLS.md) | Contribuidores, autores de agentes | API programática `gsd-tools.cjs` |
+| [Ferramentas CLI](CLI-TOOLS.md) | Contribuidores, autores de agentes | Superfície CJS `gsd-tools.cjs` + guia **`gsd-sdk query`/SDK** |
 | [Monitor de contexto](context-monitor.md) | Todos os usuários | Arquitetura de monitoramento da janela de contexto |
 | [Discuss Mode](workflow-discuss-mode.md) | Todos os usuários | Modo suposições vs entrevista no `discuss-phase` |
 | [Referências](references/) | Todos os usuários | Guias complementares de decisão, verificação e padrões |
--- a/docs/zh-CN/references/decimal-phase-calculation.md
+++ b/docs/zh-CN/references/decimal-phase-calculation.md
@@ -2,11 +2,11 @@

 为紧急插入计算下一个小数阶段编号。

-## 使用 gsd-tools
+## 使用 gsd-sdk query

 ```bash
 # 获取阶段 6 之后的下一个小数阶段
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" phase next-decimal 6
+gsd-sdk query phase.next-decimal 6
 ```

 输出：
@@ -32,14 +32,13 @@ node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" phase next-decimal 6
 ## 提取值

 ```bash
-DECIMAL_INFO=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" phase next-decimal "${AFTER_PHASE}")
-DECIMAL_PHASE=$(printf '%s\n' "$DECIMAL_INFO" | jq -r '.next')
-BASE_PHASE=$(printf '%s\n' "$DECIMAL_INFO" | jq -r '.base_phase')
+DECIMAL_PHASE=$(gsd-sdk query phase.next-decimal "${AFTER_PHASE}" --pick next)
+BASE_PHASE=$(gsd-sdk query phase.next-decimal "${AFTER_PHASE}" --pick base_phase)
 ```

 或使用 --raw 标志：
 ```bash
-DECIMAL_PHASE=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" phase next-decimal "${AFTER_PHASE}" --raw)
+DECIMAL_PHASE=$(gsd-sdk query phase.next-decimal "${AFTER_PHASE}" --raw)
 # 返回: 06.1
 ```

@@ -57,9 +56,9 @@ DECIMAL_PHASE=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" phase next-
 小数阶段目录使用完整的小数编号：

 ```bash
-SLUG=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" generate-slug "$DESCRIPTION" --raw)
+SLUG=$(gsd-sdk query generate-slug "$DESCRIPTION" --raw)
 PHASE_DIR=".planning/phases/${DECIMAL_PHASE}-${SLUG}"
 mkdir -p "$PHASE_DIR"
 ```

-示例：`.planning/phases/06.1-fix-critical-auth-bug/`
+示例：`.planning/phases/06.1-fix-critical-auth-bug/`
--- a/docs/zh-CN/references/git-integration.md
+++ b/docs/zh-CN/references/git-integration.md
@@ -51,7 +51,7 @@ Phases:
 提交内容：

 ```bash
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "docs: initialize [project-name] ([N] phases)" --files .planning/
+gsd-sdk query commit "docs: initialize [project-name] ([N] phases)" --files .planning/
 ```

 </format>
@@ -129,7 +129,7 @@ SUMMARY: .planning/phases/XX-name/{phase}-{plan}-SUMMARY.md
 提交内容：

 ```bash
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "docs({phase}-{plan}): complete [plan-name] plan" --files .planning/phases/XX-name/{phase}-{plan}-PLAN.md .planning/phases/XX-name/{phase}-{plan}-SUMMARY.md .planning/STATE.md .planning/ROADMAP.md
+gsd-sdk query commit "docs({phase}-{plan}): complete [plan-name] plan" --files .planning/phases/XX-name/{phase}-{plan}-PLAN.md .planning/phases/XX-name/{phase}-{plan}-SUMMARY.md .planning/STATE.md .planning/ROADMAP.md
 ```

 **注意：** 代码文件不包含 - 已按任务提交。
@@ -149,7 +149,7 @@ Current: [task name]
 提交内容：

 ```bash
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "wip: [phase-name] paused at task [X]/[Y]" --files .planning/
+gsd-sdk query commit "wip: [phase-name] paused at task [X]/[Y]" --files .planning/
 ```

 </format>
--- a/docs/zh-CN/references/git-planning-commit.md
+++ b/docs/zh-CN/references/git-planning-commit.md
@@ -1,13 +1,15 @@
 # Git 规划提交

-使用 gsd-tools CLI 提交规划工件，它会自动检查 `commit_docs` 配置和 gitignore 状态。
+通过 `gsd-sdk query commit` 提交规划工件，它会自动检查 `commit_docs` 配置和 gitignore 状态（与旧版 `gsd-tools.cjs commit` 行为相同）。

 ## 通过 CLI 提交

-始终使用 `gsd-tools.cjs commit` 处理 `.planning/` 文件 — 它会自动处理 `commit_docs` 和 gitignore 检查：
+先传提交说明，然后用 `--files` 显式传入文件路径。`commit` 与 `commit-to-subrepo` 都应使用 `--files` 来声明要提交的路径。
+
+对 `.planning/` 文件始终使用此方式 —— 它会自动处理 `commit_docs` 与 gitignore 检查：

 ```bash
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "docs({scope}): {description}" --files .planning/STATE.md .planning/ROADMAP.md
+gsd-sdk query commit "docs({scope}): {description}" --files .planning/STATE.md .planning/ROADMAP.md
 ```

 如果 `commit_docs` 为 `false` 或 `.planning/` 被 gitignore，CLI 会返回 `skipped`（带原因）。无需手动条件检查。
@@ -17,7 +19,7 @@ node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "docs({scope}): {des
 将 `.planning/` 文件变更合并到上次提交：

 ```bash
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "" --files .planning/codebase/*.md --amend
+gsd-sdk query commit "" --files .planning/codebase/*.md --amend
 ```

 ## 提交消息模式
@@ -35,4 +37,4 @@ node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "" --files .planning

 - config 中 `commit_docs: false`
 - `.planning/` 被 gitignore
- 无变更可提交（用 `git status --porcelain .planning/` 检查）
+- 无变更可提交（用 `git status --porcelain .planning/` 检查）
--- a/docs/zh-CN/references/planning-config.md
+++ b/docs/zh-CN/references/planning-config.md
@@ -36,19 +36,19 @@
 - 用户必须将 `.planning/` 添加到 `.gitignore`
 - 适用于：OSS 贡献、客户项目、保持规划私有

-**使用 gsd-tools.cjs（推荐）：**
+**使用 `gsd-sdk query`（推荐）：**

 ```bash
 # 提交时自动检查 commit_docs + gitignore：
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "docs: update state" --files .planning/STATE.md
+gsd-sdk query commit "docs: update state" --files .planning/STATE.md

 # 通过 state load 加载配置（返回 JSON）：
-INIT=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" state load)
+INIT=$(gsd-sdk query state.load)
 if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
 # commit_docs 在 JSON 输出中可用

 # 或使用包含 commit_docs 的 init 命令：
-INIT=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" init execute-phase "1")
+INIT=$(gsd-sdk query init.execute-phase "1")
 if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
 # commit_docs 包含在所有 init 命令输出中
 ```
@@ -58,7 +58,7 @@ if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
 **通过 CLI 提交（自动处理检查）：**

 ```bash
-node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" commit "docs: update state" --files .planning/STATE.md
+gsd-sdk query commit "docs: update state" --files .planning/STATE.md
 ```

 CLI 在内部检查 `commit_docs` 配置和 gitignore 状态 —— 无需手动条件判断。
@@ -146,14 +146,14 @@ CLI 在内部检查 `commit_docs` 配置和 gitignore 状态 —— 无需手动

 使用 `init execute-phase` 返回所有配置为 JSON：
 ```bash
-INIT=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" init execute-phase "1")
+INIT=$(gsd-sdk query init.execute-phase "1")
 if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
 # JSON 输出包含：branching_strategy, phase_branch_template, milestone_branch_template
 ```

 或使用 `state load` 获取配置值：
 ```bash
-INIT=$(node "$HOME/.claude/get-shit-done/bin/gsd-tools.cjs" state load)
+INIT=$(gsd-sdk query state.load)
 if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
 # 从 JSON 解析 branching_strategy, phase_branch_template, milestone_branch_template
 ```
--- a/get-shit-done/bin/gsd-tools.cjs
+++ b/get-shit-done/bin/gsd-tools.cjs
@@ -49,6 +49,7 @@
 *   roadmap get-phase <phase>          Extract phase section from ROADMAP.md
 *   roadmap analyze                    Full roadmap parse with disk status
 *   roadmap update-plan-progress <N>   Update progress table row from disk (PLAN vs SUMMARY counts)
+ *   roadmap annotate-dependencies <N>  Add wave dependency notes + cross-cutting constraints to ROADMAP.md
 *
 * Requirements Operations:
 *   requirements mark-complete <ids>   Mark requirement IDs as complete in REQUIREMENTS.md
@@ -111,6 +112,7 @@
 *   verify artifacts <plan-file>       Check must_haves.artifacts
 *   verify key-links <plan-file>       Check must_haves.key_links
 *   verify schema-drift <phase> [--skip]  Detect schema file changes without push
+ *   verify codebase-drift                Detect structural drift since last codebase map (#2003)
 *
 * Template Fill:
 *   template fill summary --phase N    Create pre-filled SUMMARY.md
@@ -186,6 +188,7 @@ const profileOutput = require('./lib/profile-output.cjs');
 const workstream = require('./lib/workstream.cjs');
 const docs = require('./lib/docs.cjs');
 const learnings = require('./lib/learnings.cjs');
+const gapChecker = require('./lib/gap-checker.cjs');

 // ─── Arg parsing helpers ──────────────────────────────────────────────────────

@@ -480,8 +483,18 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
      } else if (subcommand === 'prune') {
        const { 'keep-recent': keepRecent, 'dry-run': dryRun } = parseNamedArgs(args, ['keep-recent'], ['dry-run']);
        state.cmdStatePrune(cwd, { keepRecent: keepRecent || '3', dryRun: !!dryRun }, raw);
-      } else {
+      } else if (subcommand === 'complete-phase') {
+        state.cmdStateCompletePhase(cwd, raw);
+      } else if (subcommand === 'milestone-switch') {
+        // Bug #2630: reset STATE.md frontmatter + Current Position for new milestone.
+        // NB: the flag is `--milestone`, not `--version` — gsd-tools reserves
+        // `--version` as a globally-invalid help flag (see NEVER_VALID_FLAGS above).
+        const { milestone, name } = parseNamedArgs(args, ['milestone', 'name']);
+        state.cmdStateMilestoneSwitch(cwd, milestone, name, raw);
+      } else if (subcommand === undefined || subcommand === 'load') {
        state.cmdStateLoad(cwd, raw);
+      } else {
+        error(`Unknown state subcommand: "${subcommand}". Available: load, json, get, patch, update, advance-plan, record-metric, update-progress, add-decision, add-blocker, resolve-blocker, record-session, begin-phase, signal-waiting, signal-resume, planned-phase, validate, sync, prune, complete-phase, milestone-switch`);
      }
      break;
    }
@@ -592,8 +605,10 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
      } else if (subcommand === 'schema-drift') {
        const skipFlag = args.includes('--skip');
        verify.cmdVerifySchemaDrift(cwd, args[2], skipFlag, raw);
+      } else if (subcommand === 'codebase-drift') {
+        verify.cmdVerifyCodebaseDrift(cwd, raw);
      } else {
-        error('Unknown verify subcommand. Available: plan-structure, phase-completeness, references, commits, artifacts, key-links, schema-drift');
+        error('Unknown verify subcommand. Available: plan-structure, phase-completeness, references, commits, artifacts, key-links, schema-drift, codebase-drift');
      }
      break;
    }
@@ -690,8 +705,10 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
        roadmap.cmdRoadmapAnalyze(cwd, raw);
      } else if (subcommand === 'update-plan-progress') {
        roadmap.cmdRoadmapUpdatePlanProgress(cwd, args[2], raw);
+      } else if (subcommand === 'annotate-dependencies') {
+        roadmap.cmdRoadmapAnnotateDependencies(cwd, args[2], raw);
      } else {
-        error('Unknown roadmap subcommand. Available: get-phase, analyze, update-plan-progress');
+        error('Unknown roadmap subcommand. Available: get-phase, analyze, update-plan-progress, annotate-dependencies');
      }
      break;
    }
@@ -706,6 +723,13 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
      break;
    }

+    case 'gap-analysis': {
+      // Post-planning gap checker (#2493) — unified REQUIREMENTS.md +
+      // CONTEXT.md <decisions> coverage report against PLAN.md files.
+      gapChecker.cmdGapAnalysis(cwd, args.slice(1), raw);
+      break;
+    }
+
    case 'phase': {
      const subcommand = args[1];
      if (subcommand === 'next-decimal') {
@@ -764,7 +788,8 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
        verify.cmdValidateConsistency(cwd, raw);
      } else if (subcommand === 'health') {
        const repairFlag = args.includes('--repair');
-        verify.cmdValidateHealth(cwd, { repair: repairFlag }, raw);
+        const backfillFlag = args.includes('--backfill');
+        verify.cmdValidateHealth(cwd, { repair: repairFlag, backfill: backfillFlag }, raw);
      } else if (subcommand === 'agents') {
        verify.cmdValidateAgents(cwd, raw);
      } else {
@@ -859,6 +884,9 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
        case 'quick':
          init.cmdInitQuick(cwd, args.slice(2).join(' '), raw);
          break;
+        case 'ingest-docs':
+          init.cmdInitIngestDocs(cwd, raw);
+          break;
        case 'resume':
          init.cmdInitResume(cwd, raw);
          break;
@@ -893,7 +921,7 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
          init.cmdInitRemoveWorkspace(cwd, args[2], raw);
          break;
        default:
-          error(`Unknown init workflow: ${workflow}\nAvailable: execute-phase, plan-phase, new-project, new-milestone, quick, resume, verify-work, phase-op, todos, milestone-op, map-codebase, progress, manager, new-workspace, list-workspaces, remove-workspace`);
+          error(`Unknown init workflow: ${workflow}\nAvailable: execute-phase, plan-phase, new-project, new-milestone, quick, ingest-docs, resume, verify-work, phase-op, todos, milestone-op, map-codebase, progress, manager, new-workspace, list-workspaces, remove-workspace`);
      }
      break;
    }
@@ -1200,10 +1228,6 @@ async function runCommand(command, args, cwd, raw, defaultValue) {
        'agents',
        path.join('commands', 'gsd'),
        'hooks',
-        // OpenCode/Kilo flat command dir
-        'command',
-        // Codex/Copilot skills dir
-        'skills',
      ];

      function walkDir(dir, baseDir) {
--- a/get-shit-done/bin/lib/artifacts.cjs
+++ b/get-shit-done/bin/lib/artifacts.cjs
@@ -0,0 +1,52 @@
+/**
+ * Canonical GSD artifact registry.
+ *
+ * Enumerates the file names that gsd workflows officially produce at the
+ * .planning/ root level. Used by gsd-health (W019) to flag unrecognized files
+ * so stale or misnamed artifacts don't silently mislead agents or reviewers.
+ *
+ * Add entries here whenever a new workflow produces a .planning/ root file.
+ */
+
+'use strict';
+
+// Exact-match canonical file names at .planning/ root
+const CANONICAL_EXACT = new Set([
+  'PROJECT.md',
+  'ROADMAP.md',
+  'STATE.md',
+  'REQUIREMENTS.md',
+  'MILESTONES.md',
+  'BACKLOG.md',
+  'LEARNINGS.md',
+  'THREADS.md',
+  'config.json',
+  'CLAUDE.md',
+]);
+
+// Pattern-match canonical file names (regex tests on the basename)
+// Each pattern includes the name of the workflow that produces it as a comment.
+const CANONICAL_PATTERNS = [
+  /^v\d+\.\d+(?:\.\d+)?-MILESTONE-AUDIT\.md$/i,  // gsd-complete-milestone (pre-archive)
+  /^v\d+\.\d+(?:\.\d+)?-.*\.md$/i,               // other version-stamped planning docs
+];
+
+/**
+ * Return true if `filename` (basename only, no path) matches a canonical
+ * .planning/ root artifact — either an exact name or a known pattern.
+ *
+ * @param {string} filename - Basename of the file (e.g. "STATE.md")
+ */
+function isCanonicalPlanningFile(filename) {
+  if (CANONICAL_EXACT.has(filename)) return true;
+  for (const pattern of CANONICAL_PATTERNS) {
+    if (pattern.test(filename)) return true;
+  }
+  return false;
+}
+
+module.exports = {
+  CANONICAL_EXACT,
+  CANONICAL_PATTERNS,
+  isCanonicalPlanningFile,
+};
--- a/get-shit-done/bin/lib/audit.cjs
+++ b/get-shit-done/bin/lib/audit.cjs
@@ -105,12 +105,27 @@ function scanQuickTasks(planDir) {
      continue;
    }

-    const summaryPath = path.join(safeTaskDir, 'SUMMARY.md');
+    // workflows/quick.md mandates `${quick_id}-SUMMARY.md`; older flows used
+    // bare `SUMMARY.md`. Accept either to avoid false-positive "missing".
+    let summaryPath = null;
+    try {
+      const summaryFiles = fs.readdirSync(safeTaskDir, { withFileTypes: true })
+        .filter(e => e.isFile() && (e.name === 'SUMMARY.md' || e.name.endsWith('-SUMMARY.md')));
+      if (summaryFiles.length > 0) {
+        // Prefer the per-task `${quick_id}-SUMMARY.md` form when present.
+        const preferred = summaryFiles.find(e => e.name === `${dirName}-SUMMARY.md`)
+          || summaryFiles.find(e => e.name.endsWith('-SUMMARY.md'))
+          || summaryFiles[0];
+        summaryPath = path.join(safeTaskDir, preferred.name);
+      }
+    } catch {
+      // fall through with summaryPath = null → status: missing
+    }

    let status = 'missing';
    let description = '';

-    if (fs.existsSync(summaryPath)) {
+    if (summaryPath && fs.existsSync(summaryPath)) {
      let safeSum;
      try {
        safeSum = requireSafePath(summaryPath, planDir, 'quick task summary', { allowAbsolute: true });
@@ -344,6 +359,11 @@ function scanSeeds(planDir) {
  return results;
 }

+// Terminal UAT states: `complete` (legacy) and `resolved` (post-gap-closure
+// per workflows/execute-phase.md). Hoisted outside scanUatGaps so the Set is
+// not recreated on each loop iteration.
+const TERMINAL_UAT_STATUSES = new Set(['complete', 'resolved']);
+
 /**
 * Scan .planning/phases for UAT gaps (UAT files with status != 'complete').
 */
@@ -394,8 +414,12 @@ function scanUatGaps(planDir) {

      const fm = extractFrontmatter(content);
      const status = (fm.status || 'unknown').toLowerCase();
+      const result = (fm.result || '').toString().toLowerCase();

-      if (status === 'complete') continue;
+      // Also accept `result: all_pass` as a fallback when status is absent
+      // — covers UATs that omit `status:`.
+      if (TERMINAL_UAT_STATUSES.has(status)) continue;
+      if (status === 'unknown' && result === 'all_pass') continue;

      // Count open scenarios
      const pendingMatches = (content.match(/result:\s*(?:pending|\[pending\])/gi) || []).length;
--- a/get-shit-done/bin/lib/config-schema.cjs
+++ b/get-shit-done/bin/lib/config-schema.cjs
@@ -25,7 +25,6 @@ const VALID_CONFIG_KEYS = new Set([
  'workflow.discuss_mode',
  'workflow.skip_discuss',
  'workflow.auto_prune_state',
-  'workflow._auto_chain_active',
  'workflow.use_worktrees',
  'workflow.code_review',
  'workflow.code_review_depth',
@@ -34,15 +33,26 @@ const VALID_CONFIG_KEYS = new Set([
  'workflow.plan_bounce',
  'workflow.plan_bounce_script',
  'workflow.plan_bounce_passes',
+  'workflow.plan_chunked',
+  'workflow.plan_review_convergence',
+  'workflow.post_planning_gaps',
  'workflow.security_enforcement',
  'workflow.security_asvs_level',
  'workflow.security_block_on',
+  'workflow.drift_threshold',
+  'workflow.drift_action',
  'git.branching_strategy', 'git.base_branch', 'git.phase_branch_template', 'git.milestone_branch_template', 'git.quick_branch_template',
  'planning.commit_docs', 'planning.search_gitignored', 'planning.sub_repos',
+  'review.ollama_host', 'review.lm_studio_host', 'review.llama_cpp_host',
  'workflow.cross_ai_execution', 'workflow.cross_ai_command', 'workflow.cross_ai_timeout',
  'workflow.subagent_timeout',
  'workflow.inline_plan_threshold',
  'hooks.context_warnings',
+  'hooks.workflow_guard',
+  'workflow.context_coverage_gate',
+  'statusline.show_last_command',
+  'workflow.ui_review',
+  'workflow.max_discuss_passes',
  'features.thinking_partner',
  'context',
  'features.global_learnings',
@@ -50,10 +60,14 @@ const VALID_CONFIG_KEYS = new Set([
  'project_code', 'phase_naming',
  'manager.flags.discuss', 'manager.flags.plan', 'manager.flags.execute',
  'response_language',
+  'context_window',
  'intel.enabled',
  'graphify.enabled',
  'graphify.build_timeout',
  'claude_md_path',
+  'claude_md_assembly.mode',
+  // #2517 — runtime-aware model profiles
+  'runtime',
 ]);

 /**
@@ -61,9 +75,14 @@ const VALID_CONFIG_KEYS = new Set([
 * Each entry has a `test` function and a human-readable `description`.
 */
 const DYNAMIC_KEY_PATTERNS = [
-  { test: (k) => /^agent_skills\.[a-zA-Z0-9_-]+$/.test(k),   description: 'agent_skills.<agent-type>' },
-  { test: (k) => /^review\.models\.[a-zA-Z0-9_-]+$/.test(k), description: 'review.models.<cli-name>' },
-  { test: (k) => /^features\.[a-zA-Z0-9_]+$/.test(k),        description: 'features.<feature_name>' },
+  { topLevel: 'agent_skills',          test: (k) => /^agent_skills\.[a-zA-Z0-9_-]+$/.test(k),                   description: 'agent_skills.<agent-type>' },
+  { topLevel: 'review',                test: (k) => /^review\.models\.[a-zA-Z0-9_-]+$/.test(k),                 description: 'review.models.<cli-name>' },
+  { topLevel: 'features',              test: (k) => /^features\.[a-zA-Z0-9_]+$/.test(k),                        description: 'features.<feature_name>' },
+  { topLevel: 'claude_md_assembly',    test: (k) => /^claude_md_assembly\.blocks\.[a-zA-Z0-9_]+$/.test(k),      description: 'claude_md_assembly.blocks.<section>' },
+  // #2517 — runtime-aware model profile overrides: model_profile_overrides.<runtime>.<tier>
+  // <runtime> is a free string (so users can map non-built-in runtimes); <tier> is enum-restricted.
+  { topLevel: 'model_profile_overrides', test: (k) => /^model_profile_overrides\.[a-zA-Z0-9_-]+\.(opus|sonnet|haiku)$/.test(k),
+    description: 'model_profile_overrides.<runtime>.<opus|sonnet|haiku>' },
 ];

 /**
--- a/get-shit-done/bin/lib/config.cjs
+++ b/get-shit-done/bin/lib/config.cjs
@@ -11,6 +11,7 @@ const {
  formatAgentToModelMapAsTable,
 } = require('./model-profiles.cjs');
 const { VALID_CONFIG_KEYS, isValidConfigKey } = require('./config-schema.cjs');
+const { isSecretKey, maskSecret } = require('./secrets.cjs');

 const CONFIG_KEY_SUGGESTIONS = {
  'workflow.nyquist_validation_enabled': 'workflow.nyquist_validation',
@@ -24,6 +25,8 @@ const CONFIG_KEY_SUGGESTIONS = {
  'workflow.code_review_level': 'workflow.code_review_depth',
  'workflow.review_depth': 'workflow.code_review_depth',
  'review.model': 'review.models.<cli-name>',
+  'sub_repos': 'planning.sub_repos',
+  'plan_checker': 'workflow.plan_check',
 };

 function validateKnownConfigKeyPath(keyPath) {
@@ -117,6 +120,7 @@ function buildNewProjectConfig(userChoices) {
      plan_bounce_script: null,
      plan_bounce_passes: 2,
      auto_prune_state: false,
+      post_planning_gaps: CONFIG_DEFAULTS.post_planning_gaps,
      security_enforcement: CONFIG_DEFAULTS.security_enforcement,
      security_asvs_level: CONFIG_DEFAULTS.security_asvs_level,
      security_block_on: CONFIG_DEFAULTS.security_block_on,
@@ -331,7 +335,44 @@ function cmdConfigSet(cwd, keyPath, value, raw) {
    error(`Invalid context value '${value}'. Valid values: ${VALID_CONTEXT_VALUES.join(', ')}`);
  }

+  // Codebase drift detector (#2003)
+  const VALID_DRIFT_ACTIONS = ['warn', 'auto-remap'];
+  if (keyPath === 'workflow.drift_action' && !VALID_DRIFT_ACTIONS.includes(String(parsedValue))) {
+    error(`Invalid workflow.drift_action '${value}'. Valid values: ${VALID_DRIFT_ACTIONS.join(', ')}`);
+  }
+  if (keyPath === 'workflow.drift_threshold') {
+    if (typeof parsedValue !== 'number' || !Number.isInteger(parsedValue) || parsedValue < 1) {
+      error(`Invalid workflow.drift_threshold '${value}'. Must be a positive integer.`);
+    }
+  }
+
+  // Post-planning gap checker (#2493)
+  if (keyPath === 'workflow.post_planning_gaps') {
+    if (typeof parsedValue !== 'boolean') {
+      error(`Invalid workflow.post_planning_gaps '${value}'. Must be a boolean (true or false).`);
+    }
+  }
+
  const setConfigValueResult = setConfigValue(cwd, keyPath, parsedValue);
+
+  // Mask secrets in both JSON and text output. The plaintext is written
+  // to config.json (that's where secrets live on disk); the CLI output
+  // must never echo it. See lib/secrets.cjs.
+  if (isSecretKey(keyPath)) {
+    const masked = maskSecret(parsedValue);
+    const maskedPrev = setConfigValueResult.previousValue === undefined
+      ? undefined
+      : maskSecret(setConfigValueResult.previousValue);
+    const maskedResult = {
+      ...setConfigValueResult,
+      value: masked,
+      previousValue: maskedPrev,
+      masked: true,
+    };
+    output(maskedResult, raw, `${keyPath}=${masked}`);
+    return;
+  }
+
  output(setConfigValueResult, raw, `${keyPath}=${parsedValue}`);
 }

@@ -374,6 +415,14 @@ function cmdConfigGet(cwd, keyPath, raw, defaultValue) {
    error(`Key not found: ${keyPath}`);
  }

+  // Never echo plaintext for sensitive keys via config-get. Plaintext lives
+  // in config.json on disk; the CLI surface always shows the masked form.
+  if (isSecretKey(keyPath)) {
+    const masked = maskSecret(current);
+    output(masked, raw, masked);
+    return;
+  }
+
  output(current, raw, String(current));
 }

--- a/get-shit-done/bin/lib/core.cjs
+++ b/get-shit-done/bin/lib/core.cjs
@@ -266,69 +266,131 @@ const CONFIG_DEFAULTS = {
  security_enforcement: true, // workflow.security_enforcement — threat-model-anchored security verification via /gsd-secure-phase
  security_asvs_level: 1, // workflow.security_asvs_level — OWASP ASVS verification level (1=opportunistic, 2=standard, 3=comprehensive)
  security_block_on: 'high', // workflow.security_block_on — minimum severity that blocks phase advancement ('high' | 'medium' | 'low')
+  post_planning_gaps: true, // workflow.post_planning_gaps — unified post-planning gap report (#2493): scan REQUIREMENTS.md + CONTEXT.md decisions vs all PLAN.md files
 };

+/**
+ * Deep-merge two plain config objects. `overlay` wins on key conflict.
+ * Explicit `null` in overlay overrides base (null means "unset this key").
+ * Arrays are replaced, not merged. Non-object primitives use overlay value.
+ *
+ * Note: `undefined` in overlay is treated as "no value provided" and falls
+ * back to base (preserves inheritance). Explicit `null` overrides base.
+ */
+function _deepMergeConfig(base, overlay) {
+  if (overlay === null || overlay === undefined) return overlay;
+  if (typeof base !== 'object' || typeof overlay !== 'object') return overlay;
+  const result = { ...base };
+  for (const key of Object.keys(overlay)) {
+    if (overlay[key] !== null && typeof overlay[key] === 'object' && !Array.isArray(overlay[key])) {
+      result[key] = _deepMergeConfig(base[key] ?? {}, overlay[key]);
+    } else {
+      result[key] = overlay[key];
+    }
+  }
+  return result;
+}
+
 function loadConfig(cwd) {
+  // When GSD_WORKSTREAM is set, load root config first so workstream config
+  // can inherit from it. This prevents users from duplicating model_overrides,
+  // workflow.*, etc. across every workstream config (#2714).
+  const ws = process.env.GSD_WORKSTREAM || null;
+  let rootParsed = null;
+  if (ws) {
+    const rootConfigPath = path.join(planningRoot(cwd), 'config.json');
+    try {
+      const raw = fs.readFileSync(rootConfigPath, 'utf-8');
+      rootParsed = JSON.parse(raw);
+    } catch {
+      // Root config missing or unparseable — workstream config stands alone
+    }
+  }
+
  const configPath = path.join(planningDir(cwd), 'config.json');
  const defaults = CONFIG_DEFAULTS;

  try {
    const raw = fs.readFileSync(configPath, 'utf-8');
-    const parsed = JSON.parse(raw);
+    // `fileData` is the parsed content of the config.json file on disk — used
+    // for migrations and writes so we never persist merged values back to disk.
+    const fileData = JSON.parse(raw);

    // Migrate deprecated "depth" key to "granularity" with value mapping
-    if ('depth' in parsed && !('granularity' in parsed)) {
+    if ('depth' in fileData && !('granularity' in fileData)) {
      const depthToGranularity = { quick: 'coarse', standard: 'standard', comprehensive: 'fine' };
-      parsed.granularity = depthToGranularity[parsed.depth] || parsed.depth;
-      delete parsed.depth;
-      try { fs.writeFileSync(configPath, JSON.stringify(parsed, null, 2), 'utf-8'); } catch { /* intentionally empty */ }
+      fileData.granularity = depthToGranularity[fileData.depth] || fileData.depth;
+      delete fileData.depth;
+      try { fs.writeFileSync(configPath, JSON.stringify(fileData, null, 2), 'utf-8'); } catch { /* intentionally empty */ }
    }

    // Auto-detect and sync sub_repos: scan for child directories with .git
    let configDirty = false;

-    // Migrate legacy "multiRepo: true" boolean → sub_repos array
-    if (parsed.multiRepo === true && !parsed.sub_repos && !parsed.planning?.sub_repos) {
+    // Migrate legacy "multiRepo: true" boolean → planning.sub_repos array.
+    // Canonical location is planning.sub_repos (#2561); writing to top-level
+    // would be flagged as unknown by the validator below (#2638).
+    if (fileData.multiRepo === true && !fileData.sub_repos && !fileData.planning?.sub_repos) {
      const detected = detectSubRepos(cwd);
      if (detected.length > 0) {
-        parsed.sub_repos = detected;
-        if (!parsed.planning) parsed.planning = {};
-        parsed.planning.commit_docs = false;
-        delete parsed.multiRepo;
+        if (!fileData.planning) fileData.planning = {};
+        fileData.planning.sub_repos = detected;
+        fileData.planning.commit_docs = false;
+        delete fileData.multiRepo;
        configDirty = true;
      }
    }

-    // Keep sub_repos in sync with actual filesystem
-    const currentSubRepos = parsed.sub_repos || parsed.planning?.sub_repos || [];
+    // Self-heal legacy/buggy installs: strip any stale top-level sub_repos,
+    // preserving its value as the planning.sub_repos seed if that slot is empty.
+    if (Object.prototype.hasOwnProperty.call(fileData, 'sub_repos')) {
+      if (!fileData.planning) fileData.planning = {};
+      if (!fileData.planning.sub_repos) {
+        fileData.planning.sub_repos = fileData.sub_repos;
+      }
+      delete fileData.sub_repos;
+      configDirty = true;
+    }
+
+    // Keep planning.sub_repos in sync with actual filesystem
+    const currentSubRepos = fileData.planning?.sub_repos || [];
    if (Array.isArray(currentSubRepos) && currentSubRepos.length > 0) {
      const detected = detectSubRepos(cwd);
      if (detected.length > 0) {
        const sorted = [...currentSubRepos].sort();
        if (JSON.stringify(sorted) !== JSON.stringify(detected)) {
-          parsed.sub_repos = detected;
+          if (!fileData.planning) fileData.planning = {};
+          fileData.planning.sub_repos = detected;
          configDirty = true;
        }
      }
    }

-    // Persist sub_repos changes (migration or sync)
+    // Persist sub_repos changes (migration or sync) — write only the on-disk
+    // file contents, never the merged result, to avoid polluting workstream configs.
    if (configDirty) {
-      try { fs.writeFileSync(configPath, JSON.stringify(parsed, null, 2), 'utf-8'); } catch {}
+      try { fs.writeFileSync(configPath, JSON.stringify(fileData, null, 2), 'utf-8'); } catch {}
    }

+    // Now apply root→workstream inheritance. `parsed` is the effective config
+    // used for value extraction below; fileData is kept for disk writes only.
+    const parsed = rootParsed ? _deepMergeConfig(rootParsed, fileData) : fileData;
+
    // Warn about unrecognized top-level keys so users don't silently lose config.
    // Derived from config-set's VALID_CONFIG_KEYS (canonical source) plus internal-only
    // keys that loadConfig handles but config-set doesn't expose. This avoids maintaining
    // a hardcoded duplicate that drifts when new config keys are added.
-    const { VALID_CONFIG_KEYS } = require('./config.cjs');
+    // DYNAMIC_KEY_PATTERNS supplies topLevel for each pattern so adding a new
+    // dynamic-pattern namespace to config-schema.cjs automatically updates this set
+    // — no more drift between the read side and the write side (#2687).
+    const { VALID_CONFIG_KEYS, DYNAMIC_KEY_PATTERNS } = require('./config-schema.cjs');
    const KNOWN_TOP_LEVEL = new Set([
      // Extract top-level key names from dot-notation paths (e.g., 'workflow.research' → 'workflow')
      ...[...VALID_CONFIG_KEYS].map(k => k.split('.')[0]),
-      // Section containers that hold nested sub-keys
-      'git', 'workflow', 'planning', 'hooks', 'features',
+      // Dynamic-pattern top-level containers (e.g. review, model_profile_overrides)
+      ...DYNAMIC_KEY_PATTERNS.map(p => p.topLevel),
      // Internal keys loadConfig reads but config-set doesn't expose
-      'model_overrides', 'agent_skills', 'context_window', 'resolve_model_ids', 'claude_md_path',
+      'model_overrides', 'context_window', 'resolve_model_ids', 'claude_md_path',
      // Deprecated keys (still accepted for migration, not in config-set)
      'depth', 'multiRepo',
    ]);
@@ -339,6 +401,13 @@ function loadConfig(cwd) {
      );
    }

+    // #2517 — Validate runtime/tier values for keys that loadConfig handles but
+    // can be edited directly into config.json (bypassing config-set's enum check).
+    // This catches typos like `runtime: "codx"` and `model_profile_overrides.codex.banana`
+    // at read time without rejecting back-compat values from new runtimes
+    // (review findings #10, #13).
+    _warnUnknownProfileOverrides(parsed, '.planning/config.json');
+
    const get = (key, nested) => {
      if (parsed[key] !== undefined) return parsed[key];
      if (nested && parsed[nested.section] && parsed[nested.section][nested.field] !== undefined) {
@@ -374,6 +443,7 @@ function loadConfig(cwd) {
      plan_checker: get('plan_checker', { section: 'workflow', field: 'plan_check' }) ?? defaults.plan_checker,
      verifier: get('verifier', { section: 'workflow', field: 'verifier' }) ?? defaults.verifier,
      nyquist_validation: get('nyquist_validation', { section: 'workflow', field: 'nyquist_validation' }) ?? defaults.nyquist_validation,
+      post_planning_gaps: get('post_planning_gaps', { section: 'workflow', field: 'post_planning_gaps' }) ?? defaults.post_planning_gaps,
      parallelization,
      brave_search: get('brave_search') ?? defaults.brave_search,
      firecrawl: get('firecrawl') ?? defaults.firecrawl,
@@ -390,15 +460,42 @@ function loadConfig(cwd) {
      project_code: get('project_code') ?? defaults.project_code,
      subagent_timeout: get('subagent_timeout', { section: 'workflow', field: 'subagent_timeout' }) ?? defaults.subagent_timeout,
      model_overrides: parsed.model_overrides || null,
+      // #2517 — runtime-aware profiles. `runtime` defaults to null (back-compat).
+      // When null, resolveModelInternal preserves today's Claude-native behavior.
+      // NOTE: `runtime` and `model_profile_overrides` are intentionally read
+      // flat-only (not via `get()` with a workflow.X fallback) — they are
+      // top-level keys per docs/CONFIGURATION.md. The lighter-touch decision
+      // here was to document the constraint rather than introduce nested
+      // resolution edge cases for two new keys (review finding #9). The
+      // schema validation in `_warnUnknownProfileOverrides` runs against the
+      // raw `parsed` blob, so direct `.planning/config.json` edits surface
+      // unknown runtime/tier names at load time, not silently (review finding #10).
+      runtime: parsed.runtime || null,
+      model_profile_overrides: parsed.model_profile_overrides || null,
      agent_skills: parsed.agent_skills || {},
      manager: parsed.manager || {},
      response_language: get('response_language') || null,
      claude_md_path: get('claude_md_path') || null,
+      claude_md_assembly: parsed.claude_md_assembly || null,
    };
  } catch {
    // Fall back to ~/.gsd/defaults.json only for truly pre-project contexts (#1683)
-    // If .planning/ exists, the project is initialized — just missing config.json
+    // If .planning/ exists, the project is initialized — just missing config.json.
+    // When GSD_WORKSTREAM is set and root config was loaded, the workstream config
+    // doesn't exist — treat root config as the effective config for this workstream.
    if (fs.existsSync(planningDir(cwd))) {
+      if (rootParsed) {
+        // Workstream has no config.json: re-parse using root config as the sole source.
+        // Temporarily clear GSD_WORKSTREAM so planningDir() returns root .planning/,
+        // then reload. This is safe: rootParsed is already the root config object.
+        const savedWs = process.env.GSD_WORKSTREAM;
+        delete process.env.GSD_WORKSTREAM;
+        try {
+          return loadConfig(cwd);
+        } finally {
+          process.env.GSD_WORKSTREAM = savedWs;
+        }
+      }
      return defaults;
    }
    try {
@@ -414,6 +511,9 @@ function loadConfig(cwd) {
        plan_checker: globalDefaults.plan_checker ?? defaults.plan_checker,
        verifier: globalDefaults.verifier ?? defaults.verifier,
        nyquist_validation: globalDefaults.nyquist_validation ?? defaults.nyquist_validation,
+        post_planning_gaps: globalDefaults.post_planning_gaps
+          ?? globalDefaults.workflow?.post_planning_gaps
+          ?? defaults.post_planning_gaps,
        parallelization: globalDefaults.parallelization ?? defaults.parallelization,
        text_mode: globalDefaults.text_mode ?? defaults.text_mode,
        resolve_model_ids: globalDefaults.resolve_model_ids ?? defaults.resolve_model_ids,
@@ -1280,21 +1380,42 @@ function extractCurrentMilestone(content, cwd) {

  const sectionStart = sectionMatch.index;

-  // Find the end: next milestone heading at same or higher level, or EOF
+  // Find the end: next milestone heading at same or higher level, or EOF.
  // Milestone headings look like: ## v2.0, ## Roadmap v2.0, ## ✅ v1.0, etc.
+  // Scan line-by-line so that heading-like lines inside fenced code blocks
+  // (``` or ~~~) are not mistaken for milestone boundaries. See #2787.
  const headingLevel = sectionMatch[1].match(/^(#{1,3})\s/)[1].length;
  const restContent = content.slice(sectionStart + sectionMatch[0].length);
+  // Exclude phase headings (e.g. "### Phase 12: v1.0 Tech-Debt Closure") from
+  // being treated as milestone boundaries just because they mention vX.Y in
+  // the title. Phase headings always start with the literal `Phase `. See #2619.
  const nextMilestonePattern = new RegExp(
-    `^#{1,${headingLevel}}\\s+(?:.*v\\d+\\.\\d+|✅|📋|🚧)`,
-    'mi'
+    `^#{1,${headingLevel}}\\s+(?!Phase\\s+\\S)(?:.*v\\d+\\.\\d+|✅|📋|🚧)`,
+    'i'
  );
-  const nextMatch = restContent.match(nextMilestonePattern);

-  let sectionEnd;
-  if (nextMatch) {
-    sectionEnd = sectionStart + sectionMatch[0].length + nextMatch.index;
-  } else {
-    sectionEnd = content.length;
+  let sectionEnd = content.length;
+  let fenceChar = null;
+  let fenceLen = 0;
+  let charOffset = 0;
+  for (const line of restContent.split('\n')) {
+    const fenceMatch = line.match(/^\s{0,3}((?:`{3,}|~{3,}))(.*)/);
+    if (fenceMatch) {
+      const char = fenceMatch[1][0];
+      const len = fenceMatch[1].length;
+      const trailing = fenceMatch[2] || '';
+      if (!fenceChar) {
+        fenceChar = char;
+        fenceLen = len;
+      } else if (char === fenceChar && len >= fenceLen && /^\s*$/.test(trailing)) {
+        fenceChar = null;
+        fenceLen = 0;
+      }
+    } else if (!fenceChar && nextMilestonePattern.test(line)) {
+      sectionEnd = sectionStart + sectionMatch[0].length + charOffset;
+      break;
+    }
+    charOffset += line.length + 1;
  }

  // Return everything before the current milestone section (non-milestone content
@@ -1334,9 +1455,19 @@ function getRoadmapPhaseInternal(cwd, phaseNum) {

  try {
    const content = extractCurrentMilestone(fs.readFileSync(roadmapPath, 'utf-8'), cwd);
-    const escapedPhase = escapeRegex(phaseNum.toString());
-    // Match both numeric (Phase 1:) and custom (Phase PROJ-42:) headers
-    const phasePattern = new RegExp(`#{2,4}\\s*Phase\\s+${escapedPhase}:\\s*([^\\n]+)`, 'i');
+    // Strip leading zeros from purely numeric phase numbers so "03" matches "Phase 3:"
+    // in canonical ROADMAP headings. Non-numeric IDs (e.g. "PROJ-42") are kept as-is.
+    const normalized = /^\d+$/.test(String(phaseNum))
+      ? String(phaseNum).replace(/^0+(?=\d)/, '')
+      : String(phaseNum);
+    const escapedPhase = escapeRegex(normalized);
+    // Match both numeric and custom (Phase PROJ-42:) headers.
+    // For purely numeric phases allow optional leading zeros so both "Phase 1:" and
+    // "Phase 01:" are matched regardless of whether the ROADMAP uses padded numbers.
+    const isNumeric = /^\d+$/.test(String(phaseNum));
+    const phasePattern = isNumeric
+      ? new RegExp(`#{2,4}\\s*Phase\\s+0*${escapedPhase}:\\s*([^\\n]+)`, 'i')
+      : new RegExp(`#{2,4}\\s*Phase\\s+${escapedPhase}:\\s*([^\\n]+)`, 'i');
    const headerMatch = content.match(phasePattern);
    if (!headerMatch) return null;

@@ -1433,37 +1564,243 @@ function checkAgentsInstalled() {
 * Users can override with model_overrides in config.json for custom/latest models.
 */
 const MODEL_ALIAS_MAP = {
-  'opus': 'claude-opus-4-6',
+  'opus': 'claude-opus-4-7',
  'sonnet': 'claude-sonnet-4-6',
  'haiku': 'claude-haiku-4-5',
 };

+/**
+ * #2517 — runtime-aware tier resolution.
+ * Maps `model_profile` tiers (opus/sonnet/haiku) to runtime-native model IDs and
+ * (where supported) reasoning_effort settings.
+ *
+ * Each entry: { model: <id>, reasoning_effort?: <level> }
+ *
+ * `claude` mirrors MODEL_ALIAS_MAP — present for symmetry so `runtime: "claude"`
+ * resolves through the same code path. `codex` defaults are taken from the spec
+ * in #2517. Unknown runtimes fall back to the Claude alias to avoid emitting
+ * provider-specific IDs the runtime cannot accept.
+ */
+const RUNTIME_PROFILE_MAP = {
+  claude: Object.fromEntries(
+    Object.entries(MODEL_ALIAS_MAP).map(([tier, model]) => [tier, { model }])
+  ),
+  codex: {
+    opus:   { model: 'gpt-5.4',        reasoning_effort: 'xhigh' },
+    sonnet: { model: 'gpt-5.3-codex',  reasoning_effort: 'medium' },
+    haiku:  { model: 'gpt-5.4-mini',   reasoning_effort: 'medium' },
+  },
+  gemini: {
+    opus:   { model: 'gemini-3-pro' },
+    sonnet: { model: 'gemini-3-flash' },
+    haiku:  { model: 'gemini-2.5-flash-lite' },
+  },
+  qwen: {
+    opus:   { model: 'qwen3-max-2026-01-23' },
+    sonnet: { model: 'qwen3-coder-plus' },
+    haiku:  { model: 'qwen3-coder-next' },
+  },
+  opencode: {
+    opus:   { model: 'anthropic/claude-opus-4-7' },
+    sonnet: { model: 'anthropic/claude-sonnet-4-6' },
+    haiku:  { model: 'anthropic/claude-haiku-4-5' },
+  },
+  copilot: {
+    opus:   { model: 'claude-opus-4-7' },
+    sonnet: { model: 'claude-sonnet-4-6' },
+    haiku:  { model: 'claude-haiku-4-5' },
+  },
+};
+
+const RUNTIMES_WITH_REASONING_EFFORT = new Set(['codex']);
+
+/**
+ * Tier enum allowed under `model_profile_overrides[runtime][tier]`. Mirrors the
+ * regex in `config-schema.cjs` (DYNAMIC_KEY_PATTERNS) so loadConfig surfaces the
+ * same constraint at read time, not only at config-set time (review finding #10).
+ */
+const RUNTIME_OVERRIDE_TIERS = new Set(['opus', 'sonnet', 'haiku']);
+
+/**
+ * Allowlist of runtime names the install pipeline currently knows how to emit
+ * native model IDs for. Synced with `getDirName` in `bin/install.js` and the
+ * runtime list in `docs/CONFIGURATION.md`. Free-string runtimes outside this
+ * set are still accepted (#2517 deliberately leaves the runtime field open) —
+ * a warning fires once at loadConfig so a typo like `runtime: "codx"` does not
+ * silently fall back to Claude defaults (review findings #10, #13).
+ */
+const KNOWN_RUNTIMES = new Set([
+  'claude', 'codex', 'opencode', 'kilo', 'gemini', 'qwen',
+  'copilot', 'cursor', 'windsurf', 'augment', 'trae', 'codebuddy',
+  'antigravity', 'cline',
+]);
+
+const _warnedConfigKeys = new Set();
+/**
+ * Emit a one-time stderr warning for unknown runtime/tier keys in a parsed
+ * config blob. Idempotent across calls — the same (file, key) pair only warns
+ * once per process so loadConfig can be called repeatedly without spamming.
+ *
+ * Does NOT reject — preserves back-compat for users on a runtime not yet in the
+ * allowlist (the new-runtime case must always be possible without code changes).
+ */
+function _warnUnknownProfileOverrides(parsed, configLabel) {
+  if (!parsed || typeof parsed !== 'object') return;
+
+  const runtime = parsed.runtime;
+  if (runtime && typeof runtime === 'string' && !KNOWN_RUNTIMES.has(runtime)) {
+    const key = `${configLabel}::runtime::${runtime}`;
+    if (!_warnedConfigKeys.has(key)) {
+      _warnedConfigKeys.add(key);
+      try {
+        process.stderr.write(
+          `gsd: warning — config key "runtime" has unknown value "${runtime}". ` +
+          `Known runtimes: ${[...KNOWN_RUNTIMES].sort().join(', ')}. ` +
+          `Resolution will fall back to safe defaults. (#2517)\n`
+        );
+      } catch { /* stderr might be closed in some test harnesses */ }
+    }
+  }
+
+  const overrides = parsed.model_profile_overrides;
+  if (!overrides || typeof overrides !== 'object') return;
+  for (const [overrideRuntime, tierMap] of Object.entries(overrides)) {
+    if (!KNOWN_RUNTIMES.has(overrideRuntime)) {
+      const key = `${configLabel}::override-runtime::${overrideRuntime}`;
+      if (!_warnedConfigKeys.has(key)) {
+        _warnedConfigKeys.add(key);
+        try {
+          process.stderr.write(
+            `gsd: warning — model_profile_overrides.${overrideRuntime}.* uses ` +
+            `unknown runtime "${overrideRuntime}". Known runtimes: ` +
+            `${[...KNOWN_RUNTIMES].sort().join(', ')}. (#2517)\n`
+          );
+        } catch { /* ok */ }
+      }
+    }
+    if (!tierMap || typeof tierMap !== 'object') continue;
+    for (const tierName of Object.keys(tierMap)) {
+      if (!RUNTIME_OVERRIDE_TIERS.has(tierName)) {
+        const key = `${configLabel}::override-tier::${overrideRuntime}.${tierName}`;
+        if (!_warnedConfigKeys.has(key)) {
+          _warnedConfigKeys.add(key);
+          try {
+            process.stderr.write(
+              `gsd: warning — model_profile_overrides.${overrideRuntime}.${tierName} ` +
+              `uses unknown tier "${tierName}". Allowed tiers: opus, sonnet, haiku. (#2517)\n`
+            );
+          } catch { /* ok */ }
+        }
+      }
+    }
+  }
+}
+
+// Internal helper exposed for tests so per-process warning state can be reset
+// between cases that intentionally exercise the warning path repeatedly.
+function _resetRuntimeWarningCacheForTests() {
+  _warnedConfigKeys.clear();
+}
+
+/**
+ * #2517 — Resolve the runtime-aware tier entry for (runtime, tier).
+ *
+ * Single source of truth shared by core.cjs (resolveModelInternal /
+ * resolveReasoningEffortInternal) and bin/install.js (Codex/OpenCode TOML emit
+ * paths). Always merges built-in defaults with user overrides at the field
+ * level so partial overrides keep the unspecified fields:
+ *
+ *   `{ codex: { opus: "gpt-5-pro" } }`           keeps reasoning_effort: 'xhigh'
+ *   `{ codex: { opus: { reasoning_effort: 'low' } } }` keeps model: 'gpt-5.4'
+ *
+ * Without this field-merge, the documented string-shorthand example silently
+ * dropped reasoning_effort and a partial-object override silently dropped the
+ * model — both reported as critical findings in the #2609 review.
+ *
+ * Inputs:
+ *   - runtime: string (e.g. 'codex', 'claude', 'opencode')
+ *   - tier:    'opus' | 'sonnet' | 'haiku'
+ *   - overrides: optional `model_profile_overrides` blob (may be null/undefined)
+ *
+ * Returns `{ model: string, reasoning_effort?: string } | null`.
+ */
+function resolveTierEntry({ runtime, tier, overrides }) {
+  if (!runtime || !tier) return null;
+
+  const builtin = RUNTIME_PROFILE_MAP[runtime]?.[tier] || null;
+  const userRaw = overrides?.[runtime]?.[tier];
+
+  // String shorthand from CONFIGURATION.md examples — `{ codex: { opus: "gpt-5-pro" } }`.
+  // Treat as `{ model: "gpt-5-pro" }` so the field-merge below still preserves
+  // reasoning_effort from the built-in defaults.
+  let userEntry = null;
+  if (userRaw) {
+    userEntry = typeof userRaw === 'string' ? { model: userRaw } : userRaw;
+  }
+
+  if (!builtin && !userEntry) return null;
+  // Field-merge: user fields win, built-in fills the gaps.
+  return { ...(builtin || {}), ...(userEntry || {}) };
+}
+
+/**
+ * Convenience wrapper used by resolveModelInternal / resolveReasoningEffortInternal.
+ * Pulls runtime + overrides out of a loaded config and delegates to resolveTierEntry.
+ */
+function _resolveRuntimeTier(config, tier) {
+  return resolveTierEntry({
+    runtime: config.runtime,
+    tier,
+    overrides: config.model_profile_overrides,
+  });
+}
+
 function resolveModelInternal(cwd, agentType) {
  const config = loadConfig(cwd);

-  // Check per-agent override first — always respected regardless of resolve_model_ids.
+  // 1. Per-agent override — always respected; highest precedence.
  // Users who set fully-qualified model IDs (e.g., "openai/gpt-5.4") get exactly that.
  const override = config.model_overrides?.[agentType];
  if (override) {
    return override;
  }

-  // resolve_model_ids: "omit" — return empty string so the runtime uses its configured
-  // default model. For non-Claude runtimes (OpenCode, Codex, etc.) that don't recognize
-  // Claude aliases (opus/sonnet/haiku/inherit). Set automatically during install. See #1156.
+  // 2. Compute the tier (opus/sonnet/haiku) for this agent under the active profile.
+  const profile = String(config.model_profile || 'balanced').toLowerCase();
+  const agentModels = MODEL_PROFILES[agentType];
+  const tier = agentModels ? (agentModels[profile] || agentModels['balanced']) : null;
+
+  // 3. Runtime-aware resolution (#2517) — only when `runtime` is explicitly set
+  // to a non-Claude runtime. `runtime: "claude"` is the implicit default and is
+  // treated as a no-op here so it does not silently override `resolve_model_ids:
+  // "omit"` (review finding #4). Deliberate ordering for non-Claude runtimes:
+  // explicit opt-in beats `resolve_model_ids: "omit"` so users on Codex installs
+  // that auto-set "omit" can still flip on tiered behavior by setting runtime
+  // alone. inherit profile is preserved verbatim.
+  if (config.runtime && config.runtime !== 'claude' && profile !== 'inherit' && tier) {
+    const entry = _resolveRuntimeTier(config, tier);
+    if (entry?.model) return entry.model;
+    // Unknown runtime with no user-supplied overrides — fall through to Claude-safe
+    // default rather than emit an ID the runtime can't accept.
+  }
+
+  // 4. resolve_model_ids: "omit" — return empty string so the runtime uses its
+  // configured default model. For non-Claude runtimes (OpenCode, Codex, etc.) that
+  // don't recognize Claude aliases. Set automatically during install. See #1156.
  if (config.resolve_model_ids === 'omit') {
    return '';
  }

-  // Fall back to profile lookup
-  const profile = String(config.model_profile || 'balanced').toLowerCase();
-  const agentModels = MODEL_PROFILES[agentType];
+  // 5. Profile lookup (Claude-native default).
  if (!agentModels) return 'sonnet';
  if (profile === 'inherit') return 'inherit';
-  const alias = agentModels[profile] || agentModels['balanced'] || 'sonnet';
+  // `tier` is guaranteed truthy here: agentModels exists, and MODEL_PROFILES
+  // entries always define `balanced`, so `agentModels[profile] || agentModels.balanced`
+  // resolves to a string. Keep the local for readability — no defensive fallback.
+  const alias = tier;

-  // resolve_model_ids: true — map alias to full Claude model ID
-  // Prevents 404s when the Task tool passes aliases directly to the API
+  // resolve_model_ids: true — map alias to full Claude model ID.
+  // Prevents 404s when the Task tool passes aliases directly to the API.
  if (config.resolve_model_ids) {
    return MODEL_ALIAS_MAP[alias] || alias;
  }
@@ -1471,6 +1808,41 @@ function resolveModelInternal(cwd, agentType) {
  return alias;
 }

+/**
+ * #2517 — Resolve runtime-specific reasoning_effort for an agent.
+ * Returns null unless:
+ *   - `runtime` is explicitly set in config,
+ *   - the runtime supports reasoning_effort (currently: codex),
+ *   - profile is not 'inherit',
+ *   - the resolved tier entry has a `reasoning_effort` value.
+ *
+ * Never returns a value for Claude — keeps reasoning_effort out of Claude spawn paths.
+ */
+function resolveReasoningEffortInternal(cwd, agentType) {
+  const config = loadConfig(cwd);
+  if (!config.runtime) return null;
+  // Strict allowlist: reasoning_effort only propagates for runtimes whose
+  // install path actually accepts it. Adding a new runtime here is the only
+  // way to enable effort propagation — overrides cannot bypass the gate.
+  // Without this, a typo in `runtime` (e.g. `"codx"`) plus a user override
+  // for that typo would leak `xhigh` into a Claude or unknown install
+  // (review finding #3).
+  if (!RUNTIMES_WITH_REASONING_EFFORT.has(config.runtime)) return null;
+  // Per-agent override means user supplied a fully-qualified ID; reasoning_effort
+  // for that case must be set via per-agent mechanism, not tier inference.
+  if (config.model_overrides?.[agentType]) return null;
+
+  const profile = String(config.model_profile || 'balanced').toLowerCase();
+  if (profile === 'inherit') return null;
+  const agentModels = MODEL_PROFILES[agentType];
+  if (!agentModels) return null;
+  const tier = agentModels[profile] || agentModels['balanced'];
+  if (!tier) return null;
+
+  const entry = _resolveRuntimeTier(config, tier);
+  return entry?.reasoning_effort || null;
+}
+
 // ─── Summary body helpers ─────────────────────────────────────────────────

 /**
@@ -1481,11 +1853,28 @@ function resolveModelInternal(cwd, agentType) {
 */
 function extractOneLinerFromBody(content) {
  if (!content) return null;
+  // Normalize EOLs so matching works for LF and CRLF files.
+  const normalized = content.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
  // Strip frontmatter first
-  const body = content.replace(/^---\n[\s\S]*?\n---\n*/, '');
-  // Find the first **...** line after a # heading
-  const match = body.match(/^#[^\n]*\n+\*\*([^*]+)\*\*/m);
-  return match ? match[1].trim() : null;
+  const body = normalized.replace(/^---\n[\s\S]*?\n---\n*/, '');
+  // Find the first **...** span on a line after a # heading.
+  // Two supported template forms:
+  //   1) Labeled:  **One-liner:** Real prose here.   (bug #2660 — new template)
+  //   2) Bare:     **Real prose here.**              (legacy template)
+  // For (1), the first bold span ends in a colon and the prose that follows
+  // on the same line is the one-liner. For (2), the bold span itself is the
+  // one-liner.
+  const match = body.match(/^#[^\n]*\n+\*\*([^*\n]+)\*\*([^\n]*)/m);
+  if (!match) return null;
+  const boldInner = match[1].trim();
+  const afterBold = match[2];
+  // Labeled form: bold span is a "Label:" prefix — capture prose after it.
+  if (/:\s*$/.test(boldInner)) {
+    const prose = afterBold.trim();
+    return prose.length > 0 ? prose : null;
+  }
+  // Bare form: the bold content itself is the one-liner.
+  return boldInner.length > 0 ? boldInner : null;
 }

 // ─── Misc utilities ───────────────────────────────────────────────────────────
@@ -1509,6 +1898,50 @@ function getMilestoneInfo(cwd) {
  try {
    const roadmap = fs.readFileSync(path.join(planningDir(cwd), 'ROADMAP.md'), 'utf-8');

+    // 0. Prefer STATE.md milestone: frontmatter as the authoritative source.
+    // This prevents falling through to a regex that may match an old heading
+    // when the active milestone's 🚧 marker is inside a <summary> tag without
+    // **bold** formatting (bug #2409).
+    let stateVersion = null;
+    if (cwd) {
+      try {
+        const statePath = path.join(planningDir(cwd), 'STATE.md');
+        if (fs.existsSync(statePath)) {
+          const stateRaw = fs.readFileSync(statePath, 'utf-8');
+          const m = stateRaw.match(/^milestone:\s*(.+)/m);
+          if (m) stateVersion = m[1].trim();
+        }
+      } catch { /* intentionally empty */ }
+    }
+
+    if (stateVersion) {
+      // Look up the name for this version in ROADMAP.md
+      const escapedVer = escapeRegex(stateVersion);
+      // Match heading-format: ## Roadmap v2.9: Name  or  ## v2.9 Name
+      const headingMatch = roadmap.match(
+        new RegExp(`##[^\\n]*${escapedVer}[:\\s]+([^\\n(]+)`, 'i')
+      );
+      if (headingMatch) {
+        // If the heading line contains ✅ the milestone is already shipped.
+        // Fall through to normal detection so the NEW active milestone is returned
+        // instead of the stale shipped one still recorded in STATE.md.
+        if (!headingMatch[0].includes('✅')) {
+          return { version: stateVersion, name: headingMatch[1].trim() };
+        }
+        // Shipped milestone — do not early-return; fall through to normal detection below.
+      } else {
+        // Match list-format: 🚧 **v2.9 Name** or 🚧 v2.9 Name
+        const listMatch = roadmap.match(
+          new RegExp(`🚧\\s*\\*?\\*?${escapedVer}\\s+([^*\\n]+)`, 'i')
+        );
+        if (listMatch) {
+          return { version: stateVersion, name: listMatch[1].trim() };
+        }
+        // Version found in STATE.md but no name match in ROADMAP — return bare version
+        return { version: stateVersion, name: 'milestone' };
+      }
+    }
+
    // First: check for list-format roadmaps using 🚧 (in-progress) marker
    // e.g. "- 🚧 **v2.1 Belgium** — Phases 24-28 (in progress)"
    // e.g. "- 🚧 **v1.2.1 Tech Debt** — Phases 1-8 (in progress)"
@@ -1520,11 +1953,14 @@ function getMilestoneInfo(cwd) {
      };
    }

-    // Second: heading-format roadmaps — strip shipped milestones in <details> blocks
+    // Second: heading-format roadmaps — strip shipped milestones.
+    // <details> blocks are stripped by stripShippedMilestones; heading-format ✅ markers
+    // are excluded by the negative lookahead below so a stale STATE.md version (or any
+    // shipped ✅ heading) never wins over the first non-shipped milestone heading.
    const cleaned = stripShippedMilestones(roadmap);
-    // Extract version and name from the same ## heading for consistency
+    // Negative lookahead skips headings that contain ✅ (shipped milestone marker).
    // Supports 2+ segment versions: v1.2, v1.2.1, v2.0.1, etc.
-    const headingMatch = cleaned.match(/## .*v(\d+(?:\.\d+)+)[:\s]+([^\n(]+)/);
+    const headingMatch = cleaned.match(/## (?!.*✅).*v(\d+(?:\.\d+)+)[:\s]+([^\n(]+)/);
    if (headingMatch) {
      return {
        version: 'v' + headingMatch[1],
@@ -1566,7 +2002,7 @@ function getMilestonePhaseFilter(cwd) {
  }

  const normalized = new Set(
-    [...milestonePhaseNums].map(n => (n.replace(/^0+/, '') || '0').toLowerCase())
+    [...milestonePhaseNums].map(n => (n.replace(/^0+(?=\d)/, '') || '0').toLowerCase())
  );

  function isDirInMilestone(dirName) {
@@ -1702,6 +2138,13 @@ module.exports = {
  getArchivedPhaseDirs,
  getRoadmapPhaseInternal,
  resolveModelInternal,
+  resolveReasoningEffortInternal,
+  RUNTIME_PROFILE_MAP,
+  RUNTIMES_WITH_REASONING_EFFORT,
+  KNOWN_RUNTIMES,
+  RUNTIME_OVERRIDE_TIERS,
+  resolveTierEntry,
+  _resetRuntimeWarningCacheForTests,
  pathExistsInternal,
  generateSlugInternal,
  getMilestoneInfo,
--- a/get-shit-done/bin/lib/decisions.cjs
+++ b/get-shit-done/bin/lib/decisions.cjs
@@ -0,0 +1,48 @@
+'use strict';
+
+/**
+ * Shared parser for CONTEXT.md `<decisions>` blocks.
+ *
+ * Used by:
+ *   - gap-checker.cjs (#2493 post-planning gap analysis)
+ *   - intended for #2492 (plan-phase decision gate, verify-phase decision validator)
+ *
+ * Format produced by discuss-phase.md:
+ *
+ *   <decisions>
+ *   ## Implementation Decisions
+ *
+ *   ### Category
+ *   - **D-01:** Decision text
+ *   - **D-02:** Another decision
+ *   </decisions>
+ *
+ * D-IDs outside the <decisions> block are ignored. Missing block returns [].
+ */
+
+/**
+ * Parse the <decisions> section of a CONTEXT.md string.
+ *
+ * @param {string|null|undefined} contextMd - File contents, may be empty/missing.
+ * @returns {Array<{id: string, text: string}>}
+ */
+function parseDecisions(contextMd) {
+  if (!contextMd || typeof contextMd !== 'string') return [];
+  const blockMatch = contextMd.match(/<decisions>([\s\S]*?)<\/decisions>/);
+  if (!blockMatch) return [];
+  const block = blockMatch[1];
+
+  const decisionRe = /^\s*-\s*\*\*(D-[A-Za-z0-9_-]+):\*\*\s*(.+?)\s*$/gm;
+  const out = [];
+  const seen = new Set();
+  let m;
+  while ((m = decisionRe.exec(block)) !== null) {
+    const id = m[1];
+    if (seen.has(id)) continue;
+    seen.add(id);
+    out.push({ id, text: m[2] });
+  }
+  return out;
+}
+
+module.exports = { parseDecisions };
--- a/get-shit-done/bin/lib/drift.cjs
+++ b/get-shit-done/bin/lib/drift.cjs
@@ -0,0 +1,378 @@
+/**
+ * Codebase Drift Detection (#2003)
+ *
+ * Detects structural drift between a committed codebase and the
+ * `.planning/codebase/STRUCTURE.md` map produced by `gsd-codebase-mapper`.
+ *
+ * Four categories of drift element:
+ *   - new_dir    → a newly-added file whose directory prefix does not appear
+ *                  in STRUCTURE.md
+ *   - barrel     → a newly-added barrel export at
+ *                  (packages|apps)/<name>/src/index.(ts|tsx|js|mjs|cjs)
+ *   - migration  → a newly-added migration file under one of the recognized
+ *                  migration directories (supabase, prisma, drizzle, src/migrations, …)
+ *   - route      → a newly-added route module under a `routes/` or `api/` dir
+ *
+ * Each file is counted at most once; when a file matches multiple categories
+ * the most specific category wins (migration > route > barrel > new_dir).
+ *
+ * Design decisions (see PR for full rubber-duck):
+ *   - The library is pure. It takes parsed git diff output and returns a
+ *     structured result. The CLI/workflow layer is responsible for running
+ *     git and for spawning mappers.
+ *   - `last_mapped_commit` is stored as YAML-style frontmatter at the top of
+ *     each `.planning/codebase/*.md` file. This keeps the baseline attached
+ *     to the file, survives git moves, and avoids a sidecar JSON.
+ *   - The detector NEVER throws on malformed input — it returns a
+ *     `{ skipped: true }` result. The phase workflow depends on this
+ *     non-blocking guarantee.
+ */
+
+'use strict';
+
+const fs = require('node:fs');
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const DRIFT_CATEGORIES = Object.freeze(['new_dir', 'barrel', 'migration', 'route']);
+
+// Category priority when a single file matches multiple rules.
+// Higher index = more specific = wins.
+const CATEGORY_PRIORITY = { new_dir: 0, barrel: 1, route: 2, migration: 3 };
+
+const BARREL_RE = /^(packages|apps)\/[^/]+\/src\/index\.(ts|tsx|js|mjs|cjs)$/;
+
+const MIGRATION_RES = [
+  /^supabase\/migrations\/.+\.sql$/,
+  /^prisma\/migrations\/.+/,
+  /^drizzle\/meta\/.+/,
+  /^drizzle\/migrations\/.+/,
+  /^src\/migrations\/.+\.(ts|js|sql)$/,
+  /^db\/migrations\/.+\.(sql|ts|js)$/,
+  /^migrations\/.+\.(sql|ts|js)$/,
+];
+
+const ROUTE_RES = [
+  /^(apps|packages)\/[^/]+\/src\/routes\/.+\.(ts|tsx|js|jsx|mjs|cjs)$/,
+  /^src\/routes\/.+\.(ts|tsx|js|jsx|mjs|cjs)$/,
+  /^src\/api\/.+\.(ts|tsx|js|jsx|mjs|cjs)$/,
+  /^(apps|packages)\/[^/]+\/src\/api\/.+\.(ts|tsx|js|jsx|mjs|cjs)$/,
+];
+
+// A conservative allowlist for `--paths` arguments passed to the mapper:
+// repo-relative path components separated by /, containing only
+// alphanumerics, dash, underscore, and dot (no `..`, no `/..`).
+const SAFE_PATH_RE = /^(?!.*\.\.)(?:[A-Za-z0-9_.][A-Za-z0-9_.\-]*)(?:\/[A-Za-z0-9_.][A-Za-z0-9_.\-]*)*$/;
+
+// ─── Classification ──────────────────────────────────────────────────────────
+
+/**
+ * Classify a single file path into a drift category or null.
+ *
+ * @param {string} file - repo-relative path, forward slashes.
+ * @returns {'barrel'|'migration'|'route'|null}
+ */
+function classifyFile(file) {
+  if (typeof file !== 'string' || !file) return null;
+  const norm = file.replace(/\\/g, '/');
+  if (MIGRATION_RES.some((r) => r.test(norm))) return 'migration';
+  if (ROUTE_RES.some((r) => r.test(norm))) return 'route';
+  if (BARREL_RE.test(norm)) return 'barrel';
+  return null;
+}
+
+/**
+ * True iff any prefix of `file` (dir1, dir1/dir2, …) appears as a substring
+ * of `structureMd`. Used to decide whether a file is in "mapped territory".
+ *
+ * Matching is deliberately substring-based — STRUCTURE.md is free-form
+ * markdown, not a structured manifest. If the map mentions `src/lib/` the
+ * check `structureMd.includes('src/lib')` holds.
+ */
+function isPathMapped(file, structureMd) {
+  const norm = file.replace(/\\/g, '/');
+  const parts = norm.split('/');
+  // Check prefixes from longest to shortest; any hit means "mapped".
+  for (let i = parts.length - 1; i >= 1; i--) {
+    const prefix = parts.slice(0, i).join('/');
+    if (structureMd.includes(prefix)) return true;
+  }
+  // Finally, if even the top-level dir is mentioned, count as mapped.
+  if (parts.length > 0 && structureMd.includes(parts[0] + '/')) return true;
+  if (parts.length > 0 && structureMd.includes('`' + parts[0] + '`')) return true;
+  return false;
+}
+
+// ─── Main detection ──────────────────────────────────────────────────────────
+
+/**
+ * Detect codebase drift.
+ *
+ * @param {object} input
+ * @param {string[]} input.addedFiles - files with git status A (new)
+ * @param {string[]} input.modifiedFiles - files with git status M
+ * @param {string[]} input.deletedFiles - files with git status D
+ * @param {string|null|undefined} input.structureMd - contents of STRUCTURE.md
+ * @param {number} [input.threshold=3] - min number of drift elements that triggers action
+ * @param {'warn'|'auto-remap'} [input.action='warn']
+ * @returns {object} result
+ */
+function detectDrift(input) {
+  try {
+    if (!input || typeof input !== 'object') {
+      return skipped('invalid-input');
+    }
+    const {
+      addedFiles,
+      modifiedFiles,
+      deletedFiles,
+      structureMd,
+    } = input;
+    const threshold = Number.isInteger(input.threshold) && input.threshold >= 1
+      ? input.threshold
+      : 3;
+    const action = input.action === 'auto-remap' ? 'auto-remap' : 'warn';
+
+    if (structureMd === null || structureMd === undefined) {
+      return skipped('missing-structure-md');
+    }
+    if (typeof structureMd !== 'string') {
+      return skipped('invalid-structure-md');
+    }
+
+    const added = Array.isArray(addedFiles) ? addedFiles.filter((x) => typeof x === 'string') : [];
+    const modified = Array.isArray(modifiedFiles) ? modifiedFiles : [];
+    const deleted = Array.isArray(deletedFiles) ? deletedFiles : [];
+
+    // Build elements. One element per file, highest-priority category wins.
+    /** @type {{category: string, path: string}[]} */
+    const elements = [];
+    const seen = new Map();
+
+    for (const rawFile of added) {
+      const file = rawFile.replace(/\\/g, '/');
+      const specific = classifyFile(file);
+      let category = specific;
+      if (!category) {
+        if (!isPathMapped(file, structureMd)) {
+          category = 'new_dir';
+        } else {
+          continue; // mapped, known, ordinary file — not drift
+        }
+      }
+      // Dedup: if we've already counted this path at higher-or-equal priority, skip
+      const prior = seen.get(file);
+      if (prior && CATEGORY_PRIORITY[prior] >= CATEGORY_PRIORITY[category]) continue;
+      seen.set(file, category);
+    }
+
+    for (const [file, category] of seen.entries()) {
+      elements.push({ category, path: file });
+    }
+
+    // Sort for stable output.
+    elements.sort((a, b) =>
+      a.category === b.category
+        ? a.path.localeCompare(b.path)
+        : a.category.localeCompare(b.category),
+    );
+
+    const actionRequired = elements.length >= threshold;
+    let directive = 'none';
+    let spawnMapper = false;
+    let affectedPaths = [];
+    let message = '';
+
+    if (actionRequired) {
+      directive = action;
+      affectedPaths = chooseAffectedPaths(elements.map((e) => e.path));
+      if (action === 'auto-remap') {
+        spawnMapper = true;
+      }
+      message = buildMessage(elements, affectedPaths, action);
+    }
+
+    return {
+      skipped: false,
+      elements,
+      actionRequired,
+      directive,
+      spawnMapper,
+      affectedPaths,
+      threshold,
+      action,
+      message,
+      counts: {
+        added: added.length,
+        modified: modified.length,
+        deleted: deleted.length,
+      },
+    };
+  } catch (err) {
+    // Non-blocking: never throw from this function.
+    return skipped('exception:' + (err && err.message ? err.message : String(err)));
+  }
+}
+
+function skipped(reason) {
+  return {
+    skipped: true,
+    reason,
+    elements: [],
+    actionRequired: false,
+    directive: 'none',
+    spawnMapper: false,
+    affectedPaths: [],
+    message: '',
+  };
+}
+
+function buildMessage(elements, affectedPaths, action) {
+  const byCat = {};
+  for (const e of elements) {
+    (byCat[e.category] ||= []).push(e.path);
+  }
+  const lines = [
+    `Codebase drift detected: ${elements.length} structural element(s) since last mapping.`,
+    '',
+  ];
+  const labels = {
+    new_dir: 'New directories',
+    barrel: 'New barrel exports',
+    migration: 'New migrations',
+    route: 'New route modules',
+  };
+  for (const cat of ['new_dir', 'barrel', 'migration', 'route']) {
+    if (byCat[cat]) {
+      lines.push(`${labels[cat]}:`);
+      for (const p of byCat[cat]) lines.push(`  - ${p}`);
+    }
+  }
+  lines.push('');
+  if (action === 'auto-remap') {
+    lines.push(`Auto-remap scheduled for paths: ${affectedPaths.join(', ')}`);
+  } else {
+    lines.push(
+      `Run /gsd-map-codebase --paths ${affectedPaths.join(',')} to refresh planning context.`,
+    );
+  }
+  return lines.join('\n');
+}
+
+// ─── Affected paths ──────────────────────────────────────────────────────────
+
+/**
+ * Collapse a list of drifted file paths into a sorted, deduplicated list of
+ * the top-level directory prefixes (depth 2 when the repo uses an
+ * `<apps|packages>/<name>/…` layout; depth 1 otherwise).
+ */
+function chooseAffectedPaths(paths) {
+  const out = new Set();
+  for (const raw of paths || []) {
+    if (typeof raw !== 'string' || !raw) continue;
+    const file = raw.replace(/\\/g, '/');
+    const parts = file.split('/');
+    if (parts.length === 0) continue;
+    const top = parts[0];
+    if ((top === 'apps' || top === 'packages') && parts.length >= 2) {
+      out.add(`${top}/${parts[1]}`);
+    } else {
+      out.add(top);
+    }
+  }
+  return [...out].sort();
+}
+
+/**
+ * Filter `paths` to only those that are safe to splice into a mapper prompt.
+ * Any path that is absolute, contains traversal, or includes shell
+ * metacharacters is dropped.
+ */
+function sanitizePaths(paths) {
+  if (!Array.isArray(paths)) return [];
+  const out = [];
+  for (const p of paths) {
+    if (typeof p !== 'string') continue;
+    if (p.startsWith('/')) continue;
+    if (!SAFE_PATH_RE.test(p)) continue;
+    out.push(p);
+  }
+  return out;
+}
+
+// ─── Frontmatter helpers ─────────────────────────────────────────────────────
+
+const FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?/;
+
+function parseFrontmatter(content) {
+  if (typeof content !== 'string') return { data: {}, body: '' };
+  const m = content.match(FRONTMATTER_RE);
+  if (!m) return { data: {}, body: content };
+  const data = {};
+  for (const line of m[1].split(/\r?\n/)) {
+    const kv = line.match(/^([A-Za-z0-9_][A-Za-z0-9_-]*):\s*(.*)$/);
+    if (!kv) continue;
+    data[kv[1]] = kv[2];
+  }
+  return { data, body: content.slice(m[0].length) };
+}
+
+function serializeFrontmatter(data, body) {
+  const keys = Object.keys(data);
+  if (keys.length === 0) return body;
+  const lines = ['---'];
+  for (const k of keys) lines.push(`${k}: ${data[k]}`);
+  lines.push('---');
+  return lines.join('\n') + '\n' + body;
+}
+
+/**
+ * Read `last_mapped_commit` from the frontmatter of a `.planning/codebase/*.md`
+ * file. Returns null if the file does not exist or has no frontmatter.
+ */
+function readMappedCommit(filePath) {
+  let content;
+  try {
+    content = fs.readFileSync(filePath, 'utf8');
+  } catch {
+    return null;
+  }
+  const { data } = parseFrontmatter(content);
+  const sha = data.last_mapped_commit;
+  return typeof sha === 'string' && sha.length > 0 ? sha : null;
+}
+
+/**
+ * Upsert `last_mapped_commit` and `last_mapped_at` into the frontmatter of
+ * the given file, preserving any other frontmatter keys and the body.
+ */
+function writeMappedCommit(filePath, commitSha, isoDate) {
+  // Symmetric with readMappedCommit (which returns null on missing files):
+  // tolerate a missing target by creating a minimal frontmatter-only file
+  // rather than throwing ENOENT. This matters when a mapper produces a new
+  // doc and the caller stamps it before any prior content existed.
+  let content = '';
+  try {
+    content = fs.readFileSync(filePath, 'utf8');
+  } catch (err) {
+    if (err.code !== 'ENOENT') throw err;
+  }
+  const { data, body } = parseFrontmatter(content);
+  data.last_mapped_commit = commitSha;
+  if (isoDate) data.last_mapped_at = isoDate;
+  fs.writeFileSync(filePath, serializeFrontmatter(data, body));
+}
+
+// ─── Exports ─────────────────────────────────────────────────────────────────
+
+module.exports = {
+  DRIFT_CATEGORIES,
+  classifyFile,
+  detectDrift,
+  chooseAffectedPaths,
+  sanitizePaths,
+  readMappedCommit,
+  writeMappedCommit,
+  // Exposed for the CLI layer to reuse the same parser.
+  parseFrontmatter,
+};
--- a/get-shit-done/bin/lib/frontmatter.cjs
+++ b/get-shit-done/bin/lib/frontmatter.cjs
@@ -242,17 +242,26 @@ function parseMustHavesBlock(content, blockName) {
      // Only treat as a top-level list item if at the expected indent
      if (indent === listItemIndent) {
        if (current) items.push(current);
-        current = {};
        const afterDash = trimmed.slice(2);
+        const trimmedAfterDash = afterDash.trim();
+        // Check if it's a fully-quoted string (may contain ':' inside the quotes)
+        if ((trimmedAfterDash.startsWith('"') && trimmedAfterDash.endsWith('"')) ||
+            (trimmedAfterDash.startsWith("'") && trimmedAfterDash.endsWith("'"))) {
+          current = trimmedAfterDash.slice(1, -1);
        // Check if it's a simple string item (no colon means not a key-value)
-        if (!afterDash.includes(':')) {
+        } else if (!afterDash.includes(':')) {
          current = afterDash.replace(/^["']|["']$/g, '');
        } else {
          // Key-value on same line as dash: "- path: value"
-          const kvMatch = afterDash.match(/^(\w+):\s*"?([^"]*)"?\s*$/);
+          // YAML KV always has at least one space after the colon: "key: value"
+          // Requiring \s+ rejects "Class::Method" and "db:seed" (no space after colon)
+          const kvMatch = afterDash.match(/^(\w+):\s+"?([^"]*)"?\s*$/);
          if (kvMatch) {
            current = {};
            current[kvMatch[1]] = kvMatch[2];
+          } else {
+            // Looks like KV but doesn't match — treat as plain string (#2757)
+            current = afterDash.replace(/^["']|["']$/g, '');
          }
        }
        continue;
--- a/get-shit-done/bin/lib/gap-checker.cjs
+++ b/get-shit-done/bin/lib/gap-checker.cjs
@@ -0,0 +1,183 @@
+'use strict';
+
+/**
+ * Post-planning gap analysis (#2493).
+ *
+ * Reads REQUIREMENTS.md (planning-root) and CONTEXT.md (per-phase) and compares
+ * each REQ-ID and D-ID against the concatenated text of all PLAN.md files in
+ * the phase directory. Emits a unified `Source | Item | Status` report.
+ *
+ * Gated on workflow.post_planning_gaps (default true). When false, returns
+ * { enabled: false } and does not scan.
+ *
+ * Coverage detection uses word-boundary regex matching to avoid false positives
+ * (REQ-1 must not match REQ-10).
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { planningPaths, planningDir, escapeRegex, output, error } = require('./core.cjs');
+const { parseDecisions } = require('./decisions.cjs');
+
+/**
+ * Parse REQ-IDs from REQUIREMENTS.md content.
+ *
+ * Supports both checkbox (`- [ ] **REQ-NN** ...`) and traceability table
+ * (`| REQ-NN | ... |`) formats.
+ */
+function parseRequirements(reqMd) {
+  if (!reqMd || typeof reqMd !== 'string') return [];
+  const out = [];
+  const seen = new Set();
+
+  const checkboxRe = /^\s*-\s*\[[x ]\]\s*\*\*(REQ-[A-Za-z0-9_-]+)\*\*\s*(.*)$/gm;
+  let cm = checkboxRe.exec(reqMd);
+  while (cm !== null) {
+    const id = cm[1];
+    if (!seen.has(id)) {
+      seen.add(id);
+      out.push({ id, text: (cm[2] || '').trim() });
+    }
+    cm = checkboxRe.exec(reqMd);
+  }
+
+  const tableRe = /\|\s*(REQ-[A-Za-z0-9_-]+)\s*\|/g;
+  let tm = tableRe.exec(reqMd);
+  while (tm !== null) {
+    const id = tm[1];
+    if (!seen.has(id)) {
+      seen.add(id);
+      out.push({ id, text: '' });
+    }
+    tm = tableRe.exec(reqMd);
+  }
+
+  return out;
+}
+
+function detectCoverage(items, planText) {
+  return items.map(it => {
+    const re = new RegExp('\\b' + escapeRegex(it.id) + '\\b');
+    return {
+      source: it.source,
+      item: it.id,
+      status: re.test(planText) ? 'Covered' : 'Not covered',
+    };
+  });
+}
+
+function naturalKey(s) {
+  return String(s).replace(/(\d+)/g, (_, n) => n.padStart(8, '0'));
+}
+
+function sortRows(rows) {
+  const sourceOrder = { 'REQUIREMENTS.md': 0, 'CONTEXT.md': 1 };
+  return rows.slice().sort((a, b) => {
+    const so = (sourceOrder[a.source] ?? 99) - (sourceOrder[b.source] ?? 99);
+    if (so !== 0) return so;
+    return naturalKey(a.item).localeCompare(naturalKey(b.item));
+  });
+}
+
+function formatGapTable(rows) {
+  if (rows.length === 0) {
+    return '## Post-Planning Gap Analysis\n\nNo requirements or decisions to check.\n';
+  }
+  const header = '| Source | Item | Status |\n|--------|------|--------|';
+  const body = rows.map(r => {
+    const tick = r.status === 'Covered' ? '\u2713 Covered' : '\u2717 Not covered';
+    return `| ${r.source} | ${r.item} | ${tick} |`;
+  }).join('\n');
+  return `## Post-Planning Gap Analysis\n\n${header}\n${body}\n`;
+}
+
+function readGate(cwd) {
+  const cfgPath = path.join(planningDir(cwd), 'config.json');
+  try {
+    const raw = JSON.parse(fs.readFileSync(cfgPath, 'utf-8'));
+    if (raw && raw.workflow && typeof raw.workflow.post_planning_gaps === 'boolean') {
+      return raw.workflow.post_planning_gaps;
+    }
+  } catch { /* fall through */ }
+  return true;
+}
+
+function runGapAnalysis(cwd, phaseDir) {
+  if (!readGate(cwd)) {
+    return {
+      enabled: false,
+      rows: [],
+      table: '',
+      summary: 'workflow.post_planning_gaps disabled — skipping post-planning gap analysis',
+      counts: { total: 0, covered: 0, uncovered: 0 },
+    };
+  }
+
+  const absPhaseDir = path.isAbsolute(phaseDir) ? phaseDir : path.join(cwd, phaseDir);
+
+  const reqPath = planningPaths(cwd).requirements;
+  const reqMd = fs.existsSync(reqPath) ? fs.readFileSync(reqPath, 'utf-8') : '';
+  const reqItems = parseRequirements(reqMd).map(r => ({ ...r, source: 'REQUIREMENTS.md' }));
+
+  const ctxPath = path.join(absPhaseDir, 'CONTEXT.md');
+  const ctxMd = fs.existsSync(ctxPath) ? fs.readFileSync(ctxPath, 'utf-8') : '';
+  const dItems = parseDecisions(ctxMd).map(d => ({ ...d, source: 'CONTEXT.md' }));
+
+  const items = [...reqItems, ...dItems];
+
+  let planText = '';
+  try {
+    if (fs.existsSync(absPhaseDir)) {
+      const files = fs.readdirSync(absPhaseDir).filter(f => /-PLAN\.md$/.test(f));
+      planText = files.map(f => {
+        try { return fs.readFileSync(path.join(absPhaseDir, f), 'utf-8'); }
+        catch { return ''; }
+      }).join('\n');
+    }
+  } catch { /* unreadable */ }
+
+  if (items.length === 0) {
+    return {
+      enabled: true,
+      rows: [],
+      table: '## Post-Planning Gap Analysis\n\nNo requirements or decisions to check.\n',
+      summary: 'no requirements or decisions to check',
+      counts: { total: 0, covered: 0, uncovered: 0 },
+    };
+  }
+
+  const rows = sortRows(detectCoverage(items, planText));
+  const uncovered = rows.filter(r => r.status === 'Not covered').length;
+  const covered = rows.length - uncovered;
+
+  const summary = uncovered === 0
+    ? `\u2713 All ${rows.length} items covered by plans`
+    : `\u26A0 ${uncovered} of ${rows.length} items not covered by any plan`;
+
+  return {
+    enabled: true,
+    rows,
+    table: formatGapTable(rows) + '\n' + summary + '\n',
+    summary,
+    counts: { total: rows.length, covered, uncovered },
+  };
+}
+
+function cmdGapAnalysis(cwd, args, raw) {
+  const idx = args.indexOf('--phase-dir');
+  if (idx === -1 || !args[idx + 1]) {
+    error('Usage: gap-analysis --phase-dir <path-to-phase-directory>');
+  }
+  const phaseDir = args[idx + 1];
+  const result = runGapAnalysis(cwd, phaseDir);
+  output(result, raw, result.table || result.summary);
+}
+
+module.exports = {
+  parseRequirements,
+  detectCoverage,
+  formatGapTable,
+  sortRows,
+  runGapAnalysis,
+  cmdGapAnalysis,
+};
--- a/get-shit-done/bin/lib/graphify.cjs
+++ b/get-shit-done/bin/lib/graphify.cjs
@@ -102,26 +102,55 @@ function checkGraphifyInstalled() {
 }

 /**
- * Detect graphify version via python3 importlib.metadata and check compatibility.
+ * Detect graphify version and check compatibility.
 * Tested range: >=0.4.0,<1.0
 *
+ * Detection strategy:
+ * 1. Try `graphify --version` (works for most CLI installations, incl. venv installs)
+ * 2. Fall back to python3 importlib.metadata (legacy / system Python path)
+ * 3. Return null version gracefully if both fail
+ *
 * @returns {{ version: string|null, compatible: boolean|null, warning: string|null }}
 */
 function checkGraphifyVersion() {
-  const result = childProcess.spawnSync('python3', [
-    '-c',
-    'from importlib.metadata import version; print(version("graphifyy"))',
-  ], {
+  // Strategy 1: try `graphify --version` directly (2s timeout -- fast path)
+  const versionResult = childProcess.spawnSync('graphify', ['--version'], {
    stdio: 'pipe',
    encoding: 'utf-8',
-    timeout: 5000,
+    timeout: 2000,
  });

-  if (result.status !== 0 || !result.stdout || !result.stdout.trim()) {
+  let versionStr = null;
+
+  if (!versionResult.error && versionResult.status === 0) {
+    const raw = (versionResult.stdout || '').trim();
+    // graphify --version may emit "graphify 0.4.23" or just "0.4.23"
+    const match = raw.match(/(\d+\.\d+(?:\.\d+)*)/);
+    if (match) {
+      versionStr = match[1];
+    }
+  }
+
+  // Strategy 2: fall back to python3 importlib.metadata
+  if (!versionStr) {
+    const pyResult = childProcess.spawnSync('python3', [
+      '-c',
+      'from importlib.metadata import version; print(version("graphifyy"))',
+    ], {
+      stdio: 'pipe',
+      encoding: 'utf-8',
+      timeout: 5000,
+    });
+
+    if (!pyResult.error && pyResult.status === 0 && pyResult.stdout && pyResult.stdout.trim()) {
+      versionStr = pyResult.stdout.trim();
+    }
+  }
+
+  if (!versionStr) {
    return { version: null, compatible: null, warning: 'Could not determine graphify version' };
  }

-  const versionStr = result.stdout.trim();
  const parts = versionStr.split('.').map(Number);

  if (parts.length < 2 || parts.some(isNaN)) {
--- a/get-shit-done/bin/lib/init.cjs
+++ b/get-shit-done/bin/lib/init.cjs
@@ -7,6 +7,11 @@ const path = require('path');
 const { execSync } = require('child_process');
 const { loadConfig, resolveModelInternal, findPhaseInternal, getRoadmapPhaseInternal, pathExistsInternal, generateSlugInternal, getMilestoneInfo, getMilestonePhaseFilter, stripShippedMilestones, extractCurrentMilestone, normalizePhaseName, planningPaths, planningDir, planningRoot, toPosixPath, output, error, checkAgentsInstalled, phaseTokenMatches } = require('./core.cjs');

+// Accept all bold/colon variants of the Requirements header (#2769):
+// **Requirements:** / **Requirements**: / **Requirements** : render the
+// same in markdown but differ textually.
+const REQUIREMENTS_HEADER_RE = /^\*\*Requirements:?\*\*[^\S\n]*:?[^\S\n]*([^\n]*)$/m;
+
 function getLatestCompletedMilestone(cwd) {
  const milestonesPath = path.join(planningRoot(cwd), 'MILESTONES.md');
  if (!fs.existsSync(milestonesPath)) return null;
@@ -102,7 +107,7 @@ function cmdInitExecutePhase(cwd, phase, raw, options = {}) {
      has_reviews: false,
    };
  }
-  const reqMatch = roadmapPhase?.section?.match(/^\*\*Requirements\*\*:[^\S\n]*([^\n]*)$/m);
+  const reqMatch = roadmapPhase?.section?.match(REQUIREMENTS_HEADER_RE);
  const reqExtracted = reqMatch
    ? reqMatch[1].replace(/[\[\]]/g, '').split(',').map(s => s.trim()).filter(Boolean).join(', ')
    : null;
@@ -235,7 +240,7 @@ function cmdInitPlanPhase(cwd, phase, raw, options = {}) {
      has_reviews: false,
    };
  }
-  const reqMatch = roadmapPhase?.section?.match(/^\*\*Requirements\*\*:[^\S\n]*([^\n]*)$/m);
+  const reqMatch = roadmapPhase?.section?.match(REQUIREMENTS_HEADER_RE);
  const reqExtracted = reqMatch
    ? reqMatch[1].replace(/[\[\]]/g, '').split(',').map(s => s.trim()).filter(Boolean).join(', ')
    : null;
@@ -458,8 +463,11 @@ function cmdInitNewMilestone(cwd, raw) {

  try {
    if (fs.existsSync(phasesDir)) {
+      // Bug #2445: filter phase dirs to current milestone only so stale dirs
+      // from a prior milestone that were not archived don't inflate the count.
+      const isDirInMilestone = getMilestonePhaseFilter(cwd);
      phaseDirCount = fs.readdirSync(phasesDir, { withFileTypes: true })
-        .filter(entry => entry.isDirectory())
+        .filter(entry => entry.isDirectory() && isDirInMilestone(entry.name))
        .length;
    }
  } catch {}
@@ -554,6 +562,25 @@ function cmdInitQuick(cwd, description, raw) {
  output(withProjectRoot(cwd, result), raw);
 }

+/**
+ * Init handler for ingest-docs workflow (#2801).
+ *
+ * Returns the minimal set of fields that ingest-docs.md needs to detect
+ * whether a project/planning dir exists and choose new vs merge mode.
+ * Mirrors the initIngestDocs SDK handler in sdk/src/query/init.ts.
+ */
+function cmdInitIngestDocs(cwd, raw) {
+  const config = loadConfig(cwd);
+  const result = {
+    project_exists: pathExistsInternal(cwd, '.planning/PROJECT.md'),
+    planning_exists: fs.existsSync(planningRoot(cwd)),
+    has_git: fs.existsSync(path.join(cwd, '.git')),
+    project_path: '.planning/PROJECT.md',
+    commit_docs: config.commit_docs,
+  };
+  output(withProjectRoot(cwd, result), raw);
+}
+
 function cmdInitResume(cwd, raw) {
  const config = loadConfig(cwd);

@@ -824,20 +851,70 @@ function cmdInitMilestoneOp(cwd, raw) {
  let phaseCount = 0;
  let completedPhases = 0;
  const phasesDir = path.join(planningDir(cwd), 'phases');
+
+  // Bug #2633 — ROADMAP.md (current milestone section) is the authority for
+  // phase counts, NOT the on-disk `.planning/phases/` directory. After
+  // `phases clear` between milestones, on-disk dirs will be a subset of the
+  // roadmap until each phase is materialized; reading from disk causes
+  // `all_phases_complete: true` to fire prematurely.
+  let roadmapPhaseNumbers = [];
+  try {
+    const roadmapPath = path.join(planningDir(cwd), 'ROADMAP.md');
+    const roadmapRaw = fs.readFileSync(roadmapPath, 'utf-8');
+    const currentSection = extractCurrentMilestone(roadmapRaw, cwd);
+    const phasePattern = /#{2,4}\s*Phase\s+(\d+[A-Z]?(?:\.\d+)*)\s*:/gi;
+    let m;
+    while ((m = phasePattern.exec(currentSection)) !== null) {
+      roadmapPhaseNumbers.push(m[1]);
+    }
+  } catch { /* intentionally empty */ }
+
+  // Canonicalize a phase token by stripping leading zeros from the integer
+  // head while preserving any [A-Z]? suffix and dotted segments. So "03" →
+  // "3", "03A" → "3A", "03.1" → "3.1", "3A" → "3A". Disk dirs that pad
+  // ("03-alpha") then match roadmap tokens ("Phase 3") without ever
+  // collapsing distinct tokens like "3" / "3A" / "3.1" into the same bucket.
+  const canonicalizePhase = (tok) => {
+    const m = tok.match(/^(\d+)([A-Z]?(?:\.\d+)*)$/);
+    return m ? String(parseInt(m[1], 10)) + m[2] : tok;
+  };
+  const diskPhaseDirs = new Map();
  try {
    const entries = fs.readdirSync(phasesDir, { withFileTypes: true });
-    const dirs = entries.filter(e => e.isDirectory()).map(e => e.name);
-    phaseCount = dirs.length;
+    for (const e of entries) {
+      if (!e.isDirectory()) continue;
+      const m = e.name.match(/^(\d+[A-Z]?(?:\.\d+)*)/);
+      if (!m) continue;
+      diskPhaseDirs.set(canonicalizePhase(m[1]), e.name);
+    }
+  } catch { /* intentionally empty */ }

-    // Count phases with summaries (completed)
-    for (const dir of dirs) {
+  if (roadmapPhaseNumbers.length > 0) {
+    phaseCount = roadmapPhaseNumbers.length;
+    for (const num of roadmapPhaseNumbers) {
+      const dirName = diskPhaseDirs.get(canonicalizePhase(num));
+      if (!dirName) continue;
      try {
-        const phaseFiles = fs.readdirSync(path.join(phasesDir, dir));
+        const phaseFiles = fs.readdirSync(path.join(phasesDir, dirName));
        const hasSummary = phaseFiles.some(f => f.endsWith('-SUMMARY.md') || f === 'SUMMARY.md');
        if (hasSummary) completedPhases++;
      } catch { /* intentionally empty */ }
    }
-  } catch { /* intentionally empty */ }
+  } else {
+    // Fallback: no parseable ROADMAP — preserve legacy on-disk behavior.
+    try {
+      const entries = fs.readdirSync(phasesDir, { withFileTypes: true });
+      const dirs = entries.filter(e => e.isDirectory()).map(e => e.name);
+      phaseCount = dirs.length;
+      for (const dir of dirs) {
+        try {
+          const phaseFiles = fs.readdirSync(path.join(phasesDir, dir));
+          const hasSummary = phaseFiles.some(f => f.endsWith('-SUMMARY.md') || f === 'SUMMARY.md');
+          if (hasSummary) completedPhases++;
+        } catch { /* intentionally empty */ }
+      }
+    } catch { /* intentionally empty */ }
+  }

  // Check archive
  const archiveDir = path.join(planningRoot(cwd), 'archive');
@@ -1227,6 +1304,7 @@ function cmdInitProgress(cwd, raw) {
  // Build set of phases defined in ROADMAP for the current milestone
  const roadmapPhaseNums = new Set();
  const roadmapPhaseNames = new Map();
+  const roadmapCheckboxStates = new Map();
  try {
    const roadmapContent = extractCurrentMilestone(
      fs.readFileSync(path.join(planningDir(cwd), 'ROADMAP.md'), 'utf-8'), cwd
@@ -1237,6 +1315,13 @@ function cmdInitProgress(cwd, raw) {
      roadmapPhaseNums.add(hm[1]);
      roadmapPhaseNames.set(hm[1], hm[2].replace(/\(INSERTED\)/i, '').trim());
    }
+    // #2646: parse `- [x] Phase N` checkbox states so ROADMAP-only phases
+    // inherit completion from the ROADMAP when no phase directory exists.
+    const cbPattern = /-\s*\[(x| )\]\s*.*Phase\s+(\d+[A-Z]?(?:\.\d+)*)[:\s]/gi;
+    let cbm;
+    while ((cbm = cbPattern.exec(roadmapContent)) !== null) {
+      roadmapCheckboxStates.set(cbm[2], cbm[1].toLowerCase() === 'x');
+    }
  } catch { /* intentionally empty */ }

  const isDirInMilestone = getMilestonePhaseFilter(cwd);
@@ -1292,21 +1377,27 @@ function cmdInitProgress(cwd, raw) {
    }
  } catch { /* intentionally empty */ }

-  // Add phases defined in ROADMAP but not yet scaffolded to disk
+  // Add phases defined in ROADMAP but not yet scaffolded to disk. When the
+  // ROADMAP has a `- [x] Phase N` checkbox, honor it as 'complete' so
+  // completed_count and status reflect the ROADMAP source of truth (#2646).
  for (const [num, name] of roadmapPhaseNames) {
    const stripped = num.replace(/^0+/, '') || '0';
    if (!seenPhaseNums.has(stripped)) {
+      const checkboxComplete =
+        roadmapCheckboxStates.get(num) === true ||
+        roadmapCheckboxStates.get(stripped) === true;
+      const status = checkboxComplete ? 'complete' : 'not_started';
      const phaseInfo = {
        number: num,
        name: name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''),
        directory: null,
-        status: 'not_started',
+        status,
        plan_count: 0,
        summary_count: 0,
        has_research: false,
      };
      phases.push(phaseInfo);
-      if (!nextPhase && !currentPhase) {
+      if (!nextPhase && !currentPhase && status !== 'complete') {
        nextPhase = phaseInfo;
      }
    }
@@ -1856,6 +1947,7 @@ module.exports = {
  cmdInitNewProject,
  cmdInitNewMilestone,
  cmdInitQuick,
+  cmdInitIngestDocs,
  cmdInitResume,
  cmdInitVerifyWork,
  cmdInitPhaseOp,
--- a/get-shit-done/bin/lib/install-profiles.cjs
+++ b/get-shit-done/bin/lib/install-profiles.cjs
@@ -0,0 +1,132 @@
+/**
+ * Install profiles — single source of truth for which skills/agents
+ * are written to the runtime config dirs.
+ *
+ * Background: every installed `gsd-*` skill costs eager system-prompt
+ * tokens because runtimes (Claude Code, opencode, etc.) enumerate
+ * skill descriptions in `<available_skills>` on every turn. With 86
+ * skills + 33 agents the floor is ~12k tokens per turn, which is a
+ * meaningful tax for local LLMs with 32K–128K context. Frontier
+ * models (Sonnet 4.6 / Opus 4.7 with 200K–1M ctx) don't feel it.
+ *
+ * The `minimal` profile installs the main GSD loop only:
+ *   new-project → discuss-phase → plan-phase → execute-phase
+ * plus `help` (discoverability) and `update` (upgrade path).
+ *
+ * Users opt into minimal via `--minimal` on the install CLI.
+ * Default install (`full`) is unchanged — back-compat preserved.
+ */
+
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+
+const MINIMAL_SKILL_ALLOWLIST = Object.freeze([
+  'new-project',
+  'discuss-phase',
+  'plan-phase',
+  'execute-phase',
+  'help',
+  'update',
+]);
+
+const MINIMAL_ALLOWLIST_SET = new Set(MINIMAL_SKILL_ALLOWLIST);
+
+function isMinimalMode(mode) {
+  return mode === 'minimal';
+}
+
+function shouldInstallSkill(skillBaseName, mode) {
+  if (!isMinimalMode(mode)) return true;
+  return MINIMAL_ALLOWLIST_SET.has(skillBaseName);
+}
+
+// Stage dirs created during this process — cleaned up on exit.
+// 13 runtime dispatch sites in install.js can each call stageSkillsForMode,
+// so accumulating them in a single set avoids leaks without forcing each
+// site to track its own cleanup handle.
+const STAGED_DIRS = new Set();
+let exitHandlerRegistered = false;
+
+function cleanupStagedSkills() {
+  for (const dir of STAGED_DIRS) {
+    try {
+      fs.rmSync(dir, { recursive: true, force: true });
+    } catch {
+      // Best-effort: missing dir or permission error shouldn't crash a
+      // successful install. The OS reaps tmpdir eventually.
+    }
+  }
+  STAGED_DIRS.clear();
+}
+
+// Signals we register a cleanup handler for in addition to the natural
+// 'exit' event. `process.on('exit')` does NOT fire on these — an installer
+// is exactly the kind of process users abort mid-run, so without explicit
+// signal handling Ctrl+C would leave staged tmp dirs behind.
+const CLEANUP_SIGNALS = ['SIGINT', 'SIGTERM', 'SIGHUP'];
+
+function ensureExitCleanup() {
+  if (exitHandlerRegistered) return;
+  exitHandlerRegistered = true;
+  process.on('exit', cleanupStagedSkills);
+  for (const sig of CLEANUP_SIGNALS) {
+    // `once` so re-raising the signal below isn't intercepted by us a second
+    // time — the OS-default handler should take over and exit with the right
+    // status code (so CI sees the abort, scripts see 130 for SIGINT, etc.).
+    process.once(sig, () => {
+      cleanupStagedSkills();
+      process.kill(process.pid, sig);
+    });
+  }
+}
+
+/**
+ * Stage a filtered copy of the source commands/gsd directory when in
+ * minimal mode. All runtime-specific copy fns recurse a source dir,
+ * so filtering at the source point lets every copy fn stay unchanged
+ * (DRY: one filter, not 12).
+ *
+ * In full mode this is a no-op — the original srcDir is returned.
+ *
+ * Cleanup: the staged dir is automatically removed on process exit.
+ * If the copy loop throws mid-flight, the partially-populated dir is
+ * removed and the error re-raised, so callers never see an orphan.
+ *
+ * @param {string} srcDir absolute path to commands/gsd
+ * @param {string} mode 'full' | 'minimal'
+ * @returns {string} path to use (original or staged tmp)
+ */
+function stageSkillsForMode(srcDir, mode) {
+  if (!isMinimalMode(mode)) return srcDir;
+  if (!fs.existsSync(srcDir)) return srcDir;
+
+  const stageDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gsd-minimal-skills-'));
+  try {
+    const entries = fs.readdirSync(srcDir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (!entry.isFile()) continue;
+      if (!entry.name.endsWith('.md')) continue;
+      const baseName = entry.name.replace(/\.md$/, '');
+      if (!shouldInstallSkill(baseName, mode)) continue;
+      fs.copyFileSync(
+        path.join(srcDir, entry.name),
+        path.join(stageDir, entry.name),
+      );
+    }
+  } catch (err) {
+    try { fs.rmSync(stageDir, { recursive: true, force: true }); } catch {}
+    throw err;
+  }
+  STAGED_DIRS.add(stageDir);
+  ensureExitCleanup();
+  return stageDir;
+}
+
+module.exports = {
+  MINIMAL_SKILL_ALLOWLIST,
+  isMinimalMode,
+  shouldInstallSkill,
+  stageSkillsForMode,
+  cleanupStagedSkills,
+};
--- a/get-shit-done/bin/lib/model-profiles.cjs
+++ b/get-shit-done/bin/lib/model-profiles.cjs
@@ -26,7 +26,7 @@ const MODEL_PROFILES = {
  'gsd-doc-writer': { quality: 'opus', balanced: 'sonnet', budget: 'haiku', adaptive: 'sonnet' },
  'gsd-doc-verifier': { quality: 'sonnet', balanced: 'sonnet', budget: 'haiku', adaptive: 'haiku' },
 };
-const VALID_PROFILES = Object.keys(MODEL_PROFILES['gsd-planner']);
+const VALID_PROFILES = [...Object.keys(MODEL_PROFILES['gsd-planner']), 'inherit'];

 /**
 * Formats the agent-to-model mapping as a human-readable table (in string format).
@@ -58,7 +58,9 @@ function formatAgentToModelMapAsTable(agentToModelMap) {
 function getAgentToModelMapForProfile(normalizedProfile) {
  const agentToModelMap = {};
  for (const [agent, profileToModelMap] of Object.entries(MODEL_PROFILES)) {
-    agentToModelMap[agent] = profileToModelMap[normalizedProfile];
+    agentToModelMap[agent] = normalizedProfile === 'inherit'
+      ? 'inherit'
+      : profileToModelMap[normalizedProfile];
  }
  return agentToModelMap;
 }
--- a/get-shit-done/bin/lib/phase.cjs
+++ b/get-shit-done/bin/lib/phase.cjs
@@ -625,7 +625,7 @@ function renameIntegerPhases(phasesDir, removedInt) {
      const m = dir.match(/^(\d+)([A-Z])?(?:\.(\d+))?-(.+)$/i);
      if (!m) return null;
      const dirInt = parseInt(m[1], 10);
-      return dirInt > removedInt ? { dir, oldInt: dirInt, letter: m[2] ? m[2].toUpperCase() : '', decimal: m[3] ? parseInt(m[3], 10) : null, slug: m[4] } : null;
+      return (dirInt > removedInt && dirInt < 999) ? { dir, oldInt: dirInt, letter: m[2] ? m[2].toUpperCase() : '', decimal: m[3] ? parseInt(m[3], 10) : null, slug: m[4] } : null;
    })
    .filter(Boolean)
    .sort((a, b) => a.oldInt !== b.oldInt ? b.oldInt - a.oldInt : (b.decimal || 0) - (a.decimal || 0));
@@ -673,7 +673,7 @@ function updateRoadmapAfterPhaseRemoval(roadmapPath, targetPhase, isDecimal, rem
        const oldPad = oldStr.padStart(2, '0'), newPad = newStr.padStart(2, '0');
        content = content.replace(new RegExp(`(#{2,4}\\s*Phase\\s+)${oldStr}(\\s*:)`, 'gi'), `$1${newStr}$2`);
        content = content.replace(new RegExp(`(Phase\\s+)${oldStr}([:\\s])`, 'g'), `$1${newStr}$2`);
-        content = content.replace(new RegExp(`${oldPad}-(\\d{2})`, 'g'), `${newPad}-$1`);
+        content = content.replace(new RegExp(`(?<![0-9-])${oldPad}-(\\d{2})(?![0-9-])`, 'g'), `${newPad}-$1`);
        content = content.replace(new RegExp(`(\\|\\s*)${oldStr}\\.\\s`, 'g'), `$1${newStr}. `);
        content = content.replace(new RegExp(`(Depends on:\\*\\*\\s*Phase\\s+)${oldStr}\\b`, 'gi'), `$1${newStr}`);
      }
@@ -868,11 +868,16 @@ function cmdPhaseComplete(cwd, phaseNum, raw) {
        );

        const sectionText = phaseSectionMatch ? phaseSectionMatch[1] : '';
-        const reqMatch = sectionText.match(/\*\*Requirements:\*\*\s*([^\n]+)/i);
+        // Accept all bold/colon variants (#2769) — the previous pattern only
+        // matched **Requirements:** (colon inside bold) and silently skipped
+        // **Requirements**: (colon outside), preventing the matching REQ-IDs
+        // from being ticked off in REQUIREMENTS.md on phase completion.
+        const reqMatch = sectionText.match(/\*\*Requirements:?\*\*[^\S\n]*:?[^\S\n]*([^\n]+)/i);
+
+        let reqContent = fs.readFileSync(reqPath, 'utf-8');

        if (reqMatch) {
          const reqIds = reqMatch[1].replace(/[\[\]]/g, '').split(/[,\s]+/).map(r => r.trim()).filter(Boolean);
-          let reqContent = fs.readFileSync(reqPath, 'utf-8');

          for (const reqId of reqIds) {
            const reqEscaped = escapeRegex(reqId);
@@ -887,10 +892,40 @@ function cmdPhaseComplete(cwd, phaseNum, raw) {
              '$1 Complete $2'
            );
          }
-
-          atomicWriteFileSync(reqPath, reqContent);
-          requirementsUpdated = true;
        }
+
+        // Scan body for all **REQ-ID** patterns, warn about any missing from the Traceability table.
+        // Always runs regardless of whether the roadmap has a Requirements: line.
+        const bodyReqIds = [];
+        const bodyReqPattern = /\*\*([A-Z][A-Z0-9]*-\d+)\*\*/g;
+        let bodyMatch;
+        while ((bodyMatch = bodyReqPattern.exec(reqContent)) !== null) {
+          const id = bodyMatch[1];
+          if (!bodyReqIds.includes(id)) bodyReqIds.push(id);
+        }
+
+        // Collect REQ-IDs present in the Traceability section only, to avoid
+        // picking up IDs from other tables in the document.
+        const traceabilityHeadingMatch = reqContent.match(/^#{1,6}\s+Traceability\b/im);
+        const traceabilitySection = traceabilityHeadingMatch
+          ? reqContent.slice(traceabilityHeadingMatch.index)
+          : '';
+        const tableReqIds = new Set();
+        const tableRowPattern = /^\|\s*([A-Z][A-Z0-9]*-\d+)\s*\|/gm;
+        let tableMatch;
+        while ((tableMatch = tableRowPattern.exec(traceabilitySection)) !== null) {
+          tableReqIds.add(tableMatch[1]);
+        }
+
+        const unregistered = bodyReqIds.filter(id => !tableReqIds.has(id));
+        if (unregistered.length > 0) {
+          warnings.push(
+            `REQUIREMENTS.md: ${unregistered.length} REQ-ID(s) found in body but missing from Traceability table: ${unregistered.join(', ')} — add them manually to keep traceability in sync`
+          );
+        }
+
+        atomicWriteFileSync(reqPath, reqContent);
+        requirementsUpdated = true;
      }
    });
  }
--- a/get-shit-done/bin/lib/profile-output.cjs
+++ b/get-shit-done/bin/lib/profile-output.cjs
@@ -285,7 +285,7 @@ function generateProjectSection(cwd) {
  const projectPath = path.join(cwd, '.planning', 'PROJECT.md');
  const content = safeReadFile(projectPath);
  if (!content) {
-    return { content: CLAUDE_MD_FALLBACKS.project, source: 'PROJECT.md', hasFallback: true };
+    return { content: CLAUDE_MD_FALLBACKS.project, source: 'PROJECT.md', linkPath: null, hasFallback: true };
  }
  const parts = [];
  const h1Match = content.match(/^# (.+)$/m);
@@ -306,9 +306,9 @@ function generateProjectSection(cwd) {
    if (body) parts.push(`### Constraints\n\n${body}`);
  }
  if (parts.length === 0) {
-    return { content: CLAUDE_MD_FALLBACKS.project, source: 'PROJECT.md', hasFallback: true };
+    return { content: CLAUDE_MD_FALLBACKS.project, source: 'PROJECT.md', linkPath: null, hasFallback: true };
  }
-  return { content: parts.join('\n\n'), source: 'PROJECT.md', hasFallback: false };
+  return { content: parts.join('\n\n'), source: 'PROJECT.md', linkPath: '.planning/PROJECT.md', hasFallback: false };
 }

 function generateStackSection(cwd) {
@@ -316,12 +316,14 @@ function generateStackSection(cwd) {
  const researchPath = path.join(cwd, '.planning', 'research', 'STACK.md');
  let content = safeReadFile(codebasePath);
  let source = 'codebase/STACK.md';
+  let linkPath = '.planning/codebase/STACK.md';
  if (!content) {
    content = safeReadFile(researchPath);
    source = 'research/STACK.md';
+    linkPath = '.planning/research/STACK.md';
  }
  if (!content) {
-    return { content: CLAUDE_MD_FALLBACKS.stack, source: 'STACK.md', hasFallback: true };
+    return { content: CLAUDE_MD_FALLBACKS.stack, source: 'STACK.md', linkPath: null, hasFallback: true };
  }
  const lines = content.split('\n');
  const summaryLines = [];
@@ -336,14 +338,14 @@ function generateStackSection(cwd) {
    if (line.startsWith('- ') || line.startsWith('* ')) summaryLines.push(line);
  }
  const summary = summaryLines.length > 0 ? summaryLines.join('\n') : content.trim();
-  return { content: summary, source, hasFallback: false };
+  return { content: summary, source, linkPath, hasFallback: false };
 }

 function generateConventionsSection(cwd) {
  const conventionsPath = path.join(cwd, '.planning', 'codebase', 'CONVENTIONS.md');
  const content = safeReadFile(conventionsPath);
  if (!content) {
-    return { content: CLAUDE_MD_FALLBACKS.conventions, source: 'CONVENTIONS.md', hasFallback: true };
+    return { content: CLAUDE_MD_FALLBACKS.conventions, source: 'CONVENTIONS.md', linkPath: null, hasFallback: true };
  }
  const lines = content.split('\n');
  const summaryLines = [];
@@ -352,14 +354,14 @@ function generateConventionsSection(cwd) {
    if (line.startsWith('- ') || line.startsWith('* ') || line.startsWith('|')) summaryLines.push(line);
  }
  const summary = summaryLines.length > 0 ? summaryLines.join('\n') : content.trim();
-  return { content: summary, source: 'CONVENTIONS.md', hasFallback: false };
+  return { content: summary, source: 'CONVENTIONS.md', linkPath: '.planning/codebase/CONVENTIONS.md', hasFallback: false };
 }

 function generateArchitectureSection(cwd) {
  const architecturePath = path.join(cwd, '.planning', 'codebase', 'ARCHITECTURE.md');
  const content = safeReadFile(architecturePath);
  if (!content) {
-    return { content: CLAUDE_MD_FALLBACKS.architecture, source: 'ARCHITECTURE.md', hasFallback: true };
+    return { content: CLAUDE_MD_FALLBACKS.architecture, source: 'ARCHITECTURE.md', linkPath: null, hasFallback: true };
  }
  const lines = content.split('\n');
  const summaryLines = [];
@@ -368,13 +370,14 @@ function generateArchitectureSection(cwd) {
    if (line.startsWith('- ') || line.startsWith('* ') || line.startsWith('|') || line.startsWith('```')) summaryLines.push(line);
  }
  const summary = summaryLines.length > 0 ? summaryLines.join('\n') : content.trim();
-  return { content: summary, source: 'ARCHITECTURE.md', hasFallback: false };
+  return { content: summary, source: 'ARCHITECTURE.md', linkPath: '.planning/codebase/ARCHITECTURE.md', hasFallback: false };
 }

 function generateWorkflowSection() {
  return {
    content: CLAUDE_MD_WORKFLOW_ENFORCEMENT,
    source: 'GSD defaults',
+    linkPath: null,
    hasFallback: false,
  };
 }
@@ -948,19 +951,35 @@ function cmdGenerateClaudeMd(cwd, options, raw) {
    }
  }

+  let assemblyConfig = {};
+  let configClaudeMdPath = './CLAUDE.md';
+  try {
+    const config = loadConfig(cwd);
+    if (config.claude_md_path) configClaudeMdPath = config.claude_md_path;
+    if (config.claude_md_assembly) assemblyConfig = config.claude_md_assembly;
+  } catch { /* use default */ }
+
  let outputPath = options.output;
  if (!outputPath) {
-    // Read claude_md_path from config, default to ./CLAUDE.md
-    let configClaudeMdPath = './CLAUDE.md';
-    try {
-      const config = loadConfig(cwd);
-      if (config.claude_md_path) configClaudeMdPath = config.claude_md_path;
-    } catch { /* use default */ }
    outputPath = path.isAbsolute(configClaudeMdPath) ? configClaudeMdPath : path.join(cwd, configClaudeMdPath);
  } else if (!path.isAbsolute(outputPath)) {
    outputPath = path.join(cwd, outputPath);
  }

+  const globalAssemblyMode = assemblyConfig.mode || 'embed';
+  const blockModes = assemblyConfig.blocks || {};
+
+  // Return the assembled content for a section, respecting link vs embed mode.
+  // "link" mode writes `@<linkPath>` when the generator has a real source file.
+  // Falls back to "embed" for sections without a linkable source (workflow, fallbacks).
+  function buildSectionContent(name, gen, heading) {
+    const effectiveMode = blockModes[name] || globalAssemblyMode;
+    if (effectiveMode === 'link' && gen.linkPath && !gen.hasFallback) {
+      return buildSection(name, gen.source, `${heading}\n\n@${gen.linkPath}`);
+    }
+    return buildSection(name, gen.source, `${heading}\n\n${gen.content}`);
+  }
+
  let existingContent = safeReadFile(outputPath);
  let action;

@@ -969,8 +988,7 @@ function cmdGenerateClaudeMd(cwd, options, raw) {
    for (const name of MANAGED_SECTIONS) {
      const gen = generated[name];
      const heading = sectionHeadings[name];
-      const body = `${heading}\n\n${gen.content}`;
-      sections.push(buildSection(name, gen.source, body));
+      sections.push(buildSectionContent(name, gen, heading));
    }
    sections.push('');
    sections.push(CLAUDE_MD_PROFILE_PLACEHOLDER);
@@ -985,13 +1003,15 @@ function cmdGenerateClaudeMd(cwd, options, raw) {
    for (const name of MANAGED_SECTIONS) {
      const gen = generated[name];
      const heading = sectionHeadings[name];
-      const body = `${heading}\n\n${gen.content}`;
-      const fullSection = buildSection(name, gen.source, body);
+      const fullSection = buildSectionContent(name, gen, heading);
      const hasMarkers = fileContent.indexOf(`<!-- GSD:${name}-start`) !== -1;

      if (hasMarkers) {
        if (options.auto) {
-          const expectedBody = `${heading}\n\n${gen.content}`;
+          const effectiveMode = blockModes[name] || globalAssemblyMode;
+          const expectedBody = (effectiveMode === 'link' && gen.linkPath && !gen.hasFallback)
+            ? `${heading}\n\n@${gen.linkPath}`
+            : `${heading}\n\n${gen.content}`;
          if (detectManualEdit(fileContent, name, expectedBody)) {
            sectionsSkipped.push(name);
            const genIdx = sectionsGenerated.indexOf(name);
--- a/get-shit-done/bin/lib/roadmap.cjs
+++ b/get-shit-done/bin/lib/roadmap.cjs
@@ -6,6 +6,35 @@ const fs = require('fs');
 const path = require('path');
 const { escapeRegex, normalizePhaseName, planningPaths, withPlanningLock, output, error, findPhaseInternal, stripShippedMilestones, extractCurrentMilestone, replaceInCurrentMilestone, phaseTokenMatches, atomicWriteFileSync } = require('./core.cjs');

+/**
+ * Coerce an arbitrary YAML scalar/object into a string for cross-cutting
+ * truth aggregation. Handles:
+ *   - strings (passthrough)
+ *   - numbers / booleans (String() coercion — issue #2770: bare YAML ints
+ *     like `- 3` must be surfaced, not silently skipped)
+ *   - kv-shaped objects from parseMustHavesBlock continuation kv (issue
+ *     #2757) — extract the first meaningful string field
+ *
+ * Returns the empty string when no usable text can be derived; callers should
+ * skip empty results.
+ */
+function coerceTruthToString(t) {
+  if (t === null || t === undefined) return '';
+  if (typeof t === 'string') return t;
+  if (typeof t === 'number' || typeof t === 'boolean' || typeof t === 'bigint') {
+    return String(t);
+  }
+  if (typeof t === 'object') {
+    // Prefer common title-bearing keys produced by parseMustHavesBlock
+    for (const k of ['title', 'text', 'name', 'rule', 'path', 'provides']) {
+      const v = t[k];
+      if (typeof v === 'string' && v.trim()) return v;
+      if (typeof v === 'number' || typeof v === 'boolean') return String(v);
+    }
+  }
+  return '';
+}
+
 /**
 * Search for a phase header (and its section) within the given content string.
 * Returns a result object if found (either a full match or a malformed_roadmap
@@ -56,6 +85,11 @@ function searchPhaseInContent(content, escapedPhase, phaseNum) {
  const goalMatch = section.match(/\*\*Goal(?::\*\*|\*\*:)\s*([^\n]+)/i);
  const goal = goalMatch ? goalMatch[1].trim() : null;

+  // Mode: vertical-MVP slice mode flag. Lowercased + trimmed for canonical
+  // comparison; unrecognized values are preserved verbatim for forward-compat.
+  const modeMatch = section.match(/\*\*Mode(?::\*\*|\*\*:)\s*([^\n]+)/i);
+  const mode = modeMatch ? modeMatch[1].trim().toLowerCase() : null;
+
  // Extract success criteria as structured array
  const criteriaMatch = section.match(/\*\*Success Criteria\*\*[^\n]*:\s*\n((?:\s*\d+\.\s*[^\n]+\n?)+)/i);
  const success_criteria = criteriaMatch
@@ -67,6 +101,7 @@ function searchPhaseInContent(content, escapedPhase, phaseNum) {
    phase_number: phaseNum,
    phase_name: phaseName,
    goal,
+    mode,
    success_criteria,
    section,
  };
@@ -152,6 +187,9 @@ function cmdRoadmapAnalyze(cwd, raw) {
    const goalMatch = section.match(/\*\*Goal(?::\*\*|\*\*:)\s*([^\n]+)/i);
    const goal = goalMatch ? goalMatch[1].trim() : null;

+    const modeMatch = section.match(/\*\*Mode(?::\*\*|\*\*:)\s*([^\n]+)/i);
+    const mode = modeMatch ? modeMatch[1].trim().toLowerCase() : null;
+
    const dependsMatch = section.match(/\*\*Depends on(?::\*\*|\*\*:)\s*([^\n]+)/i);
    const depends_on = dependsMatch ? dependsMatch[1].trim() : null;

@@ -198,6 +236,7 @@ function cmdRoadmapAnalyze(cwd, raw) {
      number: phaseNum,
      name: phaseName,
      goal,
+      mode,
      depends_on,
      plan_count: planCount,
      summary_count: summaryCount,
@@ -353,8 +392,182 @@ function cmdRoadmapUpdatePlanProgress(cwd, phaseNum, raw) {
  }, raw, `${summaryCount}/${planCount} ${status}`);
 }

+/**
+ * Annotate the ROADMAP.md plan list for a phase with wave dependency notes
+ * and a cross-cutting constraints subsection derived from PLAN frontmatter.
+ *
+ * Wave dependency notes: "Wave 2 — blocked on Wave 1 completion" inserted as
+ * bold headers before each wave group in the plan checklist.
+ *
+ * Cross-cutting constraints: must_haves.truths strings that appear in 2+ plans
+ * are surfaced in a "Cross-cutting constraints" subsection below the plan list.
+ *
+ * The operation is idempotent: if wave headers already exist in the section
+ * the function returns without modifying the file.
+ */
+function cmdRoadmapAnnotateDependencies(cwd, phaseNum, raw) {
+  if (!phaseNum) {
+    error('phase number required for roadmap annotate-dependencies');
+  }
+
+  const roadmapPath = planningPaths(cwd).roadmap;
+  if (!fs.existsSync(roadmapPath)) {
+    output({ updated: false, reason: 'ROADMAP.md not found' }, raw, 'no roadmap');
+    return;
+  }
+
+  const phaseInfo = findPhaseInternal(cwd, phaseNum);
+  if (!phaseInfo || phaseInfo.plans.length === 0) {
+    output({ updated: false, reason: 'no plans found for phase', phase: phaseNum }, raw, 'no plans');
+    return;
+  }
+
+  const { extractFrontmatter, parseMustHavesBlock } = require('./frontmatter.cjs');
+
+  // Read each PLAN.md and extract wave + must_haves.truths
+  const planData = [];
+  for (const planFile of phaseInfo.plans) {
+    const planPath = path.join(path.resolve(cwd, phaseInfo.directory), planFile);
+    try {
+      const content = fs.readFileSync(planPath, 'utf-8');
+      const fm = extractFrontmatter(content);
+      const wave = parseInt(fm.wave, 10) || 1;
+      const planId = planFile.replace(/-PLAN\.md$/i, '').replace(/PLAN\.md$/i, '');
+      const truths = parseMustHavesBlock(content, 'truths') || [];
+      planData.push({ planFile, planId, wave, truths });
+    } catch { /* skip unreadable plans */ }
+  }
+
+  if (planData.length === 0) {
+    output({ updated: false, reason: 'could not read plan frontmatter' }, raw, 'no frontmatter');
+    return;
+  }
+
+  // Group plans by wave (sorted)
+  const waveGroups = new Map();
+  for (const p of planData) {
+    if (!waveGroups.has(p.wave)) waveGroups.set(p.wave, []);
+    waveGroups.get(p.wave).push(p);
+  }
+  const waves = [...waveGroups.keys()].sort((a, b) => a - b);
+
+  // Find cross-cutting truths: appear in 2+ plans (de-duplicated, case-insensitive).
+  //
+  // Issue #2770: must **coerce, not skip**. A previous guard
+  // `if (typeof t !== 'string') continue` silently dropped numeric scalars
+  // (YAML ints like `- 3`) and kv-shaped truths (`- title: X`), so the
+  // cross-cutting analysis lost real constraints rather than crashing on
+  // `t.trim()`. We coerce primitives via `String(t)` and extract a sensible
+  // string field from object-shaped items produced by parseMustHavesBlock's
+  // continuation-kv path (issue #2757 produces those shapes for nested keys).
+  const truthCounts = new Map();
+  for (const { truths } of planData) {
+    const seen = new Set();
+    for (const t of truths) {
+      const text = coerceTruthToString(t);
+      if (!text) continue;
+      const trimmed = text.trim();
+      const key = trimmed.toLowerCase();
+      if (!key || seen.has(key)) continue;
+      seen.add(key);
+      if (!truthCounts.has(key)) truthCounts.set(key, { count: 0, text: trimmed });
+      truthCounts.get(key).count++;
+    }
+  }
+  const crossCuttingTruths = [...truthCounts.values()]
+    .filter(v => v.count >= 2)
+    .map(v => v.text);
+
+  // Patch ROADMAP.md
+  let updated = false;
+  withPlanningLock(cwd, () => {
+    let content = fs.readFileSync(roadmapPath, 'utf-8');
+
+    // Find the phase section
+    const phaseEscaped = escapeRegex(phaseNum);
+    const phaseHeaderPattern = new RegExp(`(#{2,4}\\s*Phase\\s+${phaseEscaped}:[^\\n]*)`, 'i');
+    const phaseMatch = content.match(phaseHeaderPattern);
+    if (!phaseMatch) return;
+
+    const phaseStart = phaseMatch.index;
+    const restAfterHeader = content.slice(phaseStart);
+    const nextPhaseOffset = restAfterHeader.slice(1).search(/\n#{2,4}\s+Phase\s+\d/i);
+    const phaseEnd = nextPhaseOffset >= 0 ? phaseStart + 1 + nextPhaseOffset : content.length;
+    const phaseSection = content.slice(phaseStart, phaseEnd);
+
+    // Idempotency: skip if annotation markers already present
+    if (
+      /\*\*Wave\s+\d+/i.test(phaseSection) ||
+      /\*\*Cross-cutting constraints:\*\*/i.test(phaseSection)
+    ) return;
+
+    // Find the Plans: section within the phase section
+    const plansBlockMatch = phaseSection.match(/(Plans:\s*\n)((?:\s*-\s*\[[ x]\][^\n]*\n?)*)/i);
+    if (!plansBlockMatch) return;
+
+    const plansHeader = plansBlockMatch[1];
+    const existingList = plansBlockMatch[2];
+    const listLines = existingList.split('\n').filter(l => /^\s*-\s*\[/.test(l));
+
+    if (listLines.length === 0) return;
+
+    // Build wave-annotated plan list
+    const linesByWave = new Map();
+    for (const line of listLines) {
+      // Match plan ID from line: "- [ ] 01-01-PLAN.md — ..." or "- [ ] 01-01: ..."
+      const idMatch = line.match(/\[\s*[x ]\s*\]\s*([\w-]+?)(?:-PLAN\.md|\.md|:|\s—)/i);
+      const planId = idMatch ? idMatch[1] : null;
+      const planEntry = planId ? planData.find(p => p.planId === planId) : null;
+      const wave = planEntry ? planEntry.wave : 1;
+      if (!linesByWave.has(wave)) linesByWave.set(wave, []);
+      linesByWave.get(wave).push(line);
+    }
+
+    const annotatedLines = [];
+    const sortedWaves = [...linesByWave.keys()].sort((a, b) => a - b);
+    for (let i = 0; i < sortedWaves.length; i++) {
+      const w = sortedWaves[i];
+      const waveLines = linesByWave.get(w);
+      if (sortedWaves.length > 1) {
+        const dep = i > 0 ? ` *(blocked on Wave ${sortedWaves[i - 1]} completion)*` : '';
+        annotatedLines.push(`**Wave ${w}**${dep}`);
+      }
+      annotatedLines.push(...waveLines);
+      if (i < sortedWaves.length - 1) annotatedLines.push('');
+    }
+
+    // Append cross-cutting constraints subsection if any found
+    if (crossCuttingTruths.length > 0) {
+      annotatedLines.push('');
+      annotatedLines.push('**Cross-cutting constraints:**');
+      for (const t of crossCuttingTruths) {
+        annotatedLines.push(`- ${t}`);
+      }
+    }
+
+    const newListBlock = annotatedLines.join('\n') + '\n';
+    const newPhaseSection = phaseSection.replace(
+      plansBlockMatch[0],
+      plansHeader + newListBlock
+    );
+
+    const nextContent = content.slice(0, phaseStart) + newPhaseSection + content.slice(phaseEnd);
+    if (nextContent === content) return;
+    atomicWriteFileSync(roadmapPath, nextContent);
+    updated = true;
+  });
+
+  output({
+    updated,
+    phase: phaseNum,
+    waves: waves.length,
+    cross_cutting_constraints: crossCuttingTruths.length,
+  }, raw, updated ? `annotated ${waves.length} wave(s), ${crossCuttingTruths.length} constraint(s)` : 'skipped (already annotated or no plan list)');
+}
+
 module.exports = {
  cmdRoadmapGetPhase,
  cmdRoadmapAnalyze,
  cmdRoadmapUpdatePlanProgress,
+  cmdRoadmapAnnotateDependencies,
 };
--- a/Show More
+++ b/Show More