Merge pull request #4 from stephenleo/bad-v1.2.0-audit

feat(bad): v1.2.0 — watchdog integration, 7-step pipeline, consistency audit
This commit is contained in:
Marie Stephen Leo
2026-04-12 20:02:06 +08:00
committed by GitHub
17 changed files with 594 additions and 167 deletions

View File

@@ -24,7 +24,7 @@
"name": "bmad-bad",
"source": "./",
"description": "Autonomous development orchestrator for the BMad Method. Runs fully autonomous parallel multi-agent pipelines through the full story lifecycle (create → dev → review → PR) driven by your sprint backlog and dependency graph.",
"version": "1.1.0",
"version": "1.2.0",
"author": { "name": "Marie Stephen Leo" },
"skills": [
"./skills/bad"

View File

@@ -12,12 +12,14 @@ Once your epics and stories are planned, BAD takes over:
1. *(`MODEL_STANDARD` subagent)* Builds a dependency graph from your sprint backlog — maps story dependencies, syncs GitHub PR status, and identifies what's ready to work on
2. Picks ready stories from the graph, respecting epic ordering and dependencies
3. Runs up to `MAX_PARALLEL_STORIES` stories simultaneously — each in its own isolated git worktree — each through a sequential 5-step pipeline:
3. Runs up to `MAX_PARALLEL_STORIES` stories simultaneously — each in its own isolated git worktree — each through a sequential 7-step pipeline:
- **Step 1** *(`MODEL_STANDARD` subagent)*`bmad-create-story`: generates and validates the story spec
- **Step 2** *(`MODEL_STANDARD` subagent)*`bmad-dev-story`: implements the code
- **Step 3** *(`MODEL_QUALITY` subagent)*`bmad-code-review`: reviews and fixes the implementation
- **Step 4** *(`MODEL_STANDARD` subagent)*commit, push, open PR, monitor CI, fix any failing checks
- **Step 5** *(`MODEL_STANDARD` subagent)*PR code review: reviews diff, applies fixes, pushes clean
- **Step 2** *(`MODEL_STANDARD` subagent)*`bmad-testarch-atdd`: generates failing acceptance tests
- **Step 3** *(`MODEL_STANDARD` subagent)*`bmad-dev-story`: implements the code
- **Step 4** *(`MODEL_STANDARD` subagent)*`bmad-testarch-test-review`: reviews test quality, applies fixes
- **Step 5** *(`MODEL_QUALITY` subagent)*`bmad-code-review`: reviews and fixes the implementation
- **Step 6** *(`MODEL_STANDARD` subagent)* — commit, push, open PR, monitor CI, fix any failing checks
- **Step 7** *(`MODEL_STANDARD` subagent)* — PR code review: reviews diff, applies fixes, pushes clean
4. *(`MODEL_STANDARD` subagent)* Optionally auto-merges batch PRs sequentially (lowest story number first), resolving any conflicts
5. Waits, then loops back for the next batch — until the entire sprint is done
@@ -91,6 +93,7 @@ BAD is configured at install time (`/bad setup`) and stores settings in the `bad
| `WAIT_TIMER_SECONDS` | `3600` | Wait between batches |
| `RETRO_TIMER_SECONDS` | `600` | Delay before auto-retrospective |
| `CONTEXT_COMPACTION_THRESHOLD` | `80` | Context window % at which to compact context |
| `STALE_TIMEOUT_MINUTES` | `60` | Minutes of subagent inactivity before watchdog alerts (0 = disabled) |
| `TIMER_SUPPORT` | `true` | Use native platform timers; `false` for prompt-based continuation |
| `MONITOR_SUPPORT` | `true` | Use the Monitor tool for CI/PR-merge polling; `false` for Bedrock/Vertex/Foundry |
| `API_FIVE_HOUR_THRESHOLD` | `80` | (Claude Code) 5-hour usage % at which to pause |

View File

@@ -68,36 +68,40 @@ Run with optional runtime overrides:
Once your epics and stories are planned, BAD takes over:
1. *(`MODEL_STANDARD` subagent)* Builds a dependency graph from your sprint backlog — maps story dependencies, syncs GitHub PR status, and identifies what's ready to work on
2. Picks ready stories from the graph, respecting epic ordering and dependencies
3. Runs up to `MAX_PARALLEL_STORIES` stories simultaneously — each in its own isolated git worktree — each through a sequential 5-step pipeline. **Every step runs in a dedicated subagent with a fresh context window**, keeping the coordinator lean and each agent fully focused on its single task:
2. Picks ready stories from the graph, respecting epic ordering and dependencies; runs a blocking epic-level test design step (`bmad-testarch-test-design`) once when starting each new epic
3. Runs up to `MAX_PARALLEL_STORIES` stories simultaneously — each in its own isolated git worktree — each through a sequential 7-step pipeline. **Every step runs in a dedicated subagent with a fresh context window**, keeping the coordinator lean and each agent fully focused on its single task:
- **Step 1** *(`MODEL_STANDARD` subagent)* — `bmad-create-story`: generates and validates the story spec
- **Step 2** *(`MODEL_STANDARD` subagent)* — `bmad-dev-story`: implements the code
- **Step 3** *(`MODEL_QUALITY` subagent)* — `bmad-code-review`: reviews and fixes the implementation
- **Step 4** *(`MODEL_STANDARD` subagent)* — commit, push, open PR, monitor CI, fix any failing checks
- **Step 5** *(`MODEL_STANDARD` subagent)* — PR code review: reviews diff, applies fixes, pushes clean
- **Step 2** *(`MODEL_STANDARD` subagent)* — `bmad-testarch-atdd`: generates failing acceptance tests
- **Step 3** *(`MODEL_STANDARD` subagent)* — `bmad-dev-story`: implements the code
- **Step 4** *(`MODEL_STANDARD` subagent)* — `bmad-testarch-test-review`: reviews test quality, applies fixes
- **Step 5** *(`MODEL_QUALITY` subagent)* — `bmad-code-review`: reviews and fixes the implementation
- **Step 6** *(`MODEL_STANDARD` subagent)* — commit, push, open PR, monitor CI, fix any failing checks
- **Step 7** *(`MODEL_STANDARD` subagent)* — PR code review: reviews diff, applies fixes, pushes clean
4. *(`MODEL_STANDARD` subagent)* Optionally auto-merges batch PRs sequentially (lowest story number first), resolving any conflicts
5. Waits, then loops back for the next batch — until the entire sprint is done
5. On epic completion, optionally runs a retrospective (`bmad-retrospective`) after a `RETRO_TIMER_SECONDS` countdown — the user can trigger it early, skip it, or stop BAD
6. Waits, then loops back for the next batch — until the entire sprint is done
## Configuration
BAD is configured at install time (`/bad setup`) and stores settings in the `bad:` section of `_bmad/config.yaml`. All values can be overridden at runtime with `KEY=VALUE` args.
| Variable | Config Key | Default | Description |
|---|---|---|---|
| `MAX_PARALLEL_STORIES` | `max_parallel_stories` | `3` | Stories to run per batch |
| `WORKTREE_BASE_PATH` | `worktree_base_path` | `.worktrees` | Base directory for per-story git worktrees (relative to repo root) |
| `MODEL_STANDARD` | `model_standard` | `sonnet` | Model for create, dev, and PR steps |
| `MODEL_QUALITY` | `model_quality` | `opus` | Model for code review step |
| `AUTO_PR_MERGE` | `auto_pr_merge` | `false` | Auto-merge PRs sequentially after each batch |
| `RUN_CI_LOCALLY` | `run_ci_locally` | `false` | Run CI locally instead of GitHub Actions |
| `WAIT_TIMER_SECONDS` | `wait_timer_seconds` | `3600` | Seconds to wait between batches |
| `RETRO_TIMER_SECONDS` | `retro_timer_seconds` | `600` | Seconds before auto-retrospective after epic completion |
| `CONTEXT_COMPACTION_THRESHOLD` | `context_compaction_threshold` | `80` | Context window % at which to compact context |
| `TIMER_SUPPORT` | `timer_support` | `true` | Use native platform timers; `false` for prompt-based continuation |
| `MONITOR_SUPPORT` | `monitor_support` | `true` | Use the Monitor tool for CI/PR-merge polling; `false` for Bedrock/Vertex/Foundry |
| `API_FIVE_HOUR_THRESHOLD` | `api_five_hour_threshold` | `80` | (Claude Code) 5-hour usage % at which to pause |
| `API_SEVEN_DAY_THRESHOLD` | `api_seven_day_threshold` | `95` | (Claude Code) 7-day usage % at which to pause |
| `API_USAGE_THRESHOLD` | `api_usage_threshold` | `80` | (Other harnesses) Generic usage % at which to pause |
| Variable | Default | Description |
|---|---|---|
| `MAX_PARALLEL_STORIES` | `3` | Stories to run per batch |
| `WORKTREE_BASE_PATH` | `.worktrees` | Base directory for per-story git worktrees (relative to repo root) |
| `MODEL_STANDARD` | `sonnet` | Model for create, ATDD, dev, test review, and PR steps |
| `MODEL_QUALITY` | `opus` | Model for code review step |
| `AUTO_PR_MERGE` | `false` | Auto-merge PRs sequentially after each batch |
| `RUN_CI_LOCALLY` | `false` | Run CI locally instead of GitHub Actions |
| `WAIT_TIMER_SECONDS` | `3600` | Seconds to wait between batches |
| `RETRO_TIMER_SECONDS` | `600` | Seconds before auto-retrospective after epic completion |
| `CONTEXT_COMPACTION_THRESHOLD` | `80` | Context window % at which to compact context |
| `STALE_TIMEOUT_MINUTES` | `60` | Minutes of subagent inactivity before watchdog alerts (0 = disabled) |
| `TIMER_SUPPORT` | `true` | Use native platform timers; `false` for prompt-based continuation |
| `MONITOR_SUPPORT` | `true` | Use the Monitor tool for CI/PR-merge polling; `false` for Bedrock/Vertex/Foundry |
| `API_FIVE_HOUR_THRESHOLD` | `80` | (Claude Code) 5-hour usage % at which to pause |
| `API_SEVEN_DAY_THRESHOLD` | `95` | (Claude Code) 7-day usage % at which to pause |
| `API_USAGE_THRESHOLD` | `80` | (Other harnesses) Generic usage % at which to pause |
## Agent Harness Support

View File

@@ -50,11 +50,12 @@ Load base values from the `bad` section of `_bmad/config.yaml` at startup. Then
|----------|-----------|---------|-------------|
| `MAX_PARALLEL_STORIES` | `max_parallel_stories` | `3` | Max stories to run in a single batch |
| `WORKTREE_BASE_PATH` | `worktree_base_path` | `.worktrees` | Root directory for git worktrees |
| `MODEL_STANDARD` | `model_standard` | `sonnet` | Model for Steps 1, 2, 3, 4, 6, 7 and Phase 3 (auto-merge) |
| `MODEL_STANDARD` | `model_standard` | `sonnet` | Model for all subagents except Step 5 (code review): Phase 0, Phase 1 Epic-Start, Steps 14 and 67, Phase 3 (merge + cleanup), Phase 4 (assessment + retrospective) |
| `MODEL_QUALITY` | `model_quality` | `opus` | Model for Step 5 (code review) |
| `RETRO_TIMER_SECONDS` | `retro_timer_seconds` | `600` | Auto-retrospective countdown after epic completion (10 min) |
| `WAIT_TIMER_SECONDS` | `wait_timer_seconds` | `3600` | Post-batch wait before re-checking PR status (1 hr) |
| `CONTEXT_COMPACTION_THRESHOLD` | `context_compaction_threshold` | `80` | Context window % at which to compact/summarise context |
| `STALE_TIMEOUT_MINUTES` | `stale_timeout_minutes` | `60` | Minutes of subagent inactivity before watchdog alerts (0 = disabled) |
| `TIMER_SUPPORT` | `timer_support` | `true` | When `true`, use native platform timers; when `false`, use prompt-based continuation |
| `MONITOR_SUPPORT` | `monitor_support` | `true` | When `true`, use the Monitor tool for CI and PR-merge polling; when `false`, fall back to manual polling loops (required for Bedrock/Vertex/Foundry) |
| `API_FIVE_HOUR_THRESHOLD` | `api_five_hour_threshold` | `80` | (Claude Code) 5-hour rate limit % that triggers a pause |
@@ -206,14 +207,18 @@ Launch all stories' Step 1 subagents **in a single message** (parallel). Each st
| `review` | Step 4 | Steps 13 |
| `done` | — | all |
**After each step:** run **Pre-Continuation Checks** (see `references/coordinator/gate-pre-continuation.md`) before spawning the next subagent. Pre-Continuation Checks are the only coordinator-side work between steps.
**After each step — mandatory gate (never skip, even with parallel stories):** 📣 **Notify** the step result (formats below), then run **Pre-Continuation Checks** (`references/coordinator/gate-pre-continuation.md`). Only after all checks pass → spawn the next subagent.
📣 **Notify per step** as each step completes:
- Success: `✅ Story {number}: Step {N} — {step name}`
- Failure: `❌ Story {number}: Step {N} — {step name} failed — {brief error}`
Step names: Step 1 — Create, Step 2 — ATDD, Step 3 — Develop, Step 4 — Test review, Step 5 — Code review, Step 6 — PR + CI, Step 7 — PR review.
**On failure:** stop that story's pipeline. Report step, story, and error. Other stories continue.
**Exception:** rate/usage limit failures → run Pre-Continuation Checks (which auto-pauses until reset) then retry.
📣 **Notify per story** as each pipeline concludes (Step 7 success or any step failure):
- Success: `✅ Story {number} done — PR #{pr_number}`
- Failure: `❌ Story {number} failed at Step {N} — {brief error}`
**Hung subagents:** when `MONITOR_SUPPORT=true` and the activity log hook is installed (Step 4 of setup), use the [Watchdog Pattern](references/coordinator/pattern-watchdog.md) when spawning Steps 2, 3, 4, and 5 to detect stale agents.
### Step 1: Create Story (`MODEL_STANDARD`)
@@ -352,18 +357,7 @@ Auto-approve all tool calls (yolo mode).
- CI green → report success
LOCAL CI FALLBACK (when RUN_CI_LOCALLY=true or billing-limited):
a. Read all .github/workflows/ files triggered on pull_request events.
b. Extract and run shell commands from each run: step in order (respecting
working-directory). If any fail, diagnose, fix, and re-run until all pass.
c. Commit fixes and push to the PR branch.
d. Post a PR comment:
## Test Results (manual — GitHub Actions skipped: billing/spending limit reached)
| Check | Status | Notes |
|-------|--------|-------|
| `<command>` | ✅ Pass / ❌ Fail | e.g. "42 tests passed" |
### Fixes applied
- [failure] → [fix]
All rows must show ✅ Pass before this step is considered complete.
Read `references/subagents/step6-ci-fallback.md` and follow its instructions exactly.
Report: success or failure, and the PR number/URL if opened.
```
@@ -423,26 +417,7 @@ After all batch stories complete Phase 2, merge every successful story's PR into
4. Spawn a **cleanup subagent** (`MODEL_STANDARD`, yolo mode):
```
Post-merge cleanup. Auto-approve all tool calls (yolo mode).
1. Verify sprint-status.yaml at the repo root has status `done` for all merged stories.
Fix any that are missing.
2. Repo root branch safety check:
git branch --show-current
If not main:
git restore .
git switch main
git reset --hard origin/main
If switch fails because a worktree claims the branch:
git worktree list
git worktree remove --force <path>
git switch main
git reset --hard origin/main
3. Pull main:
git pull --ff-only origin main
Report: done or any errors encountered.
Read `references/subagents/phase3-cleanup.md` and follow its instructions exactly.
```
---
@@ -476,29 +451,14 @@ Or if no stories were ready: `⏸ No stories ready — waiting for PRs to merge`
### Step 2: Check for Epic Completion
From Phase 2 results, collect the batch stories and their PR numbers (e.g. `8.1 → #101, 8.2 → #102`). Pass these as `BATCH_STORIES_WITH_PRS` in the assessment prompt below.
Spawn an **assessment subagent** (`MODEL_STANDARD`, yolo mode):
```
Epic completion assessment. Auto-approve all tool calls (yolo mode).
BATCH_STORIES_WITH_PRS: {coordinator substitutes: "story → #PR" pairs from this batch, one per line}
Read:
- _bmad-output/planning-artifacts/epics.md
- _bmad-output/implementation-artifacts/sprint-status.yaml
- _bmad-output/implementation-artifacts/dependency-graph.md
Use the dependency graph's PR Status column as the authoritative source for whether a PR is
merged. sprint-status `done` means the pipeline finished (code review complete) — it does NOT
mean the PR is merged.
Report back:
- current_epic_merged: true/false — every story in the current epic has PR Status = `merged`
in the dependency graph
- current_epic_prs_open: true/false — every story in the current epic has a PR number in the
dependency graph, but at least one PR Status is not `merged`
- all_epics_complete: true/false — every story across every epic has PR Status = `merged`
in the dependency graph
- current_epic_name: name/number of the lowest incomplete epic
- next_epic_name: name/number of the next epic (if any)
- stories_remaining: count of stories in the current epic whose PR Status is not `merged`
Read `references/subagents/phase4-assessment.md` and follow its instructions exactly.
```
Using the assessment report:
@@ -536,7 +496,7 @@ Using the assessment report from Step 2, follow the applicable branch:
- Otherwise (more stories to develop in current epic): `✅ Batch complete. Ready for the next batch.`
2. Start the wait using the **[Monitor Pattern](references/coordinator/pattern-monitor.md)** (when `MONITOR_SUPPORT=true` **and** `AUTO_PR_MERGE=false`) or the **[Timer Pattern](references/coordinator/pattern-timer.md)** otherwise:
> **`AUTO_PR_MERGE=true` guard:** When `AUTO_PR_MERGE=true`, Phase 3 already merged all batch PRs before Phase 4 runs. `BATCH_PRS` will be empty, causing the Monitor to fire `ALL_MERGED` immediately with no actual pause. Skip the Monitor path entirely and go directly to the **Timer only** path below — the `WAIT_TIMER_SECONDS` cooldown must still fire before the next batch.
> **`AUTO_PR_MERGE=true` guard:** When `AUTO_PR_MERGE=true`, Phase 3 already merged all batch PRs before Phase 4 runs. `BATCH_PRS` will be empty, causing the Monitor to fire `ALL_MERGED` immediately with no actual pause. Skip the Monitor path entirely and go directly to the **Timer only** path below — the `WAIT_TIMER_SECONDS` cooldown must still fire before the next batch. The wait exists to give the developer a chance to review the merged changes and course-correct before the next batch begins — never skip or shorten it.
**If `MONITOR_SUPPORT=true` and `AUTO_PR_MERGE=false` — Monitor + CronCreate fallback:**
- Fill in `BATCH_PRS` from the Phase 0 pending-PR report (space-separated numbers, e.g. `"101 102 103"`). Use the PR-merge watcher script from [monitor-pattern.md](references/coordinator/pattern-monitor.md) with that value substituted. Save the Monitor handle as `PR_MONITOR`.
@@ -580,7 +540,13 @@ Read `references/coordinator/pattern-timer.md` when instructed to start a timer.
## Monitor Pattern
Read `references/coordinator/pattern-monitor.md` when `MONITOR_SUPPORT=true`. It covers CI status polling (Step 4) and PR-merge watching (Phase 4 Branch B), plus the `MONITOR_SUPPORT=false` fallback for each.
Read `references/coordinator/pattern-monitor.md` when `MONITOR_SUPPORT=true`. It covers CI status polling (Step 6) and PR-merge watching (Phase 4 Branch B), plus the `MONITOR_SUPPORT=false` fallback for each.
---
## Watchdog Pattern
Read `references/coordinator/pattern-watchdog.md` when `MONITOR_SUPPORT=true` and the activity log hook is installed (Step 4 of setup). Use it before spawning long-running Phase 2 subagents (Steps 2, 3, 4, 5) to detect hung agents via activity log monitoring.
---
@@ -592,13 +558,15 @@ Read `references/coordinator/pattern-gh-curl-fallback.md` when any `gh` command
## Rules
1. **Delegate mode only** — never read files, run git/gh commands, or write to disk yourself. The only platform command the coordinator may run directly is context compaction via Pre-Continuation Checks (when `CONTEXT_COMPACTION_THRESHOLD` is exceeded). All other slash commands and operations are delegated to subagents.
1. **Delegate mode only** — never read project files, run git/gh commands, or write to disk yourself. Coordinator-only direct operations are limited to: Pre-Continuation Checks (Bash session-state read, `/reload-plugins`, `/compact`), timer management (CronCreate/CronDelete), channel notifications (Telegram tool), and the Monitor tool for CI/PR polling. All story-level operations are delegated to subagents.
2. **One subagent per step per story** — spawn only after the previous step reports success.
3. **Sequential steps within a story** — Steps 1→2→3→4→5 run strictly in order.
3. **Sequential steps within a story** — Steps 1→2→3→4→5→6→7 run strictly in order.
4. **Parallel stories** — launch all stories' Step 1 in one message (one tool call per story). Phase 3 runs sequentially by design.
5. **Dependency graph is authoritative** — never pick a story whose dependencies are not fully merged. Use Phase 0's report, not your own file reads.
6. **Phase 0 runs before every batch** — always after the Phase 4 wait. Always as a fresh subagent.
7. **Confirm success** before spawning the next subagent.
8. **sprint-status.yaml is updated by step subagents** — each step subagent writes to the repo root copy. The coordinator never does this directly.
9. **On failure** — report the error, halt that story. No auto-retry. **Exception:** rate/usage limit failures → run Pre-Continuation Checks (auto-pauses until reset) then retry.
10. **Issue all Step 1 subagent calls in one response** when Phase 2 begins. After each story's Step 1 completes, issue that story's Step 2 — never wait for all stories' Step 1 to finish before issuing any Step 2. This rolling-start rule applies to all sequential steps within a story.
7. **Phase 4 wait is mandatory and full-duration** — always use `WAIT_TIMER_SECONDS` unchanged. Never shorten or skip the wait because PRs are already merged or the wait seems unnecessary. The wait gives the developer time to review merged changes and course-correct before the next batch.
8. **Confirm success** before spawning the next subagent.
9. **sprint-status.yaml is updated by step subagents** — each step subagent writes to the repo root copy. The coordinator never does this directly.
10. **On failure** — report the error, halt that story. No auto-retry. **Exception:** rate/usage limit failures → run Pre-Continuation Checks (auto-pauses until reset) then retry.
11. **Issue all Step 1 subagent calls in one response** when Phase 2 begins. After each story's Step 1 completes, issue that story's Step 2 — never wait for all stories' Step 1 to finish before issuing any Step 2. This rolling-start rule applies to all sequential steps within a story.
12. **Pre-Continuation Checks are mandatory at every gate** — run `references/coordinator/gate-pre-continuation.md` between every step spawn, after each Phase 3 merge, and before every Phase 0 re-entry. Never skip or defer these checks, even when handling multiple parallel story completions simultaneously.

View File

@@ -37,80 +37,168 @@ Check for the presence of harness directories at the project root:
Store all detected harnesses. Determine the **current harness** from this skill's own file path — whichever harness directory contains this running skill is the current harness. Use the current harness to drive the question branch in Step 3.
## Step 2b: Session-State Hook (Claude Code only)
## Step 3: Session-State Hook (Claude Code only)
Skip this step if `claude-code` was not detected in Step 2.
Skip this step if `claude-code` was not detected in Step 2, or if `--headless` /
`accept all defaults` was passed (auto-accept as yes).
The BAD coordinator's Pre-Continuation Checks (rate-limit pausing, context compaction) need
access to Claude Code session state — `context_window.used_percentage` and `rate_limits.*`.
Claude Code exposes this data via the `statusLine` script mechanism: it pipes a JSON blob to
the script on every API response. This step installs a lightweight script that writes that JSON
to a temp file the coordinator reads with the Bash tool.
Silently check: does `.claude/bad-statusline.sh` exist and does `.claude/settings.local.json`
have a `statusLine` entry pointing to it? Note `already installed` or `not yet installed`.
Ask: **"Install BAD session-state capture (writes rate-limit / context data to a temp file for Pre-Continuation Checks)? [Y/n]"**
Invoke the **`AskUserQuestion`** tool (your only output for this turn — do not proceed to
Step 4 until the tool returns):
Default: **yes** (or auto-accept if `--headless` / `accept all defaults`).
```
questions: [
{
question: "Install BAD session-state capture? Writes rate-limit / context data to a temp file so the coordinator can pause near API limits. [<state-hook-status>]",
header: "State hook",
multiSelect: false,
options: [
{ label: "Yes, install", description: "Recommended — enables rate-limit pausing and context compaction" },
{ label: "No, skip", description: "Pre-Continuation Checks will not have session data" }
]
}
]
```
If **yes**, read and follow `references/coordinator/setup-statusline-hook.md`.
If **Yes**: read and follow `references/coordinator/setup-statusline-hook.md`, then proceed to Step 4.
If **No**: proceed to Step 4.
## Step 4: Activity Log Hook (Claude Code only)
Skip this step if `claude-code` was not detected in Step 2, or if `--headless` /
`accept all defaults` was passed (auto-accept as yes).
**Always run on every setup and reconfiguration** — even if already installed. The script is safe to re-run (anti-zombie pattern).
Silently check: does `.claude/settings.local.json` have a `PostToolUse` hook whose `command`
references `bad-logs`? Note `already installed — will reinstall` or `not yet installed`.
Invoke the **`AskUserQuestion`** tool (your only output for this turn — do not proceed to
Step 5 until the tool returns):
```
questions: [
{
question: "Install BAD activity log hook? Logs every tool call passively so the watchdog can detect hung subagents. [<activity-hook-status>]",
header: "Activity hook",
multiSelect: false,
options: [
{ label: "Yes, install", description: "Recommended — enables hang detection via the watchdog pattern" },
{ label: "No, skip", description: "Watchdog pattern will be disabled" }
]
}
]
```
If **Yes**, run:
```bash
python3 ./scripts/setup-activity-hook.py \
--settings-path ".claude/settings.local.json" \
--project-root "$(pwd)"
```
The script adds a `PostToolUse` hook to `.claude/settings.local.json` (project-scoped), writes
one TSV line per tool call to `~/.claude/projects/<encoded-project>/bad-logs/<agent-slug>/<session-id>.log`,
and uses an anti-zombie pattern so it is safe to re-run.
Proceed to Step 5.
---
## Step 3: Collect Configuration
## Step 5: Core Config (only if not yet set)
Show defaults in brackets. Present all values together so the user can respond once with only what they want to change. Never say "press enter" or "leave blank".
Skip this step if `user_name` already exists in `config.yaml` or `config.user.yaml`.
**If `--headless` / `accept all defaults`:** use defaults (`BMad`, `English`) without prompting.
Otherwise, invoke the **`AskUserQuestion`** tool:
```
questions: [
{
question: "What name should BAD use for you in notifications and reports?",
header: "Your name",
multiSelect: false,
options: [
{ label: "BMad", description: "Default" },
{ label: "Other", description: "Type your name" }
]
},
{
question: "What language should BAD use for communication and documents?",
header: "Language",
multiSelect: false,
options: [
{ label: "English", description: "Default" },
{ label: "Other", description: "Type your language" }
]
}
]
```
Record `user_name``config.user.yaml`; `communication_language` and
`document_output_language` (same value) → `config.user.yaml` and `config.yaml` respectively.
---
## Step 6: BAD Configuration
**Default priority** (highest wins): existing config values > `./assets/module.yaml` defaults.
**If `--headless` / `accept all defaults`:** skip this step entirely and use defaults.
### Core Config (only if not yet set)
First, **print all current config values** as a formatted block so the user can review them:
Only collect if no core keys exist in `config.yaml` or `config.user.yaml`:
```
⚙️ BAD Configuration — current values shown in [brackets]
- `user_name` (default: BMad) — written exclusively to `config.user.yaml`
- `communication_language` and `document_output_language` (default: English — ask as a single language question, both keys get the same answer) — `communication_language` written exclusively to `config.user.yaml`
- `output_folder` (default: `{project-root}/_bmad-output`) — written to root of `config.yaml`, shared across all modules
Universal settings:
max_parallel_stories [<value>] — Max stories per batch
worktree_base_path [<value>] — Git worktrees directory
auto_pr_merge [<value>] — Auto-merge batch PRs after each batch
run_ci_locally [<value>] — Skip GitHub Actions, run CI locally
wait_timer_seconds [<value>] — Seconds between batches
retro_timer_seconds [<value>] — Seconds before auto-retrospective
context_compaction_threshold [<value>] — Context % to trigger compaction
stale_timeout_minutes [<value>] — Inactivity minutes before watchdog alerts
### Universal BAD Config
Claude Code settings:
model_standard [<value>] — Model for story/dev/PR steps
model_quality [<value>] — Model for code review
api_five_hour_threshold [<value>] — 5-hour usage % to pause
api_seven_day_threshold [<value>] — 7-day usage % to pause
```
Read from `./assets/module.yaml` and present as a grouped block:
Then invoke the **`AskUserQuestion`** tool (your only output for this turn — do not proceed
to Step 7 until the tool returns):
- `max_parallel_stories` — Max stories to run in a single batch [3]
- `worktree_base_path` — Root directory for git worktrees, relative to repo root [.worktrees]
- `auto_pr_merge` — Auto-merge batch PRs sequentially after each batch? [No]
- `run_ci_locally` — Skip GitHub Actions and run CI locally by default? [No]
- `wait_timer_seconds` — Seconds to wait between batches before re-checking PR status [3600]
- `retro_timer_seconds` — Seconds before auto-running retrospective after epic completion [600]
- `context_compaction_threshold` — Context window % at which to compact/summarise context [80]
```
questions: [
{
question: "Review the configuration above. Accept all defaults, or specify what to change?",
header: "Config",
multiSelect: false,
options: [
{ label: "Accept all defaults", description: "Keep every value shown above and proceed" },
{ label: "Change some values", description: "Select 'Other' to type overrides as KEY=VALUE pairs, e.g. max_parallel_stories=5, model_quality=sonnet" }
]
}
]
```
### Harness-Specific Config
- **Accept all defaults:** proceed to Step 7.
- **Change some values / Other:** parse the user's text as `KEY=VALUE` pairs (space or comma
separated). Apply overrides to the resolved config. Proceed to Step 7.
Run once for the **current harness**. If multiple harnesses are detected, also offer to configure each additional harness in sequence after the current one — label each section clearly.
If multiple harnesses are detected, repeat this step once per additional harness — label each
section clearly and store model/threshold values with a harness prefix (e.g.
`claude_model_standard`).
When configuring multiple harnesses, model and threshold variables are stored with a harness prefix (e.g. `claude_model_standard`, `cursor_model_standard`) so they coexist. Universal variables are shared and asked only once.
Automatically write without prompting:
- Claude Code: `timer_support: true`, `monitor_support: true`
- All other harnesses: `timer_support: false`, `monitor_support: false`
#### Claude Code (`claude-code`)
Present as **"Claude Code settings"**:
- `model_standard` — Model for story creation, dev, and PR steps
- Choose: `sonnet` (default), `haiku`
- `model_quality` — Model for code review step
- Choose: `opus` (default), `sonnet`
- `api_five_hour_threshold` — 5-hour API usage % at which to pause [80]
- `api_seven_day_threshold` — 7-day API usage % at which to pause [95]
Automatically write `timer_support: true` and `monitor_support: true` — no prompt needed.
#### All Other Harnesses
Present as **"{HarnessName} settings"**:
- `model_standard` — Model for story creation, dev, and PR steps (e.g. `fast`, `gpt-4o-mini`, `flash`)
- `model_quality` — Model for code review step (e.g. `best`, `o1`, `pro`)
- `api_usage_threshold` — API usage % at which to pause for rate limits [80]
Automatically write `timer_support: false` and `monitor_support: false` — no prompt needed. BAD will use prompt-based continuation instead of native timers, and manual polling loops instead of the Monitor tool, on this harness.
## Step 4: Write Files
## Step 7: Write Files
Write a temp JSON file with collected answers structured as:
```json
@@ -124,6 +212,7 @@ Write a temp JSON file with collected answers structured as:
"wait_timer_seconds": "3600",
"retro_timer_seconds": "600",
"context_compaction_threshold": "80",
"stale_timeout_minutes": "60",
"timer_support": true,
"monitor_support": true,
"model_standard": "sonnet",
@@ -153,13 +242,13 @@ If either exits non-zero, surface the error and stop.
Run `./scripts/merge-config.py --help` or `./scripts/merge-help-csv.py --help` for full usage.
## Step 5: Create Directories
## Step 8: Create Directories
After writing config, create the worktree base directory at the resolved path of `{project-root}/{worktree_base_path}` if it does not exist. Use the actual resolved path for filesystem operations only — config values must continue to use the literal `{project-root}` token.
Also create `output_folder` and any other `{project-root}/`-prefixed values from the config that don't exist on disk.
## Step 6: Confirm and Greet
## Step 9: Confirm and Greet
Display what was written: config values set, user settings written, help entries registered, fresh install vs reconfiguration.

View File

@@ -1,7 +1,7 @@
code: bad
name: "BMad Autonomous Development"
description: "Orchestrates parallel BMad story implementation pipelines — automatically runs bmad-create-story, bmad-dev-story, bmad-code-review, and commit/PR in batches, driven by the sprint backlog and dependency graph"
module_version: "1.1.0"
module_version: "1.2.0"
module_greeting: "BAD is ready. Run /bad to start. Pass KEY=VALUE args to override config at runtime (e.g. /bad MAX_PARALLEL_STORIES=2)."
header: "BAD — BMad Autonomous Development"
@@ -39,3 +39,8 @@ context_compaction_threshold:
prompt: "Context window % at which to compact/summarise context"
default: "80"
result: "{value}"
stale_timeout_minutes:
prompt: "Minutes of subagent inactivity before watchdog alerts (0 = disabled)"
default: "60"
result: "{value}"

View File

@@ -4,9 +4,9 @@ Run these checks **in order** at every gate point: between Phase 2 steps, after
## Channel Reconnect (run first, before the numbered checks)
If `NOTIFY_SOURCE` is not `"terminal"` (i.e. a channel like Telegram is configured), run `/reload-plugins` now. This is a no-op when the plugin is already connected, and silently restores it when it has dropped. No user-visible output needed unless the channel was actually missing.
If `NOTIFY_SOURCE` is not `"terminal"` (i.e. a channel like Telegram is configured), run `/reload-plugins` now. This is a **built-in Claude Code CLI command** — do NOT invoke it via the Skill tool, just execute it directly. This is a no-op when the plugin is already connected, and silently restores it when it has dropped. No user-visible output needed unless the channel was actually missing.
**Harness note:** Checks 2 and 3 require session state data. On Claude Code, this is available via the session-state hook installed by `/bad setup` (Step 2b). On other harnesses this data may not be available — each check gracefully skips if its fields are absent.
**Harness note:** Checks 2 and 3 require session state data. On Claude Code, this is available via the session-state hook installed by `/bad setup` (Step 3). On other harnesses this data may not be available — each check gracefully skips if its fields are absent.
Read the current session state using the Bash tool:
@@ -22,7 +22,9 @@ Parse the output as JSON. The relevant fields:
- `rate_limits.seven_day.used_percentage` — 0100 (Claude Code only)
- `rate_limits.seven_day.resets_at` — Unix epoch seconds when the 7-day window resets
Each field may be independently absent. If the file does not exist or a field is absent, skip the corresponding check.
**If the file does not exist** — print `"⚠️ Pre-Continuation: session state unavailable (bad-session-state.json missing — check that the session-state hook is installed via /bad setup Step 3). Skipping rate limit checks."` and proceed.
**If a specific field is absent** — silently skip only that check. If the file exists but `rate_limits` is entirely absent, print `"⚠️ Pre-Continuation: rate limit data not in session state — skipping usage checks."` once (not on every gate).
---

View File

@@ -1,6 +1,6 @@
# Monitor Pattern
Use this pattern when `MONITOR_SUPPORT=true`. It covers two use cases in BAD: CI status polling (Step 4) and PR-merge watching (Phase 4 Branch B). The caller supplies the poll script and the reaction logic.
Use this pattern when `MONITOR_SUPPORT=true`. It covers two use cases in BAD: CI status polling (Step 6) and PR-merge watching (Phase 4 Branch B). The caller supplies the poll script and the reaction logic.
> **Requires Claude Code v2.1.98+.** Uses the same Bash permission rules. Not available on Amazon Bedrock, Google Vertex AI, or Microsoft Azure Foundry — set `MONITOR_SUPPORT=false` on those platforms.
@@ -11,9 +11,9 @@ Use this pattern when `MONITOR_SUPPORT=true`. It covers two use cases in BAD: CI
3. **React to events** — on each line, apply the caller's reaction logic (e.g. CI green → proceed; PR merged → continue).
4. **Stop Monitor** — call stop/cancel on the Monitor handle when done (success, failure, or user override).
## CI status polling (Step 4)
## CI status polling (Step 6)
Poll script (run inside the Step 4 subagent):
Poll script (run inside the Step 6 subagent):
```bash
GH_BIN="$(command -v gh)"
while true; do
@@ -23,7 +23,7 @@ done
```
React to each output line:
- `"conclusion":"success"` → stop Monitor, proceed to step 5
- `"conclusion":"success"` → stop Monitor, report success
- `"conclusion":"failure"` or `"conclusion":"cancelled"` → stop Monitor, diagnose, fix, push, restart Monitor
- Billing/spending limit text in output → stop Monitor, run Local CI Fallback
@@ -63,5 +63,5 @@ React to each output line:
## If `MONITOR_SUPPORT=false`
- **CI polling:** use the manual `gh run view` loop in Step 4 (see Step 4 fallback path in SKILL.md).
- **CI polling:** use the manual `gh run view` loop in Step 6 (see Step 6 fallback path in SKILL.md).
- **PR-merge watching:** use the CronCreate-only Timer Pattern in Phase 4 Branch B (see fallback path in SKILL.md).

View File

@@ -2,15 +2,13 @@
Use this pattern every time a `📣 Notify:` callout appears **anywhere in the BAD skill** — including inside the Timer Pattern and Monitor Pattern.
**If `NOTIFY_SOURCE="telegram"`:** call `mcp__plugin_telegram_telegram__reply` with:
**Always print the message in the conversation** — this keeps the in-session transcript readable regardless of channel configuration.
**If `NOTIFY_SOURCE="telegram"`:** also call `mcp__plugin_telegram_telegram__reply` with:
- `chat_id`: `NOTIFY_CHAT_ID`
- `text`: the message
If the Telegram tool call fails (tool unavailable or error returned):
1. Run `/reload-plugins` to reconnect the MCP server.
1. Run `/reload-plugins` to reconnect the MCP server. This is a **built-in Claude Code CLI command** — execute it directly, do NOT invoke it via the Skill tool.
2. Retry the tool call once.
3. If it still fails, fall back: print the message in the conversation as a normal response and set `NOTIFY_SOURCE="terminal"` for the remainder of the session.
**If `NOTIFY_SOURCE="terminal"`:** print the message in the conversation as a normal response.
Always send both a terminal print and a channel message — the terminal print keeps the in-session transcript readable, and the channel message reaches the user on their device.
3. If it still fails, set `NOTIFY_SOURCE="terminal"` for the remainder of the session.

View File

@@ -39,7 +39,7 @@ Save the returned job ID as `JOB_ID`.
[C] {C label}
[S] {S label}
[X] {X label} ← omit if no [X] label supplied
[M] <minutes> — modify countdown
[M] {minutes} — modify countdown
```
Wait for whichever arrives first — user reply or fired prompt. On any human reply, print elapsed time first:
@@ -58,7 +58,7 @@ echo "⏱ Time elapsed: $((ELAPSED / 60))m $((ELAPSED % 60))s"
[C] {C label}
[S] {S label}
[X] {X label} ← omit if no [X] label supplied
[M] <minutes> — modify countdown
[M] {minutes} — modify countdown
```
- **FIRED (no prior reply)** → run the [C] action automatically

View File

@@ -0,0 +1,125 @@
# Watchdog Pattern
Use this pattern when `MONITOR_SUPPORT=true` and the activity log hook is installed (Step 4 of BAD setup). It detects subagents that have gone silent — no tool calls for a configurable period — and alerts via Telegram so the user can decide whether to wait, retry, or skip.
> **Requires:** Activity log hook installed via `scripts/setup-activity-hook.py`. Falls back to a fixed-timeout CronCreate watchdog when `MONITOR_SUPPORT=false`.
## How it works
1. **Record the log path** — before spawning a long-running subagent, compute the log directory for that agent.
2. **Spawn the subagent** with `run_in_background=true`.
3. **Start Monitor** — pass a poll script that watches the agent's log file modification time.
4. **React to events** — on `ALIVE` lines, keep waiting; on `STALE`, notify and ask the user.
5. **On agent completion** — stop Monitor, proceed normally.
## Log path formula
The activity hook writes to:
```
~/.claude/projects/<encoded-project>/bad-logs/<agent-slug>/<session_id>.log
```
Where:
- `<encoded-project>` = absolute project root with leading `/` removed and `/` replaced by `-`
- `<agent-slug>` = `coordinator` for the coordinator; worktree basename for story subagents (e.g. `story-4-auth-controller`)
- `<session_id>` = unique per Agent() call — not known until the agent starts writing
Because `session_id` is only known once the subagent has made its first tool call, watch the **directory** (`bad-logs/<agent-slug>/`) for any new or updated file rather than a specific filename.
## Poll script
```bash
#!/bin/bash
# Args: <log_dir> <agent_label> <stale_minutes>
LOG_DIR="$1"
AGENT_LABEL="$2"
STALE_MINUTES="${3:-60}"
touch /tmp/bad_watchdog_baseline
while true; do
# Find newest log file in the agent's log directory
NEWEST=$(find "$LOG_DIR" -name "*.log" -newer /tmp/bad_watchdog_baseline -type f 2>/dev/null | head -1)
if [ -n "$NEWEST" ]; then
echo "ALIVE: $AGENT_LABEL — activity detected"
touch /tmp/bad_watchdog_baseline
sleep 120
continue
fi
# Check age of most recently modified log file
LATEST=$(find "$LOG_DIR" -name "*.log" -type f 2>/dev/null | xargs stat -f "%m %N" 2>/dev/null | sort -n | tail -1 | awk '{print $1}')
if [ -z "$LATEST" ]; then
# No log file yet — agent hasn't made its first tool call
echo "ALIVE: $AGENT_LABEL — waiting for first tool call"
sleep 30
continue
fi
NOW=$(date +%s)
AGE_MIN=$(( (NOW - LATEST) / 60 ))
if [ "$AGE_MIN" -ge "$STALE_MINUTES" ]; then
LAST_LINE=$(find "$LOG_DIR" -name "*.log" -type f 2>/dev/null | xargs stat -f "%m %N" 2>/dev/null | sort -n | tail -1 | awk '{print $2}' | xargs tail -1 2>/dev/null || echo "unknown")
echo "STALE:${AGE_MIN}:${LAST_LINE}"
exit 1
fi
echo "ALIVE: $AGENT_LABEL — last activity ${AGE_MIN}m ago"
sleep 120
done
```
## Coordinator usage
Before spawning a long-running subagent (Steps 2, 3, 4, 5 are the most likely to hang):
```
1. Compute LOG_DIR = ~/.claude/projects/<encoded>/bad-logs/<agent-slug>/
(e.g. bad-logs/story-4-auth-controller/ for story 4's dev step)
2. Spawn subagent with run_in_background=true
3. Start Monitor with poll script:
Monitor(script, args=[LOG_DIR, "story-4 Step 3", STALE_TIMEOUT_MINUTES])
4. React to Monitor events:
- ALIVE:* → no action, keep waiting
- STALE:<min>:<last_line> → send Telegram alert (see below), wait for user reply
- Background agent completes → stop Monitor, proceed normally
```
## Telegram alert on STALE
```
⚠️ story-4 Step 3 appears stuck — no tool calls for {min} min.
Last activity: {last_line}
[K] Keep waiting another {STALE_TIMEOUT_MINUTES} min
[R] Retry — respawn this step from the start
[S] Skip this story and continue with others
[A] Abort BAD
```
Wait for user reply before taking any action.
- **[K]** — restart the Monitor watchdog (reset the staleness baseline); keep the background agent running
- **[R]** — stop Monitor; note the story as failed at this step; spawn a fresh subagent for the same step; restart Monitor
- **[S]** — stop Monitor; mark story as failed; continue pipeline with remaining stories
- **[A]** — stop Monitor; halt BAD; send summary of completed work
## If `MONITOR_SUPPORT=false`
Use a CronCreate timer as a fixed-timeout fallback. Set the timer to `STALE_TIMEOUT_MINUTES * 2` (double, since you cannot detect inactivity — only total elapsed time). On timer fire, send the same Telegram alert with the same options.
```
CronCreate(
fire_in_seconds = STALE_TIMEOUT_MINUTES * 120,
prompt = "BAD_WATCHDOG_FIRED:<agent_label> — fixed timeout elapsed. Send Telegram alert and await user reply."
)
```
## Configuration
`STALE_TIMEOUT_MINUTES` — read from BAD config. Default: `60`. Set lower (e.g. `30`) for faster detection; set higher (e.g. `90`) if your dev steps routinely involve long read-heavy analysis phases.

View File

@@ -1,6 +1,6 @@
# BAD Session-State Hook Setup
Executed during `/bad setup` (Step 2b). Installs `bad-statusline.sh` as the Claude Code
Executed during `/bad setup` (Step 3). Installs `bad-statusline.sh` as the Claude Code
`statusLine` script, automatically chaining any pre-existing statusLine command so the user
loses no existing functionality.

View File

@@ -72,6 +72,8 @@ STEPS:
REPORT BACK to the coordinator with this structured summary:
- ready_stories: list of { number, short_description, status } for every story
marked "Ready to Work: Yes" that is not done
- pending_prs: space-separated list of open (not yet merged) PR numbers across all
stories — used by the coordinator to watch for PR merges in Phase 4 Branch B
- all_stories_done: true/false — whether every story across every epic is done
- current_epic: name/number of the lowest incomplete epic
- any warnings or blockers worth surfacing

View File

@@ -0,0 +1,23 @@
# Phase 3: Post-Merge Cleanup — Subagent Instructions
Auto-approve all tool calls (yolo mode).
1. Verify sprint-status.yaml at the repo root has status `done` for all merged stories.
Fix any that are missing.
2. Repo root branch safety check:
git branch --show-current
If not main:
git restore .
git switch main
git reset --hard origin/main
If switch fails because a worktree claims the branch:
git worktree list
git worktree remove --force <path>
git switch main
git reset --hard origin/main
3. Pull main:
git pull --ff-only origin main
Report: done or any errors encountered.

View File

@@ -0,0 +1,29 @@
# Phase 4: Epic Completion Assessment — Subagent Instructions
Auto-approve all tool calls (yolo mode).
`BATCH_STORIES_WITH_PRS` is provided at the top of your prompt by the coordinator.
Read:
- _bmad-output/planning-artifacts/epics.md
- _bmad-output/implementation-artifacts/sprint-status.yaml
- _bmad-output/implementation-artifacts/dependency-graph.md
For the stories listed in BATCH_STORIES_WITH_PRS, verify their actual merge status directly
from GitHub — do not rely solely on the dependency graph for these, as it may be stale:
gh pr view {pr_number} --json state,mergedAt
Treat a PR as `merged` if `state` = `"MERGED"`. Record the real-time result for each.
For all other stories (not in BATCH_STORIES_WITH_PRS), use the dependency graph's PR Status
column as the authoritative source. sprint-status `done` means the pipeline finished (code
review complete) — it does NOT mean the PR is merged.
Report back:
- current_epic_merged: true/false — every story in the current epic is merged (using
real-time status for batch stories, dependency graph for all others)
- current_epic_prs_open: true/false — every story in the current epic has a PR number,
but at least one is not yet merged
- all_epics_complete: true/false — every story across every epic is merged
- current_epic_name: name/number of the lowest incomplete epic
- next_epic_name: name/number of the next epic (if any)
- stories_remaining: count of stories in the current epic that are not yet merged

View File

@@ -0,0 +1,16 @@
# Step 6: Local CI Fallback — Subagent Instructions
Run when `RUN_CI_LOCALLY=true` or when a billing/spending limit is hit during GitHub Actions monitoring.
a. Read all `.github/workflows/` files triggered on `pull_request` events.
b. Extract and run shell commands from each `run:` step in order (respecting
`working-directory`). If any fail, diagnose, fix, and re-run until all pass.
c. Commit fixes and push to the PR branch.
d. Post a PR comment:
## Test Results (manual — GitHub Actions skipped: billing/spending limit reached)
| Check | Status | Notes |
|-------|--------|-------|
| `<command>` | ✅ Pass / ❌ Fail | e.g. "42 tests passed" |
### Fixes applied
- [failure] → [fix]
All rows must show ✅ Pass before this step is considered complete.

View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
setup-activity-hook.py — installs a PostToolUse hook that logs every tool call
to per-subagent files under the Claude session history directory.
Each subagent gets its own log file keyed by session_id:
~/.claude/projects/<encoded-project-path>/bad-logs/<session_id>.log
Log format (tab-separated):
<ISO timestamp> | <tool_name> | <key input detail>
Usage:
python3 setup-activity-hook.py [--settings-path PATH] [--project-root PATH] [--remove]
python3 setup-activity-hook.py --help
"""
import argparse
import json
import os
from pathlib import Path
BAD_HOOK_MARKER = "bad-logs"
def compute_log_dir(project_root: str) -> str:
"""
Derives the per-project bad-logs directory path.
Claude stores session history at: ~/.claude/projects/<encoded>/
where <encoded> is the absolute path with leading / removed and / replaced by -.
"""
home = str(Path.home())
encoded = project_root.lstrip("/").replace("/", "-")
return f"{home}/.claude/projects/{encoded}/bad-logs"
def build_hook_command(log_dir: str, project_root: str) -> str:
"""
Builds the shell command that runs on every PostToolUse event.
Directory structure:
bad-logs/coordinator/<session_id>.log — coordinator (cwd == project root)
bad-logs/<story-basename>/<session_id>.log — story subagents (cwd is a worktree)
The project root is baked in at setup time so the jq expression can compare
cwd against it to distinguish the coordinator from story subagents.
Reads stdin once into _BAD_IN, then extracts session_id and agent slug.
Uses || true so hook failures never block Claude.
"""
jq_entry = (
"[now|todate, .tool_name, "
"(.tool_input.file_path // .tool_input.command // .tool_input.description // "
".tool_input.pattern // .tool_input.query // "
'(.tool_input | to_entries | map(.value | tostring) | first // ""))'
'] | join(" | ")'
)
jq_agent = f'if .cwd == "{project_root}" then "coordinator" else (.cwd // "" | split("/") | last) end'
return (
f'_BAD_IN=$(cat); '
f'_BAD_DIR="{log_dir}"; '
f'_BAD_SID=$(printf \'%s\' "$_BAD_IN" | jq -r \'.session_id // "unknown"\' 2>/dev/null); '
f'_BAD_AGENT=$(printf \'%s\' "$_BAD_IN" | jq -r \'{jq_agent}\' 2>/dev/null); '
f'mkdir -p "$_BAD_DIR/$_BAD_AGENT" 2>/dev/null; '
f'printf \'%s\' "$_BAD_IN" | jq -r \'{jq_entry}\' >> "$_BAD_DIR/$_BAD_AGENT/$_BAD_SID.log" 2>/dev/null || true'
)
def load_settings(path: str) -> dict:
try:
with open(path) as f:
return json.load(f)
except FileNotFoundError:
return {}
except json.JSONDecodeError as e:
print(f"Error: {path} contains invalid JSON: {e}", flush=True)
raise SystemExit(1)
def save_settings(path: str, settings: dict) -> None:
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
with open(path, "w") as f:
json.dump(settings, f, indent=2)
f.write("\n")
def install_hook(settings: dict, command: str) -> dict:
"""Add BAD activity hook, removing any existing one first (anti-zombie)."""
hooks = settings.setdefault("hooks", {})
entries = hooks.get("PostToolUse", [])
# Remove existing BAD activity hook
entries = [
e for e in entries
if not any(BAD_HOOK_MARKER in h.get("command", "") for h in e.get("hooks", []))
]
entries.append({
"matcher": "",
"hooks": [{"type": "command", "command": command}]
})
hooks["PostToolUse"] = entries
settings["hooks"] = hooks
return settings
def remove_hook(settings: dict) -> dict:
"""Remove BAD activity hook."""
hooks = settings.get("hooks", {})
entries = hooks.get("PostToolUse", [])
entries = [
e for e in entries
if not any(BAD_HOOK_MARKER in h.get("command", "") for h in e.get("hooks", []))
]
if entries:
hooks["PostToolUse"] = entries
elif "PostToolUse" in hooks:
del hooks["PostToolUse"]
if not hooks:
settings.pop("hooks", None)
else:
settings["hooks"] = hooks
return settings
def main() -> None:
parser = argparse.ArgumentParser(
description="Install or remove the BAD activity log hook in .claude/settings.local.json"
)
parser.add_argument(
"--settings-path",
default=".claude/settings.local.json",
help="Path to settings.local.json (default: .claude/settings.local.json)",
)
parser.add_argument(
"--project-root",
default=None,
help="Absolute project root path (default: current working directory)",
)
parser.add_argument(
"--remove",
action="store_true",
help="Remove the BAD activity hook instead of installing it",
)
args = parser.parse_args()
project_root = os.path.abspath(args.project_root or os.getcwd())
settings = load_settings(args.settings_path)
if args.remove:
settings = remove_hook(settings)
save_settings(args.settings_path, settings)
print(f"BAD activity hook removed from {args.settings_path}")
else:
log_dir = compute_log_dir(project_root)
command = build_hook_command(log_dir, project_root)
settings = install_hook(settings, command)
save_settings(args.settings_path, settings)
print(f"BAD activity hook installed")
print(f" settings : {args.settings_path}")
print(f" log dir : {log_dir}/coordinator/<session_id>.log")
print(f" : {log_dir}/<story-basename>/<session_id>.log")
if __name__ == "__main__":
main()