mirror of
https://github.com/glittercowboy/get-shit-done
synced 2026-04-26 01:35:29 +02:00
* refactor(tests): standardize to node:assert/strict and t.after() per CONTRIBUTING.md
- Replace require('node:assert') with require('node:assert/strict') across
all 73 test files to enforce strict equality (no type coercion)
- Replace try/finally cleanup blocks with t.after() hooks in core.test.cjs
and hooks-opt-in.test.cjs per the test lifecycle standards
- Utility functions in codex-config and security-scan retain try/finally
as that is appropriate for per-function resource guards, not lifecycle hooks
Closes #1674
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* perf(tests): add --test-concurrency=4 to test runner for parallel file execution
Node.js --test-concurrency controls how many test files run as parallel child
processes. Set to 4 by default, configurable via TEST_CONCURRENCY env var.
Fixes tests at a known level rather than inheriting os.availableParallelism()
which varies across CI environments.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix(security): allowlist verify.test.cjs in prompt-injection scanner
tests/verify.test.cjs uses <human>...</human> as GSD phase task-type
XML (meaning "a human should verify this step"), which matches the
scanner's fake-message-boundary pattern for LLM APIs. This is a
false positive — add it to the allowlist alongside the other test files
that legitimately contain injection-adjacent patterns.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
200 lines
6.5 KiB
Bash
Executable File
200 lines
6.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# prompt-injection-scan.sh — Scan files for prompt injection patterns
|
|
#
|
|
# Usage:
|
|
# scripts/prompt-injection-scan.sh --diff origin/main # CI mode: scan changed .md files
|
|
# scripts/prompt-injection-scan.sh --file path/to/file # Scan a single file
|
|
# scripts/prompt-injection-scan.sh --dir agents/ # Scan all files in a directory
|
|
#
|
|
# Exit codes:
|
|
# 0 = clean
|
|
# 1 = findings detected
|
|
# 2 = usage error
|
|
set -euo pipefail
|
|
|
|
# ─── Patterns ────────────────────────────────────────────────────────────────
|
|
# Each pattern is a POSIX extended regex. Keep alphabetized by category.
|
|
|
|
PATTERNS=(
|
|
# Instruction override
|
|
'ignore[[:space:]]+(all[[:space:]]+)?(previous|prior|above|earlier|preceding)[[:space:]]+(instructions|prompts|rules|directives|context)'
|
|
'disregard[[:space:]]+(all[[:space:]]+)?(previous|prior|above)[[:space:]]+(instructions|prompts|rules)'
|
|
'forget[[:space:]]+(all[[:space:]]+)?(previous|prior|above)[[:space:]]+(instructions|prompts|rules|context)'
|
|
'override[[:space:]]+(all[[:space:]]+)?(system|previous|safety)[[:space:]]+(instructions|prompts|rules|checks|filters|guards)'
|
|
'override[[:space:]]+(system|safety|security)[[:space:]]'
|
|
|
|
# Role manipulation
|
|
'you[[:space:]]+are[[:space:]]+now[[:space:]]+(a|an|my)[[:space:]]'
|
|
'from[[:space:]]+now[[:space:]]+on[[:space:]]+(you|pretend|act|behave)'
|
|
'pretend[[:space:]]+(you[[:space:]]+are|to[[:space:]]+be)[[:space:]]'
|
|
'act[[:space:]]+as[[:space:]]+(a|an|if|my)[[:space:]]'
|
|
'roleplay[[:space:]]+as[[:space:]]'
|
|
'assume[[:space:]]+the[[:space:]]+role[[:space:]]+of[[:space:]]'
|
|
|
|
# System prompt extraction
|
|
'output[[:space:]]+(your|the)[[:space:]]+(system[[:space:]]+)?(prompt|instructions)'
|
|
'reveal[[:space:]]+(your|the)[[:space:]]+(system[[:space:]]+)?(prompt|instructions)'
|
|
'show[[:space:]]+me[[:space:]]+(your|the)[[:space:]]+(system[[:space:]]+)?(prompt|instructions)'
|
|
'print[[:space:]]+(your|the)[[:space:]]+(system[[:space:]]+)?(prompt|instructions)'
|
|
'what[[:space:]]+(is|are)[[:space:]]+(your|the)[[:space:]]+(system[[:space:]]+)?(prompt|instructions)'
|
|
'repeat[[:space:]]+(your|the|all)[[:space:]]+(system[[:space:]]+)?(prompt|instructions|rules)'
|
|
|
|
# Fake message boundaries
|
|
'</?system>'
|
|
'</?assistant>'
|
|
'</?human>'
|
|
'\[SYSTEM\]'
|
|
'\[/SYSTEM\]'
|
|
'\[INST\]'
|
|
'\[/INST\]'
|
|
'<<SYS>>'
|
|
'<</SYS>>'
|
|
|
|
# Tool call injection / code execution in markdown
|
|
'eval[[:space:]]*\([[:space:]]*["\x27]'
|
|
'exec[[:space:]]*\([[:space:]]*["\x27]'
|
|
'Function[[:space:]]*\([[:space:]]*["\x27].*return'
|
|
|
|
# Jailbreak / DAN patterns
|
|
'do[[:space:]]+anything[[:space:]]+now'
|
|
'DAN[[:space:]]+mode'
|
|
'developer[[:space:]]+mode[[:space:]]+(enabled|output|activated)'
|
|
'jailbreak'
|
|
'bypass[[:space:]]+(safety|content|security)[[:space:]]+(filter|check|rule|guard)'
|
|
)
|
|
|
|
# ─── Allowlist ───────────────────────────────────────────────────────────────
|
|
# Files that legitimately discuss injection patterns (security docs, tests, this script)
|
|
ALLOWLIST=(
|
|
'scripts/prompt-injection-scan.sh'
|
|
'scripts/base64-scan.sh'
|
|
'scripts/secret-scan.sh'
|
|
'tests/security-scan.test.cjs'
|
|
'tests/security.test.cjs'
|
|
'tests/prompt-injection-scan.test.cjs'
|
|
'tests/verify.test.cjs'
|
|
'get-shit-done/bin/lib/security.cjs'
|
|
'hooks/gsd-prompt-guard.js'
|
|
'SECURITY.md'
|
|
)
|
|
|
|
is_allowlisted() {
|
|
local file="$1"
|
|
for allowed in "${ALLOWLIST[@]}"; do
|
|
if [[ "$file" == *"$allowed" ]]; then
|
|
return 0
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# ─── File Collection ─────────────────────────────────────────────────────────
|
|
|
|
collect_files() {
|
|
local mode="$1"
|
|
shift
|
|
|
|
case "$mode" in
|
|
--diff)
|
|
local base="${1:-origin/main}"
|
|
# Get changed files in the diff, filter to scannable extensions
|
|
git diff --name-only --diff-filter=ACMR "$base"...HEAD 2>/dev/null \
|
|
| grep -E '\.(md|cjs|js|json|yml|yaml|sh)$' || true
|
|
;;
|
|
--file)
|
|
if [[ -f "$1" ]]; then
|
|
echo "$1"
|
|
else
|
|
echo "Error: file not found: $1" >&2
|
|
exit 2
|
|
fi
|
|
;;
|
|
--dir)
|
|
local dir="$1"
|
|
if [[ ! -d "$dir" ]]; then
|
|
echo "Error: directory not found: $dir" >&2
|
|
exit 2
|
|
fi
|
|
find "$dir" -type f \( -name '*.md' -o -name '*.cjs' -o -name '*.js' -o -name '*.json' -o -name '*.yml' -o -name '*.yaml' -o -name '*.sh' \) \
|
|
! -path '*/node_modules/*' ! -path '*/.git/*' ! -path '*/dist/*' 2>/dev/null || true
|
|
;;
|
|
--stdin)
|
|
cat
|
|
;;
|
|
*)
|
|
echo "Usage: $0 --diff [base] | --file <path> | --dir <path> | --stdin" >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
}
|
|
|
|
# ─── Scanner ─────────────────────────────────────────────────────────────────
|
|
|
|
scan_file() {
|
|
local file="$1"
|
|
local found=0
|
|
|
|
if is_allowlisted "$file"; then
|
|
return 0
|
|
fi
|
|
|
|
for pattern in "${PATTERNS[@]}"; do
|
|
# Use grep -iE for case-insensitive extended regex
|
|
# -n for line numbers, -c for count mode first to check
|
|
local matches
|
|
matches=$(grep -inE -e "$pattern" "$file" 2>/dev/null || true)
|
|
if [[ -n "$matches" ]]; then
|
|
if [[ $found -eq 0 ]]; then
|
|
echo "FAIL: $file"
|
|
found=1
|
|
fi
|
|
echo "$matches" | while IFS= read -r line; do
|
|
echo " $line"
|
|
done
|
|
fi
|
|
done
|
|
|
|
return $found
|
|
}
|
|
|
|
# ─── Main ────────────────────────────────────────────────────────────────────
|
|
|
|
main() {
|
|
if [[ $# -eq 0 ]]; then
|
|
echo "Usage: $0 --diff [base] | --file <path> | --dir <path>" >&2
|
|
exit 2
|
|
fi
|
|
|
|
local mode="$1"
|
|
shift
|
|
|
|
local files
|
|
files=$(collect_files "$mode" "$@")
|
|
|
|
if [[ -z "$files" ]]; then
|
|
echo "prompt-injection-scan: no files to scan"
|
|
exit 0
|
|
fi
|
|
|
|
local total=0
|
|
local failed=0
|
|
|
|
while IFS= read -r file; do
|
|
[[ -z "$file" ]] && continue
|
|
total=$((total + 1))
|
|
if ! scan_file "$file"; then
|
|
failed=$((failed + 1))
|
|
fi
|
|
done <<< "$files"
|
|
|
|
echo ""
|
|
echo "prompt-injection-scan: scanned $total files, $failed with findings"
|
|
|
|
if [[ $failed -gt 0 ]]; then
|
|
exit 1
|
|
fi
|
|
exit 0
|
|
}
|
|
|
|
main "$@"
|