mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
fix: address coderabbit review on PR #2076
Actionable (4): - Dockerfile uv install: wrap `chmod ... || true` in braces so the trailing `|| true` no longer masks failures from `curl|sh` via bash operator precedence (&& binds tighter than ||). Applied to both docker/claude-mem/ and evals/swebench/Dockerfile.agent. Added `set -eux` to the RUN lines. - docker/claude-mem/Dockerfile: drop unused `sudo` apt package (~2 MB). - run-batch.py: name each agent container (`swebench-agent-<id>-<pid>-<tid>`) and force-remove via `docker rm -f <name>` in the TimeoutExpired handler so timed-out runs don't leave orphan containers. Nitpicks (2): - smoke-test.sh: collapse 3 python3 invocations into 1 — parse the instance JSON once, print `repo base_commit`, and write problem.txt in the same call. - run-instance.sh: shallow clone via `--depth 1 --no-single-branch` + `fetch --depth 1 origin $BASE_COMMIT`. Falls back to a full clone if the server rejects the by-commit fetch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,7 +30,6 @@ RUN apt-get update \
|
||||
ca-certificates \
|
||||
unzip \
|
||||
jq \
|
||||
sudo \
|
||||
less \
|
||||
procps \
|
||||
uuid-runtime \
|
||||
@@ -49,8 +48,12 @@ ENV PATH="/usr/local/bun/bin:${PATH}"
|
||||
# Versioned installer URL per https://docs.astral.sh/uv/getting-started/installation/.
|
||||
ARG UV_VERSION=0.11.7
|
||||
ENV UV_INSTALL_DIR="/usr/local/bin"
|
||||
RUN curl -LsSf "https://astral.sh/uv/${UV_VERSION}/install.sh" | sh \
|
||||
&& chmod a+rX /usr/local/bin/uv /usr/local/bin/uvx 2>/dev/null || true
|
||||
# `&&` binds tighter than `||` in bash, so the previous form let `curl|sh` fail
|
||||
# silently via the trailing `|| true`. Group the chmod so tolerated failure is
|
||||
# scoped to perms-fixing only.
|
||||
RUN set -eux \
|
||||
&& curl -LsSf "https://astral.sh/uv/${UV_VERSION}/install.sh" | sh \
|
||||
&& { chmod a+rX /usr/local/bin/uv /usr/local/bin/uvx 2>/dev/null || true; }
|
||||
|
||||
# Match the upstream devcontainer's npm-global prefix so `npm install -g`
|
||||
# targets a dir the `node` user owns.
|
||||
|
||||
@@ -35,8 +35,12 @@ ENV PATH="/usr/local/bun/bin:${PATH}"
|
||||
# uv (provides Python for Chroma per CLAUDE.md). Installed to a system
|
||||
# location, same reason.
|
||||
ENV UV_INSTALL_DIR="/usr/local/bin"
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& chmod a+rX /usr/local/bin/uv /usr/local/bin/uvx 2>/dev/null || true
|
||||
# Group the chmod so the trailing `|| true` only absorbs chmod failures; without
|
||||
# this grouping, bash precedence (`&&` binds tighter than `||`) would silently
|
||||
# mask a failed `curl|sh` install step.
|
||||
RUN set -eux \
|
||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
||||
&& { chmod a+rX /usr/local/bin/uv /usr/local/bin/uvx 2>/dev/null || true; }
|
||||
|
||||
# Claude Code CLI — PINNED to the version whose flag surface was verified in
|
||||
# the plan (Phase 0). Do NOT bump without re-verifying flags.
|
||||
|
||||
@@ -281,6 +281,10 @@ def run_one_instance(
|
||||
status: str = "failed"
|
||||
model_patch: str = ""
|
||||
|
||||
# Uniquely named so the TimeoutExpired handler can kill it without racing
|
||||
# other instances on the host.
|
||||
container_name = f"swebench-agent-{instance_id}-{os.getpid()}-{threading.get_ident()}"
|
||||
|
||||
try:
|
||||
# The orchestrator owns JSONL writes under `predictions_lock` to avoid
|
||||
# racy concurrent appends across containers — so we DO NOT mount the
|
||||
@@ -293,6 +297,8 @@ def run_one_instance(
|
||||
"docker",
|
||||
"run",
|
||||
"--rm",
|
||||
"--name",
|
||||
container_name,
|
||||
"-e",
|
||||
"CLAUDE_MEM_OUTPUT_DIR=/scratch",
|
||||
"-v",
|
||||
@@ -354,8 +360,17 @@ def run_one_instance(
|
||||
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
status = "timed_out"
|
||||
# subprocess.run killed the docker CLI, but the container may
|
||||
# still be running. Force-remove it by name so we don't leak
|
||||
# containers across the batch.
|
||||
subprocess.run(
|
||||
["docker", "rm", "-f", container_name],
|
||||
capture_output=True,
|
||||
check=False,
|
||||
timeout=30,
|
||||
)
|
||||
stderr_log_path.write_text(
|
||||
f"TIMEOUT after {timeout}s\n"
|
||||
f"TIMEOUT after {timeout}s (forced docker rm -f {container_name})\n"
|
||||
f"=== STDOUT (partial) ===\n{exc.stdout or ''}\n"
|
||||
f"=== STDERR (partial) ===\n{exc.stderr or ''}\n",
|
||||
encoding="utf-8",
|
||||
|
||||
@@ -94,8 +94,17 @@ cleanup() {
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Clone the repo at BASE_COMMIT.
|
||||
git clone "https://github.com/${REPO_SLUG}.git" "$REPO_DIR"
|
||||
# Shallow clone + fetch the exact commit. Saves minutes on large repos
|
||||
# (sympy/django/scikit-learn) vs. a full-history clone. Fallback to a full
|
||||
# clone if the server rejects the by-commit fetch (GitHub supports
|
||||
# uploadpack.allowReachableSHA1InWant by default on public repos, but mirrors
|
||||
# may not).
|
||||
if ! { git clone --depth 1 --no-single-branch "https://github.com/${REPO_SLUG}.git" "$REPO_DIR" \
|
||||
&& git -C "$REPO_DIR" fetch --depth 1 origin "$BASE_COMMIT"; }; then
|
||||
echo "WARN: shallow fetch failed; falling back to full clone" >&2
|
||||
rm -rf "$REPO_DIR"
|
||||
git clone "https://github.com/${REPO_SLUG}.git" "$REPO_DIR"
|
||||
fi
|
||||
git -C "$REPO_DIR" reset --hard "$BASE_COMMIT"
|
||||
|
||||
# ---------- Turn 1: Ingest (populate memory via PostToolUse hook) ----------
|
||||
|
||||
@@ -64,15 +64,21 @@ else:
|
||||
sys.exit(1)
|
||||
PY
|
||||
|
||||
# Read JSON fields from stdin so $INSTANCE_JSON paths with spaces/special chars
|
||||
# don't break shell interpolation inside `python3 -c`.
|
||||
REPO=$(python3 -c 'import json,sys; print(json.load(sys.stdin)["repo"])' < "$INSTANCE_JSON")
|
||||
BASE_COMMIT=$(python3 -c 'import json,sys; print(json.load(sys.stdin)["base_commit"])' < "$INSTANCE_JSON")
|
||||
|
||||
SCRATCH="$(mktemp -d -t claude-mem-smoke.XXXXXX)"
|
||||
trap 'rm -f "$CREDS_FILE" "$INSTANCE_JSON"; rm -rf "$SCRATCH"' EXIT
|
||||
|
||||
python3 -c 'import json,sys,os; open(os.path.join(sys.argv[1],"problem.txt"),"w").write(json.load(sys.stdin)["problem_statement"])' "$SCRATCH" < "$INSTANCE_JSON"
|
||||
# Parse the instance JSON once: print repo + base_commit to stdout, write the
|
||||
# problem statement directly to $SCRATCH/problem.txt. Avoids three separate
|
||||
# python3 invocations and any shell-quote injection on $INSTANCE_JSON.
|
||||
read -r REPO BASE_COMMIT < <(
|
||||
python3 - "$SCRATCH" < "$INSTANCE_JSON" <<'PY'
|
||||
import json, os, sys
|
||||
d = json.load(sys.stdin)
|
||||
scratch = sys.argv[1]
|
||||
open(os.path.join(scratch, "problem.txt"), "w").write(d["problem_statement"])
|
||||
print(d["repo"], d["base_commit"])
|
||||
PY
|
||||
)
|
||||
|
||||
echo "=== Running $INSTANCE_ID ($REPO @ $BASE_COMMIT) ===" >&2
|
||||
echo "Scratch: $SCRATCH" >&2
|
||||
|
||||
Reference in New Issue
Block a user