Files
browser-use/.github/workflows/test.yaml

137 lines
4.6 KiB
YAML

name: test
permissions:
contents: read
on:
push:
branches:
- main
- stable
- 'releases/**'
tags:
- '*'
pull_request:
workflow_dispatch:
jobs:
find_tests:
runs-on: ubuntu-latest
outputs:
TEST_FILENAMES: ${{ steps.lsgrep.outputs.TEST_FILENAMES }}
# ["test_browser", "test_controller", "test_browser_session", "test_tab_management", ...]
steps:
- uses: actions/checkout@v4
- id: lsgrep
run: |
TEST_FILENAMES="$(ls tests/ci/test_*.py | sed 's|^tests/ci/||' | sed 's|\.py$||' | jq -R -s -c 'split("\n")[:-1]')"
echo "TEST_FILENAMES=${TEST_FILENAMES}" >> "$GITHUB_OUTPUT"
echo "$TEST_FILENAMES"
# https://code.dblock.org/2021/09/03/generating-task-matrix-by-looping-over-repo-files-with-github-actions.html
- name: Check that at least one test file is found
run: |
if [ -z "${{ steps.lsgrep.outputs.TEST_FILENAMES }}" ]; then
echo "Failed to find any test_*.py files in tests/ci/ folder!" > /dev/stderr
exit 1
fi
tests:
needs: find_tests
runs-on: ubuntu-latest
env:
IN_DOCKER: 'True'
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
strategy:
matrix:
test_filename: ${{ fromJson(needs.find_tests.outputs.TEST_FILENAMES || '["FAILED_TO_DISCOVER_TESTS"]') }}
# autodiscovers all the files in tests/ci/test_*.py
# - test_browser
# - test_controller
# - test_browser_session
# - test_tab_management
# ... and more
name: ${{ matrix.test_filename }}
steps:
- name: Check that the previous step managed to find some test files for us to run
run: |
if [[ "${{ matrix.test_filename }}" == "FAILED_TO_DISCOVER_TESTS" ]]; then
echo "Failed get list of test files in tests/ci/test_*.py from find_tests job" > /dev/stderr
exit 1
fi
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
activate-environment: true
- run: uv sync --dev
- name: Detect installed Playwright version
run: echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV
- name: Cache playwright binaries
uses: actions/cache@v3
with:
path: |
~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}
#- run: playwright install chrome # tests that need it can install it manually
- run: playwright install chromium
- run: pytest --numprocesses auto -x tests/ci/${{ matrix.test_filename }}.py
evaluate-tasks:
runs-on: ubuntu-latest
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
activate-environment: true
- run: uv sync --dev
- name: Detect installed Playwright version
run: echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV
- name: Cache playwright binaries
uses: actions/cache@v3
with:
path: |
~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}
- run: playwright install chromium
- name: Run agent tasks evaluation and capture score
id: eval
run: |
python tests/ci/evaluate_tasks.py > result.txt
cat result.txt
echo "PASSED=$(grep '^PASSED=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
echo "TOTAL=$(grep '^TOTAL=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
- name: Print agent evaluation summary
run: |
echo "Agent tasks passed: $PASSED / $TOTAL"
- name: Write agent evaluation summary to workflow overview
run: |
if [ "$PASSED" = "$TOTAL" ]; then
COLOR="green"
else
COLOR="yellow"
fi
echo "<h2>Agent Tasks Score: <span style='color:$COLOR;'>$PASSED/$TOTAL</span></h2>" >> $GITHUB_STEP_SUMMARY