mirror of
https://github.com/Aider-AI/aider
synced 2026-05-05 06:32:04 +02:00
Compare commits
321 Commits
v0.69.1.de
...
v0.70.1.de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f4007210c1 | ||
|
|
77962dbd4c | ||
|
|
419cbae55f | ||
|
|
5233789c40 | ||
|
|
70731719f7 | ||
|
|
cf4aa68f98 | ||
|
|
6bfd89074c | ||
|
|
acc4893a7f | ||
|
|
ab61ae2b36 | ||
|
|
514b9c1c7d | ||
|
|
44f2cf2b24 | ||
|
|
9befec5cd1 | ||
|
|
7efdfe5639 | ||
|
|
be6811b29a | ||
|
|
f160b8df04 | ||
|
|
158471b218 | ||
|
|
a383cece53 | ||
|
|
0901e6ab77 | ||
|
|
e1485971d8 | ||
|
|
91c9b1bfe7 | ||
|
|
befe6be86c | ||
|
|
79428cf4ed | ||
|
|
7bde5fe893 | ||
|
|
2f7e024387 | ||
|
|
9235cfa739 | ||
|
|
f1e623ec5a | ||
|
|
ec2da0a399 | ||
|
|
dd9b2a872c | ||
|
|
9767759033 | ||
|
|
7537d79311 | ||
|
|
0a23c4abd6 | ||
|
|
b51768b08e | ||
|
|
4561f0c79e | ||
|
|
5c92491bc0 | ||
|
|
083605e2d7 | ||
|
|
d28e2f0f56 | ||
|
|
87a964355b | ||
|
|
fbc3f0cef5 | ||
|
|
2b1625e3a8 | ||
|
|
6230df749e | ||
|
|
e62003c0ce | ||
|
|
d0f092f7ae | ||
|
|
9e2e07e8a7 | ||
|
|
4154d47c07 | ||
|
|
62e0cba7bd | ||
|
|
39d5c785d5 | ||
|
|
e5cb81c11f | ||
|
|
3abb8d38ec | ||
|
|
7f0860d5d0 | ||
|
|
8c74c8ab6f | ||
|
|
37df899ada | ||
|
|
f55181e447 | ||
|
|
b1bc2f8c5c | ||
|
|
350df7ca55 | ||
|
|
8768a8aca6 | ||
|
|
0f8bb016f4 | ||
|
|
eb30791ff4 | ||
|
|
7b37bf0f48 | ||
|
|
fff67a5917 | ||
|
|
3dbe91577c | ||
|
|
0e05b64ebc | ||
|
|
c895e99306 | ||
|
|
6d7e8beaaa | ||
|
|
8b62d8a6c5 | ||
|
|
ec44850646 | ||
|
|
2ea5a98ee0 | ||
|
|
80928b90a5 | ||
|
|
b28174aba1 | ||
|
|
da170bab3a | ||
|
|
49a2f998dd | ||
|
|
4efdc8b4f7 | ||
|
|
a75507980a | ||
|
|
8d0decc17a | ||
|
|
e334cbb5d4 | ||
|
|
e3ac8ab19d | ||
|
|
bddf6e9017 | ||
|
|
bcdc0217b3 | ||
|
|
521841b447 | ||
|
|
c53cd336f9 | ||
|
|
a8226989c8 | ||
|
|
114b156d74 | ||
|
|
def2d4bac9 | ||
|
|
250e2ab6aa | ||
|
|
6185ddf76a | ||
|
|
dddf192e5a | ||
|
|
2d32f77ed0 | ||
|
|
7eb7533d42 | ||
|
|
bb711fe255 | ||
|
|
14a8759b82 | ||
|
|
5a0d4eff71 | ||
|
|
805d6bbc8c | ||
|
|
a0004ab892 | ||
|
|
46a444dc21 | ||
|
|
aefb5c37fe | ||
|
|
2216978726 | ||
|
|
3a2d8edb53 | ||
|
|
e0d57b7713 | ||
|
|
3a9912c01e | ||
|
|
6cadee31bf | ||
|
|
678845dfda | ||
|
|
7f7e218504 | ||
|
|
256a9951f5 | ||
|
|
370b45bb35 | ||
|
|
616c4a9a53 | ||
|
|
821f7d6694 | ||
|
|
bc89be6187 | ||
|
|
86b6a4cefd | ||
|
|
1f9a53a454 | ||
|
|
f22d112da2 | ||
|
|
c36c06ab99 | ||
|
|
a915c60999 | ||
|
|
6ddb8a7d88 | ||
|
|
50bb2cb1e6 | ||
|
|
e1571dda9b | ||
|
|
7410c6216c | ||
|
|
8f84df44ab | ||
|
|
82f21b6734 | ||
|
|
892fd5a6ef | ||
|
|
3069db0cfd | ||
|
|
b71c9d539e | ||
|
|
78e643970d | ||
|
|
34da3dd3d7 | ||
|
|
817cb0d363 | ||
|
|
01088e214c | ||
|
|
3e4500f9fd | ||
|
|
d4b62608a9 | ||
|
|
e6bfc1c2fc | ||
|
|
051cabed69 | ||
|
|
04916a6e97 | ||
|
|
f3be2fa66b | ||
|
|
c36087cc0c | ||
|
|
e35909ac7d | ||
|
|
e5a693ab94 | ||
|
|
9e9cfb4600 | ||
|
|
5dddaac006 | ||
|
|
14af6f1fba | ||
|
|
e88064fdc9 | ||
|
|
6badf5ea1d | ||
|
|
20f5f3da24 | ||
|
|
8c1b147705 | ||
|
|
366155b828 | ||
|
|
2c7d1897eb | ||
|
|
26ccb23402 | ||
|
|
d9e2471fcd | ||
|
|
8302b351dd | ||
|
|
b8647c0481 | ||
|
|
a168403d68 | ||
|
|
42d8650058 | ||
|
|
7ad0d46c11 | ||
|
|
58812f7f1f | ||
|
|
65133b2aef | ||
|
|
291d8cd335 | ||
|
|
7a27e2b94b | ||
|
|
57a8eab1c3 | ||
|
|
236a7f68e9 | ||
|
|
81d424f475 | ||
|
|
687ba8c9a2 | ||
|
|
6d74a564e6 | ||
|
|
0a3e0665ab | ||
|
|
a19f1fbc67 | ||
|
|
2aa4615c78 | ||
|
|
7dd1346878 | ||
|
|
31f8c7d9cb | ||
|
|
914ce0b94d | ||
|
|
664f09111e | ||
|
|
6141f414fd | ||
|
|
8911f0f217 | ||
|
|
5af108ccee | ||
|
|
94e4169445 | ||
|
|
479b5b7064 | ||
|
|
674e935cf5 | ||
|
|
6b4982d75b | ||
|
|
4167743a34 | ||
|
|
ba289f6db4 | ||
|
|
422fd11f4d | ||
|
|
614d9c9b0d | ||
|
|
f91be86662 | ||
|
|
72f05544e8 | ||
|
|
81f55820be | ||
|
|
b9c14e1d65 | ||
|
|
5c55453a0e | ||
|
|
12491c4983 | ||
|
|
77d379c021 | ||
|
|
1a12a59e91 | ||
|
|
0b970dd9c7 | ||
|
|
93ac2bd53e | ||
|
|
f9646ac47a | ||
|
|
e8ed3b9e23 | ||
|
|
6238a07c8f | ||
|
|
1fb33f0c47 | ||
|
|
a842f41627 | ||
|
|
c4c135e678 | ||
|
|
f36f2fdea2 | ||
|
|
e3f0a67584 | ||
|
|
f6f05fa0c6 | ||
|
|
54ca7ceac8 | ||
|
|
cf5b38d4f5 | ||
|
|
b23669400f | ||
|
|
aaacd00ecf | ||
|
|
03aa22ba84 | ||
|
|
1493b8703f | ||
|
|
59308c20c6 | ||
|
|
cac5d8e716 | ||
|
|
7f16757bbe | ||
|
|
674e3846e2 | ||
|
|
3a0be0cca9 | ||
|
|
00d7c3a05a | ||
|
|
91f5fca5e9 | ||
|
|
1d7cb0c119 | ||
|
|
24599aa64f | ||
|
|
54c1553892 | ||
|
|
0ae53ce1a1 | ||
|
|
c69ffe02f8 | ||
|
|
7bfc2e0e74 | ||
|
|
9cc674c283 | ||
|
|
66e597a05c | ||
|
|
074c636e53 | ||
|
|
4ec44936f6 | ||
|
|
eb9c41f2a0 | ||
|
|
04afb99c54 | ||
|
|
2124e7b221 | ||
|
|
2416a8bf96 | ||
|
|
408a40f78b | ||
|
|
195ae5ce4b | ||
|
|
9bebb1e9a9 | ||
|
|
e7bec5be1d | ||
|
|
c708e8ba8e | ||
|
|
60f26cc067 | ||
|
|
64fa058bc7 | ||
|
|
9a770eeae9 | ||
|
|
ffc2c5a26e | ||
|
|
b6c5bd552e | ||
|
|
05147a3199 | ||
|
|
7f0d08ad77 | ||
|
|
64f95af833 | ||
|
|
7cd2662355 | ||
|
|
154d485c9e | ||
|
|
5f6821c7e2 | ||
|
|
59cf823d56 | ||
|
|
70312c58be | ||
|
|
4942366271 | ||
|
|
f237d0f212 | ||
|
|
9b424e0fe7 | ||
|
|
81b75d178b | ||
|
|
410e732eb3 | ||
|
|
2ca93cd93d | ||
|
|
50c806286e | ||
|
|
59de835b42 | ||
|
|
ecbac76cba | ||
|
|
9b16f2139d | ||
|
|
ef14df5ba2 | ||
|
|
b4be9875b2 | ||
|
|
8d0c962f42 | ||
|
|
5e0cb8d658 | ||
|
|
1869ab94fe | ||
|
|
2627c5baaf | ||
|
|
e4e16b8f77 | ||
|
|
85218d74d4 | ||
|
|
47d5b66986 | ||
|
|
ecfb133de2 | ||
|
|
23c95d24f1 | ||
|
|
b37d89bd08 | ||
|
|
5c848d59b2 | ||
|
|
b1c04dece9 | ||
|
|
b87c7987bb | ||
|
|
0bef52ae7d | ||
|
|
9ae04cf1ec | ||
|
|
0f7c4a8d4f | ||
|
|
07353207c0 | ||
|
|
d89be83414 | ||
|
|
01382527f5 | ||
|
|
dece2193fc | ||
|
|
d298f864fa | ||
|
|
ff37d8c691 | ||
|
|
f7f305a564 | ||
|
|
86f38e11cd | ||
|
|
0b60c48253 | ||
|
|
024b9840f0 | ||
|
|
2f4f59d82f | ||
|
|
830d5ee763 | ||
|
|
5bc63f7a33 | ||
|
|
b54f970e12 | ||
|
|
0cc8c54152 | ||
|
|
241e1e27d0 | ||
|
|
0145e86202 | ||
|
|
4d50e1e373 | ||
|
|
4b6ae34800 | ||
|
|
2feb85e831 | ||
|
|
a67ac81265 | ||
|
|
dab536c9e8 | ||
|
|
64cf298521 | ||
|
|
10b5aaa6a5 | ||
|
|
8c79e5ccfb | ||
|
|
2ef536a342 | ||
|
|
85b1303460 | ||
|
|
60aca3a241 | ||
|
|
db98381a86 | ||
|
|
f62ef34715 | ||
|
|
f95a6c1a5a | ||
|
|
65555b5dd0 | ||
|
|
9f6331a35e | ||
|
|
8c10cb6230 | ||
|
|
c8894bcead | ||
|
|
edc602c33a | ||
|
|
abfb2ca810 | ||
|
|
9967efe45a | ||
|
|
e77d80bda5 | ||
|
|
a691d1750a | ||
|
|
834e2f9304 | ||
|
|
66e5e9c1ce | ||
|
|
2d5f613984 | ||
|
|
868e7a278f | ||
|
|
f953d17889 | ||
|
|
3473969aae | ||
|
|
ec11ae7c40 | ||
|
|
8217ee1bbb | ||
|
|
1ad3ee0aec | ||
|
|
b3e9a2fede | ||
|
|
2f5d6bf909 | ||
|
|
cd79f479e9 | ||
|
|
12c0f675ce |
23
HISTORY.md
23
HISTORY.md
@@ -1,5 +1,26 @@
|
||||
# Release history
|
||||
|
||||
### main branch
|
||||
|
||||
- Full support for o1 models.
|
||||
- Watch files now honors `--subtree-only`, and only watches that sub tree.
|
||||
- Improved prompting for watch files, to work more reliably with more models.
|
||||
- New install methods via uv, including one-liners.
|
||||
- Support for openrouter/deepseek/deepseek-chat model.
|
||||
- Better error handling when non-interactive commands are attempted via `/load` or `--load`.
|
||||
- Display read-only files with abs path if it's shorter than rel path.
|
||||
- Ask 10% of users to opt-in to analytics.
|
||||
- Bugfix for auto-suggest.
|
||||
- Gracefully handle unicode errors in git path names.
|
||||
- Aider wrote 74% of the code in this release.
|
||||
|
||||
### Aider v0.69.1
|
||||
|
||||
- Fix for gemini model names in model metadata.
|
||||
- Show hints about AI! and AI? when user makes AI comments.
|
||||
- Support for running without git installed.
|
||||
- Improved environment variable setup messages on Windows.
|
||||
|
||||
### Aider v0.69.0
|
||||
|
||||
- [Watch files](https://aider.chat/docs/usage/watch.html) improvements:
|
||||
@@ -14,7 +35,7 @@
|
||||
- Ask 5% of users if they want to opt-in to analytics.
|
||||
- `/voice` now lets you edit the transcribed text before sending.
|
||||
- Disabled auto-complete in Y/N prompts.
|
||||
- Aider wrote 60% of the code in this release.
|
||||
- Aider wrote 68% of the code in this release.
|
||||
|
||||
### Aider v0.68.0
|
||||
|
||||
|
||||
20
MANIFEST.in
Normal file
20
MANIFEST.in
Normal file
@@ -0,0 +1,20 @@
|
||||
# This needs to sync with aider/help_pats.py
|
||||
|
||||
global-exclude .DS_Store
|
||||
|
||||
recursive-exclude aider/website/examples *
|
||||
recursive-exclude aider/website/_posts *
|
||||
|
||||
exclude aider/website/HISTORY.md
|
||||
exclude aider/website/docs/benchmarks*.md
|
||||
exclude aider/website/docs/ctags.md
|
||||
exclude aider/website/docs/unified-diffs.md
|
||||
|
||||
exclude aider/website/install.ps1
|
||||
exclude aider/website/install.sh
|
||||
|
||||
recursive-exclude aider/website/docs/leaderboards *
|
||||
recursive-exclude aider/website/assets *
|
||||
recursive-exclude aider/website *.js
|
||||
recursive-exclude aider/website *.html
|
||||
recursive-exclude aider/website *.yml
|
||||
23
README.md
23
README.md
@@ -43,28 +43,27 @@ VIDEO END -->
|
||||
cog.out(open("aider/website/_includes/get-started.md").read())
|
||||
]]]-->
|
||||
|
||||
You can get started quickly like this:
|
||||
If you already have python 3.8-3.13 installed, you can get started quickly like this:
|
||||
|
||||
```bash
|
||||
python -m pip install -U aider-chat
|
||||
python -m pip install aider-install
|
||||
aider-install
|
||||
|
||||
# Change directory into a git repo
|
||||
cd /to/your/git/repo
|
||||
# Change directory into your code base
|
||||
cd /to/your/project
|
||||
|
||||
# Work with Claude 3.5 Sonnet on your repo
|
||||
export ANTHROPIC_API_KEY=your-key-goes-here
|
||||
aider
|
||||
# Work with Claude 3.5 Sonnet on your code
|
||||
aider --model sonnet --anthropic-api-key your-key-goes-here
|
||||
|
||||
# Work with GPT-4o on your repo
|
||||
export OPENAI_API_KEY=your-key-goes-here
|
||||
aider
|
||||
# Work with GPT-4o on your code
|
||||
aider --model gpt-4o --openai-api-key your-key-goes-here
|
||||
```
|
||||
<!--[[[end]]]-->
|
||||
|
||||
See the
|
||||
[installation instructions](https://aider.chat/docs/install.html)
|
||||
and other
|
||||
[documentation](https://aider.chat/docs/usage.html)
|
||||
and
|
||||
[usage documentation](https://aider.chat/docs/usage.html)
|
||||
for more details.
|
||||
|
||||
## Features
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from packaging import version
|
||||
|
||||
__version__ = "0.69.1.dev"
|
||||
__version__ = "0.70.1.dev"
|
||||
safe_version = __version__
|
||||
|
||||
try:
|
||||
|
||||
@@ -12,6 +12,46 @@ from aider import __version__
|
||||
from aider.dump import dump # noqa: F401
|
||||
from aider.models import model_info_manager
|
||||
|
||||
PERCENT = 10
|
||||
|
||||
|
||||
def compute_hex_threshold(percent):
|
||||
"""Convert percentage to 6-digit hex threshold.
|
||||
|
||||
Args:
|
||||
percent: Percentage threshold (0-100)
|
||||
|
||||
Returns:
|
||||
str: 6-digit hex threshold
|
||||
"""
|
||||
return format(int(0xFFFFFF * percent / 100), "06x")
|
||||
|
||||
|
||||
def is_uuid_in_percentage(uuid_str, percent):
|
||||
"""Check if a UUID string falls within the first X percent of the UUID space.
|
||||
|
||||
Args:
|
||||
uuid_str: UUID string to test
|
||||
percent: Percentage threshold (0-100)
|
||||
|
||||
Returns:
|
||||
bool: True if UUID falls within the first X percent
|
||||
"""
|
||||
if not (0 <= percent <= 100):
|
||||
raise ValueError("Percentage must be between 0 and 100")
|
||||
|
||||
if not uuid_str:
|
||||
return False
|
||||
|
||||
# Convert percentage to hex threshold (1% = "04...", 10% = "1a...", etc)
|
||||
# Using first 6 hex digits
|
||||
if percent == 0:
|
||||
return False
|
||||
|
||||
threshold = compute_hex_threshold(percent)
|
||||
return uuid_str[:6] <= threshold
|
||||
|
||||
|
||||
mixpanel_project_token = "6da9a43058a5d1b9f3353153921fb04d"
|
||||
posthog_project_api_key = "phc_99T7muzafUMMZX15H8XePbMSreEUzahHbtWjy3l5Qbv"
|
||||
posthog_host = "https://us.i.posthog.com"
|
||||
@@ -84,31 +124,7 @@ class Analytics:
|
||||
if not self.user_id:
|
||||
return False
|
||||
|
||||
PERCENT = 5
|
||||
return self.is_uuid_in_percentage(self.user_id, PERCENT)
|
||||
|
||||
def is_uuid_in_percentage(self, uuid_str, percent):
|
||||
"""Check if a UUID string falls within the first X percent of the UUID space.
|
||||
|
||||
Args:
|
||||
uuid_str: UUID string to test
|
||||
percent: Percentage threshold (0-100)
|
||||
|
||||
Returns:
|
||||
bool: True if UUID falls within the first X percent
|
||||
"""
|
||||
if not (0 <= percent <= 100):
|
||||
raise ValueError("Percentage must be between 0 and 100")
|
||||
|
||||
if not uuid_str:
|
||||
return False
|
||||
|
||||
# Convert percentage to hex threshold (1% = "04...", 10% = "1a...", etc)
|
||||
# Using first 6 hex digits
|
||||
if percent == 0:
|
||||
return False
|
||||
threshold = format(int(0xFFFFFF * percent / 100), "06x")
|
||||
return uuid_str[:6] <= threshold
|
||||
return is_uuid_in_percentage(self.user_id, PERCENT)
|
||||
|
||||
def get_data_file_path(self):
|
||||
try:
|
||||
@@ -228,3 +244,7 @@ class Analytics:
|
||||
f.write("\n")
|
||||
except OSError:
|
||||
pass # Ignore OS errors when writing to logfile
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dump(compute_hex_threshold(PERCENT))
|
||||
|
||||
@@ -427,7 +427,8 @@ def get_parser(default_config_files, git_root):
|
||||
default="default",
|
||||
help=(
|
||||
"Set the markdown code theme (default: default, other options include monokai,"
|
||||
" solarized-dark, solarized-light)"
|
||||
" solarized-dark, solarized-light, or a Pygments builtin style,"
|
||||
" see https://pygments.org/styles for available themes)"
|
||||
),
|
||||
)
|
||||
group.add_argument(
|
||||
|
||||
@@ -3,7 +3,11 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import git
|
||||
try:
|
||||
import git
|
||||
except ImportError:
|
||||
git = None
|
||||
|
||||
from diff_match_patch import diff_match_patch
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import tempfile
|
||||
from collections import OrderedDict
|
||||
from os.path import expanduser
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pyperclip
|
||||
from PIL import Image, ImageGrab
|
||||
@@ -17,11 +18,12 @@ from aider import models, prompts, voice
|
||||
from aider.editor import pipe_editor
|
||||
from aider.format_settings import format_settings
|
||||
from aider.help import Help, install_help_extra
|
||||
from aider.io import InputOutput
|
||||
from aider.llm import litellm
|
||||
from aider.repo import ANY_GIT_ERROR
|
||||
from aider.run_cmd import run_cmd
|
||||
from aider.scrape import Scraper, install_playwright
|
||||
from aider.utils import is_image_file
|
||||
from aider.utils import GitTemporaryDirectory, is_image_file
|
||||
|
||||
from .dump import dump # noqa: F401
|
||||
|
||||
@@ -1009,7 +1011,7 @@ class Commands:
|
||||
return
|
||||
|
||||
self.coder.event("interactive help")
|
||||
from aider.coders import Coder
|
||||
from aider.coders.base_coder import Coder
|
||||
|
||||
if not self.help:
|
||||
res = install_help_extra(self.io)
|
||||
@@ -1069,7 +1071,7 @@ class Commands:
|
||||
self.io.tool_error(f"Please provide a question or topic for the {edit_format} chat.")
|
||||
return
|
||||
|
||||
from aider.coders import Coder
|
||||
from aider.coders.base_coder import Coder
|
||||
|
||||
coder = Coder.create(
|
||||
io=self.io,
|
||||
@@ -1309,7 +1311,42 @@ class Commands:
|
||||
continue
|
||||
|
||||
self.io.tool_output(f"\nExecuting: {cmd}")
|
||||
self.run(cmd)
|
||||
try:
|
||||
self.run(cmd)
|
||||
except SwitchCoder:
|
||||
self.io.tool_error(
|
||||
f"Command '{cmd}' is only supported in interactive mode, skipping."
|
||||
)
|
||||
|
||||
def test_cmd_load_with_switch_coder(self):
|
||||
with GitTemporaryDirectory() as repo_dir:
|
||||
io = InputOutput(pretty=False, fancy_input=False, yes=True)
|
||||
coder = Coder.create(self.GPT35, None, io)
|
||||
commands = Commands(io, coder)
|
||||
|
||||
# Create a temporary file with commands
|
||||
commands_file = Path(repo_dir) / "test_commands.txt"
|
||||
commands_file.write_text("/ask Tell me about the code\n/model gpt-4\n")
|
||||
|
||||
# Mock run to raise SwitchCoder for /ask and /model
|
||||
def mock_run(cmd):
|
||||
if cmd.startswith(("/ask", "/model")):
|
||||
raise SwitchCoder()
|
||||
return None
|
||||
|
||||
with mock.patch.object(commands, "run", side_effect=mock_run):
|
||||
# Capture tool_error output
|
||||
with mock.patch.object(io, "tool_error") as mock_tool_error:
|
||||
commands.cmd_load(str(commands_file))
|
||||
|
||||
# Check that appropriate error messages were shown
|
||||
mock_tool_error.assert_any_call(
|
||||
"Command '/ask Tell me about the code' is only supported in interactive"
|
||||
" mode, skipping."
|
||||
)
|
||||
mock_tool_error.assert_any_call(
|
||||
"Command '/model gpt-4' is only supported in interactive mode, skipping."
|
||||
)
|
||||
|
||||
def completions_raw_save(self, document, complete_event):
|
||||
return self.completions_raw_read_only(document, complete_event)
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
# This needs to sync with MANIFEST.in
|
||||
|
||||
exclude_website_pats = [
|
||||
"**/.DS_Store",
|
||||
"examples/**",
|
||||
"_posts/**",
|
||||
"HISTORY.md",
|
||||
@@ -7,5 +10,4 @@ exclude_website_pats = [
|
||||
"docs/unified-diffs.md",
|
||||
"docs/leaderboards/index.md",
|
||||
"assets/**",
|
||||
"**/.DS_Store",
|
||||
]
|
||||
|
||||
18
aider/io.py
18
aider/io.py
@@ -203,6 +203,7 @@ class InputOutput:
|
||||
fancy_input=True,
|
||||
file_watcher=None,
|
||||
multiline_mode=False,
|
||||
root=".",
|
||||
):
|
||||
self.placeholder = None
|
||||
self.interrupted = False
|
||||
@@ -270,6 +271,7 @@ class InputOutput:
|
||||
self.console = Console(force_terminal=False, no_color=True) # non-pretty
|
||||
|
||||
self.file_watcher = file_watcher
|
||||
self.root = root
|
||||
|
||||
def _get_style(self):
|
||||
style_dict = {}
|
||||
@@ -505,6 +507,7 @@ class InputOutput:
|
||||
complete_style=CompleteStyle.MULTI_COLUMN,
|
||||
style=style,
|
||||
key_bindings=kb,
|
||||
complete_while_typing=True,
|
||||
)
|
||||
else:
|
||||
line = input(show)
|
||||
@@ -773,7 +776,12 @@ class InputOutput:
|
||||
res = "no"
|
||||
else:
|
||||
if self.prompt_session:
|
||||
res = self.prompt_session.prompt(question + " ", default=default, style=style)
|
||||
res = self.prompt_session.prompt(
|
||||
question + " ",
|
||||
default=default,
|
||||
style=style,
|
||||
complete_while_typing=True,
|
||||
)
|
||||
else:
|
||||
res = input(question + " ")
|
||||
|
||||
@@ -907,7 +915,13 @@ class InputOutput:
|
||||
editable_files = [f for f in sorted(rel_fnames) if f not in rel_read_only_fnames]
|
||||
|
||||
if read_only_files:
|
||||
files_with_label = ["Readonly:"] + read_only_files
|
||||
# Use shorter of abs/rel paths for readonly files
|
||||
ro_paths = []
|
||||
for rel_path in read_only_files:
|
||||
abs_path = os.path.abspath(os.path.join(self.root, rel_path))
|
||||
ro_paths.append(abs_path if len(abs_path) < len(rel_path) else rel_path)
|
||||
|
||||
files_with_label = ["Readonly:"] + ro_paths
|
||||
read_only_output = StringIO()
|
||||
Console(file=read_only_output, force_terminal=False).print(Columns(files_with_label))
|
||||
read_only_lines = read_only_output.getvalue().splitlines()
|
||||
|
||||
@@ -9,7 +9,11 @@ import webbrowser
|
||||
from dataclasses import fields
|
||||
from pathlib import Path
|
||||
|
||||
import git
|
||||
try:
|
||||
import git
|
||||
except ImportError:
|
||||
git = None
|
||||
|
||||
import importlib_resources
|
||||
from dotenv import load_dotenv
|
||||
from prompt_toolkit.enums import EditingMode
|
||||
@@ -93,6 +97,9 @@ def make_new_repo(git_root, io):
|
||||
|
||||
|
||||
def setup_git(git_root, io):
|
||||
if git is None:
|
||||
return
|
||||
|
||||
try:
|
||||
cwd = Path.cwd()
|
||||
except OSError:
|
||||
@@ -106,7 +113,7 @@ def setup_git(git_root, io):
|
||||
except ANY_GIT_ERROR:
|
||||
pass
|
||||
elif cwd == Path.home():
|
||||
io.tool_warning("You should probably run aider in a directory, not your home dir.")
|
||||
io.tool_warning("You should probably run aider in your project's directory, not your home dir.")
|
||||
return
|
||||
elif cwd and io.confirm_ask(
|
||||
"No git repo found, create one to track aider's changes (recommended)?"
|
||||
@@ -166,7 +173,8 @@ def check_gitignore(git_root, io, ask=True):
|
||||
existing_lines = content.splitlines()
|
||||
for pat in patterns:
|
||||
if pat not in existing_lines:
|
||||
patterns_to_add.append(pat)
|
||||
if '*' in pat or (Path(git_root) / pat).exists():
|
||||
patterns_to_add.append(pat)
|
||||
except OSError as e:
|
||||
io.tool_error(f"Error when trying to read {gitignore_file}: {e}")
|
||||
return
|
||||
@@ -385,6 +393,12 @@ def sanity_check_repo(repo, io):
|
||||
if not repo.git_repo_error:
|
||||
return True
|
||||
error_msg = str(repo.git_repo_error)
|
||||
except UnicodeDecodeError as exc:
|
||||
error_msg = (
|
||||
f"Failed to read the Git repository. This issue is likely caused by a path encoded "
|
||||
f"in a format different from the expected encoding \"{sys.getfilesystemencoding()}\".\n"
|
||||
f"Internal error: {str(exc)}"
|
||||
)
|
||||
except ANY_GIT_ERROR as exc:
|
||||
error_msg = str(exc)
|
||||
bad_ver = "version in (1, 2)" in error_msg
|
||||
@@ -410,7 +424,9 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
if force_git_root:
|
||||
if git is None:
|
||||
git_root = None
|
||||
elif force_git_root:
|
||||
git_root = force_git_root
|
||||
else:
|
||||
git_root = get_git_root()
|
||||
@@ -457,6 +473,9 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
|
||||
# Parse again to include any arguments that might have been defined in .env
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if git is None:
|
||||
args.git = False
|
||||
|
||||
if args.analytics_disable:
|
||||
analytics = Analytics(permanently_disable=True)
|
||||
print("Analytics have been permanently disabled.")
|
||||
@@ -646,7 +665,7 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
|
||||
# We can't know the git repo for sure until after parsing the args.
|
||||
# If we guessed wrong, reparse because that changes things like
|
||||
# the location of the config.yml and history files.
|
||||
if args.git and not force_git_root:
|
||||
if args.git and not force_git_root and git is not None:
|
||||
right_repo_root = guessed_wrong_repo(io, git_root, fnames, git_dname)
|
||||
if right_repo_root:
|
||||
analytics.event("exit", reason="Recursing with correct repo")
|
||||
@@ -860,7 +879,11 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
|
||||
|
||||
if args.watch_files:
|
||||
file_watcher = FileWatcher(
|
||||
coder, gitignores=ignores, verbose=args.verbose, analytics=analytics
|
||||
coder,
|
||||
gitignores=ignores,
|
||||
verbose=args.verbose,
|
||||
analytics=analytics,
|
||||
root=str(Path.cwd()) if args.subtree_only else None,
|
||||
)
|
||||
coder.file_watcher = file_watcher
|
||||
|
||||
|
||||
@@ -665,6 +665,13 @@ MODEL_SETTINGS = [
|
||||
examples_as_sys_msg=True,
|
||||
reminder="sys",
|
||||
),
|
||||
ModelSettings(
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
"diff",
|
||||
use_repo_map=True,
|
||||
examples_as_sys_msg=True,
|
||||
reminder="sys",
|
||||
),
|
||||
ModelSettings(
|
||||
"openrouter/openai/gpt-4o",
|
||||
"diff",
|
||||
@@ -764,6 +771,39 @@ MODEL_SETTINGS = [
|
||||
use_temperature=False,
|
||||
streaming=False,
|
||||
),
|
||||
ModelSettings(
|
||||
"openrouter/openai/o1",
|
||||
"diff",
|
||||
weak_model_name="openrouter/openai/gpt-4o-mini",
|
||||
editor_model_name="openrouter/openai/gpt-4o",
|
||||
editor_edit_format="editor-diff",
|
||||
use_repo_map=True,
|
||||
streaming=False,
|
||||
use_temperature=False,
|
||||
# extra_params=dict(extra_body=dict(reasoning_effort="high")),
|
||||
),
|
||||
ModelSettings(
|
||||
"openai/o1",
|
||||
"diff",
|
||||
weak_model_name="openai/gpt-4o-mini",
|
||||
editor_model_name="openai/gpt-4o",
|
||||
editor_edit_format="editor-diff",
|
||||
use_repo_map=True,
|
||||
streaming=False,
|
||||
use_temperature=False,
|
||||
# extra_params=dict(extra_body=dict(reasoning_effort="high")),
|
||||
),
|
||||
ModelSettings(
|
||||
"o1",
|
||||
"diff",
|
||||
weak_model_name="gpt-4o-mini",
|
||||
editor_model_name="gpt-4o",
|
||||
editor_edit_format="editor-diff",
|
||||
use_repo_map=True,
|
||||
streaming=False,
|
||||
use_temperature=False,
|
||||
# extra_params=dict(extra_body=dict(reasoning_effort="high")),
|
||||
),
|
||||
ModelSettings(
|
||||
"openrouter/qwen/qwen-2.5-coder-32b-instruct",
|
||||
"diff",
|
||||
@@ -1232,10 +1272,10 @@ def sanity_check_model(io, model):
|
||||
status = "Set" if value else "Not set"
|
||||
io.tool_output(f"- {key}: {status}")
|
||||
|
||||
if platform.system() == "Windows" or True:
|
||||
if platform.system() == "Windows":
|
||||
io.tool_output(
|
||||
"If you just set these environment variables using `setx` you may need to restart"
|
||||
" your terminal or command prompt for the changes to take effect."
|
||||
"Note: You may need to restart your terminal or command prompt for `setx` to take"
|
||||
" effect."
|
||||
)
|
||||
|
||||
elif not model.keys_in_environment:
|
||||
|
||||
@@ -2,7 +2,17 @@ import os
|
||||
import time
|
||||
from pathlib import Path, PurePosixPath
|
||||
|
||||
import git
|
||||
try:
|
||||
import git
|
||||
|
||||
ANY_GIT_ERROR = [
|
||||
git.exc.ODBError,
|
||||
git.exc.GitError,
|
||||
]
|
||||
except ImportError:
|
||||
git = None
|
||||
ANY_GIT_ERROR = []
|
||||
|
||||
import pathspec
|
||||
|
||||
from aider import prompts, utils
|
||||
@@ -10,15 +20,14 @@ from aider.sendchat import simple_send_with_retries
|
||||
|
||||
from .dump import dump # noqa: F401
|
||||
|
||||
ANY_GIT_ERROR = (
|
||||
git.exc.ODBError,
|
||||
git.exc.GitError,
|
||||
ANY_GIT_ERROR += [
|
||||
OSError,
|
||||
IndexError,
|
||||
BufferError,
|
||||
TypeError,
|
||||
ValueError,
|
||||
)
|
||||
]
|
||||
ANY_GIT_ERROR = tuple(ANY_GIT_ERROR)
|
||||
|
||||
|
||||
class GitRepo:
|
||||
|
||||
@@ -1,38 +1,27 @@
|
||||
{
|
||||
"gemini-2.0-flash-exp": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"openrouter/openai/o1": {
|
||||
"max_tokens": 100000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 100000,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2"
|
||||
"supports_prompt_caching": true,
|
||||
"supports_system_messages": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"gemini-2.0-flash-exp": {
|
||||
"openrouter/deepseek/deepseek-chat": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2"
|
||||
"max_input_tokens": 66000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000014,
|
||||
"output_cost_per_token": 0.00000028,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
}
|
||||
|
||||
@@ -8,8 +8,6 @@ import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import git
|
||||
|
||||
from aider.dump import dump # noqa: F401
|
||||
|
||||
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp", ".pdf"}
|
||||
@@ -73,6 +71,8 @@ class GitTemporaryDirectory(ChdirTemporaryDirectory):
|
||||
|
||||
|
||||
def make_repo(path=None):
|
||||
import git
|
||||
|
||||
if not path:
|
||||
path = "."
|
||||
repo = git.Repo.init(path)
|
||||
|
||||
@@ -66,10 +66,10 @@ class FileWatcher:
|
||||
# Compiled regex pattern for AI comments
|
||||
ai_comment_pattern = re.compile(r"(?:#|//|--) *(ai\b.*|ai\b.*|.*\bai[?!]?) *$", re.IGNORECASE)
|
||||
|
||||
def __init__(self, coder, gitignores=None, verbose=False, analytics=None):
|
||||
def __init__(self, coder, gitignores=None, verbose=False, analytics=None, root=None):
|
||||
self.coder = coder
|
||||
self.io = coder.io
|
||||
self.root = Path(coder.root)
|
||||
self.root = Path(root) if root else Path(coder.root)
|
||||
self.verbose = verbose
|
||||
self.analytics = analytics
|
||||
self.stop_event = None
|
||||
@@ -145,6 +145,7 @@ class FileWatcher:
|
||||
"""Get any detected file changes"""
|
||||
|
||||
has_action = None
|
||||
added = False
|
||||
for fname in self.changed_files:
|
||||
_, _, action = self.get_ai_comments(fname)
|
||||
if action in ("!", "?"):
|
||||
@@ -156,10 +157,16 @@ class FileWatcher:
|
||||
self.analytics.event("ai-comments file-add")
|
||||
self.coder.abs_fnames.add(fname)
|
||||
rel_fname = self.coder.get_rel_fname(fname)
|
||||
if not added:
|
||||
self.io.tool_output()
|
||||
added = True
|
||||
self.io.tool_output(f"Added {rel_fname} to the chat")
|
||||
self.io.tool_output()
|
||||
|
||||
if not has_action:
|
||||
if added:
|
||||
self.io.tool_output(
|
||||
"End your comment with AI! to request changes or AI? to ask questions"
|
||||
)
|
||||
return ""
|
||||
|
||||
if self.analytics:
|
||||
@@ -216,6 +223,9 @@ class FileWatcher:
|
||||
comments = []
|
||||
has_action = None # None, "!" or "?"
|
||||
content = self.io.read_text(filepath, silent=True)
|
||||
if not content:
|
||||
return None, None, None
|
||||
|
||||
for i, line in enumerate(content.splitlines(), 1):
|
||||
if match := self.ai_comment_pattern.search(line):
|
||||
comment = match.group(0).strip()
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
watch_code_prompt = """
|
||||
Find the "AI" comments below (marked with █) in the code files I've shared with you.
|
||||
They contain your instructions.
|
||||
Make the requested changes.
|
||||
Be sure to remove all these "AI" comments from the code!
|
||||
I've written your instructions in comments in the code and marked them with "ai"
|
||||
You can see the "AI" comments shown below (marked with █).
|
||||
Find them in the code files I've shared with you, and follow their instructions.
|
||||
|
||||
After completing those instructions, also be sure to remove all the "AI" comments from the code too.
|
||||
"""
|
||||
|
||||
watch_ask_prompt = """/ask
|
||||
Find the "AI" comments below (marked with █) in the code files I've shared with you.
|
||||
They contain your questions you need to answer and other instructions.
|
||||
They contain my questions that I need you to answer and other instructions for you.
|
||||
"""
|
||||
|
||||
@@ -23,6 +23,27 @@ cog.out(text)
|
||||
]]]-->
|
||||
|
||||
|
||||
### main branch
|
||||
|
||||
- Full support for o1 models.
|
||||
- Watch files now honors `--subtree-only`, and only watches that sub tree.
|
||||
- Improved prompting for watch files, to work more reliably with more models.
|
||||
- New install methods via uv, including one-liners.
|
||||
- Support for openrouter/deepseek/deepseek-chat model.
|
||||
- Better error handling when non-interactive commands are attempted via `/load` or `--load`.
|
||||
- Display read-only files with abs path if it's shorter than rel path.
|
||||
- Ask 10% of users to opt-in to analytics.
|
||||
- Bugfix for auto-suggest.
|
||||
- Gracefully handle unicode errors in git path names.
|
||||
- Aider wrote 74% of the code in this release.
|
||||
|
||||
### Aider v0.69.1
|
||||
|
||||
- Fix for gemini model names in model metadata.
|
||||
- Show hints about AI! and AI? when user makes AI comments.
|
||||
- Support for running without git installed.
|
||||
- Improved environment variable setup messages on Windows.
|
||||
|
||||
### Aider v0.69.0
|
||||
|
||||
- [Watch files](https://aider.chat/docs/usage/watch.html) improvements:
|
||||
@@ -37,7 +58,7 @@ cog.out(text)
|
||||
- Ask 5% of users if they want to opt-in to analytics.
|
||||
- `/voice` now lets you edit the transcribed text before sending.
|
||||
- Disabled auto-complete in Y/N prompts.
|
||||
- Aider wrote 60% of the code in this release.
|
||||
- Aider wrote 68% of the code in this release.
|
||||
|
||||
### Aider v0.68.0
|
||||
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
theme: just-the-docs
|
||||
url: "https://aider.chat"
|
||||
|
||||
# Analytics configuration
|
||||
analytics:
|
||||
enabled: false # Single switch to control analytics and cookie consent
|
||||
posthog_key: 'phc_99T7muzafUMMZX15H8XePbMSreEUzahHbtWjy3l5Qbv'
|
||||
posthog_host: 'https://us.i.posthog.com'
|
||||
|
||||
plugins:
|
||||
- jekyll-redirect-from
|
||||
- jekyll-sitemap
|
||||
@@ -45,4 +51,4 @@ callouts:
|
||||
note:
|
||||
title: Note
|
||||
color: yellow
|
||||
|
||||
|
||||
|
||||
@@ -3252,8 +3252,8 @@
|
||||
Philippe de Reynal: 30
|
||||
start_tag: v0.65.0
|
||||
total_lines: 703
|
||||
- aider_percentage: 65.29
|
||||
aider_total: 457
|
||||
- aider_percentage: 67.86
|
||||
aider_total: 437
|
||||
end_date: '2024-12-06'
|
||||
end_tag: v0.67.0
|
||||
file_counts:
|
||||
@@ -3314,18 +3314,12 @@
|
||||
tests/browser/test_browser.py:
|
||||
Paul Gauthier: 2
|
||||
Paul Gauthier (aider): 1
|
||||
tests/fixtures/watch.js:
|
||||
Paul Gauthier: 19
|
||||
Paul Gauthier (aider): 16
|
||||
tests/fixtures/watch.py:
|
||||
Paul Gauthier: 17
|
||||
Paul Gauthier (aider): 4
|
||||
grand_total:
|
||||
Paul Gauthier: 243
|
||||
Paul Gauthier (aider): 457
|
||||
Paul Gauthier: 207
|
||||
Paul Gauthier (aider): 437
|
||||
start_tag: v0.66.0
|
||||
total_lines: 700
|
||||
- aider_percentage: 71.21
|
||||
total_lines: 644
|
||||
- aider_percentage: 71.57
|
||||
aider_total: 428
|
||||
end_date: '2024-12-10'
|
||||
end_tag: v0.68.0
|
||||
@@ -3403,10 +3397,63 @@
|
||||
Paul Gauthier (aider): 1
|
||||
tests/basic/test_watch.py:
|
||||
Paul Gauthier: 1
|
||||
tests/fixtures/watch.js:
|
||||
Paul Gauthier: 3
|
||||
grand_total:
|
||||
Paul Gauthier: 173
|
||||
Paul Gauthier: 170
|
||||
Paul Gauthier (aider): 428
|
||||
start_tag: v0.67.0
|
||||
total_lines: 601
|
||||
total_lines: 598
|
||||
- aider_percentage: 67.87
|
||||
aider_total: 207
|
||||
end_date: '2024-12-13'
|
||||
end_tag: v0.69.0
|
||||
file_counts:
|
||||
.github/workflows/pages.yml:
|
||||
Paul Gauthier: 2
|
||||
aider/__init__.py:
|
||||
Paul Gauthier: 1
|
||||
aider/analytics.py:
|
||||
Paul Gauthier: 2
|
||||
aider/args.py:
|
||||
Mir Adnan ALI: 3
|
||||
Paul Gauthier: 1
|
||||
aider/coders/base_coder.py:
|
||||
JeongJuhyeon: 1
|
||||
Mir Adnan ALI: 3
|
||||
aider/commands.py:
|
||||
Mir Adnan ALI: 4
|
||||
Paul Gauthier: 5
|
||||
Paul Gauthier (aider): 3
|
||||
aider/io.py:
|
||||
Mir Adnan ALI: 37
|
||||
Paul Gauthier: 8
|
||||
Paul Gauthier (aider): 3
|
||||
aider/main.py:
|
||||
Mir Adnan ALI: 1
|
||||
aider/models.py:
|
||||
Paul Gauthier: 7
|
||||
aider/watch.py:
|
||||
Paul Gauthier: 7
|
||||
Paul Gauthier (aider): 47
|
||||
aider/website/docs/leaderboards/index.md:
|
||||
Paul Gauthier: 1
|
||||
benchmark/benchmark.py:
|
||||
Paul Gauthier: 7
|
||||
Paul Gauthier (aider): 7
|
||||
scripts/blame.py:
|
||||
Paul Gauthier (aider): 1
|
||||
scripts/issues.py:
|
||||
Paul Gauthier (aider): 58
|
||||
scripts/update-history.py:
|
||||
Paul Gauthier: 3
|
||||
tests/basic/test_io.py:
|
||||
Paul Gauthier (aider): 20
|
||||
tests/basic/test_watch.py:
|
||||
Paul Gauthier: 5
|
||||
Paul Gauthier (aider): 68
|
||||
grand_total:
|
||||
JeongJuhyeon: 1
|
||||
Mir Adnan ALI: 48
|
||||
Paul Gauthier: 49
|
||||
Paul Gauthier (aider): 207
|
||||
start_tag: v0.68.0
|
||||
total_lines: 305
|
||||
|
||||
@@ -2180,4 +2180,53 @@
|
||||
date: 2024-12-11
|
||||
versions: 0.68.1.dev
|
||||
seconds_per_case: 7.3
|
||||
total_cost: 0.0000
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-18-01-50-08--o1
|
||||
test_cases: 133
|
||||
model: o1
|
||||
edit_format: diff
|
||||
commit_hash: 074c636-dirty
|
||||
pass_rate_1: 65.4
|
||||
pass_rate_2: 84.2
|
||||
percent_cases_well_formed: 99.2
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 1
|
||||
num_with_malformed_responses: 1
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
command: aider --model openrouter/openai/o1
|
||||
date: 2024-12-18
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 29.9
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-21-22-06-01--polyglot-o1-mini-whole
|
||||
test_cases: 225
|
||||
model: o1-mini-2024-09-12
|
||||
edit_format: whole
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 8.9
|
||||
pass_rate_2: 27.1
|
||||
pass_num_1: 20
|
||||
pass_num_2: 61
|
||||
percent_cases_well_formed: 95.6
|
||||
error_outputs: 15
|
||||
num_malformed_responses: 14
|
||||
num_with_malformed_responses: 10
|
||||
user_asks: 37
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 5
|
||||
total_tests: 225
|
||||
command: aider --model o1-mini
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 34.3
|
||||
total_cost: 17.6270
|
||||
259
aider/website/_data/o1_polyglot_leaderboard.yml
Normal file
259
aider/website/_data/o1_polyglot_leaderboard.yml
Normal file
@@ -0,0 +1,259 @@
|
||||
- dirname: 2024-12-21-18-41-18--polyglot-gpt-4o-mini
|
||||
test_cases: 225
|
||||
model: gpt-4o-mini-2024-07-18
|
||||
edit_format: whole
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 0.9
|
||||
pass_rate_2: 3.6
|
||||
pass_num_1: 2
|
||||
pass_num_2: 8
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 36
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
total_tests: 225
|
||||
command: aider --model gpt-4o-mini-2024-07-18
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 17.3
|
||||
total_cost: 0.3236
|
||||
|
||||
- dirname: 2024-12-21-18-44-28--polyglot-sonnet
|
||||
test_cases: 225
|
||||
model: claude-3-5-sonnet-20241022
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 18.7
|
||||
pass_rate_2: 45.3
|
||||
pass_num_1: 42
|
||||
pass_num_2: 102
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 14
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 12
|
||||
total_tests: 225
|
||||
command: aider --model claude-3-5-sonnet-20241022
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 30.8
|
||||
total_cost: 13.4847
|
||||
|
||||
- dirname: 2024-12-21-18-52-34--polyglot-gpt-4o-diff
|
||||
test_cases: 225
|
||||
model: gpt-4o-2024-11-20
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 4.9
|
||||
pass_rate_2: 15.1
|
||||
pass_num_1: 11
|
||||
pass_num_2: 34
|
||||
percent_cases_well_formed: 96.0
|
||||
error_outputs: 12
|
||||
num_malformed_responses: 11
|
||||
num_with_malformed_responses: 9
|
||||
user_asks: 34
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 19
|
||||
total_tests: 225
|
||||
command: aider --model gpt-4o-2024-11-20
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 22.2
|
||||
total_cost: 7.1835
|
||||
|
||||
- dirname: 2024-12-21-19-23-03--polyglot-o1-hard-diff
|
||||
test_cases: 224
|
||||
model: o1-2024-12-17 (high)
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 23.7
|
||||
pass_rate_2: 61.7
|
||||
pass_num_1: 53
|
||||
pass_num_2: 139
|
||||
percent_cases_well_formed: 91.5
|
||||
error_outputs: 25
|
||||
num_malformed_responses: 24
|
||||
num_with_malformed_responses: 19
|
||||
user_asks: 16
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: aider --model openrouter/openai/o1
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 133.2
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-21-20-56-21--polyglot-deepseek-diff
|
||||
test_cases: 225
|
||||
model: DeepSeek Chat V2.5
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 5.3
|
||||
pass_rate_2: 17.8
|
||||
pass_num_1: 12
|
||||
pass_num_2: 40
|
||||
percent_cases_well_formed: 92.9
|
||||
error_outputs: 42
|
||||
num_malformed_responses: 37
|
||||
num_with_malformed_responses: 16
|
||||
user_asks: 23
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 5
|
||||
test_timeouts: 5
|
||||
total_tests: 225
|
||||
command: aider --model deepseek/deepseek-chat
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 184.0
|
||||
total_cost: 0.5101
|
||||
|
||||
- dirname: 2024-12-21-21-46-27--polyglot-haiku-diff
|
||||
test_cases: 225
|
||||
model: claude-3-5-haiku-20241022
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 7.1
|
||||
pass_rate_2: 28.0
|
||||
pass_num_1: 16
|
||||
pass_num_2: 63
|
||||
percent_cases_well_formed: 91.1
|
||||
error_outputs: 31
|
||||
num_malformed_responses: 30
|
||||
num_with_malformed_responses: 20
|
||||
user_asks: 13
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 9
|
||||
total_tests: 225
|
||||
command: aider --model claude-3-5-haiku-20241022
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 31.8
|
||||
total_cost: 6.0583
|
||||
|
||||
- dirname: 2024-12-22-13-22-32--polyglot-qwen-diff
|
||||
test_cases: 225
|
||||
model: Qwen2.5-Coder-32B-Instruct
|
||||
edit_format: diff
|
||||
commit_hash: 6d7e8be-dirty
|
||||
pass_rate_1: 4.4
|
||||
pass_rate_2: 8.0
|
||||
pass_num_1: 10
|
||||
pass_num_2: 18
|
||||
percent_cases_well_formed: 71.6
|
||||
error_outputs: 158
|
||||
num_malformed_responses: 148
|
||||
num_with_malformed_responses: 64
|
||||
user_asks: 132
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: "aider --model openai/Qwen/Qwen2.5-Coder-32B-Instruct # via hyperbolic"
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 84.4
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-22-21-26-35--polyglot-o1mini-whole
|
||||
test_cases: 225
|
||||
model: o1-mini-2024-09-12
|
||||
edit_format: whole
|
||||
commit_hash: 37df899
|
||||
pass_rate_1: 5.8
|
||||
pass_rate_2: 32.9
|
||||
pass_num_1: 13
|
||||
pass_num_2: 74
|
||||
percent_cases_well_formed: 96.9
|
||||
error_outputs: 8
|
||||
num_malformed_responses: 8
|
||||
num_with_malformed_responses: 7
|
||||
user_asks: 27
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
total_tests: 225
|
||||
command: aider --model o1-mini
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 34.7
|
||||
total_cost: 18.5770
|
||||
|
||||
- dirname: 2024-12-22-18-43-25--gemini-exp-1206-polyglot-whole-2
|
||||
test_cases: 225
|
||||
model: gemini-exp-1206
|
||||
edit_format: whole
|
||||
commit_hash: b1bc2f8
|
||||
pass_rate_1: 19.6
|
||||
pass_rate_2: 38.2
|
||||
pass_num_1: 44
|
||||
pass_num_2: 86
|
||||
percent_cases_well_formed: 98.2
|
||||
error_outputs: 8
|
||||
num_malformed_responses: 8
|
||||
num_with_malformed_responses: 4
|
||||
user_asks: 32
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 9
|
||||
total_tests: 225
|
||||
command: aider --model gemini/gemini-exp-1206
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 45.5
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-22-20-08-13--gemini-2.0-flash-exp-polyglot-whole
|
||||
test_cases: 225
|
||||
model: gemini-2.0-flash-exp
|
||||
edit_format: whole
|
||||
commit_hash: b1bc2f8
|
||||
pass_rate_1: 11.6
|
||||
pass_rate_2: 22.2
|
||||
pass_num_1: 26
|
||||
pass_num_2: 50
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 9
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 8
|
||||
total_tests: 225
|
||||
command: aider --model gemini/gemini-2.0-flash-exp
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 12.2
|
||||
total_cost: 0.0000
|
||||
312
aider/website/_data/polyglot_leaderboard.yml
Normal file
312
aider/website/_data/polyglot_leaderboard.yml
Normal file
@@ -0,0 +1,312 @@
|
||||
- dirname: 2024-12-21-18-41-18--polyglot-gpt-4o-mini
|
||||
test_cases: 225
|
||||
model: gpt-4o-mini-2024-07-18
|
||||
edit_format: whole
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 0.9
|
||||
pass_rate_2: 3.6
|
||||
pass_num_1: 2
|
||||
pass_num_2: 8
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 36
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
total_tests: 225
|
||||
command: aider --model gpt-4o-mini-2024-07-18
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 17.3
|
||||
total_cost: 0.3236
|
||||
|
||||
- dirname: 2024-12-21-18-44-28--polyglot-sonnet
|
||||
test_cases: 225
|
||||
model: claude-3-5-sonnet-20241022
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 18.7
|
||||
pass_rate_2: 45.3
|
||||
pass_num_1: 42
|
||||
pass_num_2: 102
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 14
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 12
|
||||
total_tests: 225
|
||||
command: aider --model claude-3-5-sonnet-20241022
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 30.8
|
||||
total_cost: 13.4847
|
||||
|
||||
- dirname: 2024-12-21-18-52-34--polyglot-gpt-4o-diff
|
||||
test_cases: 225
|
||||
model: gpt-4o-2024-11-20
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 4.9
|
||||
pass_rate_2: 15.1
|
||||
pass_num_1: 11
|
||||
pass_num_2: 34
|
||||
percent_cases_well_formed: 96.0
|
||||
error_outputs: 12
|
||||
num_malformed_responses: 11
|
||||
num_with_malformed_responses: 9
|
||||
user_asks: 34
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 19
|
||||
total_tests: 225
|
||||
command: aider --model gpt-4o-2024-11-20
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 22.2
|
||||
total_cost: 7.1835
|
||||
|
||||
- dirname: 2024-12-21-19-23-03--polyglot-o1-hard-diff
|
||||
test_cases: 224
|
||||
model: o1-2024-12-17 (high)
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 23.7
|
||||
pass_rate_2: 61.7
|
||||
pass_num_1: 53
|
||||
pass_num_2: 139
|
||||
percent_cases_well_formed: 91.5
|
||||
error_outputs: 25
|
||||
num_malformed_responses: 24
|
||||
num_with_malformed_responses: 19
|
||||
user_asks: 16
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: aider --model openrouter/openai/o1
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 133.2
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-21-20-56-21--polyglot-deepseek-diff
|
||||
test_cases: 225
|
||||
model: DeepSeek Chat V2.5
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 5.3
|
||||
pass_rate_2: 17.8
|
||||
pass_num_1: 12
|
||||
pass_num_2: 40
|
||||
percent_cases_well_formed: 92.9
|
||||
error_outputs: 42
|
||||
num_malformed_responses: 37
|
||||
num_with_malformed_responses: 16
|
||||
user_asks: 23
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 5
|
||||
test_timeouts: 5
|
||||
total_tests: 225
|
||||
command: aider --model deepseek/deepseek-chat
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 184.0
|
||||
total_cost: 0.5101
|
||||
|
||||
- dirname: 2024-12-21-21-46-27--polyglot-haiku-diff
|
||||
test_cases: 225
|
||||
model: claude-3-5-haiku-20241022
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 7.1
|
||||
pass_rate_2: 28.0
|
||||
pass_num_1: 16
|
||||
pass_num_2: 63
|
||||
percent_cases_well_formed: 91.1
|
||||
error_outputs: 31
|
||||
num_malformed_responses: 30
|
||||
num_with_malformed_responses: 20
|
||||
user_asks: 13
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 9
|
||||
total_tests: 225
|
||||
command: aider --model claude-3-5-haiku-20241022
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 31.8
|
||||
total_cost: 6.0583
|
||||
|
||||
- dirname: 2024-12-22-13-22-32--polyglot-qwen-diff
|
||||
test_cases: 225
|
||||
model: Qwen2.5-Coder-32B-Instruct
|
||||
edit_format: diff
|
||||
commit_hash: 6d7e8be-dirty
|
||||
pass_rate_1: 4.4
|
||||
pass_rate_2: 8.0
|
||||
pass_num_1: 10
|
||||
pass_num_2: 18
|
||||
percent_cases_well_formed: 71.6
|
||||
error_outputs: 158
|
||||
num_malformed_responses: 148
|
||||
num_with_malformed_responses: 64
|
||||
user_asks: 132
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: "aider --model openai/Qwen/Qwen2.5-Coder-32B-Instruct # via hyperbolic"
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 84.4
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-22-21-26-35--polyglot-o1mini-whole
|
||||
test_cases: 225
|
||||
model: o1-mini-2024-09-12
|
||||
edit_format: whole
|
||||
commit_hash: 37df899
|
||||
pass_rate_1: 5.8
|
||||
pass_rate_2: 32.9
|
||||
pass_num_1: 13
|
||||
pass_num_2: 74
|
||||
percent_cases_well_formed: 96.9
|
||||
error_outputs: 8
|
||||
num_malformed_responses: 8
|
||||
num_with_malformed_responses: 7
|
||||
user_asks: 27
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
total_tests: 225
|
||||
command: aider --model o1-mini
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 34.7
|
||||
total_cost: 18.5770
|
||||
|
||||
- dirname: 2024-12-22-18-43-25--gemini-exp-1206-polyglot-whole-2
|
||||
test_cases: 225
|
||||
model: gemini-exp-1206
|
||||
edit_format: whole
|
||||
commit_hash: b1bc2f8
|
||||
pass_rate_1: 19.6
|
||||
pass_rate_2: 38.2
|
||||
pass_num_1: 44
|
||||
pass_num_2: 86
|
||||
percent_cases_well_formed: 98.2
|
||||
error_outputs: 8
|
||||
num_malformed_responses: 8
|
||||
num_with_malformed_responses: 4
|
||||
user_asks: 32
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 9
|
||||
total_tests: 225
|
||||
command: aider --model gemini/gemini-exp-1206
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 45.5
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-22-20-08-13--gemini-2.0-flash-exp-polyglot-whole
|
||||
test_cases: 225
|
||||
model: gemini-2.0-flash-exp
|
||||
edit_format: whole
|
||||
commit_hash: b1bc2f8
|
||||
pass_rate_1: 11.6
|
||||
pass_rate_2: 22.2
|
||||
pass_num_1: 26
|
||||
pass_num_2: 50
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 9
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 8
|
||||
total_tests: 225
|
||||
command: aider --model gemini/gemini-2.0-flash-exp
|
||||
date: 2024-12-22
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 12.2
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-23-01-11-56--yi-test
|
||||
test_cases: 225
|
||||
model: yi-lightning
|
||||
edit_format: whole
|
||||
commit_hash: 2b1625e
|
||||
pass_rate_1: 5.8
|
||||
pass_rate_2: 12.9
|
||||
pass_num_1: 13
|
||||
pass_num_2: 29
|
||||
percent_cases_well_formed: 92.9
|
||||
error_outputs: 87
|
||||
num_malformed_responses: 72
|
||||
num_with_malformed_responses: 16
|
||||
user_asks: 107
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 6
|
||||
total_tests: 225
|
||||
command: aider --model openai/yi-lightning
|
||||
date: 2024-12-23
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 146.7
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-25-13-31-51--deepseekv3preview-diff2
|
||||
test_cases: 225
|
||||
model: DeepSeek Chat V3 Preview
|
||||
edit_format: diff
|
||||
commit_hash: 0a23c4a-dirty
|
||||
pass_rate_1: 22.7
|
||||
pass_rate_2: 48.4
|
||||
pass_num_1: 51
|
||||
pass_num_2: 109
|
||||
percent_cases_well_formed: 98.7
|
||||
error_outputs: 7
|
||||
num_malformed_responses: 7
|
||||
num_with_malformed_responses: 3
|
||||
user_asks: 19
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 8
|
||||
total_tests: 225
|
||||
command: aider --model deepseek/deepseek-chat
|
||||
date: 2024-12-25
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 34.8
|
||||
total_cost: 0.3369
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
|
||||
## Avoid package conflicts
|
||||
|
||||
You can avoid python package conflicts by installing aider using
|
||||
[pipx](/docs/install/pipx.html)
|
||||
or
|
||||
[uv](/docs/install/uv.html).
|
||||
|
||||
If you are using aider to work on a python project, sometimes your project will require
|
||||
specific versions of python packages which conflict with the versions that aider
|
||||
requires.
|
||||
If this happens, the `python -m pip install aide-chat` command may return errors like these:
|
||||
|
||||
```
|
||||
aider-chat 0.23.0 requires somepackage==X.Y.Z, but you have somepackage U.W.V which is incompatible.
|
||||
```
|
||||
|
||||
which will install it globally on your system
|
||||
within its own python environment.
|
||||
This way you can use aider to work on any python project,
|
||||
even if that project has conflicting dependencies.
|
||||
@@ -1,185 +0,0 @@
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
var ctx = document.getElementById('editChart').getContext('2d');
|
||||
const blueDiagonalPattern = pattern.draw('diagonal', 'rgba(54, 162, 235, 0.2)');
|
||||
const redDiagonalPattern = pattern.draw('diagonal', 'rgba(255, 99, 132, 0.2)');
|
||||
let displayedData = [];
|
||||
|
||||
const HIGHTLIGHT_MODEL = 'no no no';
|
||||
var leaderboardData = {
|
||||
labels: [],
|
||||
datasets: [{
|
||||
label: 'Percent completed correctly',
|
||||
data: [],
|
||||
backgroundColor: function(context) {
|
||||
const row = allData[context.dataIndex];
|
||||
if (row && row.edit_format === 'whole') {
|
||||
return diagonalPattern;
|
||||
}
|
||||
const label = leaderboardData.labels[context.dataIndex] || '';
|
||||
return (label && label.includes(HIGHTLIGHT_MODEL)) ? 'rgba(255, 99, 132, 0.2)' : 'rgba(54, 162, 235, 0.2)';
|
||||
},
|
||||
borderColor: function(context) {
|
||||
const label = context.chart.data.labels[context.dataIndex] || '';
|
||||
return (label && label.includes(HIGHTLIGHT_MODEL)) ? 'rgba(255, 99, 132, 1)' : 'rgba(54, 162, 235, 1)';
|
||||
},
|
||||
borderWidth: 1
|
||||
}]
|
||||
};
|
||||
|
||||
var allData = [];
|
||||
{% for row in edit_sorted %}
|
||||
allData.push({
|
||||
model: '{{ row.model }}',
|
||||
pass_rate_2: {{ row.pass_rate_2 }},
|
||||
percent_cases_well_formed: {{ row.percent_cases_well_formed }},
|
||||
edit_format: '{{ row.edit_format }}'
|
||||
});
|
||||
{% endfor %}
|
||||
|
||||
function updateChart() {
|
||||
var selectedRows = document.querySelectorAll('tr.selected');
|
||||
var showAll = selectedRows.length === 0;
|
||||
|
||||
displayedData = [];
|
||||
leaderboardData.labels = [];
|
||||
leaderboardData.datasets[0].data = [];
|
||||
|
||||
allData.forEach(function(row, index) {
|
||||
var rowElement = document.getElementById('edit-row-' + index);
|
||||
if (showAll) {
|
||||
rowElement.classList.remove('selected');
|
||||
}
|
||||
if (showAll || rowElement.classList.contains('selected')) {
|
||||
displayedData.push(row);
|
||||
leaderboardData.labels.push(row.model);
|
||||
leaderboardData.datasets[0].data.push(row.pass_rate_2);
|
||||
}
|
||||
});
|
||||
|
||||
leaderboardChart.update();
|
||||
leaderboardChart.render();
|
||||
}
|
||||
|
||||
// Use displayedData in the backgroundColor callback instead of allData
|
||||
leaderboardData.datasets[0].backgroundColor = function(context) {
|
||||
const row = displayedData[context.dataIndex];
|
||||
const label = leaderboardData.labels[context.dataIndex] || '';
|
||||
if (label && label.includes(HIGHTLIGHT_MODEL)) {
|
||||
if (row && row.edit_format === 'whole') return redDiagonalPattern;
|
||||
else return 'rgba(255, 99, 132, 0.2)';
|
||||
} else if (row && row.edit_format === 'whole') {
|
||||
return blueDiagonalPattern;
|
||||
} else {
|
||||
return 'rgba(54, 162, 235, 0.2)';
|
||||
}
|
||||
};
|
||||
|
||||
var tableBody = document.querySelector('table tbody');
|
||||
allData.forEach(function(row, index) {
|
||||
var tr = tableBody.children[index];
|
||||
tr.id = 'edit-row-' + index;
|
||||
tr.style.cursor = 'pointer';
|
||||
tr.onclick = function() {
|
||||
this.classList.toggle('selected');
|
||||
updateChart();
|
||||
};
|
||||
});
|
||||
|
||||
var leaderboardChart = new Chart(ctx, {
|
||||
type: 'bar',
|
||||
data: leaderboardData,
|
||||
options: {
|
||||
plugins: {
|
||||
legend: {
|
||||
display: true,
|
||||
labels: {
|
||||
generateLabels: function(chart) {
|
||||
return [
|
||||
{
|
||||
text: 'Diff-like format',
|
||||
fillStyle: 'rgba(54, 162, 235, 0.2)',
|
||||
strokeStyle: 'rgba(54, 162, 235, 1)',
|
||||
lineWidth: 1
|
||||
},
|
||||
{
|
||||
text: 'Whole format',
|
||||
fillStyle: blueDiagonalPattern,
|
||||
strokeStyle: 'rgba(54, 162, 235, 1)',
|
||||
lineWidth: 1
|
||||
}
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Percent completed correctly'
|
||||
}
|
||||
},
|
||||
x: {
|
||||
ticks: {
|
||||
callback: function(value, index) {
|
||||
const label = this.getLabelForValue(value);
|
||||
if (label.length <= "claude-3-5-sonnet".length) {
|
||||
return label;
|
||||
}
|
||||
|
||||
// Find all possible split positions
|
||||
const splitPositions = [];
|
||||
for (let i = 0; i < label.length; i++) {
|
||||
if (label[i] === '-' || label[i] === ' ') {
|
||||
splitPositions.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (splitPositions.length === 0) {
|
||||
return label;
|
||||
}
|
||||
|
||||
// Find split position closest to middle
|
||||
const middle = label.length / 2;
|
||||
const splitIndex = splitPositions.reduce((closest, current) => {
|
||||
return Math.abs(current - middle) < Math.abs(closest - middle) ? current : closest;
|
||||
});
|
||||
|
||||
return [
|
||||
label.slice(0, splitIndex),
|
||||
label.slice(splitIndex + 1)
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
updateChart();
|
||||
|
||||
// Add search functionality for edit table
|
||||
document.getElementById('editSearchInput').addEventListener('keyup', function() {
|
||||
var searchWords = this.value.toLowerCase().split(' ').filter(word => word.length > 0);
|
||||
var tableBody = document.querySelector('table:first-of-type tbody');
|
||||
var rows = tableBody.getElementsByTagName('tr');
|
||||
|
||||
displayedData = [];
|
||||
leaderboardData.labels = [];
|
||||
leaderboardData.datasets[0].data = [];
|
||||
|
||||
for (var i = 0; i < rows.length; i++) {
|
||||
var rowText = rows[i].textContent;
|
||||
if (searchWords.every(word => rowText.toLowerCase().includes(word))) {
|
||||
rows[i].style.display = '';
|
||||
displayedData.push(allData[i]);
|
||||
leaderboardData.labels.push(allData[i].model);
|
||||
leaderboardData.datasets[0].data.push(allData[i].pass_rate_2);
|
||||
} else {
|
||||
rows[i].style.display = 'none';
|
||||
}
|
||||
}
|
||||
leaderboardChart.update();
|
||||
});
|
||||
});
|
||||
@@ -1,5 +0,0 @@
|
||||
{: .tip }
|
||||
All API keys can be stored in a
|
||||
[.env file](/docs/config/dotenv.html#storing-llm-keys)
|
||||
or in a [YAML config file](/docs/config/aider_conf.html#storing-llm-keys).
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
|
||||
You can get started quickly like this:
|
||||
If you already have python 3.8-3.13 installed, you can get started quickly like this:
|
||||
|
||||
```bash
|
||||
python -m pip install -U aider-chat
|
||||
python -m pip install aider-install
|
||||
aider-install
|
||||
|
||||
# Change directory into a git repo
|
||||
cd /to/your/git/repo
|
||||
# Change directory into your code base
|
||||
cd /to/your/project
|
||||
|
||||
# Work with Claude 3.5 Sonnet on your repo
|
||||
export ANTHROPIC_API_KEY=your-key-goes-here
|
||||
aider
|
||||
# Work with Claude 3.5 Sonnet on your code
|
||||
aider --model sonnet --anthropic-api-key your-key-goes-here
|
||||
|
||||
# Work with GPT-4o on your repo
|
||||
export OPENAI_API_KEY=your-key-goes-here
|
||||
aider
|
||||
# Work with GPT-4o on your code
|
||||
aider --model gpt-4o --openai-api-key your-key-goes-here
|
||||
```
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
<meta name="theme-color" content="#ffffff">
|
||||
|
||||
{% if site.analytics.enabled %}
|
||||
<!-- Cookie Consent -->
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/cookieconsent@3/build/cookieconsent.min.css" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/cookieconsent@3/build/cookieconsent.min.js" data-cfasync="false"></script>
|
||||
@@ -72,9 +73,10 @@ window.addEventListener('load', function(){
|
||||
// PostHog initialization function
|
||||
function initPostHog() {
|
||||
!function(t,e){var o,n,p,r;e.__SV||(window.posthog=e,e._i=[],e.init=function(i,s,a){function g(t,e){var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}(p=t.createElement("script")).type="text/javascript",p.crossOrigin="anonymous",p.async=!0,p.src=s.api_host.replace(".i.posthog.com","-assets.i.posthog.com")+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people||[],u.toString=function(t){var e="posthog";return"posthog"!==a&&(e+="."+a),t||(e+=" (stub)"),e},u.people.toString=function(){return u.toString(1)+".people (stub)"},o="init capture register register_once register_for_session unregister unregister_for_session getFeatureFlag getFeatureFlagPayload isFeatureEnabled reloadFeatureFlags updateEarlyAccessFeatureEnrollment getEarlyAccessFeatures on onFeatureFlags onSessionId getSurveys getActiveMatchingSurveys renderSurvey canRenderSurvey getNextSurveyStep identify setPersonProperties group resetGroups setPersonPropertiesForFlags resetPersonPropertiesForFlags setGroupPropertiesForFlags resetGroupPropertiesForFlags reset get_distinct_id getGroups get_session_id get_session_replay_url alias set_config startSessionRecording stopSessionRecording sessionRecordingStarted captureException loadToolbar get_property getSessionProperty createPersonProfile opt_in_capturing opt_out_capturing has_opted_in_capturing has_opted_out_capturing clear_opt_in_out_capturing debug".split(" "),n=0;n<o.length;n++)g(u,o[n]);e._i.push([i,s,a])},e.__SV=1)}(document,window.posthog||[]);
|
||||
posthog.init('phc_99T7muzafUMMZX15H8XePbMSreEUzahHbtWjy3l5Qbv', {
|
||||
api_host:'https://us.i.posthog.com',
|
||||
posthog.init('{{ site.analytics.posthog_key }}', {
|
||||
api_host: '{{ site.analytics.posthog_host }}',
|
||||
person_profiles: 'identified_only'
|
||||
})
|
||||
}
|
||||
</script>
|
||||
{% endif %}
|
||||
|
||||
4
aider/website/_includes/keys.md
Normal file
4
aider/website/_includes/keys.md
Normal file
@@ -0,0 +1,4 @@
|
||||
{: .tip :}
|
||||
See the
|
||||
[API key configuration docs](/docs/config/api-keys.html)
|
||||
for information on how to configure and store your API keys.
|
||||
@@ -0,0 +1,190 @@
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
var ctx = document.getElementById('editChart').getContext('2d');
|
||||
const blueDiagonalPattern = pattern.draw('diagonal', 'rgba(54, 162, 235, 0.2)');
|
||||
const redDiagonalPattern = pattern.draw('diagonal', 'rgba(255, 99, 132, 0.2)');
|
||||
let displayedData = [];
|
||||
|
||||
const HIGHLIGHT_MODEL = '{{ highlight_model | default: "no no no" }}';
|
||||
var leaderboardData = {
|
||||
labels: [],
|
||||
datasets: [{
|
||||
label: 'Percent completed correctly',
|
||||
data: [],
|
||||
backgroundColor: function(context) {
|
||||
const row = allData[context.dataIndex];
|
||||
if (row && row.edit_format === 'whole') {
|
||||
return diagonalPattern;
|
||||
}
|
||||
const label = leaderboardData.labels[context.dataIndex] || '';
|
||||
return (label && label.includes(HIGHLIGHT_MODEL)) ? 'rgba(255, 99, 132, 0.2)' : 'rgba(54, 162, 235, 0.2)';
|
||||
},
|
||||
borderColor: function(context) {
|
||||
const label = context.chart.data.labels[context.dataIndex] || '';
|
||||
return (label && label.includes(HIGHLIGHT_MODEL)) ? 'rgba(255, 99, 132, 1)' : 'rgba(54, 162, 235, 1)';
|
||||
},
|
||||
borderWidth: 1
|
||||
}]
|
||||
};
|
||||
|
||||
var allData = [];
|
||||
{% for row in data_source %}
|
||||
allData.push({
|
||||
model: '{{ row.model }}',
|
||||
pass_rate: {{ row[pass_rate_field] }},
|
||||
percent_cases_well_formed: {{ row.percent_cases_well_formed }},
|
||||
edit_format: '{{ row.edit_format | default: "diff" }}'
|
||||
});
|
||||
{% endfor %}
|
||||
|
||||
function updateChart() {
|
||||
var selectedRows = document.querySelectorAll('tr.selected');
|
||||
var showAll = selectedRows.length === 0;
|
||||
|
||||
displayedData = [];
|
||||
leaderboardData.labels = [];
|
||||
leaderboardData.datasets[0].data = [];
|
||||
|
||||
allData.forEach(function(row, index) {
|
||||
var rowElement = document.getElementById('edit-row-' + index);
|
||||
if (showAll) {
|
||||
rowElement.classList.remove('selected');
|
||||
}
|
||||
if (showAll || rowElement.classList.contains('selected')) {
|
||||
displayedData.push(row);
|
||||
leaderboardData.labels.push(row.model);
|
||||
leaderboardData.datasets[0].data.push(row.pass_rate);
|
||||
}
|
||||
});
|
||||
|
||||
leaderboardChart.update();
|
||||
leaderboardChart.render();
|
||||
}
|
||||
|
||||
// Use displayedData in the backgroundColor callback instead of allData
|
||||
leaderboardData.datasets[0].backgroundColor = function(context) {
|
||||
const row = displayedData[context.dataIndex];
|
||||
const label = leaderboardData.labels[context.dataIndex] || '';
|
||||
if (label && label.includes(HIGHLIGHT_MODEL)) {
|
||||
if (row && row.edit_format === 'whole') return redDiagonalPattern;
|
||||
else return 'rgba(255, 99, 132, 0.2)';
|
||||
} else if (row && row.edit_format === 'whole') {
|
||||
return blueDiagonalPattern;
|
||||
} else {
|
||||
return 'rgba(54, 162, 235, 0.2)';
|
||||
}
|
||||
};
|
||||
|
||||
var tableBody = document.querySelector('table tbody');
|
||||
allData.forEach(function(row, index) {
|
||||
var tr = tableBody.children[index];
|
||||
if (!tr) {
|
||||
// If the row doesn't exist, create it
|
||||
tr = document.createElement('tr');
|
||||
tableBody.appendChild(tr);
|
||||
}
|
||||
tr.id = 'edit-row-' + index;
|
||||
tr.style.cursor = 'pointer';
|
||||
tr.onclick = function() {
|
||||
this.classList.toggle('selected');
|
||||
updateChart();
|
||||
};
|
||||
});
|
||||
|
||||
var leaderboardChart = new Chart(ctx, {
|
||||
type: 'bar',
|
||||
data: leaderboardData,
|
||||
options: {
|
||||
plugins: {
|
||||
legend: {
|
||||
display: true,
|
||||
labels: {
|
||||
generateLabels: function(chart) {
|
||||
return [
|
||||
{
|
||||
text: 'Diff-like format',
|
||||
fillStyle: 'rgba(54, 162, 235, 0.2)',
|
||||
strokeStyle: 'rgba(54, 162, 235, 1)',
|
||||
lineWidth: 1
|
||||
},
|
||||
{
|
||||
text: 'Whole format',
|
||||
fillStyle: blueDiagonalPattern,
|
||||
strokeStyle: 'rgba(54, 162, 235, 1)',
|
||||
lineWidth: 1
|
||||
}
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Percent completed correctly'
|
||||
}
|
||||
},
|
||||
x: {
|
||||
ticks: {
|
||||
callback: function(value, index) {
|
||||
const label = this.getLabelForValue(value);
|
||||
if (label.length <= "claude-3-5-sonnet".length) {
|
||||
return label;
|
||||
}
|
||||
|
||||
// Find all possible split positions
|
||||
const splitPositions = [];
|
||||
for (let i = 0; i < label.length; i++) {
|
||||
if (label[i] === '-' || label[i] === ' ') {
|
||||
splitPositions.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (splitPositions.length === 0) {
|
||||
return label;
|
||||
}
|
||||
|
||||
// Find split position closest to middle
|
||||
const middle = label.length / 2;
|
||||
const splitIndex = splitPositions.reduce((closest, current) => {
|
||||
return Math.abs(current - middle) < Math.abs(closest - middle) ? current : closest;
|
||||
});
|
||||
|
||||
return [
|
||||
label.slice(0, splitIndex),
|
||||
label.slice(splitIndex + 1)
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
updateChart();
|
||||
|
||||
// Add search functionality for edit table
|
||||
document.getElementById('editSearchInput').addEventListener('keyup', function() {
|
||||
var searchWords = this.value.toLowerCase().split(' ').filter(word => word.length > 0);
|
||||
var tableBody = document.querySelector('table:first-of-type tbody');
|
||||
var rows = tableBody.getElementsByTagName('tr');
|
||||
|
||||
displayedData = [];
|
||||
leaderboardData.labels = [];
|
||||
leaderboardData.datasets[0].data = [];
|
||||
|
||||
for (var i = 0; i < rows.length; i++) {
|
||||
var rowText = rows[i].textContent;
|
||||
if (searchWords.every(word => rowText.toLowerCase().includes(word))) {
|
||||
rows[i].style.display = '';
|
||||
displayedData.push(allData[i]);
|
||||
leaderboardData.labels.push(allData[i].model);
|
||||
leaderboardData.datasets[0].data.push(allData[i].pass_rate);
|
||||
} else {
|
||||
rows[i].style.display = 'none';
|
||||
}
|
||||
}
|
||||
leaderboardChart.update();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
var ctx = document.getElementById('refacChart').getContext('2d');
|
||||
var leaderboardData = {
|
||||
labels: [],
|
||||
datasets: [{
|
||||
label: 'Percent completed correctly',
|
||||
data: [],
|
||||
backgroundColor: 'rgba(54, 162, 235, 0.2)',
|
||||
borderColor: 'rgba(54, 162, 235, 1)',
|
||||
borderWidth: 1
|
||||
}]
|
||||
};
|
||||
|
||||
var allData = [];
|
||||
{% for row in refac_sorted %}
|
||||
allData.push({
|
||||
model: '{{ row.model }}',
|
||||
pass_rate_1: {{ row.pass_rate_1 }},
|
||||
percent_cases_well_formed: {{ row.percent_cases_well_formed }}
|
||||
});
|
||||
{% endfor %}
|
||||
|
||||
function updateChart() {
|
||||
var selectedRows = document.querySelectorAll('tr.selected');
|
||||
var showAll = selectedRows.length === 0;
|
||||
|
||||
leaderboardData.labels = [];
|
||||
leaderboardData.datasets[0].data = [];
|
||||
|
||||
allData.forEach(function(row, index) {
|
||||
var rowElement = document.getElementById('refac-row-' + index);
|
||||
if (showAll) {
|
||||
rowElement.classList.remove('selected');
|
||||
}
|
||||
if (showAll || rowElement.classList.contains('selected')) {
|
||||
leaderboardData.labels.push(row.model);
|
||||
leaderboardData.datasets[0].data.push(row.pass_rate_1);
|
||||
}
|
||||
});
|
||||
|
||||
leaderboardChart.update();
|
||||
}
|
||||
|
||||
var tableBody = document.querySelectorAll('table tbody')[1];
|
||||
allData.forEach(function(row, index) {
|
||||
var tr = tableBody.children[index];
|
||||
tr.id = 'refac-row-' + index;
|
||||
tr.style.cursor = 'pointer';
|
||||
tr.onclick = function() {
|
||||
this.classList.toggle('selected');
|
||||
updateChart();
|
||||
};
|
||||
});
|
||||
|
||||
var leaderboardChart = new Chart(ctx, {
|
||||
type: 'bar',
|
||||
data: leaderboardData,
|
||||
options: {
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
updateChart();
|
||||
|
||||
// Add search functionality for refactoring table
|
||||
document.getElementById('refacSearchInput').addEventListener('keyup', function() {
|
||||
var searchWords = this.value.toLowerCase().split(' ').filter(word => word.length > 0);
|
||||
var tableBody = document.querySelectorAll('table tbody')[1];
|
||||
var rows = tableBody.getElementsByTagName('tr');
|
||||
|
||||
leaderboardData.labels = [];
|
||||
leaderboardData.datasets[0].data = [];
|
||||
|
||||
for (var i = 0; i < rows.length; i++) {
|
||||
var rowText = rows[i].textContent;
|
||||
if (searchWords.every(word => rowText.toLowerCase().includes(word))) {
|
||||
rows[i].style.display = '';
|
||||
leaderboardData.labels.push(allData[i].model);
|
||||
leaderboardData.datasets[0].data.push(allData[i].pass_rate_1);
|
||||
} else {
|
||||
rows[i].style.display = 'none';
|
||||
}
|
||||
}
|
||||
leaderboardChart.update();
|
||||
});
|
||||
});
|
||||
@@ -1,49 +0,0 @@
|
||||
|
||||
Aider has special support for providing
|
||||
OpenAI and Anthropic API keys
|
||||
via dedicated
|
||||
[command line switches](/docs/config/options.html#api-keys-and-settings)
|
||||
`--openai-api-key` and `--anthropic-api-key`.
|
||||
|
||||
You can also set those API keys via special entries in the
|
||||
[yaml config file](/docs/config/aider_conf.html), like this:
|
||||
|
||||
```yaml
|
||||
openai-api-key: <key>
|
||||
anthropic-api-key: <key>
|
||||
```
|
||||
|
||||
All other LLM providers can use one of the following methods to set their
|
||||
keys:
|
||||
|
||||
### API keys on the command line
|
||||
{: .no_toc }
|
||||
|
||||
Use `--api-key provider=<key>` which has the effect of setting the environment variable `PROVIDER_API_KEY=<key>`. So `--api-key gemini=xxx` would set `GEMINI_API_KEY=xxx`.
|
||||
|
||||
### API keys in a .env file
|
||||
{: .no_toc }
|
||||
|
||||
The [.env file](/docs/config/dotenv.html)
|
||||
is a great place to set API keys and other provider API environment variables:
|
||||
|
||||
```bash
|
||||
GEMINI_API_KEY=foo
|
||||
OPENROUTER_API_KEY=bar
|
||||
DEEPSEEK_API_KEY=baz
|
||||
```
|
||||
|
||||
### API keys in .aider.conf.yml
|
||||
{: .no_toc }
|
||||
|
||||
Or you can set API keys in the
|
||||
[`.aider.conf.yml` file](/docs/config/aider_conf.html)
|
||||
via the `api-key` entry:
|
||||
|
||||
```
|
||||
api-key:
|
||||
- gemini=foo # Sets env var GEMINI_API_KEY=foo
|
||||
- openrouter=bar # Sets env var OPENROUTER_API_KEY=bar
|
||||
- deepseek=baz # Sets env var DEEPSEEK_API_KEY=baz
|
||||
```
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
{: .tip }
|
||||
The best way to install aider is with
|
||||
[pipx](/docs/install/pipx.html)
|
||||
or
|
||||
[uv](/docs/install/uv.html)
|
||||
once for your whole system.
|
||||
Or, using a python
|
||||
[virtual environment](https://docs.python.org/3/library/venv.html){:target="_blank"}
|
||||
is recommended.
|
||||
216
aider/website/_posts/2024-12-21-polyglot.md
Normal file
216
aider/website/_posts/2024-12-21-polyglot.md
Normal file
@@ -0,0 +1,216 @@
|
||||
---
|
||||
title: o1 tops aider's new polyglot leaderboard
|
||||
excerpt: o1 scores the top result on aider's new multi-language, more challenging coding benchmark.
|
||||
highlight_image: /assets/o1-polyglot.jpg
|
||||
draft: false
|
||||
nav_exclude: true
|
||||
---
|
||||
{% if page.date %}
|
||||
<p class="post-date">{{ page.date | date: "%B %d, %Y" }}</p>
|
||||
{% endif %}
|
||||
|
||||
# o1 tops aider's new polyglot leaderboard
|
||||
{: .no_toc }
|
||||
|
||||
<canvas id="editChart" width="800" height="450" style="margin-top: 20px"></canvas>
|
||||
|
||||
OpenAI's new o1 model with "high" reasoning effort
|
||||
gets the top score on the
|
||||
new
|
||||
[aider polyglot leaderboard](/docs/leaderboards/), significantly ahead of
|
||||
other top LLMs.
|
||||
The new polyglot benchmark uses many popular coding languages
|
||||
and was designed to be
|
||||
*much more challenging* than aider's original
|
||||
[code editing benchmark](/docs/leaderboards/edit.html).
|
||||
This more clearly distinguishes
|
||||
the performance of
|
||||
today's strongest coding models and
|
||||
leaves headroom for future LLMs.
|
||||
|
||||
{: .note :}
|
||||
See the main
|
||||
[aider leaderboard](https://aider.chat/docs/leaderboards/)
|
||||
for benchmark results from more models.
|
||||
This article only contains a snapshot
|
||||
of results at the time of publication.
|
||||
|
||||
## The polyglot benchmark
|
||||
|
||||
Like aider's original code editing benchmark,
|
||||
the new polyglot benchmark is based on Exercism
|
||||
coding exercises.
|
||||
|
||||
The new polyglot benchmark:
|
||||
|
||||
- Contains coding problems in C++, Go, Java, JavaScript, Python and Rust.
|
||||
The old benchmark was solely based on Python exercises.
|
||||
- Focuses on the *most difficult* 225 exercises out of the 697 that
|
||||
Exercism provides for those languages.
|
||||
The old benchmark simply included all 133 Python exercises,
|
||||
regardless of difficulty.
|
||||
|
||||
## Motivation and goals
|
||||
|
||||
Aider's original code editing benchmark was
|
||||
saturating as the top scores approached and then surpassed 80%.
|
||||
Sonnet's score of 84.2% was based on solving 112 of the 133
|
||||
exercises, leaving only 21 unsolved exercises.
|
||||
New champions were advancing the top score by
|
||||
solving just 1-2 more problems than the previous record.
|
||||
This made it hard to clearly
|
||||
measure the
|
||||
difference in code editing skill between these top models.
|
||||
|
||||
Part of the problem is that many of the original
|
||||
133 Python problems are very easy
|
||||
and provide
|
||||
little challenge to today's frontier LLMs.
|
||||
Models as old as GPT 3.5 Turbo were able to solve half of the
|
||||
133 problems.
|
||||
Such easy problems simply inflate the benchmark scores
|
||||
of modern LLMs without
|
||||
providing any data about which models are better or worse.
|
||||
|
||||
The main goal for a new benchmark
|
||||
was to re-calibrate the scale so that
|
||||
today's top coding LLMs
|
||||
would occupy a wide range of scores between about 5% and 50%.
|
||||
This should leave headroom for future LLMs and
|
||||
make it possible to
|
||||
more clearly compare the relative performance of top models.
|
||||
|
||||
## Designing the polyglot benchmark
|
||||
|
||||
The new benchmark:
|
||||
|
||||
- Tests LLMs with more coding languages, to increase diversity and source a larger pool of problems.
|
||||
- Includes just the most challenging coding problems and excludes easy problems that are solvable by most of today's top coding LLMs.
|
||||
- Includes more total coding problems, to enable more granularity of comparison.
|
||||
|
||||
The new benchmark is based on Exercism coding problems
|
||||
from 6 of the most popular programming languages:
|
||||
|
||||
- C++
|
||||
- Go
|
||||
- Java
|
||||
- JavaScript
|
||||
- Python
|
||||
- Rust
|
||||
|
||||
Exercism provides a total of 697 coding problems in those 6 languages.
|
||||
A set of 7 of today's top coding models each attempted all 697 of
|
||||
the Exercism problems:
|
||||
|
||||
- Sonnet
|
||||
- Haiku
|
||||
- o1 Mini
|
||||
- DeepSeek
|
||||
- GPT-4o
|
||||
- Qwen 32B Coder Instruct
|
||||
- GPT-4o Mini
|
||||
|
||||
Depending on the difficulty of the problems,
|
||||
a different number of solutions were found by the collection of
|
||||
7 models:
|
||||
|
||||
| Solutions<br>found | Number of<br>problems | Cumulative number<br>of problems |
|
||||
|--------|-----------|------------|
|
||||
| 0 | 66 | 66 |
|
||||
| 1 | 61 | 127 |
|
||||
| 2 | 50 | 177 |
|
||||
| 3 | 48 | 225 |
|
||||
| 4 | 53 | 278 |
|
||||
| 5 | 71 | 349 |
|
||||
| 6 | 90 | 439 |
|
||||
| 7 | 258 | 697 |
|
||||
|
||||
In the table above, you can see that 258 of the problems were solved
|
||||
by all 7 LLMs.
|
||||
These problems are far too easy, and wouldn't be good choices for the new benchmark.
|
||||
Instead, we need hard problems like the
|
||||
66 that none of the 7 models were able to solve.
|
||||
|
||||
The new benchmark uses
|
||||
the 225 problems that were solved by 3 or fewer models.
|
||||
This achieves a balance between hard and moderate problems,
|
||||
and provides a large but not excessive total pool of problems.
|
||||
It also represents a good diversity of coding languages:
|
||||
|
||||
| Language | Problems |
|
||||
|-------------|----------|
|
||||
| C++ | 26 |
|
||||
| Go | 39 |
|
||||
| Java | 47 |
|
||||
| JavaScript | 49 |
|
||||
| Python | 34 |
|
||||
| Rust | 30 |
|
||||
| **Total** | **225** |
|
||||
|
||||
## o1
|
||||
|
||||
OpenAI's new o1 model established a very strong
|
||||
top score of 62% on the new benchmark.
|
||||
This still leaves 86 problems of headroom for future models
|
||||
to solve.
|
||||
Given the incredible pace of recent advancements, it
|
||||
will be interesting to see
|
||||
how long it will take for this new benchmark to saturate.
|
||||
|
||||
## Benchmark problems
|
||||
|
||||
The 225 coding problems are available in the
|
||||
[aider polyglot benchmark repo](https://github.com/Aider-AI/polyglot-benchmark)
|
||||
on GitHub.
|
||||
|
||||
|
||||
|
||||
## Results
|
||||
|
||||
<table style="width: 100%; max-width: 800px; margin: auto; border-collapse: collapse; box-shadow: 0 2px 4px rgba(0,0,0,0.1); font-size: 14px;">
|
||||
<thead style="background-color: #f2f2f2;">
|
||||
<tr>
|
||||
<th style="padding: 8px; text-align: left;">Model</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent completed correctly</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent using correct edit format</th>
|
||||
<th style="padding: 8px; text-align: left;">Command</th>
|
||||
<th style="padding: 8px; text-align: center;">Edit format</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% assign edit_sorted = site.data.o1_polyglot_leaderboard | sort: 'pass_rate_2' | reverse %}
|
||||
{% for row in edit_sorted %}
|
||||
<tr style="border-bottom: 1px solid #ddd;">
|
||||
<td style="padding: 8px;">{{ row.model }}</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.pass_rate_2 }}%</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.percent_cases_well_formed }}%</td>
|
||||
<td style="padding: 8px;"><code>{{ row.command }}</code></td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.edit_format }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<script src="https://unpkg.com/patternomaly/dist/patternomaly.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
{% assign data_source = edit_sorted %}
|
||||
{% assign pass_rate_field = "pass_rate_2" %}
|
||||
{% assign highlight_model = "o1-2024" %}
|
||||
{% include leaderboard.js %}
|
||||
</script>
|
||||
<style>
|
||||
tr.selected {
|
||||
color: #0056b3;
|
||||
}
|
||||
table {
|
||||
table-layout: fixed;
|
||||
}
|
||||
td, th {
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
}
|
||||
td:nth-child(3), td:nth-child(4) {
|
||||
font-size: 12px;
|
||||
}
|
||||
</style>
|
||||
BIN
aider/website/assets/o1-polyglot.jpg
Normal file
BIN
aider/website/assets/o1-polyglot.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 157 KiB |
File diff suppressed because it is too large
Load Diff
@@ -218,7 +218,7 @@
|
||||
## Set the background color for the current item in the completion menu (default: terminal's default text color)
|
||||
#completion-menu-current-bg-color: xxx
|
||||
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light)
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light, or a Pygments builtin style, see https://pygments.org/styles for available themes)
|
||||
#code-theme: default
|
||||
|
||||
## Show diffs when committing changes (default: False)
|
||||
|
||||
@@ -207,7 +207,7 @@
|
||||
## Set the background color for the current item in the completion menu (default: terminal's default text color)
|
||||
#AIDER_COMPLETION_MENU_CURRENT_BG_COLOR=
|
||||
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light)
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light, or a Pygments builtin style, see https://pygments.org/styles for available themes)
|
||||
#AIDER_CODE_THEME=default
|
||||
|
||||
## Show diffs when committing changes (default: False)
|
||||
|
||||
@@ -40,5 +40,5 @@ Using an `.env` file:
|
||||
AIDER_DARK_MODE=true
|
||||
```
|
||||
|
||||
{% include env-keys-tip.md %}
|
||||
{% include keys.md %}
|
||||
|
||||
|
||||
@@ -1163,6 +1163,22 @@ cog.out("```\n")
|
||||
use_system_prompt: true
|
||||
use_temperature: true
|
||||
weak_model_name: null
|
||||
- cache_control: false
|
||||
caches_by_default: false
|
||||
edit_format: diff
|
||||
editor_edit_format: null
|
||||
editor_model_name: null
|
||||
examples_as_sys_msg: true
|
||||
extra_params: null
|
||||
lazy: false
|
||||
name: openrouter/deepseek/deepseek-chat
|
||||
reminder: sys
|
||||
send_undo_reply: false
|
||||
streaming: true
|
||||
use_repo_map: true
|
||||
use_system_prompt: true
|
||||
use_temperature: true
|
||||
weak_model_name: null
|
||||
- cache_control: false
|
||||
caches_by_default: false
|
||||
edit_format: diff
|
||||
@@ -1307,6 +1323,54 @@ cog.out("```\n")
|
||||
use_system_prompt: false
|
||||
use_temperature: false
|
||||
weak_model_name: openrouter/openai/gpt-4o-mini
|
||||
- cache_control: false
|
||||
caches_by_default: false
|
||||
edit_format: diff
|
||||
editor_edit_format: editor-diff
|
||||
editor_model_name: openrouter/openai/gpt-4o
|
||||
examples_as_sys_msg: false
|
||||
extra_params: null
|
||||
lazy: false
|
||||
name: openrouter/openai/o1
|
||||
reminder: user
|
||||
send_undo_reply: false
|
||||
streaming: false
|
||||
use_repo_map: true
|
||||
use_system_prompt: true
|
||||
use_temperature: false
|
||||
weak_model_name: openrouter/openai/gpt-4o-mini
|
||||
- cache_control: false
|
||||
caches_by_default: false
|
||||
edit_format: diff
|
||||
editor_edit_format: editor-diff
|
||||
editor_model_name: openai/gpt-4o
|
||||
examples_as_sys_msg: false
|
||||
extra_params: null
|
||||
lazy: false
|
||||
name: openai/o1
|
||||
reminder: user
|
||||
send_undo_reply: false
|
||||
streaming: false
|
||||
use_repo_map: true
|
||||
use_system_prompt: true
|
||||
use_temperature: false
|
||||
weak_model_name: openai/gpt-4o-mini
|
||||
- cache_control: false
|
||||
caches_by_default: false
|
||||
edit_format: diff
|
||||
editor_edit_format: editor-diff
|
||||
editor_model_name: gpt-4o
|
||||
examples_as_sys_msg: false
|
||||
extra_params: null
|
||||
lazy: false
|
||||
name: o1
|
||||
reminder: user
|
||||
send_undo_reply: false
|
||||
streaming: false
|
||||
use_repo_map: true
|
||||
use_system_prompt: true
|
||||
use_temperature: false
|
||||
weak_model_name: gpt-4o-mini
|
||||
- cache_control: false
|
||||
caches_by_default: false
|
||||
edit_format: diff
|
||||
|
||||
@@ -15,11 +15,7 @@ load whichever is found first.
|
||||
- The root of your git repo.
|
||||
- Your home directory.
|
||||
|
||||
## Storing LLM keys
|
||||
|
||||
{% include special-keys.md %}
|
||||
|
||||
{% include env-keys-tip.md %}
|
||||
{% include keys.md %}
|
||||
|
||||
## A note on lists
|
||||
|
||||
@@ -274,7 +270,7 @@ cog.outl("```")
|
||||
## Set the background color for the current item in the completion menu (default: terminal's default text color)
|
||||
#completion-menu-current-bg-color: xxx
|
||||
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light)
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light, or a Pygments builtin style, see https://pygments.org/styles for available themes)
|
||||
#code-theme: default
|
||||
|
||||
## Show diffs when committing changes (default: False)
|
||||
|
||||
90
aider/website/docs/config/api-keys.md
Normal file
90
aider/website/docs/config/api-keys.md
Normal file
@@ -0,0 +1,90 @@
|
||||
---
|
||||
parent: Configuration
|
||||
nav_order: 5
|
||||
description: Setting API keys for API providers.
|
||||
---
|
||||
|
||||
# API Keys
|
||||
|
||||
Aider lets you specify API keys in a few ways:
|
||||
|
||||
- On the command line
|
||||
- As environment variables
|
||||
- In a `.env` file
|
||||
- In your `.aider.conf.yml` config file
|
||||
|
||||
---
|
||||
|
||||
## OpenAI and Anthropic
|
||||
|
||||
Aider has special support for providing
|
||||
OpenAI and Anthropic API keys
|
||||
via dedicated switches and configuration options.
|
||||
Settings keys for other providers works a bit differently, see below.
|
||||
|
||||
#### Command line
|
||||
|
||||
You can set OpenAI and Anthropic API keys via
|
||||
[command line switches](/docs/config/options.html#api-keys-and-settings)
|
||||
`--openai-api-key` and `--anthropic-api-key`.
|
||||
|
||||
|
||||
#### Environment variables or .env file
|
||||
|
||||
You can also store them in environment variables or a
|
||||
[.env file](/docs/config/dotenv.html), which also works
|
||||
for every API provider:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=<key>
|
||||
ANTHROPIC_API_KEY=<key>
|
||||
```
|
||||
|
||||
#### Yaml config file
|
||||
You can also set those API keys via special entries in the
|
||||
[yaml config file](/docs/config/aider_conf.html), like this:
|
||||
|
||||
```yaml
|
||||
openai-api-key: <key>
|
||||
anthropic-api-key: <key>
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Other API providers
|
||||
|
||||
All other LLM providers can use one of these other methods to set their API keys.
|
||||
|
||||
#### Command line
|
||||
{: .no_toc }
|
||||
|
||||
Use `--api-key provider=<key>` which has the effect of setting the environment variable `PROVIDER_API_KEY=<key>`. So `--api-key gemini=xxx` would set `GEMINI_API_KEY=xxx`.
|
||||
|
||||
#### Environment variables or .env file
|
||||
{: .no_toc }
|
||||
|
||||
You can set API keys in environment variables.
|
||||
The [.env file](/docs/config/dotenv.html)
|
||||
is a great place to store your API keys and other provider API environment variables:
|
||||
|
||||
```bash
|
||||
GEMINI_API_KEY=foo
|
||||
OPENROUTER_API_KEY=bar
|
||||
DEEPSEEK_API_KEY=baz
|
||||
```
|
||||
|
||||
#### Yaml config file
|
||||
|
||||
|
||||
You can also set API keys in the
|
||||
[`.aider.conf.yml` file](/docs/config/aider_conf.html)
|
||||
via the `api-key` entry:
|
||||
|
||||
```
|
||||
api-key:
|
||||
- gemini=foo # Sets env var GEMINI_API_KEY=foo
|
||||
- openrouter=bar # Sets env var OPENROUTER_API_KEY=bar
|
||||
- deepseek=baz # Sets env var DEEPSEEK_API_KEY=baz
|
||||
```
|
||||
|
||||
@@ -20,11 +20,7 @@ Aider will look for a `.env` file in these locations:
|
||||
|
||||
If the files above exist, they will be loaded in that order. Files loaded last will take priority.
|
||||
|
||||
## Storing LLM keys
|
||||
|
||||
{% include special-keys.md %}
|
||||
|
||||
{% include env-keys-tip.md %}
|
||||
{% include keys.md %}
|
||||
|
||||
## Sample .env file
|
||||
|
||||
@@ -251,7 +247,7 @@ cog.outl("```")
|
||||
## Set the background color for the current item in the completion menu (default: terminal's default text color)
|
||||
#AIDER_COMPLETION_MENU_CURRENT_BG_COLOR=
|
||||
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light)
|
||||
## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light, or a Pygments builtin style, see https://pygments.org/styles for available themes)
|
||||
#AIDER_CODE_THEME=default
|
||||
|
||||
## Show diffs when committing changes (default: False)
|
||||
|
||||
@@ -13,9 +13,7 @@ or review them below.
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
## LLM keys
|
||||
|
||||
{% include special-keys.md %}
|
||||
{% include keys.md %}
|
||||
|
||||
## Usage summary
|
||||
|
||||
@@ -383,7 +381,7 @@ Set the background color for the current item in the completion menu (default: t
|
||||
Environment variable: `AIDER_COMPLETION_MENU_CURRENT_BG_COLOR`
|
||||
|
||||
### `--code-theme VALUE`
|
||||
Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light)
|
||||
Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light, or a Pygments builtin style, see https://pygments.org/styles for available themes)
|
||||
Default: default
|
||||
Environment variable: `AIDER_CODE_THEME`
|
||||
|
||||
|
||||
@@ -209,6 +209,45 @@ all the raw information being sent to/from the LLM in the conversation.
|
||||
You can also refer to the
|
||||
[instructions for installing a development version of aider](https://aider.chat/docs/install/optional.html#install-the-development-version-of-aider).
|
||||
|
||||
## What LLMs do you use to build aider?
|
||||
|
||||
Aider writes a lot of its own code, usually about 70% of the new code in each
|
||||
release.
|
||||
People often ask which LLMs I use with aider, when writing aider.
|
||||
Below is a table showing the models I have used recently,
|
||||
extracted from the
|
||||
[public log](https://github.com/aider-ai/aider/blob/main/aider/website/assets/sample-analytics.jsonl)
|
||||
of my
|
||||
[aider analytics](https://aider.chat/docs/more/analytics.html).
|
||||
|
||||
<!--[[[cog
|
||||
import sys
|
||||
sys.path.append(".")
|
||||
import scripts.my_models as my_models
|
||||
stats = my_models.collect_model_stats()
|
||||
html = my_models.format_html_table(stats)
|
||||
cog.out(html)
|
||||
]]]-->
|
||||
<style>
|
||||
table { border-collapse: collapse; width: 100%; }
|
||||
th, td { padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }
|
||||
th { background-color: #f2f2f2; }
|
||||
tr:hover { background-color: #f5f5f5; }
|
||||
.right { text-align: right; }
|
||||
</style>
|
||||
<table>
|
||||
<tr><th>Model Name</th><th class='right'>Total Tokens</th><th class='right'>Percent</th></tr>
|
||||
<tr><td>claude-3-5-sonnet-20241022</td><td class='right'>1,951,105</td><td class='right'>99.0%</td></tr>
|
||||
<tr><td>claude-3-5-haiku-20241022</td><td class='right'>14,008</td><td class='right'>0.7%</td></tr>
|
||||
<tr><td>gpt-4o</td><td class='right'>4,273</td><td class='right'>0.2%</td></tr>
|
||||
<tr><td>openrouter/REDACTED</td><td class='right'>1,234</td><td class='right'>0.1%</td></tr>
|
||||
<tr><td>openai/gpt-4o-mini</td><td class='right'>141</td><td class='right'>0.0%</td></tr>
|
||||
</table>
|
||||
|
||||
{: .note :}
|
||||
Some models show as REDACTED, because they are new or unpopular models.
|
||||
Aider's analytics only records the names of "well known" LLMs.
|
||||
<!--[[[end]]]-->
|
||||
|
||||
## How are the "aider wrote xx% of code" stats computed?
|
||||
|
||||
@@ -220,6 +259,31 @@ by doing something like `git blame` on the repo,
|
||||
and counting up who wrote all the new lines of code in each release.
|
||||
Only lines in source code files are counted, not documentation or prompt files.
|
||||
|
||||
## Why does aider sometimes stop highlighting code in its replies?
|
||||
|
||||
Aider displays the markdown responses that are coming back from the LLM.
|
||||
Usually, the LLM will reply with code in a markdown "code block" with
|
||||
triple backtick fences, like this:
|
||||
|
||||
````
|
||||
Here's some code:
|
||||
|
||||
```
|
||||
print("hello")
|
||||
```
|
||||
````
|
||||
|
||||
But if you've added files to the chat that contain triple backticks,
|
||||
aider needs to tell the LLM to use a different set of fences.
|
||||
Otherwise, the LLM can't safely include your code's triple backticks
|
||||
inside the code blocks that it returns with edits.
|
||||
Aider will use fences like `<source>...</source>` in this case.
|
||||
|
||||
A side effect of this is that the code that aider outputs may no
|
||||
longer be properly highlighted.
|
||||
You will most often notice this if you add markdown files
|
||||
to you chats that contain code blocks.
|
||||
|
||||
## Why is the LLM speaking to me in an unexpected language?
|
||||
|
||||
Aider goes to some effort to prompt the model to use the language that is configured
|
||||
|
||||
@@ -5,41 +5,118 @@ nav_order: 20
|
||||
description: How to install and get started pair programming with aider.
|
||||
---
|
||||
|
||||
# Quick start
|
||||
# Installation
|
||||
{: .no_toc }
|
||||
|
||||
|
||||
## Get started quickly with aider-install
|
||||
|
||||
{% include get-started.md %}
|
||||
|
||||
Or see the
|
||||
[full installation instructions](/docs/install/install.html)
|
||||
for more details,
|
||||
or the
|
||||
[usage instructions](https://aider.chat/docs/usage.html) to start coding with aider.
|
||||
This will install aider in its own separate python environment.
|
||||
If needed,
|
||||
aider-install will also install a separate version of python 3.12 to use with aider.
|
||||
|
||||
{% include venv-pipx.md %}
|
||||
Once aider is installed,
|
||||
there are also some [optional install steps](/docs/install/optional.html).
|
||||
|
||||
<div class="video-container">
|
||||
<video controls poster="/assets/install.jpg">
|
||||
<source src="/assets/install.mp4" type="video/mp4">
|
||||
<a href="/assets/install.mp4">Installing aider</a>
|
||||
</video>
|
||||
</div>
|
||||
See the [usage instructions](https://aider.chat/docs/usage.html) to start coding with aider.
|
||||
|
||||
<style>
|
||||
.video-container {
|
||||
position: relative;
|
||||
padding-bottom: 76.2711864407%;
|
||||
height: 0;
|
||||
overflow: hidden;
|
||||
}
|
||||
## One-liners
|
||||
|
||||
.video-container video {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
</style>
|
||||
These one-liners will install aider, along with python 3.12 if needed.
|
||||
They are based on the
|
||||
[uv installers](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
#### Windows
|
||||
|
||||
```powershell
|
||||
powershell -ExecutionPolicy ByPass -c "irm https://aider.chat/install.ps1 | iex"
|
||||
```
|
||||
|
||||
#### Mac & Linux
|
||||
|
||||
Use curl to download the script and execute it with sh:
|
||||
|
||||
```bash
|
||||
curl -LsSf https://aider.chat/install.sh | sh
|
||||
```
|
||||
|
||||
If your system doesn't have curl, you can use wget:
|
||||
|
||||
```bash
|
||||
wget -qO- https://aider.chat/install.sh | sh
|
||||
```
|
||||
|
||||
|
||||
## Install with uv
|
||||
|
||||
You can install aider with uv:
|
||||
|
||||
```bash
|
||||
python -m pip install uv # If you need to install uv
|
||||
uv tool install --force --python python3.12 aider-chat@latest
|
||||
```
|
||||
|
||||
This will install uv using your existing python version 3.8-3.13,
|
||||
and use it to install aider.
|
||||
If needed,
|
||||
uv will automatically install a separate python 3.12 to use with aider.
|
||||
|
||||
Also see the
|
||||
[docs on other methods for installing uv itself](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
## Install with pipx
|
||||
|
||||
You can install aider with pipx:
|
||||
|
||||
```bash
|
||||
python -m pip install pipx # If you need to install pipx
|
||||
pipx install aider-chat
|
||||
```
|
||||
|
||||
You can use pipx to install aider with python versions 3.9-3.12.
|
||||
|
||||
Also see the
|
||||
[docs on other methods for installing pipx itself](https://pipx.pypa.io/stable/installation/).
|
||||
|
||||
## Other install methods
|
||||
|
||||
You can install aider with the methods described below, but one of the above
|
||||
methods is usually safer.
|
||||
|
||||
#### Install with pip
|
||||
|
||||
If you install with pip, you should consider
|
||||
using a
|
||||
[virtual environment](https://docs.python.org/3/library/venv.html)
|
||||
to keep aider's dependencies separated.
|
||||
|
||||
|
||||
You can use pip to install aider with python versions 3.9-3.12.
|
||||
|
||||
```bash
|
||||
# Install aider
|
||||
python -m pip install -U --upgrade-strategy only-if-needed aider-chat
|
||||
|
||||
# To work with GPT-4o:
|
||||
aider --4o --openai-api-key sk-xxx...
|
||||
|
||||
# To work with Claude 3.5 Sonnet:
|
||||
aider --sonnet --anthropic-api-key sk-xxx...
|
||||
```
|
||||
|
||||
{% include python-m-aider.md %}
|
||||
|
||||
#### Installing with package managers
|
||||
|
||||
It's best to install aider using one of methods
|
||||
recommended above.
|
||||
While aider is available in a number of system package managers,
|
||||
they often install aider with incorrect dependencies.
|
||||
|
||||
## Next steps...
|
||||
|
||||
There are some [optional install steps](/docs/install/optional.html) you could consider.
|
||||
See the [usage instructions](https://aider.chat/docs/usage.html) to start coding with aider.
|
||||
|
||||
|
||||
@@ -8,9 +8,9 @@ nav_order: 900
|
||||
|
||||
You can use aider in GitHub Codespaces via the built-in Terminal pane.
|
||||
See below for an example,
|
||||
but you can see the
|
||||
but you can just follow the
|
||||
[main install instructions](/docs/install.html)
|
||||
for all the details.
|
||||
inside your codespace terminal.
|
||||
|
||||
|
||||
<div class="video-container">
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
---
|
||||
parent: Installation
|
||||
nav_order: 10
|
||||
---
|
||||
|
||||
# Installing aider
|
||||
{: .no_toc }
|
||||
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
## Python version
|
||||
|
||||
Aider currently works with python 3.9-3.12.
|
||||
|
||||
## Install git
|
||||
|
||||
Make sure you have git installed.
|
||||
Here are
|
||||
[instructions for installing git in various environments](https://github.com/git-guides/install-git).
|
||||
|
||||
## Get your API key
|
||||
|
||||
To work with OpenAI's models like GPT-4o or GPT-3.5 you need a paid
|
||||
[OpenAI API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key).
|
||||
Note that this is different than being a "ChatGPT Plus" subscriber.
|
||||
|
||||
To work with Anthropic's models like Claude 3.5 Sonnet you need a paid
|
||||
[Anthropic API key](https://docs.anthropic.com/claude/reference/getting-started-with-the-api).
|
||||
|
||||
|
||||
{% include venv-pipx.md %}
|
||||
|
||||
## Mac/Linux install
|
||||
|
||||
```bash
|
||||
# Install aider
|
||||
python -m pip install -U --upgrade-strategy only-if-needed aider-chat
|
||||
|
||||
# To work with GPT-4o:
|
||||
aider --4o --openai-api-key sk-xxx...
|
||||
|
||||
# To work with Claude 3.5 Sonnet:
|
||||
aider --sonnet --anthropic-api-key sk-xxx...
|
||||
```
|
||||
|
||||
## Windows install
|
||||
|
||||
```bash
|
||||
# Install aider
|
||||
python -m pip install -U --upgrade-strategy only-if-needed aider-chat
|
||||
|
||||
# To work with GPT-4o:
|
||||
aider --4o --openai-api-key sk-xxx...
|
||||
|
||||
# To work with Claude 3.5 Sonnet:
|
||||
aider --sonnet --anthropic-api-key sk-xxx...
|
||||
```
|
||||
|
||||
{% include python-m-aider.md %}
|
||||
|
||||
## Working with other LLMs
|
||||
|
||||
{% include works-best.md %}
|
||||
|
||||
## You are done!
|
||||
|
||||
There are some [optional install steps](/docs/install/optional.html) you could consider.
|
||||
See the [usage instructions](https://aider.chat/docs/usage.html) to start coding with aider.
|
||||
|
||||
@@ -11,10 +11,29 @@ The steps below are completely optional.
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
## Install git
|
||||
|
||||
## Store your api keys
|
||||
Aider works best if you have git installed.
|
||||
Here are
|
||||
[instructions for installing git in various environments](https://github.com/git-guides/install-git).
|
||||
|
||||
You can [store your api keys in a .env file](/docs/config/dotenv.html)
|
||||
## Get your API key
|
||||
|
||||
To work with OpenAI's models like GPT-4o or o1-preview you need a paid
|
||||
[OpenAI API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key).
|
||||
Note that this is different than being a "ChatGPT Plus" subscriber.
|
||||
|
||||
To work with Anthropic's models like Claude 3.5 Sonnet you need a paid
|
||||
[Anthropic API key](https://docs.anthropic.com/claude/reference/getting-started-with-the-api).
|
||||
|
||||
|
||||
### Working with other LLMs
|
||||
|
||||
{% include works-best.md %}
|
||||
|
||||
### Store your api keys
|
||||
|
||||
You can [store your api keys in configuration or env files](/docs/config/api-keys.html)
|
||||
and they will be loaded automatically whenever you run aider.
|
||||
|
||||
## Enable Playwright
|
||||
@@ -55,13 +74,17 @@ Installing PortAudio is completely optional, but can usually be accomplished lik
|
||||
- For Linux, do `sudo apt-get install libportaudio2`
|
||||
- Some linux environments may also need `sudo apt install libasound2-plugins`
|
||||
|
||||
## Add aider to your editor
|
||||
## Add aider to your IDE/editor
|
||||
|
||||
Other projects have integrated aider into some IDE/editors.
|
||||
It's not clear if they are tracking the latest
|
||||
You can use
|
||||
[aider's `--watch-files` mode](https://aider.chat/docs/usage/watch.html)
|
||||
to integrate with any IDE or editor.
|
||||
|
||||
There are a number of 3rd party aider plugins for various IDE/editors.
|
||||
It's not clear how well they are tracking the latest
|
||||
versions of aider,
|
||||
so it may be best to just run the latest
|
||||
aider in a terminal alongside your editor.
|
||||
aider in a terminal alongside your editor and use `--watch-files`.
|
||||
|
||||
### NeoVim
|
||||
|
||||
@@ -71,29 +94,22 @@ aider in a terminal alongside your editor.
|
||||
|
||||
### VS Code
|
||||
|
||||
joshuavial also confirmed that aider works inside a VS Code terminal window.
|
||||
Aider detects if it is running inside VSCode and turns off pretty/color output,
|
||||
since the VSCode terminal doesn't seem to support it well.
|
||||
You can run aider inside a VS Code terminal window.
|
||||
There are a number of 3rd party
|
||||
[aider plugins for VSCode](https://marketplace.visualstudio.com/search?term=aider%20-kodu&target=VSCode&category=All%20categories&sortBy=Relevance).
|
||||
|
||||
### Other editors
|
||||
|
||||
If you are interested in creating an aider plugin for your favorite editor,
|
||||
please let me know by opening a
|
||||
please let us know by opening a
|
||||
[GitHub issue](https://github.com/Aider-AI/aider/issues).
|
||||
|
||||
|
||||
## Install the development version of aider
|
||||
|
||||
If you want the very latest development version of aider
|
||||
you can install directly from GitHub:
|
||||
you can install it like this:
|
||||
|
||||
```
|
||||
python -m pip install --upgrade git+https://github.com/Aider-AI/aider.git
|
||||
aider --install-main-branch
|
||||
```
|
||||
|
||||
If you've git cloned the aider repository already, you can install "live" from your local copy. This is mostly useful if you are developing aider and want your current modifications to take effect immediately.
|
||||
|
||||
```
|
||||
python -m pip install -e .
|
||||
```
|
||||
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
---
|
||||
parent: Installation
|
||||
nav_order: 100
|
||||
---
|
||||
|
||||
# Install with pipx
|
||||
|
||||
A recommended way to install aider is with pipx:
|
||||
|
||||
```bash
|
||||
python -m pip install pipx # If you need to install pipx
|
||||
pipx install aider-chat
|
||||
```
|
||||
|
||||
See also the
|
||||
[docs on other methods for installing pipx itself](https://pipx.pypa.io/stable/installation/).
|
||||
|
||||
|
||||
## pipx on replit
|
||||
|
||||
{% include replit-pipx.md %}
|
||||
|
||||
|
||||
{% include conflicts.md %}
|
||||
8
aider/website/docs/install/replit.md
Normal file
8
aider/website/docs/install/replit.md
Normal file
@@ -0,0 +1,8 @@
|
||||
---
|
||||
parent: Installation
|
||||
nav_order: 900
|
||||
---
|
||||
|
||||
### Replit
|
||||
|
||||
{% include replit-pipx.md %}
|
||||
@@ -1,18 +0,0 @@
|
||||
---
|
||||
parent: Installation
|
||||
nav_order: 100
|
||||
---
|
||||
|
||||
# Install with uv
|
||||
|
||||
A recommended way to install aider is with uv:
|
||||
|
||||
```bash
|
||||
python -m pip install uv # If you need to install uv
|
||||
uv tool install --python python3.12 aider-chat
|
||||
```
|
||||
|
||||
See also the
|
||||
[docs on other methods for installing uv itself](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
{% include conflicts.md %}
|
||||
10
aider/website/docs/leaderboards/by-release-date.md
Normal file
10
aider/website/docs/leaderboards/by-release-date.md
Normal file
@@ -0,0 +1,10 @@
|
||||
---
|
||||
title: Scores by release date
|
||||
parent: Aider LLM Leaderboards
|
||||
nav_order: 200
|
||||
---
|
||||
|
||||
## LLM code editing skill by model release date
|
||||
|
||||
[](https://aider.chat/assets/models-over-time.svg)
|
||||
|
||||
14
aider/website/docs/leaderboards/contrib.md
Normal file
14
aider/website/docs/leaderboards/contrib.md
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
parent: Aider LLM Leaderboards
|
||||
nav_order: 900
|
||||
---
|
||||
|
||||
# Contributing results
|
||||
|
||||
Contributions of benchmark results are welcome!
|
||||
See the
|
||||
[benchmark README](https://github.com/Aider-AI/aider/blob/main/benchmark/README.md)
|
||||
for information on running aider's code editing benchmarks.
|
||||
Submit results by opening a PR with edits to the
|
||||
[benchmark results data files](https://github.com/Aider-AI/aider/blob/main/aider/website/_data/).
|
||||
|
||||
134
aider/website/docs/leaderboards/edit.md
Normal file
134
aider/website/docs/leaderboards/edit.md
Normal file
@@ -0,0 +1,134 @@
|
||||
---
|
||||
parent: Aider LLM Leaderboards
|
||||
highlight_image: /assets/leaderboard.jpg
|
||||
nav_order: 50
|
||||
description: Quantitative benchmark of basic LLM code editing skill.
|
||||
---
|
||||
|
||||
# Code editing leaderboard
|
||||
|
||||
|
||||
{: .note :}
|
||||
This old
|
||||
[aider code editing leaderboard](edit.html)
|
||||
has been replaced by the
|
||||
new, much more challenging
|
||||
[polyglot leaderboard](/docs/leaderboards/).
|
||||
|
||||
[Aider's code editing benchmark](/docs/benchmarks.html#the-benchmark) asks the LLM to edit python source files to complete 133 small coding exercises
|
||||
from Exercism.
|
||||
This measures the LLM's coding ability, and whether it can
|
||||
write new code that integrates into existing code.
|
||||
The model also has to successfully apply all its changes to the source file without human intervention.
|
||||
|
||||
<input type="text" id="editSearchInput" placeholder="Search..." style="width: 100%; max-width: 800px; margin: 10px auto; padding: 8px; display: block; border: 1px solid #ddd; border-radius: 4px;">
|
||||
|
||||
<table style="width: 100%; max-width: 800px; margin: auto; border-collapse: collapse; box-shadow: 0 2px 4px rgba(0,0,0,0.1); font-size: 14px;">
|
||||
<thead style="background-color: #f2f2f2;">
|
||||
<tr>
|
||||
<th style="padding: 8px; text-align: left;">Model</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent completed correctly</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent using correct edit format</th>
|
||||
<th style="padding: 8px; text-align: left;">Command</th>
|
||||
<th style="padding: 8px; text-align: center;">Edit format</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% assign edit_sorted = site.data.edit_leaderboard | sort: 'pass_rate_2' | reverse %}
|
||||
{% for row in edit_sorted %}
|
||||
<tr style="border-bottom: 1px solid #ddd;">
|
||||
<td style="padding: 8px;">{{ row.model }}</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.pass_rate_2 }}%</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.percent_cases_well_formed }}%</td>
|
||||
<td style="padding: 8px;"><code>{{ row.command }}</code></td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.edit_format }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<canvas id="editChart" width="800" height="450" style="margin-top: 20px"></canvas>
|
||||
<script src="https://unpkg.com/patternomaly/dist/patternomaly.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
{% assign data_source = edit_sorted %}
|
||||
{% assign pass_rate_field = "pass_rate_2" %}
|
||||
{% include leaderboard.js %}
|
||||
</script>
|
||||
<style>
|
||||
tr.selected {
|
||||
color: #0056b3;
|
||||
}
|
||||
table {
|
||||
table-layout: fixed;
|
||||
}
|
||||
td, th {
|
||||
word-wrap: break-word;
|
||||
overflow-wrap: break-word;
|
||||
}
|
||||
td:nth-child(3), td:nth-child(4) {
|
||||
font-size: 12px;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
## Notes on benchmarking results
|
||||
|
||||
The key benchmarking results are:
|
||||
|
||||
- **Percent completed correctly** - Measures what percentage of the coding tasks that the LLM completed successfully. To complete a task, the LLM must solve the programming assignment *and* edit the code to implement that solution.
|
||||
- **Percent using correct edit format** - Measures the percent of coding tasks where the LLM complied with the edit format specified in the system prompt. If the LLM makes edit mistakes, aider will give it feedback and ask for a fixed copy of the edit. The best models can reliably conform to the edit format, without making errors.
|
||||
|
||||
|
||||
## Notes on the edit format
|
||||
|
||||
Aider uses different "edit formats" to collect code edits from different LLMs.
|
||||
The "whole" format is the easiest for an LLM to use, but it uses a lot of tokens
|
||||
and may limit how large a file can be edited.
|
||||
Models which can use one of the diff formats are much more efficient,
|
||||
using far fewer tokens.
|
||||
Models that use a diff-like format are able to
|
||||
edit larger files with less cost and without hitting token limits.
|
||||
|
||||
Aider is configured to use the best edit format for the popular OpenAI and Anthropic models
|
||||
and the [other models recommended on the LLM page](/docs/llms.html).
|
||||
For lesser known models aider will default to using the "whole" editing format
|
||||
since it is the easiest format for an LLM to use.
|
||||
|
||||
## Contributing benchmark results
|
||||
|
||||
Contributions of benchmark results are welcome!
|
||||
See the
|
||||
[benchmark README](https://github.com/Aider-AI/aider/blob/main/benchmark/README.md)
|
||||
for information on running aider's code editing benchmarks.
|
||||
Submit results by opening a PR with edits to the
|
||||
[benchmark results data files](https://github.com/Aider-AI/aider/blob/main/aider/website/_data/).
|
||||
|
||||
|
||||
<p class="post-date">
|
||||
By Paul Gauthier,
|
||||
last updated
|
||||
<!--[[[cog
|
||||
import subprocess
|
||||
import datetime
|
||||
|
||||
files = [
|
||||
'aider/website/docs/leaderboards/index.md',
|
||||
'aider/website/_data/edit_leaderboard.yml',
|
||||
'aider/website/_data/refactor_leaderboard.yml'
|
||||
]
|
||||
|
||||
def get_last_modified_date(file):
|
||||
result = subprocess.run(['git', 'log', '-1', '--format=%ct', file], capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
timestamp = int(result.stdout.strip())
|
||||
return datetime.datetime.fromtimestamp(timestamp)
|
||||
return datetime.datetime.min
|
||||
|
||||
mod_dates = [get_last_modified_date(file) for file in files]
|
||||
latest_mod_date = max(mod_dates)
|
||||
cog.out(f"{latest_mod_date.strftime('%B %d, %Y.')}")
|
||||
]]]-->
|
||||
December 16, 2024.
|
||||
<!--[[[end]]]-->
|
||||
</p>
|
||||
@@ -2,32 +2,43 @@
|
||||
highlight_image: /assets/leaderboard.jpg
|
||||
nav_order: 950
|
||||
description: Quantitative benchmarks of LLM code editing skill.
|
||||
has_children: true
|
||||
---
|
||||
|
||||
|
||||
# Aider LLM Leaderboards
|
||||
{: .no_toc }
|
||||
|
||||
Aider works best with LLMs which are good at *editing* code, not just good at writing
|
||||
code.
|
||||
To evaluate an LLM's editing skill, aider uses a pair of benchmarks that
|
||||
To evaluate an LLM's editing skill, aider uses benchmarks that
|
||||
assess a model's ability to consistently follow the system prompt
|
||||
to successfully edit code.
|
||||
|
||||
The leaderboards below report the results from a number of popular LLMs.
|
||||
The leaderboards report the results from a number of popular LLMs.
|
||||
While [aider can connect to almost any LLM](/docs/llms.html),
|
||||
it works best with models that score well on the benchmarks.
|
||||
|
||||
See the following sections for benchmark
|
||||
results and additional information:
|
||||
- TOC
|
||||
{:toc}
|
||||
|
||||
## Code editing leaderboard
|
||||
{: .note :}
|
||||
The
|
||||
[original aider code editing leaderboard](edit.html)
|
||||
has been replaced by this
|
||||
new, much more challenging
|
||||
[polyglot leaderboard](https://aider.chat/2024/12/21/polyglot.html).
|
||||
|
||||
[Aider's code editing benchmark](/docs/benchmarks.html#the-benchmark) asks the LLM to edit python source files to complete 133 small coding exercises
|
||||
## Polyglot leaderboard
|
||||
|
||||
[Aider's polyglot benchmark](/docs/benchmarks.html#the-benchmark)
|
||||
asks the LLM to edit source files to complete 225 coding exercises
|
||||
from Exercism.
|
||||
This measures the LLM's coding ability, and whether it can
|
||||
It contains exercises in many popular programming languages:
|
||||
C++, Go, Java, JavaScript, Python and Rust.
|
||||
The 225 exercises were purposely selected to be the *hardest*
|
||||
that Exercism offered in those languages, to provide
|
||||
a strong coding challenge to LLMs.
|
||||
|
||||
This benchmark measures the LLM's coding ability in popular languages,
|
||||
and whether it can
|
||||
write new code that integrates into existing code.
|
||||
The model also has to successfully apply all its changes to the source file without human intervention.
|
||||
|
||||
@@ -44,7 +55,7 @@ The model also has to successfully apply all its changes to the source file with
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% assign edit_sorted = site.data.edit_leaderboard | sort: 'pass_rate_2' | reverse %}
|
||||
{% assign edit_sorted = site.data.polyglot_leaderboard | sort: 'pass_rate_2' | reverse %}
|
||||
{% for row in edit_sorted %}
|
||||
<tr style="border-bottom: 1px solid #ddd;">
|
||||
<td style="padding: 8px;">{{ row.model }}</td>
|
||||
@@ -57,11 +68,16 @@ The model also has to successfully apply all its changes to the source file with
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
### Aider polyglot benchmark results
|
||||
|
||||
<canvas id="editChart" width="800" height="450" style="margin-top: 20px"></canvas>
|
||||
<script src="https://unpkg.com/patternomaly/dist/patternomaly.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
{% include edit-leaderboard.js %}
|
||||
{% assign data_source = edit_sorted %}
|
||||
{% assign pass_rate_field = "pass_rate_2" %}
|
||||
{% assign highlight_model = "xxxxxxxxxxx" %}
|
||||
{% include leaderboard.js %}
|
||||
</script>
|
||||
<style>
|
||||
tr.selected {
|
||||
@@ -79,84 +95,7 @@ The model also has to successfully apply all its changes to the source file with
|
||||
}
|
||||
</style>
|
||||
|
||||
## Code refactoring leaderboard
|
||||
|
||||
[Aider's refactoring benchmark](https://github.com/Aider-AI/refactor-benchmark) asks the LLM to refactor 89 large methods from large python classes. This is a more challenging benchmark, which tests the model's ability to output long chunks of code without skipping sections or making mistakes. It was developed to provoke and measure [GPT-4 Turbo's "lazy coding" habit](/2023/12/21/unified-diffs.html).
|
||||
|
||||
The refactoring benchmark requires a large context window to
|
||||
work with large source files.
|
||||
Therefore, results are available for fewer models.
|
||||
|
||||
<input type="text" id="refacSearchInput" placeholder="Search..." style="width: 100%; max-width: 800px; margin: 10px auto; padding: 8px; display: block; border: 1px solid #ddd; border-radius: 4px;">
|
||||
|
||||
<table style="width: 100%; max-width: 800px; margin: auto; border-collapse: collapse; box-shadow: 0 2px 4px rgba(0,0,0,0.1); font-size: 14px;">
|
||||
<thead style="background-color: #f2f2f2;">
|
||||
<tr>
|
||||
<th style="padding: 8px; text-align: left;">Model</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent completed correctly</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent using correct edit format</th>
|
||||
<th style="padding: 8px; text-align: left;">Command</th>
|
||||
<th style="padding: 8px; text-align: center;">Edit format</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% assign refac_sorted = site.data.refactor_leaderboard | sort: 'pass_rate_1' | reverse %}
|
||||
{% for row in refac_sorted %}
|
||||
<tr style="border-bottom: 1px solid #ddd;">
|
||||
<td style="padding: 8px;">{{ row.model }}</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.pass_rate_1 }}%</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.percent_cases_well_formed }}%</td>
|
||||
<td style="padding: 8px;"><code>{{ row.command }}</code></td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.edit_format }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<canvas id="refacChart" width="800" height="450" style="margin-top: 20px"></canvas>
|
||||
<script src="https://unpkg.com/patternomaly/dist/patternomaly.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
{% include refactor-leaderboard.js %}
|
||||
</script>
|
||||
|
||||
|
||||
## LLM code editing skill by model release date
|
||||
|
||||
[](https://aider.chat/assets/models-over-time.svg)
|
||||
|
||||
|
||||
## Notes on benchmarking results
|
||||
|
||||
The key benchmarking results are:
|
||||
|
||||
- **Percent completed correctly** - Measures what percentage of the coding tasks that the LLM completed successfully. To complete a task, the LLM must solve the programming assignment *and* edit the code to implement that solution.
|
||||
- **Percent using correct edit format** - Measures the percent of coding tasks where the LLM complied with the edit format specified in the system prompt. If the LLM makes edit mistakes, aider will give it feedback and ask for a fixed copy of the edit. The best models can reliably conform to the edit format, without making errors.
|
||||
|
||||
|
||||
## Notes on the edit format
|
||||
|
||||
Aider uses different "edit formats" to collect code edits from different LLMs.
|
||||
The "whole" format is the easiest for an LLM to use, but it uses a lot of tokens
|
||||
and may limit how large a file can be edited.
|
||||
Models which can use one of the diff formats are much more efficient,
|
||||
using far fewer tokens.
|
||||
Models that use a diff-like format are able to
|
||||
edit larger files with less cost and without hitting token limits.
|
||||
|
||||
Aider is configured to use the best edit format for the popular OpenAI and Anthropic models
|
||||
and the [other models recommended on the LLM page](/docs/llms.html).
|
||||
For lesser known models aider will default to using the "whole" editing format
|
||||
since it is the easiest format for an LLM to use.
|
||||
|
||||
## Contributing benchmark results
|
||||
|
||||
Contributions of benchmark results are welcome!
|
||||
See the
|
||||
[benchmark README](https://github.com/Aider-AI/aider/blob/main/benchmark/README.md)
|
||||
for information on running aider's code editing benchmarks.
|
||||
Submit results by opening a PR with edits to the
|
||||
[benchmark results data files](https://github.com/Aider-AI/aider/blob/main/aider/website/_data/).
|
||||
|
||||
|
||||
<p class="post-date">
|
||||
@@ -183,6 +122,6 @@ mod_dates = [get_last_modified_date(file) for file in files]
|
||||
latest_mod_date = max(mod_dates)
|
||||
cog.out(f"{latest_mod_date.strftime('%B %d, %Y.')}")
|
||||
]]]-->
|
||||
December 11, 2024.
|
||||
December 26, 2024.
|
||||
<!--[[[end]]]-->
|
||||
</p>
|
||||
|
||||
29
aider/website/docs/leaderboards/notes.md
Normal file
29
aider/website/docs/leaderboards/notes.md
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
parent: Aider LLM Leaderboards
|
||||
nav_order: 800
|
||||
---
|
||||
|
||||
# Benchmark notes
|
||||
|
||||
## Notes on benchmarking results
|
||||
|
||||
The key benchmarking results are:
|
||||
|
||||
- **Percent completed correctly** - Measures what percentage of the coding tasks that the LLM completed successfully. To complete a task, the LLM must solve the programming assignment *and* edit the code to implement that solution.
|
||||
- **Percent using correct edit format** - Measures the percent of coding tasks where the LLM complied with the edit format specified in the system prompt. If the LLM makes edit mistakes, aider will give it feedback and ask for a fixed copy of the edit. The best models can reliably conform to the edit format, without making errors.
|
||||
|
||||
|
||||
## Notes on the edit format
|
||||
|
||||
Aider uses different "edit formats" to collect code edits from different LLMs.
|
||||
The "whole" format is the easiest for an LLM to use, but it uses a lot of tokens
|
||||
and may limit how large a file can be edited.
|
||||
Models which can use one of the diff formats are much more efficient,
|
||||
using far fewer tokens.
|
||||
Models that use a diff-like format are able to
|
||||
edit larger files with less cost and without hitting token limits.
|
||||
|
||||
Aider is configured to use the best edit format for the popular OpenAI and Anthropic models
|
||||
and the [other models recommended on the LLM page](/docs/llms.html).
|
||||
For lesser known models aider will default to using the "whole" editing format
|
||||
since it is the easiest format for an LLM to use.
|
||||
52
aider/website/docs/leaderboards/refactor.md
Normal file
52
aider/website/docs/leaderboards/refactor.md
Normal file
@@ -0,0 +1,52 @@
|
||||
---
|
||||
parent: Aider LLM Leaderboards
|
||||
highlight_image: /assets/leaderboard.jpg
|
||||
nav_order: 100
|
||||
description: Quantitative benchmark of LLM code refactoring skill.
|
||||
---
|
||||
|
||||
|
||||
## Refactoring leaderboard
|
||||
|
||||
[Aider's refactoring benchmark](https://github.com/Aider-AI/refactor-benchmark) asks the LLM to refactor 89 large methods from large python classes. This is a more challenging benchmark, which tests the model's ability to output long chunks of code without skipping sections or making mistakes. It was developed to provoke and measure [GPT-4 Turbo's "lazy coding" habit](/2023/12/21/unified-diffs.html).
|
||||
|
||||
The refactoring benchmark requires a large context window to
|
||||
work with large source files.
|
||||
Therefore, results are available for fewer models.
|
||||
|
||||
<input type="text" id="editSearchInput" placeholder="Search..." style="width: 100%; max-width: 800px; margin: 10px auto; padding: 8px; display: block; border: 1px solid #ddd; border-radius: 4px;">
|
||||
|
||||
<table style="width: 100%; max-width: 800px; margin: auto; border-collapse: collapse; box-shadow: 0 2px 4px rgba(0,0,0,0.1); font-size: 14px;">
|
||||
<thead style="background-color: #f2f2f2;">
|
||||
<tr>
|
||||
<th style="padding: 8px; text-align: left;">Model</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent completed correctly</th>
|
||||
<th style="padding: 8px; text-align: center;">Percent using correct edit format</th>
|
||||
<th style="padding: 8px; text-align: left;">Command</th>
|
||||
<th style="padding: 8px; text-align: center;">Edit format</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% assign refac_sorted = site.data.refactor_leaderboard | sort: 'pass_rate_1' | reverse %}
|
||||
{% for row in refac_sorted %}
|
||||
<tr style="border-bottom: 1px solid #ddd;">
|
||||
<td style="padding: 8px;">{{ row.model }}</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.pass_rate_1 }}%</td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.percent_cases_well_formed }}%</td>
|
||||
<td style="padding: 8px;"><code>{{ row.command }}</code></td>
|
||||
<td style="padding: 8px; text-align: center;">{{ row.edit_format }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<canvas id="editChart" width="800" height="450" style="margin-top: 20px"></canvas>
|
||||
<script src="https://unpkg.com/patternomaly/dist/patternomaly.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
{% assign data_source = refac_sorted %}
|
||||
{% assign pass_rate_field = "pass_rate_1" %}
|
||||
{% include leaderboard.js %}
|
||||
</script>
|
||||
|
||||
|
||||
@@ -95,6 +95,8 @@ cog.out(''.join(lines))
|
||||
- TOGETHERAI_API_KEY
|
||||
- VOLCENGINE_API_KEY
|
||||
- VOYAGE_API_KEY
|
||||
- WATSONX_API_KEY
|
||||
- WX_API_KEY
|
||||
- XAI_API_KEY
|
||||
- XINFERENCE_API_KEY
|
||||
<!--[[[end]]]-->
|
||||
|
||||
@@ -20,7 +20,7 @@ copy of each source file that needs changes.
|
||||
While simple, it can be slow and costly because the LLM has to return
|
||||
the *entire file* even if just a few lines are edited.
|
||||
|
||||
The format expects the file path just before the fenced file content:
|
||||
The whole format expects the file path just before the fenced file content:
|
||||
|
||||
````
|
||||
show_greeting.py
|
||||
@@ -28,7 +28,7 @@ show_greeting.py
|
||||
import sys
|
||||
|
||||
def greeting(name):
|
||||
print(f"Hey {{name}}")
|
||||
print("Hey", name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
greeting(sys.argv[1])
|
||||
@@ -42,7 +42,7 @@ The "diff" edit format asks the LLM to specify file edits as a series of search/
|
||||
This is an efficient format, because the model only needs to return parts of the file
|
||||
which have changes.
|
||||
|
||||
They are formatted using a syntax similar to the git merge conflict resolution markings,
|
||||
Edits are formatted using a syntax similar to the git merge conflict resolution markings,
|
||||
with the file path right before a fenced block:
|
||||
|
||||
````
|
||||
@@ -62,7 +62,7 @@ from flask import Flask
|
||||
The "diff-fenced" edit format is based on the diff format, but
|
||||
the file path is placed inside the fence.
|
||||
It is primarily used with the Gemini family of models,
|
||||
which often fail to conform to fencing approach specified in the diff format.
|
||||
which often fail to conform to the fencing approach specified in the diff format.
|
||||
|
||||
````
|
||||
```
|
||||
@@ -84,7 +84,10 @@ This is an efficient format, because the model only needs to return parts of the
|
||||
which have changes.
|
||||
|
||||
It was mainly used to the GPT-4 Turbo family of models,
|
||||
to reduce their "lazy coding" tendencies with other edit formats.
|
||||
because it reduced their "lazy coding" tendencies.
|
||||
With other edit formats the GPT-4 Turbo models tended to elide
|
||||
large sections of code and replace them with "# ... original code here ..."
|
||||
style comments.
|
||||
|
||||
|
||||
````
|
||||
@@ -104,3 +107,10 @@ to reduce their "lazy coding" tendencies with other edit formats.
|
||||
These are streamlined versions of the diff and whole formats, intended to be used
|
||||
with `--editor-edit-format` when using
|
||||
[architect mode](/docs/usage/modes.html).
|
||||
The actual edit format is the same, but aider uses a simpler prompt that
|
||||
is more narrowly focused on just editing the file as opposed to
|
||||
solving the coding task.
|
||||
The architect model resolves the coding task and
|
||||
provides plain text instructions about which file changes need to be made.
|
||||
The editor interprets those instructions to produce the
|
||||
syntactically correct diff or whole edits.
|
||||
|
||||
@@ -14,12 +14,11 @@ You may see an error message like this:
|
||||
|
||||
> aider: The term 'aider' is not recognized as a name of a cmdlet, function, script file, or executable program. Check the spelling of the name, or if a path was included, verify that the path is correct and try again.
|
||||
|
||||
Below is the most fail safe way to install and run aider in these situations:
|
||||
Below is the most fail safe way to run aider in these situations:
|
||||
|
||||
```
|
||||
python -m pip install -U aider-chat
|
||||
python -m aider
|
||||
```
|
||||
|
||||
|
||||
{% include venv-pipx.md %}
|
||||
You should also consider
|
||||
[installing aider using aider-install, uv or pipx](/docs/install.html).
|
||||
|
||||
@@ -5,7 +5,7 @@ nav_order: 28
|
||||
|
||||
# Dependency versions
|
||||
|
||||
Aider expects to be installed via `pip` or `pipx`, which will install
|
||||
Aider expects to be installed with the
|
||||
correct versions of all of its required dependencies.
|
||||
|
||||
If you've been linked to this doc from a GitHub issue,
|
||||
@@ -13,43 +13,38 @@ or if aider is reporting `ImportErrors`
|
||||
it is likely that your
|
||||
aider install is using incorrect dependencies.
|
||||
|
||||
## Install with pipx
|
||||
|
||||
## Avoid package conflicts
|
||||
|
||||
If you are using aider to work on a python project, sometimes your project will require
|
||||
specific versions of python packages which conflict with the versions that aider
|
||||
requires.
|
||||
If this happens, you may see errors like these when running pip installs:
|
||||
|
||||
```
|
||||
aider-chat 0.23.0 requires somepackage==X.Y.Z, but you have somepackage U.W.V which is incompatible.
|
||||
```
|
||||
|
||||
## Install with aider-install, uv or pipx
|
||||
|
||||
If you are having dependency problems you should consider
|
||||
[installing aider using pipx](/docs/install/pipx.html).
|
||||
[installing aider using aider-install, uv or pipx](/docs/install.html).
|
||||
This will ensure that aider is installed in its own python environment,
|
||||
with the correct set of dependencies.
|
||||
|
||||
Try re-installing cleanly:
|
||||
|
||||
```
|
||||
pipx uninstall aider-chat
|
||||
pipx install aider-chat
|
||||
```
|
||||
|
||||
## Package managers like Homebrew, AUR, ports
|
||||
|
||||
Package managers often install aider with the wrong dependencies, leading
|
||||
to import errors and other problems.
|
||||
|
||||
The recommended way to
|
||||
install aider is with
|
||||
[pip](/docs/install/install.html).
|
||||
Be sure to use the `--upgrade-strategy only-if-needed` switch so that the correct
|
||||
versions of dependencies will be installed.
|
||||
It is recommended to
|
||||
[install aider using aider-install, uv or pipx](/docs/install.html).
|
||||
|
||||
```
|
||||
python -m pip install -U --upgrade-strategy only-if-needed aider-chat
|
||||
```
|
||||
|
||||
A very safe way is to
|
||||
[install aider using pipx](/docs/install/pipx.html),
|
||||
which will ensure it is installed in a stand alone virtual environment.
|
||||
|
||||
## Dependency versions matter
|
||||
|
||||
Aider pins its dependencies and is tested to work with those specific versions.
|
||||
If you are installing aider with pip (rather than pipx),
|
||||
If you are installing aider directly with pip
|
||||
you should be careful about upgrading or downgrading the python packages that
|
||||
aider uses.
|
||||
|
||||
@@ -64,9 +59,4 @@ and sometimes introduces bugs or backwards incompatible changes.
|
||||
|
||||
## Replit
|
||||
|
||||
You can `pip install -U aider-chat` on replit.
|
||||
|
||||
Or you can install aider with
|
||||
pipx as follows:
|
||||
|
||||
{% include replit-pipx.md %}
|
||||
|
||||
@@ -37,7 +37,7 @@ description: Aider can run in your browser, not just on the command line.
|
||||
If you run aider with `--watch-files`, it will watch all files in your repo
|
||||
and look for any AI coding instructions you add using your favorite IDE or text editor.
|
||||
|
||||
Specifically, aider looks for one-liner comments (# ... or // ...) that either start or end with `AI`, `AI!` or `AI?1 like these:
|
||||
Specifically, aider looks for one-liner comments (# ... or // ...) that either start or end with `AI`, `AI!` or `AI?` like these:
|
||||
|
||||
```python
|
||||
# Make a snake game. AI!
|
||||
|
||||
@@ -70,28 +70,27 @@ Aider works best with GPT-4o & Claude 3.5 Sonnet and can
|
||||
cog.out(open("aider/website/_includes/get-started.md").read())
|
||||
-->
|
||||
|
||||
You can get started quickly like this:
|
||||
If you already have python 3.8-3.13 installed, you can get started quickly like this:
|
||||
|
||||
```bash
|
||||
python -m pip install -U aider-chat
|
||||
python -m pip install aider-install
|
||||
aider-install
|
||||
|
||||
# Change directory into a git repo
|
||||
cd /to/your/git/repo
|
||||
# Change directory into your code base
|
||||
cd /to/your/project
|
||||
|
||||
# Work with Claude 3.5 Sonnet on your repo
|
||||
export ANTHROPIC_API_KEY=your-key-goes-here
|
||||
aider
|
||||
# Work with Claude 3.5 Sonnet on your code
|
||||
aider --model sonnet --anthropic-api-key your-key-goes-here
|
||||
|
||||
# Work with GPT-4o on your repo
|
||||
export OPENAI_API_KEY=your-key-goes-here
|
||||
aider
|
||||
# Work with GPT-4o on your code
|
||||
aider --model gpt-4o --openai-api-key your-key-goes-here
|
||||
```
|
||||
<!-- NOOP -->
|
||||
|
||||
See the
|
||||
[installation instructions](https://aider.chat/docs/install.html)
|
||||
and other
|
||||
[documentation](https://aider.chat/docs/usage.html)
|
||||
and
|
||||
[usage documentation](https://aider.chat/docs/usage.html)
|
||||
for more details.
|
||||
|
||||
## Features
|
||||
|
||||
559
aider/website/install.ps1
Normal file
559
aider/website/install.ps1
Normal file
@@ -0,0 +1,559 @@
|
||||
# Licensed under the MIT license
|
||||
# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
<#
|
||||
.SYNOPSIS
|
||||
|
||||
The installer for uv 0.5.9
|
||||
|
||||
.DESCRIPTION
|
||||
|
||||
This script detects what platform you're on and fetches an appropriate archive from
|
||||
https://github.com/astral-sh/uv/releases/download/0.5.9
|
||||
then unpacks the binaries and installs them to the first of the following locations
|
||||
|
||||
$env:XDG_BIN_HOME
|
||||
$env:XDG_DATA_HOME/../bin
|
||||
$HOME/.local/bin
|
||||
|
||||
It will then add that dir to PATH by editing your Environment.Path registry key
|
||||
|
||||
.PARAMETER ArtifactDownloadUrl
|
||||
The URL of the directory where artifacts can be fetched from
|
||||
|
||||
.PARAMETER NoModifyPath
|
||||
Don't add the install directory to PATH
|
||||
|
||||
.PARAMETER Help
|
||||
Print help
|
||||
|
||||
#>
|
||||
|
||||
param (
|
||||
[Parameter(HelpMessage = "The URL of the directory where artifacts can be fetched from")]
|
||||
[string]$ArtifactDownloadUrl = 'https://github.com/astral-sh/uv/releases/download/0.5.9',
|
||||
[Parameter(HelpMessage = "Don't add the install directory to PATH")]
|
||||
[switch]$NoModifyPath,
|
||||
[Parameter(HelpMessage = "Print Help")]
|
||||
[switch]$Help
|
||||
)
|
||||
|
||||
$app_name = 'uv'
|
||||
$app_version = '0.5.9'
|
||||
if ($env:UV_INSTALLER_GHE_BASE_URL) {
|
||||
$installer_base_url = $env:UV_INSTALLER_GHE_BASE_URL
|
||||
} elseif ($env:UV_INSTALLER_GITHUB_BASE_URL) {
|
||||
$installer_base_url = $env:UV_INSTALLER_GITHUB_BASE_URL
|
||||
} else {
|
||||
$installer_base_url = "https://github.com"
|
||||
}
|
||||
if ($env:INSTALLER_DOWNLOAD_URL) {
|
||||
$ArtifactDownloadUrl = $env:INSTALLER_DOWNLOAD_URL
|
||||
} else {
|
||||
$ArtifactDownloadUrl = "$installer_base_url/astral-sh/uv/releases/download/0.5.9"
|
||||
}
|
||||
|
||||
$receipt = @"
|
||||
{"binaries":["CARGO_DIST_BINS"],"binary_aliases":{},"cdylibs":["CARGO_DIST_DYLIBS"],"cstaticlibs":["CARGO_DIST_STATICLIBS"],"install_layout":"unspecified","install_prefix":"AXO_INSTALL_PREFIX","modify_path":true,"provider":{"source":"cargo-dist","version":"0.25.2-prerelease.3"},"source":{"app_name":"uv","name":"uv","owner":"astral-sh","release_type":"github"},"version":"0.5.9"}
|
||||
"@
|
||||
$receipt_home = "${env:LOCALAPPDATA}\uv"
|
||||
|
||||
if ($env:UV_DISABLE_UPDATE) {
|
||||
$install_updater = $false
|
||||
} else {
|
||||
$install_updater = $true
|
||||
}
|
||||
|
||||
if ($NoModifyPath) {
|
||||
Write-Information "-NoModifyPath has been deprecated; please set UV_NO_MODIFY_PATH=1 in the environment"
|
||||
}
|
||||
|
||||
if ($env:UV_NO_MODIFY_PATH) {
|
||||
$NoModifyPath = $true
|
||||
}
|
||||
|
||||
$unmanaged_install = $env:UV_UNMANAGED_INSTALL
|
||||
|
||||
if ($unmanaged_install) {
|
||||
$NoModifyPath = $true
|
||||
$install_updater = $false
|
||||
}
|
||||
|
||||
function Install-Binary($install_args) {
|
||||
if ($Help) {
|
||||
Get-Help $PSCommandPath -Detailed
|
||||
Exit
|
||||
}
|
||||
|
||||
Initialize-Environment
|
||||
|
||||
# Platform info injected by dist
|
||||
$platforms = @{
|
||||
"aarch64-pc-windows-msvc" = @{
|
||||
"artifact_name" = "uv-x86_64-pc-windows-msvc.zip"
|
||||
"bins" = @("uv.exe", "uvx.exe")
|
||||
"libs" = @()
|
||||
"staticlibs" = @()
|
||||
"zip_ext" = ".zip"
|
||||
"aliases" = @{
|
||||
}
|
||||
"aliases_json" = '{}'
|
||||
}
|
||||
"i686-pc-windows-msvc" = @{
|
||||
"artifact_name" = "uv-i686-pc-windows-msvc.zip"
|
||||
"bins" = @("uv.exe", "uvx.exe")
|
||||
"libs" = @()
|
||||
"staticlibs" = @()
|
||||
"zip_ext" = ".zip"
|
||||
"aliases" = @{
|
||||
}
|
||||
"aliases_json" = '{}'
|
||||
}
|
||||
"x86_64-pc-windows-msvc" = @{
|
||||
"artifact_name" = "uv-x86_64-pc-windows-msvc.zip"
|
||||
"bins" = @("uv.exe", "uvx.exe")
|
||||
"libs" = @()
|
||||
"staticlibs" = @()
|
||||
"zip_ext" = ".zip"
|
||||
"aliases" = @{
|
||||
}
|
||||
"aliases_json" = '{}'
|
||||
}
|
||||
}
|
||||
|
||||
$fetched = Download "$ArtifactDownloadUrl" $platforms
|
||||
# FIXME: add a flag that lets the user not do this step
|
||||
try {
|
||||
Invoke-Installer -artifacts $fetched -platforms $platforms "$install_args"
|
||||
} catch {
|
||||
throw @"
|
||||
We encountered an error trying to perform the installation;
|
||||
please review the error messages below.
|
||||
|
||||
$_
|
||||
"@
|
||||
}
|
||||
}
|
||||
|
||||
function Get-TargetTriple() {
|
||||
try {
|
||||
# NOTE: this might return X64 on ARM64 Windows, which is OK since emulation is available.
|
||||
# It works correctly starting in PowerShell Core 7.3 and Windows PowerShell in Win 11 22H2.
|
||||
# Ideally this would just be
|
||||
# [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture
|
||||
# but that gets a type from the wrong assembly on Windows PowerShell (i.e. not Core)
|
||||
$a = [System.Reflection.Assembly]::LoadWithPartialName("System.Runtime.InteropServices.RuntimeInformation")
|
||||
$t = $a.GetType("System.Runtime.InteropServices.RuntimeInformation")
|
||||
$p = $t.GetProperty("OSArchitecture")
|
||||
# Possible OSArchitecture Values: https://learn.microsoft.com/dotnet/api/system.runtime.interopservices.architecture
|
||||
# Rust supported platforms: https://doc.rust-lang.org/stable/rustc/platform-support.html
|
||||
switch ($p.GetValue($null).ToString())
|
||||
{
|
||||
"X86" { return "i686-pc-windows-msvc" }
|
||||
"X64" { return "x86_64-pc-windows-msvc" }
|
||||
"Arm" { return "thumbv7a-pc-windows-msvc" }
|
||||
"Arm64" { return "aarch64-pc-windows-msvc" }
|
||||
}
|
||||
} catch {
|
||||
# The above was added in .NET 4.7.1, so Windows PowerShell in versions of Windows
|
||||
# prior to Windows 10 v1709 may not have this API.
|
||||
Write-Verbose "Get-TargetTriple: Exception when trying to determine OS architecture."
|
||||
Write-Verbose $_
|
||||
}
|
||||
|
||||
# This is available in .NET 4.0. We already checked for PS 5, which requires .NET 4.5.
|
||||
Write-Verbose("Get-TargetTriple: falling back to Is64BitOperatingSystem.")
|
||||
if ([System.Environment]::Is64BitOperatingSystem) {
|
||||
return "x86_64-pc-windows-msvc"
|
||||
} else {
|
||||
return "i686-pc-windows-msvc"
|
||||
}
|
||||
}
|
||||
|
||||
function Download($download_url, $platforms) {
|
||||
$arch = Get-TargetTriple
|
||||
|
||||
if (-not $platforms.ContainsKey($arch)) {
|
||||
$platforms_json = ConvertTo-Json $platforms
|
||||
throw "ERROR: could not find binaries for this platform. Last platform tried: $arch platform info: $platforms_json"
|
||||
}
|
||||
|
||||
# Lookup what we expect this platform to look like
|
||||
$info = $platforms[$arch]
|
||||
$zip_ext = $info["zip_ext"]
|
||||
$bin_names = $info["bins"]
|
||||
$lib_names = $info["libs"]
|
||||
$staticlib_names = $info["staticlibs"]
|
||||
$artifact_name = $info["artifact_name"]
|
||||
|
||||
# Make a new temp dir to unpack things to
|
||||
$tmp = New-Temp-Dir
|
||||
$dir_path = "$tmp\$app_name$zip_ext"
|
||||
|
||||
# Download and unpack!
|
||||
$url = "$download_url/$artifact_name"
|
||||
Write-Information "Downloading $app_name $app_version ($arch)"
|
||||
Write-Verbose " from $url"
|
||||
Write-Verbose " to $dir_path"
|
||||
$wc = New-Object Net.Webclient
|
||||
$wc.downloadFile($url, $dir_path)
|
||||
|
||||
Write-Verbose "Unpacking to $tmp"
|
||||
|
||||
# Select the tool to unpack the files with.
|
||||
#
|
||||
# As of windows 10(?), powershell comes with tar preinstalled, but in practice
|
||||
# it only seems to support .tar.gz, and not xz/zstd. Still, we should try to
|
||||
# forward all tars to it in case the user has a machine that can handle it!
|
||||
switch -Wildcard ($zip_ext) {
|
||||
".zip" {
|
||||
Expand-Archive -Path $dir_path -DestinationPath "$tmp";
|
||||
Break
|
||||
}
|
||||
".tar.*" {
|
||||
tar xf $dir_path --strip-components 1 -C "$tmp";
|
||||
Break
|
||||
}
|
||||
Default {
|
||||
throw "ERROR: unknown archive format $zip_ext"
|
||||
}
|
||||
}
|
||||
|
||||
# Let the next step know what to copy
|
||||
$bin_paths = @()
|
||||
foreach ($bin_name in $bin_names) {
|
||||
Write-Verbose " Unpacked $bin_name"
|
||||
$bin_paths += "$tmp\$bin_name"
|
||||
}
|
||||
$lib_paths = @()
|
||||
foreach ($lib_name in $lib_names) {
|
||||
Write-Verbose " Unpacked $lib_name"
|
||||
$lib_paths += "$tmp\$lib_name"
|
||||
}
|
||||
$staticlib_paths = @()
|
||||
foreach ($lib_name in $staticlib_names) {
|
||||
Write-Verbose " Unpacked $lib_name"
|
||||
$staticlib_paths += "$tmp\$lib_name"
|
||||
}
|
||||
|
||||
if (($null -ne $info["updater"]) -and $install_updater) {
|
||||
$updater_id = $info["updater"]["artifact_name"]
|
||||
$updater_url = "$download_url/$updater_id"
|
||||
$out_name = "$tmp\uv-update.exe"
|
||||
|
||||
$wc.downloadFile($updater_url, $out_name)
|
||||
$bin_paths += $out_name
|
||||
}
|
||||
|
||||
return @{
|
||||
"bin_paths" = $bin_paths
|
||||
"lib_paths" = $lib_paths
|
||||
"staticlib_paths" = $staticlib_paths
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-Installer($artifacts, $platforms) {
|
||||
# Replaces the placeholder binary entry with the actual list of binaries
|
||||
$arch = Get-TargetTriple
|
||||
|
||||
if (-not $platforms.ContainsKey($arch)) {
|
||||
$platforms_json = ConvertTo-Json $platforms
|
||||
throw "ERROR: could not find binaries for this platform. Last platform tried: $arch platform info: $platforms_json"
|
||||
}
|
||||
|
||||
$info = $platforms[$arch]
|
||||
|
||||
# Forces the install to occur at this path, not the default
|
||||
$force_install_dir = $null
|
||||
$install_layout = "unspecified"
|
||||
# Check the newer app-specific variable before falling back
|
||||
# to the older generic one
|
||||
if (($env:UV_INSTALL_DIR)) {
|
||||
$force_install_dir = $env:UV_INSTALL_DIR
|
||||
$install_layout = "flat"
|
||||
} elseif (($env:CARGO_DIST_FORCE_INSTALL_DIR)) {
|
||||
$force_install_dir = $env:CARGO_DIST_FORCE_INSTALL_DIR
|
||||
$install_layout = "flat"
|
||||
} elseif ($unmanaged_install) {
|
||||
$force_install_dir = $unmanaged_install
|
||||
$install_layout = "flat"
|
||||
}
|
||||
|
||||
# Check if the install layout should be changed from `flat` to `cargo-home`
|
||||
# for backwards compatible updates of applications that switched layouts.
|
||||
if (($force_install_dir) -and ($install_layout -eq "flat")) {
|
||||
# If the install directory is targeting the Cargo home directory, then
|
||||
# we assume this application was previously installed that layout
|
||||
# Note the installer passes the path with `\\` separators, but here they are
|
||||
# `\` so we normalize for comparison. We don't use `Resolve-Path` because they
|
||||
# may not exist.
|
||||
$cargo_home = if ($env:CARGO_HOME) { $env:CARGO_HOME } else {
|
||||
Join-Path $(if ($HOME) { $HOME } else { "." }) ".cargo"
|
||||
}
|
||||
if ($force_install_dir.Replace('\\', '\') -eq $cargo_home) {
|
||||
$install_layout = "cargo-home"
|
||||
}
|
||||
}
|
||||
|
||||
# The actual path we're going to install to
|
||||
$dest_dir = $null
|
||||
$dest_dir_lib = $null
|
||||
# The install prefix we write to the receipt.
|
||||
# For organized install methods like CargoHome, which have
|
||||
# subdirectories, this is the root without `/bin`. For other
|
||||
# methods, this is the same as `_install_dir`.
|
||||
$receipt_dest_dir = $null
|
||||
# Before actually consulting the configured install strategy, see
|
||||
# if we're overriding it.
|
||||
if (($force_install_dir)) {
|
||||
switch ($install_layout) {
|
||||
"hierarchical" {
|
||||
$dest_dir = Join-Path $force_install_dir "bin"
|
||||
$dest_dir_lib = Join-Path $force_install_dir "lib"
|
||||
}
|
||||
"cargo-home" {
|
||||
$dest_dir = Join-Path $force_install_dir "bin"
|
||||
$dest_dir_lib = $dest_dir
|
||||
}
|
||||
"flat" {
|
||||
$dest_dir = $force_install_dir
|
||||
$dest_dir_lib = $dest_dir
|
||||
}
|
||||
Default {
|
||||
throw "Error: unrecognized installation layout: $install_layout"
|
||||
}
|
||||
}
|
||||
$receipt_dest_dir = $force_install_dir
|
||||
}
|
||||
if (-Not $dest_dir) {
|
||||
# Install to $env:XDG_BIN_HOME
|
||||
$dest_dir = if (($base_dir = $env:XDG_BIN_HOME)) {
|
||||
Join-Path $base_dir ""
|
||||
}
|
||||
$dest_dir_lib = $dest_dir
|
||||
$receipt_dest_dir = $dest_dir
|
||||
$install_layout = "flat"
|
||||
}
|
||||
if (-Not $dest_dir) {
|
||||
# Install to $env:XDG_DATA_HOME/../bin
|
||||
$dest_dir = if (($base_dir = $env:XDG_DATA_HOME)) {
|
||||
Join-Path $base_dir "../bin"
|
||||
}
|
||||
$dest_dir_lib = $dest_dir
|
||||
$receipt_dest_dir = $dest_dir
|
||||
$install_layout = "flat"
|
||||
}
|
||||
if (-Not $dest_dir) {
|
||||
# Install to $HOME/.local/bin
|
||||
$dest_dir = if (($base_dir = $HOME)) {
|
||||
Join-Path $base_dir ".local/bin"
|
||||
}
|
||||
$dest_dir_lib = $dest_dir
|
||||
$receipt_dest_dir = $dest_dir
|
||||
$install_layout = "flat"
|
||||
}
|
||||
|
||||
# Looks like all of the above assignments failed
|
||||
if (-Not $dest_dir) {
|
||||
throw "ERROR: could not find a valid path to install to; please check the installation instructions"
|
||||
}
|
||||
|
||||
# The replace call here ensures proper escaping is inlined into the receipt
|
||||
$receipt = $receipt.Replace('AXO_INSTALL_PREFIX', $receipt_dest_dir.replace("\", "\\"))
|
||||
$receipt = $receipt.Replace('"install_layout":"unspecified"', -join('"install_layout":"', $install_layout, '"'))
|
||||
|
||||
$dest_dir = New-Item -Force -ItemType Directory -Path $dest_dir
|
||||
$dest_dir_lib = New-Item -Force -ItemType Directory -Path $dest_dir_lib
|
||||
Write-Information "Installing to $dest_dir"
|
||||
# Just copy the binaries from the temp location to the install dir
|
||||
foreach ($bin_path in $artifacts["bin_paths"]) {
|
||||
$installed_file = Split-Path -Path "$bin_path" -Leaf
|
||||
Copy-Item "$bin_path" -Destination "$dest_dir" -ErrorAction Stop
|
||||
Remove-Item "$bin_path" -Recurse -Force -ErrorAction Stop
|
||||
Write-Information " $installed_file"
|
||||
|
||||
if (($dests = $info["aliases"][$installed_file])) {
|
||||
$source = Join-Path "$dest_dir" "$installed_file"
|
||||
foreach ($dest_name in $dests) {
|
||||
$dest = Join-Path $dest_dir $dest_name
|
||||
$null = New-Item -ItemType HardLink -Target "$source" -Path "$dest" -Force -ErrorAction Stop
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach ($lib_path in $artifacts["lib_paths"]) {
|
||||
$installed_file = Split-Path -Path "$lib_path" -Leaf
|
||||
Copy-Item "$lib_path" -Destination "$dest_dir_lib" -ErrorAction Stop
|
||||
Remove-Item "$lib_path" -Recurse -Force -ErrorAction Stop
|
||||
Write-Information " $installed_file"
|
||||
}
|
||||
foreach ($lib_path in $artifacts["staticlib_paths"]) {
|
||||
$installed_file = Split-Path -Path "$lib_path" -Leaf
|
||||
Copy-Item "$lib_path" -Destination "$dest_dir_lib" -ErrorAction Stop
|
||||
Remove-Item "$lib_path" -Recurse -Force -ErrorAction Stop
|
||||
Write-Information " $installed_file"
|
||||
}
|
||||
|
||||
$formatted_bins = ($info["bins"] | ForEach-Object { '"' + $_ + '"' }) -join ","
|
||||
$receipt = $receipt.Replace('"CARGO_DIST_BINS"', $formatted_bins)
|
||||
$formatted_libs = ($info["libs"] | ForEach-Object { '"' + $_ + '"' }) -join ","
|
||||
$receipt = $receipt.Replace('"CARGO_DIST_DYLIBS"', $formatted_libs)
|
||||
$formatted_staticlibs = ($info["staticlibs"] | ForEach-Object { '"' + $_ + '"' }) -join ","
|
||||
$receipt = $receipt.Replace('"CARGO_DIST_STATICLIBS"', $formatted_staticlibs)
|
||||
# Also replace the aliases with the arch-specific one
|
||||
$receipt = $receipt.Replace('"binary_aliases":{}', -join('"binary_aliases":', $info['aliases_json']))
|
||||
if ($NoModifyPath) {
|
||||
$receipt = $receipt.Replace('"modify_path":true', '"modify_path":false')
|
||||
}
|
||||
|
||||
# Write the install receipt
|
||||
if ($install_updater) {
|
||||
$null = New-Item -Path $receipt_home -ItemType "directory" -ErrorAction SilentlyContinue
|
||||
# Trying to get Powershell 5.1 (not 6+, which is fake and lies) to write utf8 is a crime
|
||||
# because "Out-File -Encoding utf8" actually still means utf8BOM, so we need to pull out
|
||||
# .NET's APIs which actually do what you tell them (also apparently utf8NoBOM is the
|
||||
# default in newer .NETs but I'd rather not rely on that at this point).
|
||||
$Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False
|
||||
[IO.File]::WriteAllLines("$receipt_home/uv-receipt.json", "$receipt", $Utf8NoBomEncoding)
|
||||
}
|
||||
|
||||
# Respect the environment, but CLI takes precedence
|
||||
if ($null -eq $NoModifyPath) {
|
||||
$NoModifyPath = $env:INSTALLER_NO_MODIFY_PATH
|
||||
}
|
||||
|
||||
Write-Information ""
|
||||
Write-Information "Installing aider-chat..."
|
||||
& "$dest_dir\uv.exe" tool install --force --python python3.12 aider-chat@latest
|
||||
|
||||
if (-not $NoModifyPath) {
|
||||
Add-Ci-Path $dest_dir
|
||||
if (Add-Path $dest_dir) {
|
||||
Write-Information ""
|
||||
Write-Information "You need to add $dest_dir to your PATH. Either restart your system or run:"
|
||||
Write-Information ""
|
||||
Write-Information " set Path=$dest_dir;%Path% (cmd)"
|
||||
Write-Information " `$env:Path = `"$dest_dir;`$env:Path`" (powershell)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Attempt to do CI-specific rituals to get the install-dir on PATH faster
|
||||
function Add-Ci-Path($OrigPathToAdd) {
|
||||
# If GITHUB_PATH is present, then write install_dir to the file it refs.
|
||||
# After each GitHub Action, the contents will be added to PATH.
|
||||
# So if you put a curl | sh for this script in its own "run" step,
|
||||
# the next step will have this dir on PATH.
|
||||
#
|
||||
# Note that GITHUB_PATH will not resolve any variables, so we in fact
|
||||
# want to write the install dir and not an expression that evals to it
|
||||
if (($gh_path = $env:GITHUB_PATH)) {
|
||||
Write-Output "$OrigPathToAdd" | Out-File -FilePath "$gh_path" -Encoding utf8 -Append
|
||||
}
|
||||
}
|
||||
|
||||
# Try to add the given path to PATH via the registry
|
||||
#
|
||||
# Returns true if the registry was modified, otherwise returns false
|
||||
# (indicating it was already on PATH)
|
||||
function Add-Path($OrigPathToAdd) {
|
||||
Write-Verbose "Adding $OrigPathToAdd to your PATH"
|
||||
$RegistryPath = "HKCU:\Environment"
|
||||
$PropertyName = "Path"
|
||||
$PathToAdd = $OrigPathToAdd
|
||||
|
||||
$Item = if (Test-Path $RegistryPath) {
|
||||
# If the registry key exists, get it
|
||||
Get-Item -Path $RegistryPath
|
||||
} else {
|
||||
# If the registry key doesn't exist, create it
|
||||
Write-Verbose "Creating $RegistryPath"
|
||||
New-Item -Path $RegistryPath -Force
|
||||
}
|
||||
|
||||
$OldPath = ""
|
||||
try {
|
||||
# Try to get the old PATH value. If that fails, assume we're making it from scratch.
|
||||
# Otherwise assume there's already paths in here and use a ; separator
|
||||
$OldPath = $Item | Get-ItemPropertyValue -Name $PropertyName
|
||||
$PathToAdd = "$PathToAdd;"
|
||||
} catch {
|
||||
# We'll be creating the PATH from scratch
|
||||
Write-Verbose "No $PropertyName Property exists on $RegistryPath (we'll make one)"
|
||||
}
|
||||
|
||||
# Check if the path is already there
|
||||
#
|
||||
# We don't want to incorrectly match "C:\blah\" to "C:\blah\blah\", so we include the semicolon
|
||||
# delimiters when searching, ensuring exact matches. To avoid corner cases we add semicolons to
|
||||
# both sides of the input, allowing us to pretend we're always in the middle of a list.
|
||||
Write-Verbose "Old $PropertyName Property is $OldPath"
|
||||
if (";$OldPath;" -like "*;$OrigPathToAdd;*") {
|
||||
# Already on path, nothing to do
|
||||
Write-Verbose "install dir already on PATH, all done!"
|
||||
return $false
|
||||
} else {
|
||||
# Actually update PATH
|
||||
Write-Verbose "Actually mutating $PropertyName Property"
|
||||
$NewPath = $PathToAdd + $OldPath
|
||||
# We use -Force here to make the value already existing not be an error
|
||||
$Item | New-ItemProperty -Name $PropertyName -Value $NewPath -PropertyType String -Force | Out-Null
|
||||
return $true
|
||||
}
|
||||
}
|
||||
|
||||
function Initialize-Environment() {
|
||||
If (($PSVersionTable.PSVersion.Major) -lt 5) {
|
||||
throw @"
|
||||
Error: PowerShell 5 or later is required to install $app_name.
|
||||
Upgrade PowerShell:
|
||||
|
||||
https://docs.microsoft.com/en-us/powershell/scripting/setup/installing-windows-powershell
|
||||
|
||||
"@
|
||||
}
|
||||
|
||||
# show notification to change execution policy:
|
||||
$allowedExecutionPolicy = @('Unrestricted', 'RemoteSigned', 'ByPass')
|
||||
If ((Get-ExecutionPolicy).ToString() -notin $allowedExecutionPolicy) {
|
||||
throw @"
|
||||
Error: PowerShell requires an execution policy in [$($allowedExecutionPolicy -join ", ")] to run $app_name. For example, to set the execution policy to 'RemoteSigned' please run:
|
||||
|
||||
Set-ExecutionPolicy RemoteSigned -scope CurrentUser
|
||||
|
||||
"@
|
||||
}
|
||||
|
||||
# GitHub requires TLS 1.2
|
||||
If ([System.Enum]::GetNames([System.Net.SecurityProtocolType]) -notcontains 'Tls12') {
|
||||
throw @"
|
||||
Error: Installing $app_name requires at least .NET Framework 4.5
|
||||
Please download and install it first:
|
||||
|
||||
https://www.microsoft.com/net/download
|
||||
|
||||
"@
|
||||
}
|
||||
}
|
||||
|
||||
function New-Temp-Dir() {
|
||||
[CmdletBinding(SupportsShouldProcess)]
|
||||
param()
|
||||
$parent = [System.IO.Path]::GetTempPath()
|
||||
[string] $name = [System.Guid]::NewGuid()
|
||||
New-Item -ItemType Directory -Path (Join-Path $parent $name)
|
||||
}
|
||||
|
||||
# PSScriptAnalyzer doesn't like how we use our params as globals, this calms it
|
||||
$Null = $ArtifactDownloadUrl, $NoModifyPath, $Help
|
||||
# Make Write-Information statements be visible
|
||||
$InformationPreference = "Continue"
|
||||
|
||||
# The default interactive handler
|
||||
try {
|
||||
Install-Binary "$Args"
|
||||
} catch {
|
||||
Write-Information $_
|
||||
exit 1
|
||||
}
|
||||
1832
aider/website/install.sh
Normal file
1832
aider/website/install.sh
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,54 @@
|
||||
FROM python:3.10-slim
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y less git build-essential
|
||||
FROM buildpack-deps:jammy
|
||||
|
||||
# Install Python 3.11
|
||||
RUN apt-get update && apt-get install -y \
|
||||
software-properties-common \
|
||||
cmake \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y \
|
||||
python3.11 \
|
||||
python3.11-venv \
|
||||
python3.11-dev \
|
||||
python3-pip \
|
||||
openjdk-21-jdk \
|
||||
libtbb-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Make python3.11 the default python3
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
|
||||
|
||||
# Install Go
|
||||
RUN curl -OL https://golang.org/dl/go1.21.5.linux-amd64.tar.gz && \
|
||||
tar -C /usr/local -xzf go1.21.5.linux-amd64.tar.gz && \
|
||||
rm go1.21.5.linux-amd64.tar.gz
|
||||
ENV PATH="/usr/local/go/bin:${PATH}"
|
||||
|
||||
# Install Rust
|
||||
ADD https://sh.rustup.rs /tmp/rustup.sh
|
||||
RUN chmod +x /tmp/rustup.sh && /tmp/rustup.sh -y && rm /tmp/rustup.sh
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Install Node.js and dependencies
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
apt-get install -y nodejs && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
mkdir -p /npm-install && \
|
||||
cd /npm-install && \
|
||||
npm init -y && \
|
||||
npm install \
|
||||
jest \
|
||||
@babel/core@7.25.2 \
|
||||
@exercism/babel-preset-javascript@0.2.1 \
|
||||
@exercism/eslint-config-javascript@0.6.0 \
|
||||
@types/jest@29.5.12 \
|
||||
@types/node@20.12.12 \
|
||||
babel-jest@29.6.4 \
|
||||
core-js@3.37.1 \
|
||||
eslint@8.49.0
|
||||
|
||||
COPY . /aider
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
RUN pip install --no-cache-dir -e /aider[dev]
|
||||
RUN pip3 install --no-cache-dir --upgrade pip uv
|
||||
RUN uv pip install --system --no-cache-dir -e /aider[dev]
|
||||
RUN git config --global --add safe.directory /aider
|
||||
WORKDIR /aider
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
|
||||
# Aider code editing benchmark harness
|
||||
# Aider benchmark harness
|
||||
|
||||
Aider uses a "code editing" benchmark to quantitatively measure how well it works
|
||||
with the GPT-3.5 and GPT-4 models.
|
||||
Aider uses benchmarks to quantitatively measure how well it works
|
||||
with various LLMs.
|
||||
This directory holds the harness and tools needed to run the benchmarking suite.
|
||||
|
||||
## Background
|
||||
|
||||
The benchmark is based on the [Exercism
|
||||
python](https://github.com/exercism/python) coding exercises.
|
||||
The benchmark is based on the [Exercism](https://github.com/exercism/python) coding exercises.
|
||||
This
|
||||
benchmark evaluates how effectively aider and GPT can translate a
|
||||
benchmark evaluates how effectively aider and LLMs can translate a
|
||||
natural language coding request into executable code saved into
|
||||
files that pass unit tests.
|
||||
It provides an end-to-end evaluation of not just
|
||||
GPT's coding ability, but also its capacity to *edit existing code*
|
||||
the LLM's coding ability, but also its capacity to *edit existing code*
|
||||
and *format those code edits* so that aider can save the
|
||||
edits to the local source files.
|
||||
|
||||
See [this writeup for a longer discussion about the benchmark and how to interpret the results](https://aider.chat/docs/benchmarks.html).
|
||||
See [this writeup for a longer discussion about the benchmark](https://aider.chat/2024/12/21/polyglot.html).
|
||||
|
||||
The benchmark is intended to be run *inside a docker container*.
|
||||
This is because the benchmarking harness will be
|
||||
@@ -33,7 +32,7 @@ There are 3 main tasks involved in benchmarking aider:
|
||||
|
||||
1. Install and setup for benchmarking.
|
||||
|
||||
2. Run the benchmark to measure performance across the 133 exercises.
|
||||
2. Run the benchmark to measure performance across all the exercises.
|
||||
|
||||
3. Generate a summary report of how many of the exercises succeeded or failed.
|
||||
|
||||
@@ -50,11 +49,8 @@ git clone git@github.com:Aider-AI/aider.git
|
||||
cd aider
|
||||
mkdir tmp.benchmarks
|
||||
|
||||
# Clone the exercism repo
|
||||
git clone git@github.com:exercism/python.git
|
||||
|
||||
# Copy the practice exercises into the benchmark scratch dir
|
||||
cp -rp python/exercises/practice tmp.benchmarks/exercism-python
|
||||
# Clone the repo with the exercises
|
||||
git clone https://github.com/Aider-AI/polyglot-benchmark tmp.benchmarks/polyglot-benchmark
|
||||
|
||||
# Build the docker container
|
||||
./benchmark/docker_build.sh
|
||||
@@ -70,21 +66,21 @@ Launch the docker container and run the benchmark inside it:
|
||||
|
||||
# Inside the container, install aider as a development build.
|
||||
# This way you're running the code that you cloned above, including any local changes.
|
||||
pip install -e .
|
||||
pip install -e .[dev]
|
||||
|
||||
# Run the benchmark:
|
||||
./benchmark/benchmark.py a-helpful-name-for-this-run --model gpt-3.5-turbo --edit-format whole --threads 10
|
||||
./benchmark/benchmark.py a-helpful-name-for-this-run --model gpt-3.5-turbo --edit-format whole --threads 10 --exercises-dir polyglot-benchmark
|
||||
```
|
||||
|
||||
The above will create a folder `tmp.benchmarks/YYYY-MM-DD-HH-MM-SS--a-helpful-name-for-this-run` with benchmarking results.
|
||||
Run like this, the script will run all 133 exercises in a random order.
|
||||
Run like this, the script will run all the exercises in a random order.
|
||||
|
||||
You can run `./benchmark/benchmark.py --help` for a list of all the arguments, but here are the most useful to keep in mind:
|
||||
|
||||
- `--model` is the name of the model, same as you would pass directly to `aider`.
|
||||
- `--edit-format` is the name of the edit format, same as you would pass directly to `aider`. When working with an experimental LLM, I recommend starting with `whole`
|
||||
- `--threads` specifies how many exercises to benchmark in parallel. Start with a single thread if you are working out the kinks on your benchmarking setup or working with a new model, etc. Once you are getting reliable results, you can speed up the process by running with more threads. 10 works well against the OpenAI APIs.
|
||||
- `--num-tests` specifies how many of the 133 tests to run before stopping. This is another way to start gently as you debug your benchmarking setup.
|
||||
- `--num-tests` specifies how many of the tests to run before stopping. This is another way to start gently as you debug your benchmarking setup.
|
||||
- `--keywords` filters the tests to run to only the ones whose name match the supplied argument (similar to `pytest -k xxxx`).
|
||||
|
||||
### Benchmark report
|
||||
@@ -102,7 +98,7 @@ The benchmark report is a yaml record with statistics about the run:
|
||||
|
||||
```yaml
|
||||
- dirname: 2024-07-04-14-32-08--claude-3.5-sonnet-diff-continue
|
||||
test_cases: 133
|
||||
test_cases: 225
|
||||
model: claude-3.5-sonnet
|
||||
edit_format: diff
|
||||
commit_hash: 35f21b5
|
||||
@@ -143,7 +139,6 @@ You can see examples of the benchmark report yaml in the
|
||||
|
||||
## Limitations, notes
|
||||
|
||||
- Benchmarking all 133 exercises against Claude 3.5 Sonnet will cost about $4.
|
||||
- Contributions of benchmark results are welcome! Submit results by opening a PR with edits to the
|
||||
[aider leaderboard data files](https://github.com/Aider-AI/aider/blob/main/aider/website/_data/).
|
||||
- These scripts are not intended for use by typical aider end users.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
@@ -36,8 +36,6 @@ EXERCISES_DIR_DEFAULT = "exercism-python"
|
||||
app = typer.Typer(add_completion=False, pretty_exceptions_enable=False)
|
||||
|
||||
|
||||
NUM_TESTS = (89, 133)
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -74,7 +72,7 @@ def find_latest_benchmark_dir():
|
||||
|
||||
for d in recent_dirs:
|
||||
# Look for .md files in subdirectories
|
||||
for md_file in d.glob("*/.*.md"):
|
||||
for md_file in d.glob("*/exercises/practice/*/.*.md"):
|
||||
if md_file.is_file():
|
||||
mtime = md_file.stat().st_mtime
|
||||
if mtime > latest_time:
|
||||
@@ -89,10 +87,10 @@ def find_latest_benchmark_dir():
|
||||
return latest_dir
|
||||
|
||||
|
||||
def show_stats(dirnames, graphs):
|
||||
def show_stats(dirnames, graphs, stats_languages=None):
|
||||
raw_rows = []
|
||||
for dirname in dirnames:
|
||||
row = summarize_results(dirname)
|
||||
row = summarize_results(dirname, stats_languages)
|
||||
raw_rows.append(row)
|
||||
|
||||
# return
|
||||
@@ -103,10 +101,16 @@ def show_stats(dirnames, graphs):
|
||||
if not row:
|
||||
continue
|
||||
|
||||
if row.completed_tests not in NUM_TESTS:
|
||||
print(f"Warning: {row.dir_name} is incomplete: {row.completed_tests}")
|
||||
if row.completed_tests != row.total_tests:
|
||||
print(
|
||||
f"Warning: {row.dir_name} is incomplete: {row.completed_tests} of {row.total_tests}"
|
||||
)
|
||||
|
||||
try:
|
||||
kind = (row.model, row.edit_format)
|
||||
except AttributeError:
|
||||
return
|
||||
|
||||
kind = (row.model, row.edit_format)
|
||||
if kind in seen:
|
||||
dump(row.dir_name)
|
||||
dump(seen[kind])
|
||||
@@ -161,6 +165,9 @@ def main(
|
||||
sleep: float = typer.Option(
|
||||
0, "--sleep", help="Sleep seconds between tests when single threaded"
|
||||
),
|
||||
languages: str = typer.Option(
|
||||
None, "--languages", "-l", help="Only run tests for specific languages (comma separated)"
|
||||
),
|
||||
edit_format: str = typer.Option(None, "--edit-format", "-e", help="Edit format"),
|
||||
editor_model: str = typer.Option(None, "--editor-model", help="Editor model name"),
|
||||
editor_edit_format: str = typer.Option(None, "--editor-edit-format", help="Editor edit format"),
|
||||
@@ -188,6 +195,11 @@ def main(
|
||||
stats_only: bool = typer.Option(
|
||||
False, "--stats", "-s", help="Do not run tests, just collect stats on completed tests"
|
||||
),
|
||||
stats_languages: str = typer.Option(
|
||||
None,
|
||||
"--stats-languages",
|
||||
help="Only include stats for specific languages (comma separated)",
|
||||
),
|
||||
diffs_only: bool = typer.Option(False, "--diffs", help="Just diff the provided stats dirs"),
|
||||
tries: int = typer.Option(2, "--tries", "-r", help="Number of tries for running tests"),
|
||||
threads: int = typer.Option(1, "--threads", "-t", help="Number of threads to run in parallel"),
|
||||
@@ -224,7 +236,7 @@ def main(
|
||||
updated_dirnames.append(dirname)
|
||||
|
||||
if stats_only:
|
||||
return show_stats(updated_dirnames, graphs)
|
||||
return show_stats(updated_dirnames, graphs, stats_languages)
|
||||
|
||||
if diffs_only:
|
||||
return show_diffs(updated_dirnames)
|
||||
@@ -237,9 +249,41 @@ def main(
|
||||
return
|
||||
|
||||
assert BENCHMARK_DNAME.exists() and BENCHMARK_DNAME.is_dir(), BENCHMARK_DNAME
|
||||
|
||||
def get_exercise_dirs(base_dir, languages=None):
|
||||
"""Get all exercise directories for specified languages (or all if none specified)"""
|
||||
base_dir = Path(base_dir)
|
||||
|
||||
# Get available language dirs
|
||||
lang_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
|
||||
|
||||
# Filter to requested languages if specified
|
||||
if languages:
|
||||
requested = set(lang.strip().lower() for lang in languages.split(","))
|
||||
lang_dirs = [d for d in lang_dirs if d.name.lower() in requested]
|
||||
dump(lang_dirs)
|
||||
if not lang_dirs:
|
||||
print(f"No matching language directories found for: {languages}")
|
||||
return []
|
||||
|
||||
# Get all exercise dirs under exercises/practice for each language
|
||||
exercise_dirs = []
|
||||
for lang_dir in lang_dirs:
|
||||
practice_dir = lang_dir / "exercises" / "practice"
|
||||
if practice_dir.exists():
|
||||
exercise_dirs.extend(d for d in practice_dir.iterdir() if d.is_dir())
|
||||
|
||||
return exercise_dirs
|
||||
|
||||
original_dname = BENCHMARK_DNAME / exercises_dir
|
||||
assert original_dname.exists() and original_dname.is_dir(), original_dname
|
||||
|
||||
exercise_dirs = get_exercise_dirs(original_dname, languages)
|
||||
|
||||
if not exercise_dirs:
|
||||
print("No exercise directories found")
|
||||
return 1
|
||||
|
||||
if clean and dirname.exists():
|
||||
print("Cleaning up and replacing", dirname)
|
||||
dir_files = set(fn.name for fn in dirname.glob("*"))
|
||||
@@ -257,10 +301,19 @@ def main(
|
||||
|
||||
if not dirname.exists():
|
||||
print(f"Copying {original_dname} -> {dirname} ...")
|
||||
shutil.copytree(original_dname, dirname)
|
||||
# Only copy the practice subdirs with exercises
|
||||
os.makedirs(dirname, exist_ok=True)
|
||||
for lang_dir in original_dname.iterdir():
|
||||
if not lang_dir.is_dir():
|
||||
continue
|
||||
practice_dir = lang_dir / "exercises" / "practice"
|
||||
if practice_dir.exists():
|
||||
dest_lang_dir = dirname / lang_dir.name / "exercises" / "practice"
|
||||
os.makedirs(dest_lang_dir.parent, exist_ok=True)
|
||||
shutil.copytree(practice_dir, dest_lang_dir)
|
||||
print("...done")
|
||||
|
||||
test_dnames = sorted(os.listdir(dirname))
|
||||
test_dnames = sorted(str(d.relative_to(original_dname)) for d in exercise_dirs)
|
||||
|
||||
if keywords:
|
||||
keywords = keywords.split(",")
|
||||
@@ -277,10 +330,10 @@ def main(
|
||||
|
||||
if threads == 1:
|
||||
all_results = []
|
||||
for testname in test_dnames:
|
||||
for test_path in test_dnames:
|
||||
results = run_test(
|
||||
original_dname,
|
||||
dirname / testname,
|
||||
dirname / test_path,
|
||||
model,
|
||||
edit_format,
|
||||
tries,
|
||||
@@ -302,10 +355,10 @@ def main(
|
||||
time.sleep(sleep)
|
||||
else:
|
||||
run_test_threaded = lox.thread(threads)(run_test)
|
||||
for testname in test_dnames:
|
||||
for test_path in test_dnames:
|
||||
run_test_threaded.scatter(
|
||||
original_dname,
|
||||
dirname / testname,
|
||||
dirname / test_path,
|
||||
model,
|
||||
edit_format,
|
||||
tries,
|
||||
@@ -365,17 +418,32 @@ def show_diffs(dirnames):
|
||||
print("unchanged:", len(unchanged), ",".join(sorted(unchanged)))
|
||||
|
||||
|
||||
def load_results(dirname):
|
||||
def load_results(dirname, stats_languages=None):
|
||||
dirname = Path(dirname)
|
||||
all_results = [json.loads(fname.read_text()) for fname in dirname.glob("*/.aider.results.json")]
|
||||
all_results = []
|
||||
|
||||
if stats_languages:
|
||||
languages = [lang.strip().lower() for lang in stats_languages.split(",")]
|
||||
glob_patterns = [f"{lang}/exercises/practice/*/.aider.results.json" for lang in languages]
|
||||
else:
|
||||
glob_patterns = ["*/exercises/practice/*/.aider.results.json"]
|
||||
|
||||
for pattern in glob_patterns:
|
||||
for fname in dirname.glob(pattern):
|
||||
try:
|
||||
results = json.loads(fname.read_text())
|
||||
all_results.append(results)
|
||||
except json.JSONDecodeError:
|
||||
print("json.JSONDecodeError", fname)
|
||||
continue
|
||||
return all_results
|
||||
|
||||
|
||||
def summarize_results(dirname):
|
||||
all_results = load_results(dirname)
|
||||
def summarize_results(dirname, stats_languages=None):
|
||||
all_results = load_results(dirname, stats_languages)
|
||||
|
||||
res = SimpleNamespace()
|
||||
res.total_tests = len(list(Path(dirname).glob("*")))
|
||||
res.total_tests = len(list(Path(dirname).glob("*/exercises/practice/*")))
|
||||
|
||||
try:
|
||||
tries = max(len(results.get("tests_outcomes", [])) for results in all_results if results)
|
||||
@@ -456,9 +524,10 @@ def summarize_results(dirname):
|
||||
percents[i] = pass_rate
|
||||
# console.print(f"{pass_rate:.1f}% correct after try {i+1}")
|
||||
setattr(res, f"pass_rate_{i + 1}", f"{pass_rate:.1f}")
|
||||
setattr(res, f"pass_num_{i + 1}", passed_tests[i])
|
||||
|
||||
print(f"- dirname: {dirname.name}")
|
||||
style = None if res.completed_tests in NUM_TESTS else "red"
|
||||
style = None if res.completed_tests == res.total_tests else "red"
|
||||
console.print(f" test_cases: {res.completed_tests}", style=style)
|
||||
for key, val in variants.items():
|
||||
if len(val) > 1:
|
||||
@@ -471,6 +540,8 @@ def summarize_results(dirname):
|
||||
|
||||
for i in range(tries):
|
||||
print(f" pass_rate_{i + 1}: {percents[i]:.1f}")
|
||||
for i in range(tries):
|
||||
print(f" pass_num_{i + 1}: {passed_tests[i]}")
|
||||
|
||||
pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
|
||||
print(f" percent_cases_well_formed: {pct_well_formed * 100:.1f}")
|
||||
@@ -484,10 +555,12 @@ def summarize_results(dirname):
|
||||
show("indentation_errors")
|
||||
show("exhausted_context_windows")
|
||||
show("test_timeouts")
|
||||
print(f" total_tests: {res.total_tests}")
|
||||
|
||||
a_model = set(variants["model"]).pop()
|
||||
command = f"aider --model {a_model}"
|
||||
print(f" command: {command}")
|
||||
if variants["model"]:
|
||||
a_model = set(variants["model"]).pop()
|
||||
command = f"aider --model {a_model}"
|
||||
print(f" command: {command}")
|
||||
|
||||
print(f" date: {date}")
|
||||
print(" versions:", ",".join(versions))
|
||||
@@ -590,25 +663,78 @@ def run_test_real(
|
||||
if results_fname.exists():
|
||||
try:
|
||||
res = json.loads(results_fname.read_text())
|
||||
# if res.get("test_timeouts", 0) > 0:
|
||||
# print(f"{results_fname} test timeouts, redoing...")
|
||||
# else:
|
||||
return res
|
||||
except JSONDecodeError:
|
||||
print(f"{results_fname} failed to parse, skipping")
|
||||
return
|
||||
print(f"{results_fname} failed to parse, redoing...")
|
||||
|
||||
# Read solution and test files from config
|
||||
fnames = []
|
||||
for fname in testdir.glob("*"):
|
||||
if (
|
||||
"test" not in fname.name
|
||||
and fname.is_file()
|
||||
and fname.name[0] != "."
|
||||
and fname.suffix == ".py"
|
||||
):
|
||||
fnames.append(fname)
|
||||
config_file = testdir / ".meta/config.json"
|
||||
if not config_file.exists():
|
||||
raise ValueError(f"No config file found: {config_file}")
|
||||
|
||||
with open(config_file) as f:
|
||||
config = json.loads(f.read())
|
||||
|
||||
# Get file sets from config
|
||||
test_files = config.get("files", {}).get("test", [])
|
||||
example_files = config.get("files", {}).get("example", [])
|
||||
solution_files = set(config.get("files", {}).get("solution", []))
|
||||
|
||||
# Forcibly ignore certain files not covered by test_files and example_files
|
||||
ignore_files = set(
|
||||
[
|
||||
"CMakeLists.txt",
|
||||
"Cargo.toml",
|
||||
]
|
||||
)
|
||||
|
||||
# Add all files under .meta and .docs directories
|
||||
ignore_files.update(str(p.relative_to(testdir)) for p in testdir.glob(".meta/**/*"))
|
||||
ignore_files.update(str(p.relative_to(testdir)) for p in testdir.glob(".docs/**/*"))
|
||||
|
||||
# Also ignore test & example files
|
||||
ignore_files.update(test_files)
|
||||
ignore_files.update(example_files)
|
||||
|
||||
# Remove any ignore files from the solution set that LLM will edit
|
||||
solution_files.discard(ignore_files)
|
||||
|
||||
# Copy all solution files
|
||||
for file_path in solution_files:
|
||||
src = testdir / Path(file_path)
|
||||
if src.exists():
|
||||
fnames.append(src)
|
||||
# restore the original file, in case we interrupted a prev run
|
||||
# after it had saved changes
|
||||
original_fname = original_dname / testdir.name / fname.name
|
||||
shutil.copy(original_fname, fname)
|
||||
# Find the original file in the language-specific practice dir
|
||||
lang_part = str(testdir).split("/exercises/practice/")[0]
|
||||
original_fname = (
|
||||
original_dname
|
||||
/ Path(lang_part).name
|
||||
/ "exercises"
|
||||
/ "practice"
|
||||
/ testdir.name
|
||||
/ file_path
|
||||
)
|
||||
if original_fname.exists():
|
||||
os.makedirs(src.parent, exist_ok=True)
|
||||
shutil.copy(original_fname, src)
|
||||
else:
|
||||
print(f"Warning: Solution file not found: {src}")
|
||||
|
||||
# Copy all test files
|
||||
for file_path in test_files:
|
||||
src = testdir / Path(file_path)
|
||||
if src.exists():
|
||||
original_fname = original_dname / testdir.name / file_path
|
||||
if original_fname.exists():
|
||||
os.makedirs(src.parent, exist_ok=True)
|
||||
shutil.copy(original_fname, src)
|
||||
else:
|
||||
print(f"Warning: Test file not found: {src}")
|
||||
|
||||
file_list = " ".join(fname.name for fname in fnames)
|
||||
|
||||
@@ -662,7 +788,10 @@ def run_test_real(
|
||||
# auto_lint=False, # disabled for code-in-json experiments
|
||||
cache_prompts=True,
|
||||
suggest_shell_commands=False,
|
||||
ignore_mentions=ignore_files,
|
||||
)
|
||||
dump(coder.ignore_mentions)
|
||||
|
||||
coder.max_apply_update_errors = max_apply_update_errors
|
||||
coder.show_announcements()
|
||||
|
||||
@@ -705,8 +834,11 @@ def run_test_real(
|
||||
break
|
||||
|
||||
try:
|
||||
errors = run_unit_tests(testdir, history_fname)
|
||||
errors = run_unit_tests(original_dname, testdir, history_fname, test_files)
|
||||
except subprocess.TimeoutExpired:
|
||||
# try:
|
||||
# errors = run_unit_tests(original_dname, testdir, history_fname, test_files)
|
||||
# except subprocess.TimeoutExpired:
|
||||
errors = "Tests timed out!"
|
||||
timeouts += 1
|
||||
|
||||
@@ -725,7 +857,6 @@ def run_test_real(
|
||||
indentation_errors += sum(1 for line in errors if line.startswith("IndentationError"))
|
||||
|
||||
print(errors[-1])
|
||||
errors = errors[:50]
|
||||
errors = "\n".join(errors)
|
||||
instructions = errors
|
||||
instructions += prompts.test_failures.format(file_list=file_list)
|
||||
@@ -765,22 +896,50 @@ def run_test_real(
|
||||
return results
|
||||
|
||||
|
||||
def run_unit_tests(testdir, history_fname):
|
||||
command = [
|
||||
"python",
|
||||
"-m",
|
||||
"unittest",
|
||||
"discover",
|
||||
"-s",
|
||||
str(testdir),
|
||||
"-t",
|
||||
str(testdir),
|
||||
"-p",
|
||||
"*_test.py",
|
||||
]
|
||||
print(" ".join(command))
|
||||
def run_unit_tests(original_dname, testdir, history_fname, test_files):
|
||||
timeout = 60 * 3
|
||||
|
||||
timeout = 60
|
||||
# Remove @Disabled annotations from Java test files
|
||||
for file_path in test_files:
|
||||
if file_path.endswith(".java"):
|
||||
test_file = testdir / file_path
|
||||
if test_file.exists():
|
||||
content = test_file.read_text()
|
||||
content = re.sub(r"@Disabled\([^)]*\)\s*\n", "", content)
|
||||
test_file.write_text(content)
|
||||
|
||||
# Map of file extensions to test commands
|
||||
TEST_COMMANDS = {
|
||||
".py": ["pytest"],
|
||||
".rs": ["cargo", "test", "--", "--include-ignored"],
|
||||
".go": ["go", "test", "./..."],
|
||||
".js": ["/aider/benchmark/npm-test.sh"],
|
||||
".cpp": ["/aider/benchmark/cpp-test.sh"],
|
||||
".java": ["./gradlew", "test"],
|
||||
}
|
||||
|
||||
# Get unique file extensions from test files
|
||||
extensions = {Path(f).suffix for f in test_files}
|
||||
|
||||
# Find matching test command
|
||||
command = None
|
||||
for ext in extensions:
|
||||
if ext in TEST_COMMANDS:
|
||||
command = TEST_COMMANDS[ext]
|
||||
break
|
||||
|
||||
if not command:
|
||||
raise ValueError(f"No test command found for files with extensions: {extensions}")
|
||||
|
||||
# Copy test files from original directory
|
||||
for file_path in test_files:
|
||||
src = original_dname / testdir.name / file_path
|
||||
dst = testdir / file_path
|
||||
if src.exists():
|
||||
os.makedirs(dst.parent, exist_ok=True)
|
||||
shutil.copy(src, dst)
|
||||
|
||||
print(" ".join(command))
|
||||
|
||||
result = subprocess.run(
|
||||
command,
|
||||
@@ -788,11 +947,13 @@ def run_unit_tests(testdir, history_fname):
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=testdir,
|
||||
)
|
||||
|
||||
success = result.returncode == 0
|
||||
res = result.stdout
|
||||
res = cleanup_test_output(res, testdir)
|
||||
dump(res)
|
||||
|
||||
with history_fname.open("a") as fh:
|
||||
fh.write(f"```\n{res}\n```")
|
||||
@@ -804,25 +965,7 @@ def run_unit_tests(testdir, history_fname):
|
||||
|
||||
def cleanup_test_output(output, testdir):
|
||||
# remove timing info, to avoid randomizing the response to GPT
|
||||
res = re.sub(
|
||||
r"^Ran \d+ tests in \d+\.\d+s$",
|
||||
"",
|
||||
output,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
res = re.sub(
|
||||
r"^====*$",
|
||||
"====",
|
||||
res,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
res = re.sub(
|
||||
r"^----*$",
|
||||
"----",
|
||||
res,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
|
||||
res = re.sub(r"\bin \d+\.\d+s\b", "", output)
|
||||
res = res.replace(str(testdir), str(testdir.name))
|
||||
return res
|
||||
|
||||
|
||||
20
benchmark/clone-exercism.sh
Executable file
20
benchmark/clone-exercism.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create directories if they don't exist
|
||||
mkdir -p tmp.benchmarks/exercism
|
||||
|
||||
# Change to the exercism directory
|
||||
cd tmp.benchmarks/exercism
|
||||
|
||||
# List of languages to clone
|
||||
languages=("cpp" "go" "java" "javascript" "python" "rust")
|
||||
|
||||
# Clone each repository
|
||||
for lang in "${languages[@]}"; do
|
||||
if [ ! -d "$lang" ]; then
|
||||
echo "Cloning $lang repository..."
|
||||
git clone "https://github.com/exercism/$lang"
|
||||
else
|
||||
echo "$lang repository already exists"
|
||||
fi
|
||||
done
|
||||
11
benchmark/cpp-test.sh
Executable file
11
benchmark/cpp-test.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
[ ! -d "build" ] && mkdir build
|
||||
cd build
|
||||
cmake -G "Unix Makefiles" ..
|
||||
make
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ docker run \
|
||||
-v `pwd`/tmp.benchmarks/.:/benchmarks \
|
||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||
-e HISTFILE=/aider/.bash_history \
|
||||
-e PROMPT_COMMAND='history -a' \
|
||||
-e HISTCONTROL=ignoredups \
|
||||
-e HISTSIZE=10000 \
|
||||
-e HISTFILESIZE=20000 \
|
||||
-e AIDER_DOCKER=1 \
|
||||
-e AIDER_BENCHMARK_DIR=/benchmarks \
|
||||
aider-benchmark \
|
||||
|
||||
63
benchmark/install-docker-ubuntu.sh
Executable file
63
benchmark/install-docker-ubuntu.sh
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on error
|
||||
set -e
|
||||
|
||||
# Update package index
|
||||
echo "Updating package index..."
|
||||
sudo apt-get update
|
||||
|
||||
# Install prerequisites
|
||||
echo "Installing prerequisites..."
|
||||
sudo apt-get install -y \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
curl \
|
||||
gnupg \
|
||||
lsb-release
|
||||
|
||||
# Add Docker's official GPG key
|
||||
echo "Adding Docker's GPG key..."
|
||||
sudo mkdir -p /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
|
||||
# Set up the repository
|
||||
echo "Setting up Docker repository..."
|
||||
echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
|
||||
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
# Update package index again
|
||||
sudo apt-get update
|
||||
|
||||
# Install Docker Engine
|
||||
echo "Installing Docker Engine..."
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
|
||||
|
||||
# Add current user to docker group and verify
|
||||
echo "Adding current user to docker group..."
|
||||
sudo usermod -aG docker $USER
|
||||
|
||||
# Verify group addition
|
||||
if getent group docker | grep -q "\b${USER}\b"; then
|
||||
echo "Successfully added $USER to docker group"
|
||||
else
|
||||
echo "Failed to add $USER to docker group. Retrying..."
|
||||
# Force group addition
|
||||
sudo gpasswd -a $USER docker
|
||||
fi
|
||||
|
||||
# Print success message and instructions
|
||||
echo "Docker installation completed successfully!"
|
||||
|
||||
# Start Docker service
|
||||
echo "Starting Docker service..."
|
||||
sudo systemctl start docker
|
||||
sudo systemctl enable docker
|
||||
|
||||
# Verify Docker installation and service status
|
||||
echo "Docker version:"
|
||||
docker --version
|
||||
|
||||
echo "Docker Compose version:"
|
||||
docker compose version
|
||||
13
benchmark/npm-test.sh
Executable file
13
benchmark/npm-test.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# Create symlinks if they don't exist
|
||||
[ ! -e node_modules ] && ln -s /npm-install/node_modules .
|
||||
[ ! -e package-lock.json ] && ln -s /npm-install/package-lock.json .
|
||||
|
||||
|
||||
sed -i 's/\bxtest(/test(/g' *.spec.js
|
||||
npm run test
|
||||
|
||||
353
benchmark/problem_stats.py
Executable file
353
benchmark/problem_stats.py
Executable file
@@ -0,0 +1,353 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from aider.dump import dump # noqa
|
||||
|
||||
HARD_SET_NUM = 3 # Number of models that defines the hard set threshold
|
||||
|
||||
|
||||
def get_dirs_from_leaderboard():
|
||||
# Load the leaderboard data
|
||||
with open("aider/website/_data/edit_leaderboard.yml") as f:
|
||||
leaderboard = yaml.safe_load(f)
|
||||
return [(entry["dirname"], entry["model"]) for entry in leaderboard]
|
||||
|
||||
|
||||
def load_results(dirname):
|
||||
"""Load all result files from a benchmark directory"""
|
||||
dirname = Path(dirname)
|
||||
|
||||
benchmark_dir = dirname
|
||||
if not benchmark_dir.exists():
|
||||
benchmark_dir = Path("tmp.benchmarks") / dirname
|
||||
if not benchmark_dir.exists():
|
||||
return None
|
||||
|
||||
all_results = []
|
||||
parse_errors = [] # Track which exercises had parse errors for this model
|
||||
|
||||
# Look in language subdirectories under exercises/practice
|
||||
for fname in benchmark_dir.glob("*/exercises/practice/*/.aider.results.json"):
|
||||
error = False
|
||||
try:
|
||||
results = json.loads(fname.read_text())
|
||||
error = "testcase" not in results
|
||||
if not error:
|
||||
# Add language info to results
|
||||
lang = fname.parts[-5] # Get language from path
|
||||
results["language"] = lang
|
||||
all_results.append(results)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
error = True
|
||||
|
||||
if error:
|
||||
# Track the parse error for this exercise/model combination
|
||||
lang = fname.parts[-5]
|
||||
exercise = f"{fname.parts[-2]}/{lang}" # Use directory name as testcase
|
||||
parse_errors.append(exercise)
|
||||
print(f"Bad results file {fname}")
|
||||
continue
|
||||
|
||||
return all_results, parse_errors
|
||||
|
||||
|
||||
def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
|
||||
PARSE_ERROR_M = 4 # Threshold for number of parse errors to DQ an exercise
|
||||
|
||||
if dirs is None:
|
||||
# Use leaderboard data if no directories specified
|
||||
dir_entries = get_dirs_from_leaderboard()
|
||||
else:
|
||||
# Use provided directories, with dirname as model name
|
||||
dir_entries = [(d, d) for d in dirs]
|
||||
|
||||
# Filter out entries that don't load and sort by pass rate
|
||||
valid_entries = []
|
||||
parse_errors_by_model = {} # Track which exercises had parse errors for each model
|
||||
|
||||
dump(dir_entries)
|
||||
|
||||
for dirname, model in dir_entries:
|
||||
results_data = load_results(dirname)
|
||||
|
||||
if results_data:
|
||||
results, model_parse_errors = results_data
|
||||
parse_errors_by_model[model] = set(model_parse_errors)
|
||||
# Calculate pass rate for sorting when using custom dirs
|
||||
if dirs is not None:
|
||||
pass_rate = sum(
|
||||
1 for r in results if r.get("tests_outcomes", []) and r["tests_outcomes"][-1]
|
||||
) / len(results)
|
||||
else:
|
||||
# Use existing pass rate from leaderboard
|
||||
pass_rate = next(
|
||||
(
|
||||
entry["pass_rate_2"]
|
||||
for entry in yaml.safe_load(
|
||||
open("aider/website/_data/edit_leaderboard.yml")
|
||||
)
|
||||
if entry["dirname"] == dirname
|
||||
),
|
||||
0,
|
||||
)
|
||||
valid_entries.append(((dirname, model), results, float(pass_rate)))
|
||||
|
||||
# Sort by pass rate and take top N if specified
|
||||
valid_entries.sort(key=lambda x: x[2], reverse=True)
|
||||
if topn:
|
||||
valid_entries = valid_entries[:topn]
|
||||
|
||||
# Get all exercise names from a complete run
|
||||
all_exercises = set()
|
||||
exercise_solutions = defaultdict(list)
|
||||
|
||||
# Get all unique exercise names from all results
|
||||
all_exercises = set()
|
||||
for (dirname, model), results, _ in valid_entries:
|
||||
if results:
|
||||
for result in results:
|
||||
try:
|
||||
all_exercises.add(result["testcase"] + "/" + result["language"])
|
||||
except KeyError:
|
||||
print(f"Warning: Missing testcase in {dirname}", json.dumps(result, indent=4))
|
||||
|
||||
for (dirname, model), results, _ in valid_entries:
|
||||
if not results:
|
||||
print(f"Could not load results for {dirname}")
|
||||
continue
|
||||
|
||||
for result in results:
|
||||
testcase = result.get("testcase")
|
||||
if not testcase:
|
||||
continue
|
||||
lang = result.get("language")
|
||||
if not lang:
|
||||
continue
|
||||
|
||||
testcase = f"{testcase}/{lang}"
|
||||
# Consider it solved if the last test attempt passed
|
||||
tests_outcomes = result.get("tests_outcomes", [])
|
||||
if tests_outcomes and tests_outcomes[-1]:
|
||||
exercise_solutions[testcase].append(model)
|
||||
|
||||
# Calculate never solved exercises
|
||||
never_solved = len(all_exercises - set(exercise_solutions.keys()))
|
||||
|
||||
# Print per-exercise statistics
|
||||
print("\nExercise Solution Statistics:")
|
||||
print("-" * 40)
|
||||
|
||||
# Add exercises that were never solved
|
||||
for exercise in all_exercises:
|
||||
if exercise not in exercise_solutions:
|
||||
exercise_solutions[exercise] = []
|
||||
|
||||
# Create list of (language, exercise) pairs with solution stats
|
||||
exercise_stats = []
|
||||
total_models = len(valid_entries)
|
||||
|
||||
for testcase in all_exercises:
|
||||
# Language is already in the testcase string
|
||||
lang = testcase.split("/")[0] # First part is the language
|
||||
models = exercise_solutions[testcase]
|
||||
num_solved = len(models)
|
||||
percent = (num_solved / total_models) * 100
|
||||
testcase = testcase.replace("exercises/", "") # Remove the exercises/ prefix
|
||||
# Remove duplicate language prefix (e.g. javascript/javascript/ -> javascript/)
|
||||
if testcase.startswith(f"{lang}/{lang}/"):
|
||||
testcase = testcase[len(lang) + 1 :]
|
||||
exercise_stats.append((lang, testcase, num_solved, percent))
|
||||
|
||||
# Sort all exercises by solve rate, then by exercise name
|
||||
exercise_stats.sort(
|
||||
key=lambda x: (-x[2], x[1])
|
||||
) # -x[2] for descending solve rate, x[1] for ascending exercise name
|
||||
|
||||
# Calculate max lengths for alignment after cleaning up paths
|
||||
max_name_len = max(len(f"{lang}/{testcase}") for lang, testcase, _, _ in exercise_stats)
|
||||
|
||||
# Print all exercises sorted by solve rate
|
||||
print("\nAll Exercises (sorted by solve rate):")
|
||||
for i, (lang, testcase, num_solved, percent) in enumerate(exercise_stats, 1):
|
||||
print(f"{i:>3}. {testcase:<{max_name_len}} : {num_solved:>3} solved ({percent:>5.1f}%)")
|
||||
|
||||
print("\nSummary:")
|
||||
solved_at_least_once = len([ex for ex, models in exercise_solutions.items() if models])
|
||||
solved_by_none = never_solved
|
||||
solved_by_all = len(
|
||||
[ex for ex, models in exercise_solutions.items() if len(models) == total_models]
|
||||
)
|
||||
|
||||
print(f"Total exercises solved at least once: {solved_at_least_once}")
|
||||
print(f"Never solved by any model: {solved_by_none}")
|
||||
if solved_by_none > 0:
|
||||
print("\nExercises never solved by any model:")
|
||||
unsolved = [ex for ex, models in exercise_solutions.items() if not models]
|
||||
for ex in sorted(unsolved):
|
||||
# Split into language and exercise parts
|
||||
lang, exercise = ex.split("/")
|
||||
# Reconstruct path in desired format
|
||||
formatted_path = f"{lang}/exercises/practice/{exercise}"
|
||||
print(f" {formatted_path}")
|
||||
print(f"\nSolved by all models: {solved_by_all}")
|
||||
print(
|
||||
f"Total exercises: {len(all_exercises)} = {solved_by_none} (none) + {solved_by_all} (all) +"
|
||||
f" {len(all_exercises) - solved_by_none - solved_by_all} (some)"
|
||||
)
|
||||
|
||||
# Distribution table of how many models solved each exercise
|
||||
print("\nDistribution of solutions:")
|
||||
print("Models Exercises Cumulative")
|
||||
print("-" * 35)
|
||||
counts = [0] * (total_models + 1)
|
||||
for ex, models in exercise_solutions.items():
|
||||
counts[len(models)] += 1
|
||||
|
||||
cumsum = 0
|
||||
for i, count in enumerate(counts):
|
||||
cumsum += count
|
||||
print(f"{i:>6d} {count:>9d} {cumsum:>10d}")
|
||||
|
||||
# Count parse errors per exercise
|
||||
parse_error_counts = defaultdict(int)
|
||||
for model_errors in parse_errors_by_model.values():
|
||||
for exercise in model_errors:
|
||||
parse_error_counts[exercise] += 1
|
||||
|
||||
# Find exercises to disqualify based on parse error threshold
|
||||
disqualified_exercises = {
|
||||
exercise for exercise, count in parse_error_counts.items() if count >= PARSE_ERROR_M
|
||||
}
|
||||
|
||||
if disqualified_exercises:
|
||||
print(
|
||||
f"\nDisqualified {len(disqualified_exercises)} exercises with {PARSE_ERROR_M}+ parse"
|
||||
" errors:"
|
||||
)
|
||||
for ex in sorted(disqualified_exercises):
|
||||
print(f" {ex} ({parse_error_counts[ex]} parse errors)")
|
||||
|
||||
# Collect the hard set (exercises solved by HARD_SET_NUM or fewer models)
|
||||
print(f"\nHard Set Analysis (exercises solved by ≤{HARD_SET_NUM} models):")
|
||||
print("-" * 60)
|
||||
hard_set = {
|
||||
ex
|
||||
for ex, models in exercise_solutions.items()
|
||||
if len(models) <= HARD_SET_NUM and ex not in disqualified_exercises
|
||||
}
|
||||
print(f"Total hard set exercises: {len(hard_set)}")
|
||||
|
||||
# Count total problems, unsolved problems, and hard set problems by language
|
||||
lang_totals = defaultdict(int)
|
||||
lang_unsolved = defaultdict(int)
|
||||
lang_hard_set = defaultdict(int)
|
||||
|
||||
for exercise in all_exercises:
|
||||
lang = exercise.split("/")[1] # Get language from path
|
||||
lang_totals[lang] += 1
|
||||
if not exercise_solutions[exercise]: # No models solved this exercise
|
||||
lang_unsolved[lang] += 1
|
||||
if exercise in hard_set: # Exercise is in the hard set
|
||||
lang_hard_set[lang] += 1
|
||||
|
||||
print("\nUnsolved and hard set problems by language:")
|
||||
print(f"{'Language':<12} {'Unsolved':>8} {'Hard Set':>9} {'Total':>7} {'%hardUnsolved':>8}")
|
||||
print("-" * 47)
|
||||
for lang in sorted(lang_totals.keys()):
|
||||
count = lang_unsolved[lang]
|
||||
hard = lang_hard_set[lang]
|
||||
total = lang_totals[lang]
|
||||
pct = (count / hard) * 100 if hard else -1
|
||||
print(f"{lang:<12} {count:>8} {hard:>9} {total:>7} {pct:>7.1f}%")
|
||||
print()
|
||||
|
||||
# For each model, compute performance on hard set
|
||||
model_hard_stats = []
|
||||
for (dirname, model), results, _ in valid_entries:
|
||||
if not results:
|
||||
continue
|
||||
|
||||
solved_hard = 0
|
||||
for result in results:
|
||||
testcase = result.get("testcase")
|
||||
if not testcase:
|
||||
continue
|
||||
lang = result.get("language")
|
||||
if not lang:
|
||||
continue
|
||||
|
||||
testcase = f"{testcase}/{lang}"
|
||||
if testcase in hard_set:
|
||||
tests_outcomes = result.get("tests_outcomes", [])
|
||||
if tests_outcomes and tests_outcomes[-1]:
|
||||
solved_hard += 1
|
||||
|
||||
pct = (solved_hard / len(hard_set)) * 100
|
||||
model_hard_stats.append((model, solved_hard, pct))
|
||||
|
||||
# Sort by number solved
|
||||
model_hard_stats.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
print("\nModel performance on hard set:")
|
||||
print(f"{'Model':<55} {'Solved':<8} {'Percent':>7}")
|
||||
print("-" * 50)
|
||||
for model, solved, pct in model_hard_stats:
|
||||
print(f"{model:<55} {solved:>6d} {pct:>6.1f}%")
|
||||
|
||||
if copy_hard_set:
|
||||
# Create hard set directory
|
||||
src_dir = Path("tmp.benchmarks/exercism")
|
||||
dst_dir = Path("tmp.benchmarks/exercism-polyglot")
|
||||
|
||||
if dst_dir.exists():
|
||||
print(f"\nError: Destination directory {dst_dir} already exists")
|
||||
return
|
||||
|
||||
print(f"\nCopying hard set problems to {dst_dir}...")
|
||||
|
||||
# Create a set of (exercise, language) pairs from hard_set
|
||||
hard_set_pairs = {tuple(exercise.split("/")) for exercise in hard_set}
|
||||
|
||||
# Copy each hard set problem's directory
|
||||
copied_by_lang = defaultdict(int)
|
||||
for lang_dir in src_dir.glob("*/exercises/practice"):
|
||||
if not lang_dir.is_dir():
|
||||
continue
|
||||
|
||||
lang = lang_dir.parts[-3] # Get language from path
|
||||
for problem_dir in lang_dir.glob("*"):
|
||||
if (problem_dir.name, lang) in hard_set_pairs:
|
||||
rel_path = problem_dir.relative_to(src_dir)
|
||||
dst_path = dst_dir / rel_path
|
||||
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copytree(problem_dir, dst_path)
|
||||
copied_by_lang[lang] += 1
|
||||
|
||||
total_copied = sum(copied_by_lang.values())
|
||||
print(f"\nCopied {total_copied} hard set problems:")
|
||||
for lang in sorted(copied_by_lang):
|
||||
print(f" {lang}: {copied_by_lang[lang]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--topn", type=int, help="Only consider top N models by pass rate")
|
||||
parser.add_argument(
|
||||
"dirs", nargs="*", help="Directories to analyze (optional, defaults to leaderboard entries)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--copy-hard-set",
|
||||
action="store_true",
|
||||
help="Copy hard set problems to tmp.benchmarks/exercism-polygot",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
analyze_exercise_solutions(args.dirs if args.dirs else None, args.topn, args.copy_hard_set)
|
||||
@@ -3,7 +3,7 @@ instructions_addendum = """
|
||||
|
||||
Use the above instructions to modify the supplied files: {file_list}
|
||||
Don't change the names of existing functions or classes, as they may be referenced from other code like unit tests, etc.
|
||||
Only use standard python libraries, don't suggest installing any packages.
|
||||
Only use standard libraries, don't suggest installing any packages.
|
||||
""" # noqa: E501
|
||||
|
||||
|
||||
@@ -11,6 +11,6 @@ test_failures = """
|
||||
####
|
||||
|
||||
See the testing errors above.
|
||||
The tests are correct.
|
||||
The tests are correct, don't try and change them.
|
||||
Fix the code in {file_list} to resolve the errors.
|
||||
"""
|
||||
|
||||
33
benchmark/rsync.sh
Executable file
33
benchmark/rsync.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 user@host"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DEST="$1"
|
||||
REPO_ROOT="$(git rev-parse --show-toplevel)"
|
||||
|
||||
# Create a temporary file for rsync exclude patterns
|
||||
EXCLUDE_FILE=$(mktemp)
|
||||
|
||||
# Convert .gitignore patterns to rsync exclude patterns
|
||||
git -C "$REPO_ROOT" ls-files --exclude-standard --others --ignored --directory > "$EXCLUDE_FILE"
|
||||
|
||||
# Create remote directory if needed
|
||||
ssh "$DEST" "mkdir -p ~/aider"
|
||||
|
||||
# Sync the repository
|
||||
rsync -avz --delete \
|
||||
--exclude-from="$EXCLUDE_FILE" \
|
||||
"$REPO_ROOT/" \
|
||||
"$DEST:~/aider/"
|
||||
|
||||
rsync -a .env .gitignore "$DEST:~/aider/."
|
||||
|
||||
rsync -a ~/dotfiles/screenrc "$DEST:.screenrc"
|
||||
|
||||
# Clean up
|
||||
rm "$EXCLUDE_FILE"
|
||||
@@ -1,7 +1,3 @@
|
||||
# [[[cog
|
||||
# from aider.help_pats import exclude_website_pats
|
||||
# ]]]
|
||||
# [[[end]]]
|
||||
|
||||
[project]
|
||||
name = "aider-chat"
|
||||
@@ -38,29 +34,11 @@ help = { file = "requirements/requirements-help.txt" }
|
||||
browser = { file = "requirements/requirements-browser.txt" }
|
||||
playwright = { file = "requirements/requirements-playwright.txt" }
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["aider*", "aider.website"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"aider" = ["queries/*.scm"]
|
||||
"aider.website" = ["**/*.md"]
|
||||
|
||||
[tool.setuptools.exclude-package-data]
|
||||
"aider.website" = [
|
||||
# [[[cog
|
||||
# cog.out("\n".join(f' "{pat}",' for pat in exclude_website_pats))
|
||||
# ]]]
|
||||
"examples/**",
|
||||
"_posts/**",
|
||||
"HISTORY.md",
|
||||
"docs/benchmarks*md",
|
||||
"docs/ctags.md",
|
||||
"docs/unified-diffs.md",
|
||||
"docs/leaderboards/index.md",
|
||||
"assets/**",
|
||||
"**/.DS_Store",
|
||||
# [[[end]]]
|
||||
]
|
||||
include = ["aider"]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68", "setuptools_scm[toml]>=8"]
|
||||
|
||||
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
@@ -18,6 +18,11 @@ website_files = [
|
||||
"aider/website/docs/leaderboards/index.md",
|
||||
]
|
||||
|
||||
exclude_files = [
|
||||
"aider/website/install.ps1",
|
||||
"aider/website/install.sh",
|
||||
]
|
||||
|
||||
|
||||
def blame(start_tag, end_tag=None):
|
||||
commits = get_all_commit_hashes_between_tags(start_tag, end_tag)
|
||||
@@ -36,6 +41,7 @@ def blame(start_tag, end_tag=None):
|
||||
]
|
||||
files = [f for f in files if not f.endswith("prompts.py")]
|
||||
files = [f for f in files if not f.startswith("tests/fixtures/watch")]
|
||||
files = [f for f in files if f not in exclude_files]
|
||||
|
||||
all_file_counts = {}
|
||||
grand_total = defaultdict(int)
|
||||
|
||||
95
scripts/my_models.py
Executable file
95
scripts/my_models.py
Executable file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
from collections import defaultdict, deque
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def collect_model_stats(n_lines=1000):
|
||||
"""Collect model usage statistics from the analytics file."""
|
||||
analytics_path = Path.home() / ".aider" / "analytics.jsonl"
|
||||
model_stats = defaultdict(int)
|
||||
|
||||
with open(analytics_path) as f:
|
||||
lines = deque(f, n_lines)
|
||||
for line in lines:
|
||||
try:
|
||||
event = json.loads(line)
|
||||
if event["event"] == "message_send":
|
||||
properties = event["properties"]
|
||||
main_model = properties.get("main_model")
|
||||
total_tokens = properties.get("total_tokens", 0)
|
||||
if main_model:
|
||||
model_stats[main_model] += total_tokens
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return model_stats
|
||||
|
||||
|
||||
def format_text_table(model_stats):
|
||||
"""Format model statistics as a text table."""
|
||||
total_tokens = sum(model_stats.values())
|
||||
lines = []
|
||||
|
||||
lines.append("\nModel Token Usage Summary:")
|
||||
lines.append("-" * 80)
|
||||
lines.append(f"{'Model Name':<40} {'Total Tokens':>15} {'Percent':>10}")
|
||||
lines.append("-" * 80)
|
||||
|
||||
for model, tokens in sorted(model_stats.items(), key=lambda x: x[1], reverse=True):
|
||||
percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
|
||||
lines.append(f"{model:<40} {tokens:>15,} {percentage:>9.1f}%")
|
||||
|
||||
lines.append("-" * 80)
|
||||
lines.append(f"{'TOTAL':<40} {total_tokens:>15,} {100:>9.1f}%")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_html_table(model_stats):
|
||||
"""Format model statistics as an HTML table."""
|
||||
total_tokens = sum(model_stats.values())
|
||||
|
||||
html = [
|
||||
"<style>",
|
||||
"table { border-collapse: collapse; width: 100%; }",
|
||||
"th, td { padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }",
|
||||
"th { background-color: #f2f2f2; }",
|
||||
"tr:hover { background-color: #f5f5f5; }",
|
||||
".right { text-align: right; }",
|
||||
"</style>",
|
||||
"<table>",
|
||||
(
|
||||
"<tr><th>Model Name</th><th class='right'>Total Tokens</th><th"
|
||||
" class='right'>Percent</th></tr>"
|
||||
),
|
||||
]
|
||||
|
||||
for model, tokens in sorted(model_stats.items(), key=lambda x: x[1], reverse=True):
|
||||
percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
|
||||
html.append(
|
||||
f"<tr><td>{model}</td>"
|
||||
f"<td class='right'>{tokens:,}</td>"
|
||||
f"<td class='right'>{percentage:.1f}%</td></tr>"
|
||||
)
|
||||
|
||||
html.append("</table>")
|
||||
|
||||
# Add note about redacted models if any are present
|
||||
if any("REDACTED" in model for model in model_stats.keys()):
|
||||
html.extend(
|
||||
[
|
||||
"",
|
||||
"{: .note :}",
|
||||
"Some models show as REDACTED, because they are new or unpopular models.",
|
||||
'Aider\'s analytics only records the names of "well known" LLMs.',
|
||||
]
|
||||
)
|
||||
|
||||
return "\n".join(html)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
stats = collect_model_stats()
|
||||
print(format_text_table(stats))
|
||||
@@ -3,4 +3,6 @@
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
./scripts/blame.py v0.1.0 --all --output aider/website/_data/blame.yml
|
||||
# Use first argument as version if provided, otherwise default to v0.1.0
|
||||
VERSION=${1:-v0.1.0}
|
||||
./scripts/blame.py "$VERSION" --all --output aider/website/_data/blame.yml
|
||||
|
||||
@@ -11,6 +11,7 @@ fi
|
||||
|
||||
if [ "$ARG" != "--check" ]; then
|
||||
tail -1000 ~/.aider/analytics.jsonl > aider/website/assets/sample-analytics.jsonl
|
||||
cog -r aider/website/docs/faq.md
|
||||
fi
|
||||
|
||||
# README.md before index.md, because index.md uses cog to include README.md
|
||||
|
||||
@@ -10,15 +10,6 @@ import sys
|
||||
from packaging import version
|
||||
|
||||
|
||||
def check_cog_pyproject():
|
||||
result = subprocess.run(["cog", "--check", "pyproject.toml"], capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
print("Error: cog --check pyproject.toml failed, updating.")
|
||||
subprocess.run(["cog", "-r", "pyproject.toml"])
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Bump version")
|
||||
parser.add_argument("new_version", help="New version in x.y.z format")
|
||||
@@ -82,7 +73,6 @@ def main():
|
||||
dry_run = args.dry_run
|
||||
|
||||
# Perform checks before proceeding
|
||||
check_cog_pyproject()
|
||||
check_branch()
|
||||
check_working_directory_clean()
|
||||
check_main_branch_up_to_date()
|
||||
|
||||
@@ -94,9 +94,6 @@ def test_need_to_ask(temp_data_dir):
|
||||
assert analytics.need_to_ask(True) is True
|
||||
assert analytics.need_to_ask(False) is False
|
||||
|
||||
analytics.user_id = "111"
|
||||
assert analytics.need_to_ask(None) is False
|
||||
|
||||
analytics.user_id = "000"
|
||||
assert analytics.need_to_ask(None) is True
|
||||
|
||||
@@ -108,32 +105,32 @@ def test_need_to_ask(temp_data_dir):
|
||||
|
||||
|
||||
def test_is_uuid_in_percentage():
|
||||
analytics = Analytics()
|
||||
from aider.analytics import is_uuid_in_percentage
|
||||
|
||||
# Test basic percentage thresholds
|
||||
assert analytics.is_uuid_in_percentage("00000000000000000000000000000000", 1) is True
|
||||
assert analytics.is_uuid_in_percentage("01999000000000000000000000000000", 1) is True
|
||||
assert analytics.is_uuid_in_percentage("02000000000000000000000000000000", 1) is True
|
||||
assert analytics.is_uuid_in_percentage("02910000000000000000000000000001", 1) is False
|
||||
assert analytics.is_uuid_in_percentage("03000000000000000000000000000000", 1) is False
|
||||
assert analytics.is_uuid_in_percentage("ff000000000000000000000000000000", 1) is False
|
||||
assert is_uuid_in_percentage("00000000000000000000000000000000", 1) is True
|
||||
assert is_uuid_in_percentage("01999000000000000000000000000000", 1) is True
|
||||
assert is_uuid_in_percentage("02000000000000000000000000000000", 1) is True
|
||||
assert is_uuid_in_percentage("02910000000000000000000000000001", 1) is False
|
||||
assert is_uuid_in_percentage("03000000000000000000000000000000", 1) is False
|
||||
assert is_uuid_in_percentage("ff000000000000000000000000000000", 1) is False
|
||||
|
||||
assert analytics.is_uuid_in_percentage("00000000000000000000000000000000", 10) is True
|
||||
assert analytics.is_uuid_in_percentage("19000000000000000000000000000000", 10) is True
|
||||
assert analytics.is_uuid_in_percentage("1a000000000000000000000000000000", 10) is False
|
||||
assert analytics.is_uuid_in_percentage("ff000000000000000000000000000000", 10) is False
|
||||
assert is_uuid_in_percentage("00000000000000000000000000000000", 10) is True
|
||||
assert is_uuid_in_percentage("19000000000000000000000000000000", 10) is True
|
||||
assert is_uuid_in_percentage("1a000000000000000000000000000000", 10) is False
|
||||
assert is_uuid_in_percentage("ff000000000000000000000000000000", 10) is False
|
||||
|
||||
# Test edge cases
|
||||
assert analytics.is_uuid_in_percentage("00000000000000000000000000000000", 0) is False
|
||||
assert analytics.is_uuid_in_percentage("00000000000000000000000000000000", 100) is True
|
||||
assert analytics.is_uuid_in_percentage("ffffffffffffffffffffffffffffffff", 100) is True
|
||||
assert is_uuid_in_percentage("00000000000000000000000000000000", 0) is False
|
||||
assert is_uuid_in_percentage("00000000000000000000000000000000", 100) is True
|
||||
assert is_uuid_in_percentage("ffffffffffffffffffffffffffffffff", 100) is True
|
||||
|
||||
# Test invalid inputs
|
||||
with pytest.raises(ValueError):
|
||||
analytics.is_uuid_in_percentage("00000000000000000000000000000000", -1)
|
||||
is_uuid_in_percentage("00000000000000000000000000000000", -1)
|
||||
with pytest.raises(ValueError):
|
||||
analytics.is_uuid_in_percentage("00000000000000000000000000000000", 101)
|
||||
is_uuid_in_percentage("00000000000000000000000000000000", 101)
|
||||
|
||||
# Test empty/None UUID
|
||||
assert analytics.is_uuid_in_percentage("", 50) is False
|
||||
assert analytics.is_uuid_in_percentage(None, 50) is False
|
||||
assert is_uuid_in_percentage("", 50) is False
|
||||
assert is_uuid_in_percentage(None, 50) is False
|
||||
|
||||
@@ -140,8 +140,15 @@ class TestMain(TestCase):
|
||||
|
||||
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
|
||||
|
||||
# Test without .env file present
|
||||
gitignore.write_text("one\ntwo\n")
|
||||
check_gitignore(cwd, io)
|
||||
self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
|
||||
|
||||
# Test with .env file present
|
||||
env_file = cwd / ".env"
|
||||
env_file.touch()
|
||||
check_gitignore(cwd, io)
|
||||
self.assertEqual("one\ntwo\n.aider*\n.env\n", gitignore.read_text())
|
||||
del os.environ["GIT_CONFIG_GLOBAL"]
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ from git import GitError, Repo
|
||||
|
||||
from aider import urls
|
||||
from aider.main import sanity_check_repo
|
||||
from aider.repo import GitRepo
|
||||
from aider.io import InputOutput
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -182,3 +184,41 @@ def test_sanity_check_repo_with_no_repo(mock_io):
|
||||
# Assert that no errors or outputs were logged
|
||||
mock_io.tool_error.assert_not_called()
|
||||
mock_io.tool_output.assert_not_called()
|
||||
|
||||
|
||||
def corrupt_git_index(repo_path):
|
||||
index_path = os.path.join(repo_path, ".git", "index")
|
||||
with open(index_path, "r+b") as f:
|
||||
# Verify the file has the correct signature
|
||||
signature = f.read(4)
|
||||
if signature != b"DIRC":
|
||||
raise ValueError("Invalid git index file signature.")
|
||||
|
||||
# Seek to the data section and inject invalid bytes to simulate encoding error
|
||||
f.seek(77)
|
||||
f.write(b"\xF5" * 5)
|
||||
|
||||
|
||||
def test_sanity_check_repo_with_corrupt_index(create_repo, mock_io):
|
||||
repo_path, repo = create_repo
|
||||
# Corrupt the Git index file
|
||||
corrupt_git_index(repo_path)
|
||||
|
||||
# Create GitRepo instance
|
||||
git_repo = GitRepo(InputOutput(), None, repo_path)
|
||||
|
||||
# Call the function
|
||||
result = sanity_check_repo(git_repo, mock_io)
|
||||
|
||||
# Assert that the function returns False
|
||||
assert result is False
|
||||
|
||||
# Assert that the appropriate error messages were logged
|
||||
mock_io.tool_error.assert_called_with("Unable to read git repository, it may be corrupt?")
|
||||
mock_io.tool_output.assert_called_with(
|
||||
(
|
||||
"Failed to read the Git repository. This issue is likely caused by a path encoded "
|
||||
"in a format different from the expected encoding \"utf-8\".\n"
|
||||
"Internal error: 'utf-8' codec can't decode byte 0xf5 in position 3: invalid start byte"
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user