mirror of
https://github.com/Aider-AI/aider
synced 2026-04-25 17:15:07 +02:00
Merge pull request #4475 from lreeves/patch-1
Add GPT-5 with all reasoning settings to polyglot leaderboard
This commit is contained in:
@@ -1711,3 +1711,90 @@
|
||||
versions: 0.85.3.dev
|
||||
seconds_per_case: 35.5
|
||||
total_cost: 0.7406
|
||||
|
||||
- dirname: 2025-08-23-15-47-21--gpt-5-high
|
||||
test_cases: 225
|
||||
model: openai/gpt-5
|
||||
edit_format: diff
|
||||
commit_hash: 32faf82
|
||||
reasoning_effort: high
|
||||
pass_rate_1: 52.0
|
||||
pass_rate_2: 88.0
|
||||
pass_num_1: 117
|
||||
pass_num_2: 198
|
||||
percent_cases_well_formed: 91.6
|
||||
error_outputs: 23
|
||||
num_malformed_responses: 22
|
||||
num_with_malformed_responses: 19
|
||||
user_asks: 96
|
||||
lazy_comments: 3
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
prompt_tokens: 2675561
|
||||
completion_tokens: 2623429
|
||||
test_timeouts: 3
|
||||
total_tests: 225
|
||||
command: aider --model openai/gpt-5
|
||||
date: 2025-08-23
|
||||
versions: 0.86.2.dev
|
||||
seconds_per_case: 194.0
|
||||
total_cost: 29.0829
|
||||
|
||||
- dirname: 2025-08-25-13-23-27--gpt-5-medium
|
||||
test_cases: 225
|
||||
model: openai/gpt-5
|
||||
edit_format: diff
|
||||
commit_hash: 32faf82
|
||||
reasoning_effort: medium
|
||||
pass_rate_1: 49.8
|
||||
pass_rate_2: 86.7
|
||||
pass_num_1: 112
|
||||
pass_num_2: 195
|
||||
percent_cases_well_formed: 88.4
|
||||
error_outputs: 40
|
||||
num_malformed_responses: 40
|
||||
num_with_malformed_responses: 26
|
||||
user_asks: 102
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
prompt_tokens: 2827261
|
||||
completion_tokens: 1468799
|
||||
test_timeouts: 0
|
||||
total_tests: 225
|
||||
command: aider --model openai/gpt-5
|
||||
date: 2025-08-25
|
||||
versions: 0.86.2.dev
|
||||
seconds_per_case: 118.7
|
||||
total_cost: 17.6930
|
||||
|
||||
- dirname: 2025-08-25-14-16-37--gpt-5-low
|
||||
test_cases: 225
|
||||
model: openai/gpt-5
|
||||
edit_format: diff
|
||||
commit_hash: 32faf82
|
||||
reasoning_effort: low
|
||||
pass_rate_1: 43.1
|
||||
pass_rate_2: 81.3
|
||||
pass_num_1: 97
|
||||
pass_num_2: 183
|
||||
percent_cases_well_formed: 86.7
|
||||
error_outputs: 46
|
||||
num_malformed_responses: 46
|
||||
num_with_malformed_responses: 30
|
||||
user_asks: 113
|
||||
lazy_comments: 1
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
prompt_tokens: 2534059
|
||||
completion_tokens: 779568
|
||||
test_timeouts: 1
|
||||
total_tests: 225
|
||||
command: aider --model openai/gpt-5
|
||||
date: 2025-08-25
|
||||
versions: 0.86.2.dev
|
||||
seconds_per_case: 62.4
|
||||
total_cost: 10.3713
|
||||
|
||||
Reference in New Issue
Block a user