Merge pull request #4475 from lreeves/patch-1

Add GPT-5 with all reasoning settings to polyglot leaderboard
This commit is contained in:
paul-gauthier
2025-09-02 08:52:13 -07:00
committed by GitHub

View File

@@ -1711,3 +1711,90 @@
versions: 0.85.3.dev
seconds_per_case: 35.5
total_cost: 0.7406
- dirname: 2025-08-23-15-47-21--gpt-5-high
test_cases: 225
model: openai/gpt-5
edit_format: diff
commit_hash: 32faf82
reasoning_effort: high
pass_rate_1: 52.0
pass_rate_2: 88.0
pass_num_1: 117
pass_num_2: 198
percent_cases_well_formed: 91.6
error_outputs: 23
num_malformed_responses: 22
num_with_malformed_responses: 19
user_asks: 96
lazy_comments: 3
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
prompt_tokens: 2675561
completion_tokens: 2623429
test_timeouts: 3
total_tests: 225
command: aider --model openai/gpt-5
date: 2025-08-23
versions: 0.86.2.dev
seconds_per_case: 194.0
total_cost: 29.0829
- dirname: 2025-08-25-13-23-27--gpt-5-medium
test_cases: 225
model: openai/gpt-5
edit_format: diff
commit_hash: 32faf82
reasoning_effort: medium
pass_rate_1: 49.8
pass_rate_2: 86.7
pass_num_1: 112
pass_num_2: 195
percent_cases_well_formed: 88.4
error_outputs: 40
num_malformed_responses: 40
num_with_malformed_responses: 26
user_asks: 102
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
prompt_tokens: 2827261
completion_tokens: 1468799
test_timeouts: 0
total_tests: 225
command: aider --model openai/gpt-5
date: 2025-08-25
versions: 0.86.2.dev
seconds_per_case: 118.7
total_cost: 17.6930
- dirname: 2025-08-25-14-16-37--gpt-5-low
test_cases: 225
model: openai/gpt-5
edit_format: diff
commit_hash: 32faf82
reasoning_effort: low
pass_rate_1: 43.1
pass_rate_2: 81.3
pass_num_1: 97
pass_num_2: 183
percent_cases_well_formed: 86.7
error_outputs: 46
num_malformed_responses: 46
num_with_malformed_responses: 30
user_asks: 113
lazy_comments: 1
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
prompt_tokens: 2534059
completion_tokens: 779568
test_timeouts: 1
total_tests: 225
command: aider --model openai/gpt-5
date: 2025-08-25
versions: 0.86.2.dev
seconds_per_case: 62.4
total_cost: 10.3713