examples update

This commit is contained in:
Dongsheng Zhu 2025-04-06 04:25:07 +00:00
parent f568550633
commit b2c84058f2
2 changed files with 94 additions and 3 deletions

View File

@ -150,5 +150,4 @@ summarizer = dict(
summary_groups=summary_groups, summary_groups=summary_groups,
) )
# work_dir = 'outputs/code' work_dir = 'outputs/code'
work_dir = 'outputs/code_2'

View File

@ -64,6 +64,98 @@ for dataset in datasets:
dataset['eval_cfg']['evaluator']['num_repeats'] = num_repeats dataset['eval_cfg']['evaluator']['num_repeats'] = num_repeats
dataset['eval_cfg']['evaluator']['k'] = k dataset['eval_cfg']['evaluator']['k'] = k
dataset['num_repeats'] = num_repeats dataset['num_repeats'] = num_repeats
dataset['abbr'] += f'_passk' # dataset['abbr'] += f'_passk'
# summary
summarizer = dict(
dataset_abbrs = [
'pass@1',
['bigcodebench_full_instruct_passk', 'pass@1'],
['bigcodebench_hard_instruct_passk', 'pass@1'],
['lcb_code_generation_passk', 'pass@1'],
['openai_humaneval_passk_passk', 'humaneval_pass@1'],
['humaneval_pro_passk', 'pass@1'],
['mbpp_passk_passk', 'pass@1'],
['mbpp_pro_passk', 'pass@1'],
['humaneval-multiple-cpp_passk', 'pass@1'],
['humaneval-multiple-cs_passk', 'pass@1'],
['humaneval-multiple-go_passk', 'pass@1'],
['humaneval-multiple-java_passk', 'pass@1'],
['humaneval-multiple-rb_passk', 'pass@1'],
['humaneval-multiple-js_passk', 'pass@1'],
['humaneval-multiple-php_passk', 'pass@1'],
['humaneval-multiple-r_passk', 'pass@1'],
['humaneval-multiple-rs_passk', 'pass@1'],
['humaneval-multiple-sh_passk', 'pass@1'],
['mbpp-multiple-cpp_passk', 'pass@1'],
['mbpp-multiple-cs_passk', 'pass@1'],
['mbpp-multiple-go_passk', 'pass@1'],
['mbpp-multiple-java_passk', 'pass@1'],
['mbpp-multiple-rb_passk', 'pass@1'],
['mbpp-multiple-js_passk', 'pass@1'],
['mbpp-multiple-php_passk', 'pass@1'],
['mbpp-multiple-r_passk', 'pass@1'],
['mbpp-multiple-rs_passk', 'pass@1'],
['mbpp-multiple-sh_passk', 'pass@1'],
'',
'pass@3',
['bigcodebench_full_instruct_passk', 'pass@3'],
['bigcodebench_hard_instruct_passk', 'pass@3'],
['lcb_code_generation_passk', 'pass@3'],
['openai_humaneval_passk_passk', 'humaneval_pass@3'],
['humaneval_pro_passk', 'pass@3'],
['mbpp_passk_passk', 'pass@3'],
['mbpp_pro_passk', 'pass@3'],
['humaneval-multiple-cpp_passk', 'pass@3'],
['humaneval-multiple-cs_passk', 'pass@3'],
['humaneval-multiple-go_passk', 'pass@3'],
['humaneval-multiple-java_passk', 'pass@3'],
['humaneval-multiple-rb_passk', 'pass@3'],
['humaneval-multiple-js_passk', 'pass@3'],
['humaneval-multiple-php_passk', 'pass@3'],
['humaneval-multiple-r_passk', 'pass@3'],
['humaneval-multiple-rs_passk', 'pass@3'],
['humaneval-multiple-sh_passk', 'pass@3'],
['mbpp-multiple-cpp_passk', 'pass@3'],
['mbpp-multiple-cs_passk', 'pass@3'],
['mbpp-multiple-go_passk', 'pass@3'],
['mbpp-multiple-java_passk', 'pass@3'],
['mbpp-multiple-rb_passk', 'pass@3'],
['mbpp-multiple-js_passk', 'pass@3'],
['mbpp-multiple-php_passk', 'pass@3'],
['mbpp-multiple-r_passk', 'pass@3'],
['mbpp-multiple-rs_passk', 'pass@3'],
['mbpp-multiple-sh_passk', 'pass@3'],
'',
'pass@5',
['bigcodebench_full_instruct_passk', 'pass@5'],
['bigcodebench_hard_instruct_passk', 'pass@5'],
['lcb_code_generation_passk', 'pass@5'],
['openai_humaneval_passk_passk', 'humaneval_pass@5'],
['humaneval_pro_passk', 'pass@5'],
['mbpp_passk_passk', 'pass@5'],
['mbpp_pro_passk', 'pass@5'],
['humaneval-multiple-cpp_passk', 'pass@5'],
['humaneval-multiple-cs_passk', 'pass@5'],
['humaneval-multiple-go_passk', 'pass@5'],
['humaneval-multiple-java_passk', 'pass@5'],
['humaneval-multiple-rb_passk', 'pass@5'],
['humaneval-multiple-js_passk', 'pass@5'],
['humaneval-multiple-php_passk', 'pass@5'],
['humaneval-multiple-r_passk', 'pass@5'],
['humaneval-multiple-rs_passk', 'pass@5'],
['humaneval-multiple-sh_passk', 'pass@5'],
['mbpp-multiple-cpp_passk', 'pass@5'],
['mbpp-multiple-cs_passk', 'pass@5'],
['mbpp-multiple-go_passk', 'pass@5'],
['mbpp-multiple-java_passk', 'pass@5'],
['mbpp-multiple-rb_passk', 'pass@5'],
['mbpp-multiple-js_passk', 'pass@5'],
['mbpp-multiple-php_passk', 'pass@5'],
['mbpp-multiple-r_passk', 'pass@5'],
['mbpp-multiple-rs_passk', 'pass@5'],
['mbpp-multiple-sh_passk', 'pass@5'],
],
)
work_dir = 'outputs/code_passk' work_dir = 'outputs/code_passk'