..
__init__.py
[Add] Add CompassArenaSubjectiveBench ( #1645 )
2024-11-01 13:52:22 +08:00
alignbench.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
alpacaeval.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
arena_hard.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
compass_arena_subjective_bench.py
[Feature] Added CompassArena-SubjectiveBench with Bradley-Terry Model ( #1751 )
2024-12-16 13:41:28 +08:00
compass_arena.py
[Change] Change Compassarena metric ( #1749 )
2024-12-10 14:45:32 +08:00
compassbench_checklist.py
add new dataset summerizer ( #1758 )
2024-12-13 09:50:43 +08:00
compassbench_control_length_bias.py
[Refactor] Reorganize subjective eval ( #1284 )
2024-07-05 22:11:37 +08:00
compassbench.py
[Refactor] Reorganize subjective eval ( #1284 )
2024-07-05 22:11:37 +08:00
corev2.py
reorganize subject files ( #801 )
2024-01-16 18:03:11 +08:00
creationbench.py
reorganize subject files ( #801 )
2024-01-16 18:03:11 +08:00
flames.py
[Fix] fix Flames ( #1599 )
2024-10-12 14:34:59 +08:00
fofo.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
followbench.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
hellobench.py
Upload HelloBench ( #1607 )
2024-10-15 17:11:37 +08:00
judgerbench.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
mtbench101.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
mtbench.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00
multiround.py
reorganize subject files ( #801 )
2024-01-16 18:03:11 +08:00
subjective_cmp.py
[Fix] Fix Slurm ENV ( #1392 )
2024-08-06 01:35:20 +08:00
utils.py
[Feature] Added CompassArena-SubjectiveBench with Bradley-Terry Model ( #1751 )
2024-12-16 13:41:28 +08:00
wildbench.py
[Feature] Add Judgerbench and reorg subeval ( #1593 )
2024-10-15 16:36:05 +08:00