.. |
__init__.py
|
[Feature] Add Bradley-Terry Subjective Evaluation method to Arena Hard dataset (#1802)
|
2025-01-03 16:33:43 +08:00 |
alignbench.py
|
[Feature] Add Judgerbench and reorg subeval (#1593)
|
2024-10-15 16:36:05 +08:00 |
alpacaeval.py
|
[Feature] Added Bradley-Terry subjective evaluation
|
2024-12-31 11:01:23 +08:00 |
arena_hard.py
|
[Feature] Add Bradley-Terry Subjective Evaluation method to Arena Hard dataset (#1802)
|
2025-01-03 16:33:43 +08:00 |
compass_arena_subjective_bench.py
|
[Feature] Added CompassArena-SubjectiveBench with Bradley-Terry Model (#1751)
|
2024-12-16 13:41:28 +08:00 |
compass_arena.py
|
[Feature] Added Bradley-Terry subjective evaluation
|
2024-12-31 11:01:23 +08:00 |
compassbench_checklist.py
|
add new dataset summerizer (#1758)
|
2024-12-13 09:50:43 +08:00 |
compassbench_control_length_bias.py
|
[Refactor] Reorganize subjective eval (#1284)
|
2024-07-05 22:11:37 +08:00 |
compassbench.py
|
[Refactor] Reorganize subjective eval (#1284)
|
2024-07-05 22:11:37 +08:00 |
corev2.py
|
reorganize subject files (#801)
|
2024-01-16 18:03:11 +08:00 |
creationbench.py
|
reorganize subject files (#801)
|
2024-01-16 18:03:11 +08:00 |
flames.py
|
[Fix] fix Flames (#1599)
|
2024-10-12 14:34:59 +08:00 |
fofo.py
|
[Feature] Add Judgerbench and reorg subeval (#1593)
|
2024-10-15 16:36:05 +08:00 |
followbench.py
|
[Feature] Add Judgerbench and reorg subeval (#1593)
|
2024-10-15 16:36:05 +08:00 |
hellobench.py
|
Upload HelloBench (#1607)
|
2024-10-15 17:11:37 +08:00 |
judgerbench.py
|
[Feature] Add Judgerbench and reorg subeval (#1593)
|
2024-10-15 16:36:05 +08:00 |
mtbench101.py
|
[Feature] Add Judgerbench and reorg subeval (#1593)
|
2024-10-15 16:36:05 +08:00 |
mtbench.py
|
[Feature] Add Judgerbench and reorg subeval (#1593)
|
2024-10-15 16:36:05 +08:00 |
multiround.py
|
reorganize subject files (#801)
|
2024-01-16 18:03:11 +08:00 |
subjective_cmp.py
|
[Fix] Fix Slurm ENV (#1392)
|
2024-08-06 01:35:20 +08:00 |
utils.py
|
[Feature] Added CompassArena-SubjectiveBench with Bradley-Terry Model (#1751)
|
2024-12-16 13:41:28 +08:00 |
wildbench.py
|
[Feature] Added Bradley-Terry subjective evaluation
|
2024-12-31 11:01:23 +08:00 |