mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix acc of IFEval (#849)
* [Feature] Add IFEval * [Fix] Changing the Score Rule.
This commit is contained in:
parent
35aace776a
commit
2801883351
@ -36,10 +36,10 @@ Hark! Hearken to the tale of thy journey to the land of the rising sun, Japan. \
|
|||||||
## Evaluation results
|
## Evaluation results
|
||||||
|
|
||||||
```
|
```
|
||||||
dataset version metric mode internlm2-chat-20b-hf
|
dataset version metric mode qwen-72b-chat-hf mistral-7b-instruct-v0.2-hf mixtral-8x7b-instruct-v0.1 chatglm3-6b-hf
|
||||||
--------- --------- ---------- ------ -----------------------
|
--------- --------- ---------- ------ ------------------ ----------------------------- ---------------------------- ----------------
|
||||||
IFEval 3321a3 strict_acc gen 0.33
|
IFEval 27a9cc strict_acc gen 43.62 49.17 48.98 29.76
|
||||||
IFEval 3321a3 loose_acc gen 0.4
|
IFEval 27a9cc loose_acc gen 45.47 53.97 54.71 32.16
|
||||||
```
|
```
|
||||||
|
|
||||||
## Reference
|
## Reference
|
||||||
|
@ -50,4 +50,4 @@ class IFEvaluator(BaseEvaluator):
|
|||||||
loose = sum(
|
loose = sum(
|
||||||
[result['loose'].follow_all_instructions
|
[result['loose'].follow_all_instructions
|
||||||
for result in results]) / len(results)
|
for result in results]) / len(results)
|
||||||
return dict(strict_acc=strict, loose_acc=loose)
|
return dict(strict_acc=strict * 100, loose_acc=loose * 100)
|
||||||
|
Loading…
Reference in New Issue
Block a user