mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix IFEval (#906)
* fix ifeval * fix ifeval * fix ifeval * fix ifeval
This commit is contained in:
parent
32ba0b074e
commit
45c606bcd0
@ -1,4 +1,4 @@
|
|||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .IFEval_gen_3321a3 import humaneval_datasets # noqa: F401, F403
|
from .IFEval_gen_3321a3 import ifeval_datasets # noqa: F401, F403
|
@ -26,7 +26,7 @@ ifeval_datasets = [
|
|||||||
dict(
|
dict(
|
||||||
abbr='IFEval',
|
abbr='IFEval',
|
||||||
type=IFEvalDataset,
|
type=IFEvalDataset,
|
||||||
path='./data_1/ifeval/input_data.jsonl',
|
path='data/ifeval/input_data.jsonl',
|
||||||
reader_cfg=ifeval_reader_cfg,
|
reader_cfg=ifeval_reader_cfg,
|
||||||
infer_cfg=ifeval_infer_cfg,
|
infer_cfg=ifeval_infer_cfg,
|
||||||
eval_cfg=ifeval_eval_cfg)
|
eval_cfg=ifeval_eval_cfg)
|
||||||
|
@ -20,7 +20,7 @@ from typing import Dict, Optional, Union
|
|||||||
|
|
||||||
from absl import flags
|
from absl import flags
|
||||||
|
|
||||||
import opencompass.datasets.ifeval.instructions_registry as instructions_registry
|
import opencompass.datasets.IFEval.instructions_registry as instructions_registry
|
||||||
|
|
||||||
_INPUT_DATA = flags.DEFINE_string('input_data',
|
_INPUT_DATA = flags.DEFINE_string('input_data',
|
||||||
None,
|
None,
|
||||||
@ -69,8 +69,6 @@ def test_instruction_following_strict(
|
|||||||
instruction_cls = instructions_registry.INSTRUCTION_DICT[
|
instruction_cls = instructions_registry.INSTRUCTION_DICT[
|
||||||
instruction_id]
|
instruction_id]
|
||||||
instruction = instruction_cls(instruction_id)
|
instruction = instruction_cls(instruction_id)
|
||||||
print(index)
|
|
||||||
print(inp)
|
|
||||||
instruction.build_description(**inp.kwargs[index])
|
instruction.build_description(**inp.kwargs[index])
|
||||||
args = instruction.get_instruction_args()
|
args = instruction.get_instruction_args()
|
||||||
if args and 'prompt' in args:
|
if args and 'prompt' in args:
|
||||||
|
@ -29,7 +29,6 @@ class IFEvaluator(BaseEvaluator):
|
|||||||
def score(self, predictions, references):
|
def score(self, predictions, references):
|
||||||
results = []
|
results = []
|
||||||
for pred, refer in zip(predictions, references):
|
for pred, refer in zip(predictions, references):
|
||||||
print(refer)
|
|
||||||
input = InputExample(
|
input = InputExample(
|
||||||
key=refer['key'],
|
key=refer['key'],
|
||||||
instruction_id_list=refer['instruction_id_list'],
|
instruction_id_list=refer['instruction_id_list'],
|
||||||
|
@ -22,10 +22,14 @@ import re
|
|||||||
import string
|
import string
|
||||||
from typing import Dict, Optional, Sequence, Union
|
from typing import Dict, Optional, Sequence, Union
|
||||||
|
|
||||||
import langdetect
|
try:
|
||||||
|
import langdetect
|
||||||
|
except ImportError:
|
||||||
|
langdetect = None
|
||||||
|
|
||||||
from absl import logging
|
from absl import logging
|
||||||
|
|
||||||
import opencompass.datasets.ifeval.instructions_util as instructions_util
|
import opencompass.datasets.IFEval.instructions_util as instructions_util
|
||||||
|
|
||||||
_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]]
|
_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]]
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Registry of all instructions."""
|
"""Registry of all instructions."""
|
||||||
import opencompass.datasets.ifeval.instructions as instructions
|
import opencompass.datasets.IFEval.instructions as instructions
|
||||||
|
|
||||||
_KEYWORD = 'keywords:'
|
_KEYWORD = 'keywords:'
|
||||||
|
|
||||||
|
File diff suppressed because one or more lines are too long
@ -47,7 +47,7 @@ from .huggingface import * # noqa: F401, F403
|
|||||||
from .humaneval import * # noqa: F401, F403
|
from .humaneval import * # noqa: F401, F403
|
||||||
from .humanevalx import * # noqa: F401, F403
|
from .humanevalx import * # noqa: F401, F403
|
||||||
from .hungarian_math import * # noqa: F401, F403
|
from .hungarian_math import * # noqa: F401, F403
|
||||||
from .IFEval import * # noqa: F401, F403
|
from .IFEval.ifeval import IFEvalDataset, IFEvaluator # noqa: F401, F403
|
||||||
from .infinitebench import * # noqa: F401, F403
|
from .infinitebench import * # noqa: F401, F403
|
||||||
from .iwslt2017 import * # noqa: F401, F403
|
from .iwslt2017 import * # noqa: F401, F403
|
||||||
from .jigsawmultilingual import * # noqa: F401, F403
|
from .jigsawmultilingual import * # noqa: F401, F403
|
||||||
|
@ -9,7 +9,9 @@ evaluate>=0.3.0
|
|||||||
fairscale
|
fairscale
|
||||||
func_timeout
|
func_timeout
|
||||||
fuzzywuzzy
|
fuzzywuzzy
|
||||||
|
immutabledict
|
||||||
jieba
|
jieba
|
||||||
|
langdetect
|
||||||
ltp
|
ltp
|
||||||
mmengine-lite
|
mmengine-lite
|
||||||
nltk==3.8
|
nltk==3.8
|
||||||
|
Loading…
Reference in New Issue
Block a user