[Fix] Fix IFEval (#906)

* fix ifeval

* fix ifeval

* fix ifeval

* fix ifeval
This commit is contained in:
bittersweet1999 2024-02-22 16:51:34 +08:00 committed by GitHub
parent 32ba0b074e
commit 45c606bcd0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 17 additions and 11 deletions

View File

@ -1,4 +1,4 @@
from mmengine.config import read_base from mmengine.config import read_base
with read_base(): with read_base():
from .IFEval_gen_3321a3 import humaneval_datasets # noqa: F401, F403 from .IFEval_gen_3321a3 import ifeval_datasets # noqa: F401, F403

View File

@ -26,7 +26,7 @@ ifeval_datasets = [
dict( dict(
abbr='IFEval', abbr='IFEval',
type=IFEvalDataset, type=IFEvalDataset,
path='./data_1/ifeval/input_data.jsonl', path='data/ifeval/input_data.jsonl',
reader_cfg=ifeval_reader_cfg, reader_cfg=ifeval_reader_cfg,
infer_cfg=ifeval_infer_cfg, infer_cfg=ifeval_infer_cfg,
eval_cfg=ifeval_eval_cfg) eval_cfg=ifeval_eval_cfg)

View File

@ -20,7 +20,7 @@ from typing import Dict, Optional, Union
from absl import flags from absl import flags
import opencompass.datasets.ifeval.instructions_registry as instructions_registry import opencompass.datasets.IFEval.instructions_registry as instructions_registry
_INPUT_DATA = flags.DEFINE_string('input_data', _INPUT_DATA = flags.DEFINE_string('input_data',
None, None,
@ -69,8 +69,6 @@ def test_instruction_following_strict(
instruction_cls = instructions_registry.INSTRUCTION_DICT[ instruction_cls = instructions_registry.INSTRUCTION_DICT[
instruction_id] instruction_id]
instruction = instruction_cls(instruction_id) instruction = instruction_cls(instruction_id)
print(index)
print(inp)
instruction.build_description(**inp.kwargs[index]) instruction.build_description(**inp.kwargs[index])
args = instruction.get_instruction_args() args = instruction.get_instruction_args()
if args and 'prompt' in args: if args and 'prompt' in args:

View File

@ -29,7 +29,6 @@ class IFEvaluator(BaseEvaluator):
def score(self, predictions, references): def score(self, predictions, references):
results = [] results = []
for pred, refer in zip(predictions, references): for pred, refer in zip(predictions, references):
print(refer)
input = InputExample( input = InputExample(
key=refer['key'], key=refer['key'],
instruction_id_list=refer['instruction_id_list'], instruction_id_list=refer['instruction_id_list'],

View File

@ -22,10 +22,14 @@ import re
import string import string
from typing import Dict, Optional, Sequence, Union from typing import Dict, Optional, Sequence, Union
import langdetect try:
import langdetect
except ImportError:
langdetect = None
from absl import logging from absl import logging
import opencompass.datasets.ifeval.instructions_util as instructions_util import opencompass.datasets.IFEval.instructions_util as instructions_util
_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]] _InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]]

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Registry of all instructions.""" """Registry of all instructions."""
import opencompass.datasets.ifeval.instructions as instructions import opencompass.datasets.IFEval.instructions as instructions
_KEYWORD = 'keywords:' _KEYWORD = 'keywords:'

File diff suppressed because one or more lines are too long

View File

@ -47,7 +47,7 @@ from .huggingface import * # noqa: F401, F403
from .humaneval import * # noqa: F401, F403 from .humaneval import * # noqa: F401, F403
from .humanevalx import * # noqa: F401, F403 from .humanevalx import * # noqa: F401, F403
from .hungarian_math import * # noqa: F401, F403 from .hungarian_math import * # noqa: F401, F403
from .IFEval import * # noqa: F401, F403 from .IFEval.ifeval import IFEvalDataset, IFEvaluator # noqa: F401, F403
from .infinitebench import * # noqa: F401, F403 from .infinitebench import * # noqa: F401, F403
from .iwslt2017 import * # noqa: F401, F403 from .iwslt2017 import * # noqa: F401, F403
from .jigsawmultilingual import * # noqa: F401, F403 from .jigsawmultilingual import * # noqa: F401, F403

View File

@ -9,7 +9,9 @@ evaluate>=0.3.0
fairscale fairscale
func_timeout func_timeout
fuzzywuzzy fuzzywuzzy
immutabledict
jieba jieba
langdetect
ltp ltp
mmengine-lite mmengine-lite
nltk==3.8 nltk==3.8