mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix IFEval (#906)
* fix ifeval * fix ifeval * fix ifeval * fix ifeval
This commit is contained in:
parent
32ba0b074e
commit
45c606bcd0
@ -1,4 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .IFEval_gen_3321a3 import humaneval_datasets # noqa: F401, F403
|
||||
from .IFEval_gen_3321a3 import ifeval_datasets # noqa: F401, F403
|
@ -26,7 +26,7 @@ ifeval_datasets = [
|
||||
dict(
|
||||
abbr='IFEval',
|
||||
type=IFEvalDataset,
|
||||
path='./data_1/ifeval/input_data.jsonl',
|
||||
path='data/ifeval/input_data.jsonl',
|
||||
reader_cfg=ifeval_reader_cfg,
|
||||
infer_cfg=ifeval_infer_cfg,
|
||||
eval_cfg=ifeval_eval_cfg)
|
||||
|
@ -20,7 +20,7 @@ from typing import Dict, Optional, Union
|
||||
|
||||
from absl import flags
|
||||
|
||||
import opencompass.datasets.ifeval.instructions_registry as instructions_registry
|
||||
import opencompass.datasets.IFEval.instructions_registry as instructions_registry
|
||||
|
||||
_INPUT_DATA = flags.DEFINE_string('input_data',
|
||||
None,
|
||||
@ -69,8 +69,6 @@ def test_instruction_following_strict(
|
||||
instruction_cls = instructions_registry.INSTRUCTION_DICT[
|
||||
instruction_id]
|
||||
instruction = instruction_cls(instruction_id)
|
||||
print(index)
|
||||
print(inp)
|
||||
instruction.build_description(**inp.kwargs[index])
|
||||
args = instruction.get_instruction_args()
|
||||
if args and 'prompt' in args:
|
||||
|
@ -29,7 +29,6 @@ class IFEvaluator(BaseEvaluator):
|
||||
def score(self, predictions, references):
|
||||
results = []
|
||||
for pred, refer in zip(predictions, references):
|
||||
print(refer)
|
||||
input = InputExample(
|
||||
key=refer['key'],
|
||||
instruction_id_list=refer['instruction_id_list'],
|
||||
|
@ -22,10 +22,14 @@ import re
|
||||
import string
|
||||
from typing import Dict, Optional, Sequence, Union
|
||||
|
||||
import langdetect
|
||||
try:
|
||||
import langdetect
|
||||
except ImportError:
|
||||
langdetect = None
|
||||
|
||||
from absl import logging
|
||||
|
||||
import opencompass.datasets.ifeval.instructions_util as instructions_util
|
||||
import opencompass.datasets.IFEval.instructions_util as instructions_util
|
||||
|
||||
_InstructionArgsDtype = Optional[Dict[str, Union[int, str, Sequence[str]]]]
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Registry of all instructions."""
|
||||
import opencompass.datasets.ifeval.instructions as instructions
|
||||
import opencompass.datasets.IFEval.instructions as instructions
|
||||
|
||||
_KEYWORD = 'keywords:'
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
@ -47,7 +47,7 @@ from .huggingface import * # noqa: F401, F403
|
||||
from .humaneval import * # noqa: F401, F403
|
||||
from .humanevalx import * # noqa: F401, F403
|
||||
from .hungarian_math import * # noqa: F401, F403
|
||||
from .IFEval import * # noqa: F401, F403
|
||||
from .IFEval.ifeval import IFEvalDataset, IFEvaluator # noqa: F401, F403
|
||||
from .infinitebench import * # noqa: F401, F403
|
||||
from .iwslt2017 import * # noqa: F401, F403
|
||||
from .jigsawmultilingual import * # noqa: F401, F403
|
||||
|
@ -9,7 +9,9 @@ evaluate>=0.3.0
|
||||
fairscale
|
||||
func_timeout
|
||||
fuzzywuzzy
|
||||
immutabledict
|
||||
jieba
|
||||
langdetect
|
||||
ltp
|
||||
mmengine-lite
|
||||
nltk==3.8
|
||||
|
Loading…
Reference in New Issue
Block a user