From f376bb159888210d77dc9621b8211883d27f27e7 Mon Sep 17 00:00:00 2001 From: yusun-nlp Date: Wed, 28 May 2025 13:39:22 +0000 Subject: [PATCH] rename 0-shot --- ....py => smolinstruct_0shot_instruct_gen.py} | 0 .../smolinstruct_pp_acc_0_shot_instruct.py | 8 +-- opencompass/datasets/smolinstruct.py | 50 +++++++++---------- 3 files changed, 27 insertions(+), 31 deletions(-) rename opencompass/configs/datasets/SmolInstruct/{smolinstruct_gen_0shot_instruct.py => smolinstruct_0shot_instruct_gen.py} (100%) diff --git a/opencompass/configs/datasets/SmolInstruct/smolinstruct_gen_0shot_instruct.py b/opencompass/configs/datasets/SmolInstruct/smolinstruct_0shot_instruct_gen.py similarity index 100% rename from opencompass/configs/datasets/SmolInstruct/smolinstruct_gen_0shot_instruct.py rename to opencompass/configs/datasets/SmolInstruct/smolinstruct_0shot_instruct_gen.py diff --git a/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_0_shot_instruct.py b/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_0_shot_instruct.py index 346de7e1..4747117a 100644 --- a/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_0_shot_instruct.py +++ b/opencompass/configs/datasets/SmolInstruct/smolinstruct_pp_acc_0_shot_instruct.py @@ -22,10 +22,10 @@ pp_acc_hint_dict = { } name_dict = { - 'BBBP': "property_prediction-bbbp", - 'ClinTox': "property_prediction-clintox", - 'HIV': "property_prediction-hiv", - 'SIDER': "property_prediction-sider", + 'BBBP': 'property_prediction-bbbp', + 'ClinTox': 'property_prediction-clintox', + 'HIV': 'property_prediction-hiv', + 'SIDER': 'property_prediction-sider', } pp_acc_datasets_0shot_instruct = [] diff --git a/opencompass/datasets/smolinstruct.py b/opencompass/datasets/smolinstruct.py index cb8bb994..6cd850cb 100644 --- a/opencompass/datasets/smolinstruct.py +++ b/opencompass/datasets/smolinstruct.py @@ -32,11 +32,10 @@ class SmolInstructDataset(BaseDataset): def extract_chemical_data(text): other_patterns = [ - "reactants and reagents are:\n```\n", - "reactants and reagents:\n```\n", - "Reactants and Reagents:**\n```\n", - "Reactants and Reagents SMILES:**\n```\n", - "Reactants and Reagents:** \n`" + 'reactants and reagents are:\n```\n', 'reactants and reagents:\n```\n', + 'Reactants and Reagents:**\n```\n', + 'Reactants and Reagents SMILES:**\n```\n', + 'Reactants and Reagents:** \n`' ] pattern = re.compile(r'<(MOLFORMULA|SMILES|IUPAC)>(.*?)', re.DOTALL) @@ -44,7 +43,7 @@ def extract_chemical_data(text): if not matches: for other_pattern in other_patterns: if other_pattern in text: - text = text.split(other_pattern)[-1].split("\n")[0] + text = text.split(other_pattern)[-1].split('\n')[0] break return [text] return [match[1].strip() for match in matches] @@ -220,7 +219,7 @@ class NCElementMatchEvaluator(BaseEvaluator): if len(predictions) != len(references): return { 'error': 'predictions and references have different ' - 'length' + 'length' } # topk的prediction,要拆开 @@ -271,7 +270,7 @@ class NCExactMatchEvaluator(BaseEvaluator): if len(predictions) != len(references): return { 'error': 'predictions and references have different ' - 'length' + 'length' } predictions = [ extract_chemical_data(prediction) for prediction in predictions @@ -304,8 +303,7 @@ class NCExactMatchEvaluator(BaseEvaluator): def extract_number(text): pattern = re.compile( - r'(?:\s*|\\boxed\{)\s*(-?\d*\.?\d+)\s*(?:|\})' - ) + r'(?:\s*|\\boxed\{)\s*(-?\d*\.?\d+)\s*(?:|\})') matches = pattern.findall(text) return [float(match) for match in matches] @@ -321,7 +319,7 @@ class RMSEEvaluator(BaseEvaluator): if len(predictions) != len(references): return { 'error': 'predictions and references have different ' - 'length' + 'length' } avg_score = 0 @@ -338,7 +336,7 @@ class RMSEEvaluator(BaseEvaluator): except: raise ValueError(f'ans: {reference}') detail = {'pred': pred, 'answer': ans} - rmse_score = np.sqrt(np.mean((np.array(pred) - np.array(ans)) ** 2)) + rmse_score = np.sqrt(np.mean((np.array(pred) - np.array(ans))**2)) detail['score'] = rmse_score avg_score += rmse_score details.append(detail) @@ -359,7 +357,7 @@ class FTSEvaluator(BaseEvaluator): if len(predictions) != len(references): return { 'error': 'predictions and references have different ' - 'length' + 'length' } predictions = [ @@ -419,7 +417,7 @@ class MeteorEvaluator(BaseEvaluator): if len(predictions) != len(references): return { 'error': 'predictions and references have different ' - 'length' + 'length' } avg_score = 0 details = [] @@ -452,24 +450,22 @@ def smolinstruct_acc_postprocess(text: str) -> str: @TEXT_POSTPROCESSORS.register_module('smolinstruct-acc-0shot') def smolinstruct_acc_0shot_postprocess(text: str) -> str: # Remove tags if they exist - if "" in text: - text = text.split("")[-1].strip() + if '' in text: + text = text.split('')[-1].strip() # Check for exact "yes" or "no" responses if text.strip().lower() == 'yes': - return " Yes " + return ' Yes ' elif text.strip().lower() == 'no': - return " No " + return ' No ' # Define regex patterns to match various formats of "yes" or "no" patterns = [ r'\\boxed\{\s*(yes|no)\s*\}', r'[Th]he\s+answer\s+is\s*[\.:\'"“‘’\-]*\s*(yes|no)[\s\.,!?:;\'"”’\-]*', - r'[Aa]nswer:\s*(yes|no)\b', - r'\*\*[Aa]nswer:\*\*\s*(yes|no)\b', + r'[Aa]nswer:\s*(yes|no)\b', r'\*\*[Aa]nswer:\*\*\s*(yes|no)\b', r'\*\*[Aa]nswer\*\*:\s*(yes|no)\b', - r'\s*(yes|no)\s*', - r'^\s*(yes|no)[\.\?!]?\s*$' + r'\s*(yes|no)\s*', r'^\s*(yes|no)[\.\?!]?\s*$' ] for pattern in patterns: text = text.strip() @@ -477,16 +473,16 @@ def smolinstruct_acc_0shot_postprocess(text: str) -> str: if match: answer = match.group(1) # modified if answer.lower() == 'yes': - return " Yes " + return ' Yes ' elif answer.lower() == 'no': - return " No " + return ' No ' # If no patterns matched, check for simple "yes" or "no" text = text.strip().lower() if text.startswith('yes') or text.endswith('yes'): - return " Yes " + return ' Yes ' elif text.startswith('no') or text.endswith('no'): - return " No " + return ' No ' # If no patterns matched, return an empty string - return "" + return ''