import re from opencompass.registry import TEXT_POSTPROCESSORS @TEXT_POSTPROCESSORS.register_module('general') def general_postprocess(text: str) -> str: # Cut off the first newline, period, or comma truncated_text = re.split(r'[\n.,]', text, 1)[0] # Remove punctuation no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) # Remove article no_articles = re.sub(r'\b(a|an|the)\b', '', no_punctuation, flags=re.IGNORECASE) # Remove duplicated blank spaces cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() return cleaned_text @TEXT_POSTPROCESSORS.register_module('general_cn') def general_cn_postprocess(text: str) -> str: truncated_text = re.split(r'[\n.,]', text, 1)[0] no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) no_articles = re.sub(r'\b(a|an|the)\b', '', no_punctuation, flags=re.IGNORECASE) cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() import jieba cleaned_text = ' '.join(jieba.cut(text)) return cleaned_text @TEXT_POSTPROCESSORS.register_module('first-capital') def first_capital_postprocess(text: str) -> str: for t in text: if t.isupper(): return t return '' def first_option_postprocess(text: str, options: str) -> str: """Find first valid option for text.""" patterns = [ f'[Tt]he answer is [{options}]', f'[Tt]he correct answer is [{options}]', f'答案是(.*?)[{options}]', f'答案为(.*?)[{options}]', f'固选(.*?)[{options}]', f'答案应该是(.*?)[{options}]', f'(\s|^)[{options}][\s。,,\.$]', # noqa f'[{options}]', ] regexes = [re.compile(pattern) for pattern in patterns] for regex in regexes: match = regex.search(text) if match: outputs = match.group(0) for i in options: if i in outputs: return i return '' @TEXT_POSTPROCESSORS.register_module('first-capital-multi') def first_capital_postprocess_multi(text: str) -> str: match = re.search(r'([A-D]+)', text) if match: return match.group(1) return '' def last_option_postprocess(text: str, options: str) -> str: match = re.findall(rf'([{options}])', text) if match: return match[-1] return ''