import re from opencompass.registry import TEXT_POSTPROCESSORS @TEXT_POSTPROCESSORS.register_module('general') def general_postprocess(text: str) -> str: # Cut off the first newline, period, or comma truncated_text = re.split(r'[\n.,]', text, 1)[0] # Remove punctuation no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) # Remove article no_articles = re.sub(r'\b(a|an|the)\b', '', no_punctuation, flags=re.IGNORECASE) # Remove duplicated blank spaces cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() return cleaned_text @TEXT_POSTPROCESSORS.register_module('general_cn') def general_cn_postprocess(text: str) -> str: truncated_text = re.split(r'[\n.,]', text, 1)[0] no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) no_articles = re.sub(r'\b(a|an|the)\b', '', no_punctuation, flags=re.IGNORECASE) cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() import jieba cleaned_text = ' '.join(jieba.cut(text)) return cleaned_text @TEXT_POSTPROCESSORS.register_module('first-capital') def first_capital_postprocess(text: str) -> str: for t in text: if t.isupper(): return t return '' @TEXT_POSTPROCESSORS.register_module('first-capital-multi') def first_capital_postprocess_multi(text: str) -> str: match = re.search(r'([A-D]+)', text) if match: return match.group(1) return ''