mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
82 lines
2.3 KiB
Python
82 lines
2.3 KiB
Python
import re
|
||
|
||
from opencompass.registry import TEXT_POSTPROCESSORS
|
||
|
||
|
||
@TEXT_POSTPROCESSORS.register_module('general')
|
||
def general_postprocess(text: str) -> str:
|
||
# Cut off the first newline, period, or comma
|
||
truncated_text = re.split(r'[\n.,]', text, 1)[0]
|
||
|
||
# Remove punctuation
|
||
no_punctuation = re.sub(r'[^\w\s]', '', truncated_text)
|
||
|
||
# Remove article
|
||
no_articles = re.sub(r'\b(a|an|the)\b',
|
||
'',
|
||
no_punctuation,
|
||
flags=re.IGNORECASE)
|
||
|
||
# Remove duplicated blank spaces
|
||
cleaned_text = re.sub(r'\s+', ' ', no_articles).strip()
|
||
|
||
return cleaned_text
|
||
|
||
|
||
@TEXT_POSTPROCESSORS.register_module('general_cn')
|
||
def general_cn_postprocess(text: str) -> str:
|
||
truncated_text = re.split(r'[\n.,]', text, 1)[0]
|
||
|
||
no_punctuation = re.sub(r'[^\w\s]', '', truncated_text)
|
||
|
||
no_articles = re.sub(r'\b(a|an|the)\b',
|
||
'',
|
||
no_punctuation,
|
||
flags=re.IGNORECASE)
|
||
|
||
cleaned_text = re.sub(r'\s+', ' ', no_articles).strip()
|
||
import jieba
|
||
cleaned_text = ' '.join(jieba.cut(text))
|
||
return cleaned_text
|
||
|
||
|
||
@TEXT_POSTPROCESSORS.register_module('first-capital')
|
||
def first_capital_postprocess(text: str) -> str:
|
||
for t in text:
|
||
if t.isupper():
|
||
return t
|
||
return ''
|
||
|
||
|
||
def first_option_postprocess(text: str, options: str) -> str:
|
||
"""Find first valid option for text."""
|
||
|
||
patterns = [
|
||
f'[Tt]he answer is [{options}]',
|
||
f'[Tt]he correct answer is [{options}]',
|
||
f'答案是(.*?)[{options}]',
|
||
f'答案为(.*?)[{options}]',
|
||
f'固选(.*?)[{options}]',
|
||
f'答案应该是(.*?)[{options}]',
|
||
f'(\s|^)[{options}][\s。,,\.$]', # noqa
|
||
f'[{options}]',
|
||
]
|
||
|
||
regexes = [re.compile(pattern) for pattern in patterns]
|
||
for regex in regexes:
|
||
match = regex.search(text)
|
||
if match:
|
||
outputs = match.group(0)
|
||
for i in options:
|
||
if i in outputs:
|
||
return i
|
||
return ''
|
||
|
||
|
||
@TEXT_POSTPROCESSORS.register_module('first-capital-multi')
|
||
def first_capital_postprocess_multi(text: str) -> str:
|
||
match = re.search(r'([A-D]+)', text)
|
||
if match:
|
||
return match.group(1)
|
||
return ''
|