mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] enhance the ability of humaneval_postprocess (#676)
* [Feature] enhance the ability of humaneval_postprocess * refactor * [Feature] Keep the old version of the function and realize the new function in humaneval_postprocess_v2. * Update opencompass/datasets/humaneval.py --------- Co-authored-by: Leymore <zfz-960727@163.com> Co-authored-by: Hubert <42952108+yingfhu@users.noreply.github.com>
This commit is contained in:
parent
1029119e39
commit
dd4318f6ab
@ -104,6 +104,53 @@ def humaneval_postprocess(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def humaneval_postprocess_v2(text: str) -> str:
|
||||
"""This is an advanced version of previous postprocess to handle more
|
||||
situations, better to use this one."""
|
||||
text = text.lstrip('\n')
|
||||
if '```' in text:
|
||||
blocks = re.findall(r'```(.*?)```', text, re.DOTALL)
|
||||
if len(blocks) == 0:
|
||||
text = text.split('```')[1] # fall back to default strategy
|
||||
else:
|
||||
text = blocks[0] # fetch the first code block
|
||||
if not text.startswith('\n'): # in case starting with ```python
|
||||
text = text[max(text.find('\n') + 1, 0):]
|
||||
if text.strip().startswith('from') or text.strip().startswith('import'):
|
||||
def_idx = text.find('def')
|
||||
if def_idx != -1:
|
||||
text = text[max(text.find('\n', def_idx) + 1, 0):]
|
||||
# remove empty lines
|
||||
text = '\n'.join([line for line in text.split('\n') if line != ''])
|
||||
text = text.lstrip('\n')
|
||||
if text.strip().startswith('def'):
|
||||
text = '\n'.join(text.split('\n')[1:])
|
||||
if not text.startswith(' '):
|
||||
if text.startswith(' '):
|
||||
text = ' ' + text.lstrip()
|
||||
else:
|
||||
text = '\n'.join([' ' + line for line in text.split('\n')])
|
||||
text = text.split('\n')
|
||||
|
||||
# If number of leading space reduces, we assume that the code block ends.
|
||||
min_leading_space = None
|
||||
end_index = None
|
||||
for index, line in enumerate(text):
|
||||
if line.strip() == '' or line.strip()[0] in ["'", '"', '#']:
|
||||
continue
|
||||
current_leading_space = len(line.rstrip()) - len(line.strip())
|
||||
if min_leading_space is None:
|
||||
min_leading_space = current_leading_space
|
||||
elif current_leading_space < min_leading_space:
|
||||
end_index = index
|
||||
break
|
||||
if end_index is not None:
|
||||
text = '\n'.join(text[:end_index])
|
||||
else:
|
||||
text = '\n'.join(text)
|
||||
return text
|
||||
|
||||
|
||||
def humaneval_gpt_postprocess(text: str) -> str:
|
||||
"""Better answer postprocessor for better instruction-aligned models like
|
||||
GPT."""
|
||||
|
Loading…
Reference in New Issue
Block a user