[Feature] enhance the ability of humaneval_postprocess (#676)

* [Feature] enhance the ability of humaneval_postprocess

* refactor

* [Feature] Keep the old version of the function and realize the new function in humaneval_postprocess_v2.

* Update opencompass/datasets/humaneval.py

---------

Co-authored-by: Leymore <zfz-960727@163.com>
Co-authored-by: Hubert <42952108+yingfhu@users.noreply.github.com>
This commit is contained in:
Jingming 2023-12-11 14:39:56 +08:00 committed by GitHub
parent 1029119e39
commit dd4318f6ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -104,6 +104,53 @@ def humaneval_postprocess(text: str) -> str:
return text
def humaneval_postprocess_v2(text: str) -> str:
"""This is an advanced version of previous postprocess to handle more
situations, better to use this one."""
text = text.lstrip('\n')
if '```' in text:
blocks = re.findall(r'```(.*?)```', text, re.DOTALL)
if len(blocks) == 0:
text = text.split('```')[1] # fall back to default strategy
else:
text = blocks[0] # fetch the first code block
if not text.startswith('\n'): # in case starting with ```python
text = text[max(text.find('\n') + 1, 0):]
if text.strip().startswith('from') or text.strip().startswith('import'):
def_idx = text.find('def')
if def_idx != -1:
text = text[max(text.find('\n', def_idx) + 1, 0):]
# remove empty lines
text = '\n'.join([line for line in text.split('\n') if line != ''])
text = text.lstrip('\n')
if text.strip().startswith('def'):
text = '\n'.join(text.split('\n')[1:])
if not text.startswith(' '):
if text.startswith(' '):
text = ' ' + text.lstrip()
else:
text = '\n'.join([' ' + line for line in text.split('\n')])
text = text.split('\n')
# If number of leading space reduces, we assume that the code block ends.
min_leading_space = None
end_index = None
for index, line in enumerate(text):
if line.strip() == '' or line.strip()[0] in ["'", '"', '#']:
continue
current_leading_space = len(line.rstrip()) - len(line.strip())
if min_leading_space is None:
min_leading_space = current_leading_space
elif current_leading_space < min_leading_space:
end_index = index
break
if end_index is not None:
text = '\n'.join(text[:end_index])
else:
text = '\n'.join(text)
return text
def humaneval_gpt_postprocess(text: str) -> str:
"""Better answer postprocessor for better instruction-aligned models like
GPT."""