mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Merge branch 'open-compass:main' into main
This commit is contained in:
commit
a176630aaa
@ -1 +1 @@
|
||||
__version__ = '0.3.3'
|
||||
__version__ = '0.3.4'
|
||||
|
@ -0,0 +1,33 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_internal_v2_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='# Complete the following python code:\n{prompt}',
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvalEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_internal_v2_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='opencompass/humaneval',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
]
|
@ -0,0 +1,33 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_internal_v1_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='Complete the following python code:\n{prompt}',
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvalEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_internal_v1_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='opencompass/humaneval',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
]
|
@ -184,3 +184,71 @@ def humaneval_postprocess_v2(text: str) -> str:
|
||||
if len(blocks) >= 1:
|
||||
text = blocks[0]
|
||||
return text
|
||||
|
||||
|
||||
def humaneval_internal_v2_postprocess(text: str):
|
||||
if text.startswith(' ') and not text.startswith(' '):
|
||||
text = ' ' + text
|
||||
prediction = text.split('\n\n\n')[0]
|
||||
prediction = prediction.split('\n```')[0]
|
||||
prediction_list = prediction.split('\n')
|
||||
return_list = []
|
||||
for line in prediction_list:
|
||||
if line and line[0] != ' ':
|
||||
break
|
||||
return_list.append(line)
|
||||
return '\n'.join(return_list)
|
||||
|
||||
def humaneval_internal_v1_postprocess(text: str) -> str:
|
||||
"""This is an advanced version of previous postprocess to handle more
|
||||
situations, better to use this one."""
|
||||
try:
|
||||
# for chatGLM related text
|
||||
eval_text = eval(text)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
if isinstance(eval_text, str):
|
||||
text = eval_text
|
||||
text = text.lstrip('\n')
|
||||
if '```' in text:
|
||||
blocks = re.findall(r'```(.*?)```', text, re.DOTALL)
|
||||
if len(blocks) == 0:
|
||||
text = text.split('```')[1] # fall back to default strategy
|
||||
else:
|
||||
text = blocks[0] # fetch the first code block
|
||||
if not text.startswith('\n'): # in case starting with ```python
|
||||
text = text[max(text.find('\n') + 1, 0) :]
|
||||
if text.strip().startswith('from') or text.strip().startswith('import'):
|
||||
def_idx = text.find('def')
|
||||
if def_idx != -1:
|
||||
text = text[max(text.find('\n', def_idx) + 1, 0) :]
|
||||
# remove empty lines
|
||||
text = '\n'.join([line for line in text.split('\n') if line != ''])
|
||||
text = text.lstrip('\n')
|
||||
if text.strip().startswith('def'):
|
||||
text = '\n'.join(text.split('\n')[1:])
|
||||
# deal with the indentation error
|
||||
if text.startswith(' '):
|
||||
text = ' ' + text.lstrip()
|
||||
else:
|
||||
text = '\n'.join([' ' + line for line in text.split('\n')])
|
||||
text = text.split('\n')
|
||||
|
||||
# If number of leading space reduces, we assume that the code block ends.
|
||||
min_leading_space = None
|
||||
end_index = None
|
||||
for index, line in enumerate(text):
|
||||
if line.strip() == '' or line.strip()[0] in ["'", '"', '#']:
|
||||
continue
|
||||
current_leading_space = len(line.rstrip()) - len(line.strip())
|
||||
if min_leading_space is None:
|
||||
min_leading_space = current_leading_space
|
||||
elif current_leading_space < min_leading_space:
|
||||
end_index = index
|
||||
break
|
||||
if end_index is not None:
|
||||
text = '\n'.join(text[:end_index])
|
||||
else:
|
||||
text = '\n'.join(text)
|
||||
return text
|
||||
|
@ -73,7 +73,11 @@ class TurboMindModelwithChatTemplate(BaseModel):
|
||||
else:
|
||||
assert isinstance(generation_config.eos_token_id, list)
|
||||
for token_id in generation_config.eos_token_id:
|
||||
potential_stop_words.append(self.tokenizer.decode(token_id))
|
||||
stop_word = self.tokenizer.decode(token_id)
|
||||
if stop_word.startswith(' '):
|
||||
self.logger.warning(f'stop_word "{stop_word}" contains blanks, which will be stripped')
|
||||
stop_word = stop_word.strip()
|
||||
potential_stop_words.append(stop_word)
|
||||
if self.tokenizer.eos_token is not None:
|
||||
potential_stop_words.append(self.tokenizer.eos_token)
|
||||
potential_stop_words = list(set(potential_stop_words))
|
||||
|
@ -42,14 +42,15 @@ class DLCRunner(BaseRunner):
|
||||
eval_with_gpu: list = ['plugin_eval'],
|
||||
retry: int = 2,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None):
|
||||
lark_bot_url: str = None,
|
||||
keep_tmp_file: bool = False):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
self.aliyun_cfg = aliyun_cfg
|
||||
self.max_num_workers = max_num_workers
|
||||
self.retry = retry
|
||||
|
||||
self.eval_with_gpu = eval_with_gpu
|
||||
|
||||
self.keep_tmp_file = keep_tmp_file
|
||||
logger = get_logger()
|
||||
logger.warning(
|
||||
'To ensure the integrity of the log results, the log displayed '
|
||||
@ -106,7 +107,10 @@ class DLCRunner(BaseRunner):
|
||||
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
param_file = f'tmp/{os.getpid()}_params.py'
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'tmp/{uuid_str}_params.py'
|
||||
pwd = os.getcwd()
|
||||
try:
|
||||
cfg.dump(param_file)
|
||||
@ -164,20 +168,27 @@ class DLCRunner(BaseRunner):
|
||||
# set priority to 1 as default
|
||||
task_priority = self.aliyun_cfg.get('priority', 1)
|
||||
|
||||
# Different dlc versions has different commands
|
||||
if self.aliyun_cfg.get('dlc_job_cmd') == 'create':
|
||||
dlc_job_cmd = 'create job --kind PyTorchJob'
|
||||
worker_cmd = ' --worker_count 1'
|
||||
else:
|
||||
dlc_job_cmd = 'submit pytorchjob'
|
||||
worker_cmd = ' --workers 1'
|
||||
tmpl = (
|
||||
'dlc submit pytorchjob'
|
||||
f'dlc {dlc_job_cmd}'
|
||||
f" --command '{shell_cmd}'"
|
||||
f' --name {task_name[:512]}'
|
||||
f" --config {self.aliyun_cfg['dlc_config_path']}"
|
||||
f" --workspace_id {self.aliyun_cfg['workspace_id']}"
|
||||
f" --resource_id {self.aliyun_cfg['resource_id']}"
|
||||
f" --resource_id={self.aliyun_cfg['resource_id']}"
|
||||
f' --priority {task_priority}'
|
||||
' --workers 1'
|
||||
f'{worker_cmd}'
|
||||
f' --worker_cpu {max(num_gpus * 8, 12)}'
|
||||
f' --worker_gpu {num_gpus}'
|
||||
f' --worker_memory {max(num_gpus * 128, 192)}Gi'
|
||||
f" --worker_image {self.aliyun_cfg['worker_image']}"
|
||||
f" --data_sources {','.join(self.aliyun_cfg['data_sources'])}")
|
||||
f" --data_sources={','.join(self.aliyun_cfg['data_sources'])}")
|
||||
get_cmd = partial(task.get_command,
|
||||
cfg_path=param_file,
|
||||
template=tmpl)
|
||||
@ -298,7 +309,10 @@ class DLCRunner(BaseRunner):
|
||||
return_code = _run_within_retry()
|
||||
finally:
|
||||
# Clean up
|
||||
os.remove(param_file)
|
||||
if not self.keep_tmp_file:
|
||||
os.remove(param_file)
|
||||
else:
|
||||
pass
|
||||
|
||||
return task_name, return_code
|
||||
|
||||
|
@ -56,10 +56,12 @@ class LocalRunner(BaseRunner):
|
||||
debug: bool = False,
|
||||
max_workers_per_gpu: int = 1,
|
||||
lark_bot_url: str = None,
|
||||
keep_tmp_file: bool = False,
|
||||
**kwargs):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.max_workers_per_gpu = max_workers_per_gpu
|
||||
self.keep_tmp_file = keep_tmp_file
|
||||
logger = get_logger()
|
||||
for k, v in kwargs.items():
|
||||
logger.warning(f'Ignored argument in {self.__module__}: {k}={v}')
|
||||
@ -100,7 +102,10 @@ class LocalRunner(BaseRunner):
|
||||
assert len(all_gpu_ids) >= num_gpus
|
||||
# get cmd
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
param_file = f'tmp/{os.getpid()}_params.py'
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
|
||||
param_file = f'tmp/{uuid_str}_params.py'
|
||||
try:
|
||||
task.cfg.dump(param_file)
|
||||
# if use torchrun, restrict it behaves the same as non
|
||||
@ -140,7 +145,10 @@ class LocalRunner(BaseRunner):
|
||||
stdout=log_file,
|
||||
stderr=subprocess.STDOUT)
|
||||
finally:
|
||||
os.remove(param_file)
|
||||
if not self.keep_tmp_file:
|
||||
os.remove(param_file)
|
||||
else:
|
||||
pass
|
||||
status.append((task_name, 0))
|
||||
else:
|
||||
if len(all_gpu_ids) > 0:
|
||||
|
@ -24,11 +24,11 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
using `srun` command.
|
||||
|
||||
This runner launches tasks one by one for execution. A new task will only
|
||||
be launched when and only when max_num_workers is not met, and the previous
|
||||
task has been successfully allocated to a machine. Therefore, unlike the
|
||||
`SlurmRunner`, at most only one task will be in the PENDING status at the
|
||||
same time during a run, making the random_sleep strategy no longer
|
||||
necessary. In addition, this runner also includes a feature to
|
||||
be launched when and only when max_num_workers is not met, and the
|
||||
previous task has been successfully allocated to a machine. Therefore,
|
||||
unlike the `SlurmRunner`, at most only one task will be in the PENDING
|
||||
status at the same time during a run, making the random_sleep strategy
|
||||
no longer necessary. In addition, this runner also includes a feature to
|
||||
automatically kill all jobs by the job_id on exit.
|
||||
|
||||
The runner will obtain the job_id by reading the srun output similar to
|
||||
@ -59,7 +59,8 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
qos: str = None,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None,
|
||||
extra_command: Optional[List[str]] = None):
|
||||
extra_command: Optional[List[str]] = None,
|
||||
keep_tmp_file: bool = False):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.retry = retry
|
||||
@ -67,6 +68,7 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
self.quotatype = quotatype
|
||||
self.qos = qos
|
||||
self.task_prefix = task_prefix
|
||||
self.keep_tmp_file = keep_tmp_file
|
||||
if not extra_command:
|
||||
extra_command = []
|
||||
assert isinstance(extra_command, list)
|
||||
@ -171,7 +173,10 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
param_file = f'tmp/{os.getpid()}_params.py'
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'tmp/{uuid_str}_params.py'
|
||||
process = None
|
||||
try:
|
||||
cfg.dump(param_file)
|
||||
@ -256,7 +261,11 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
child_conn.close()
|
||||
if process is not None:
|
||||
process.kill()
|
||||
os.remove(param_file)
|
||||
if not self.keep_tmp_file:
|
||||
os.remove(param_file)
|
||||
else:
|
||||
pass
|
||||
|
||||
return task_name, process.returncode
|
||||
|
||||
def _job_failed(self, return_code: int, output_paths: List[str]) -> bool:
|
||||
|
@ -47,7 +47,8 @@ class VOLCRunner(BaseRunner):
|
||||
max_num_workers: int = 32,
|
||||
retry: int = 2,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None):
|
||||
lark_bot_url: str = None,
|
||||
keep_tmp_file: bool = False):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
self.volcano_cfg = volcano_cfg
|
||||
self.max_num_workers = max_num_workers
|
||||
@ -55,6 +56,7 @@ class VOLCRunner(BaseRunner):
|
||||
self.queue_name = queue_name
|
||||
self.preemptible = preemptible
|
||||
self.priority = priority
|
||||
self.keep_tmp_file = keep_tmp_file
|
||||
|
||||
def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]:
|
||||
"""Launch multiple tasks.
|
||||
@ -100,9 +102,12 @@ class VOLCRunner(BaseRunner):
|
||||
pwd = os.getcwd()
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
param_file = f'{pwd}/tmp/{os.getpid()}_params.py'
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'{pwd}/tmp/{uuid_str}_params.py'
|
||||
|
||||
volc_cfg_file = f'{pwd}/tmp/{os.getpid()}_cfg.yaml'
|
||||
volc_cfg_file = f'{pwd}/tmp/{uuid_str}_cfg.yaml'
|
||||
volc_cfg = self._choose_flavor(num_gpus)
|
||||
with open(volc_cfg_file, 'w') as fp:
|
||||
yaml.dump(volc_cfg, fp, sort_keys=False)
|
||||
@ -191,8 +196,12 @@ class VOLCRunner(BaseRunner):
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
os.remove(param_file)
|
||||
os.remove(volc_cfg_file)
|
||||
if not self.keep_tmp_file:
|
||||
os.remove(param_file)
|
||||
os.remove(volc_cfg_file)
|
||||
else:
|
||||
pass
|
||||
|
||||
return task_name, returncode
|
||||
|
||||
def _run_task(self, cmd, log_path, poll_interval):
|
||||
|
@ -299,16 +299,34 @@ class DefaultSummarizer:
|
||||
raw_txts = '\n'.join(raw_txts)
|
||||
return raw_txts
|
||||
|
||||
@staticmethod
|
||||
def _format_md_table(table):
|
||||
table_head_str = '| ' + ' | '.join(table[0]) + ' |\n'
|
||||
table_mid_list = ['-----' for _ in range(len(table[0]))]
|
||||
table_mid_str = '|' + ' | '.join(table_mid_list) + '|\n'
|
||||
|
||||
md_table_str = table_head_str + table_mid_str
|
||||
for row in table[1:]:
|
||||
curr_str = '| ' + ' | '.join(row) + ' |\n'
|
||||
md_table_str += curr_str
|
||||
return md_table_str
|
||||
|
||||
def _output_to_file(self, output_path, time_str, table, raw_txts):
|
||||
# output to file
|
||||
if output_path is None:
|
||||
output_path = osp.join(self.work_dir, 'summary', f'summary_{time_str}.txt')
|
||||
output_csv_path = osp.join(self.work_dir, 'summary', f'summary_{time_str}.csv')
|
||||
output_md_path = osp.join(self.work_dir, 'summary', f'summary_{time_str}.md')
|
||||
else:
|
||||
output_csv_path = output_path.replace('.txt', '.csv')
|
||||
output_md_path = output_path.replace('.txt', '.md')
|
||||
|
||||
output_dir = osp.split(output_path)[0]
|
||||
mmengine.mkdir_or_exist(output_dir)
|
||||
|
||||
# process md table
|
||||
md_table = self._format_md_table(table)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
text = f'{time_str}\n' + \
|
||||
'tabulate format\n' + \
|
||||
@ -320,6 +338,10 @@ class DefaultSummarizer:
|
||||
'^' * 128 + '\n' + \
|
||||
'\n'.join([','.join(row) for row in table]) + '\n' + \
|
||||
'$' * 128 + '\n\n' + \
|
||||
'markdown format\n' + \
|
||||
'^' * 128 + '\n' + \
|
||||
md_table + '\n' + \
|
||||
'$' * 128 + '\n' + \
|
||||
'-' * 128 + ' THIS IS A DIVIDER ' + '-' * 128 + '\n\n' + \
|
||||
'raw format\n' + \
|
||||
'^' * 128 + '\n' + \
|
||||
@ -332,6 +354,11 @@ class DefaultSummarizer:
|
||||
f.write('\n'.join([','.join(row) for row in table]) + '\n')
|
||||
self.logger.info(f'write csv to {osp.abspath(output_csv_path)}')
|
||||
|
||||
with open(output_md_path, 'w', encoding='utf-8') as f:
|
||||
f.write(md_table)
|
||||
print(f'\n\nThe markdown format results is as below:\n\n{md_table}')
|
||||
self.logger.info(f'write markdown summary to {osp.abspath(output_md_path)}')
|
||||
|
||||
def summarize(
|
||||
self,
|
||||
output_path: str = None,
|
||||
|
Loading…
Reference in New Issue
Block a user