mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Update cdme config and evaluator (#812)
* update cdme config and evaluator * fix cdme prompt * move CDME trim post-processor as a separate evaluator --------- Co-authored-by: 郭琦鹏 <guoqipeng@pjlab.org.cn>
This commit is contained in:
parent
f09a2ff418
commit
e975a96fa1
@ -46,13 +46,21 @@ cdme_eval_cfg = dict(
|
||||
dataset_postprocessor=dict(type=cdme_dataset_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
context_lengths = list(range(1000, 201000, 1000))
|
||||
cdme_trim_eval_cfg = dict(
|
||||
evaluator=dict(type=CDMEEvaluator, use_trim=True),
|
||||
pred_postprocessor=dict(type=cdme_postprocess),
|
||||
dataset_postprocessor=dict(type=cdme_dataset_postprocess),
|
||||
pred_role='BOT')
|
||||
|
||||
#context_lengths = list(range(1000, 201000, 1000))
|
||||
context_lengths = [16000, 32000, 48000, 64000, 80000, 96000, 112000, 128000, 144000, 160000, 176000, 192000, 200000]
|
||||
document_depth_percent_intervals = 20
|
||||
document_depth_percent_interval_type = "linear"
|
||||
|
||||
base_path = './data/CDME'
|
||||
file_list = ['zh_finance.jsonl']
|
||||
cdme_datasets = []
|
||||
cdme_trim_datasets = []
|
||||
|
||||
for original_context_length in context_lengths:
|
||||
for depth_percent in generate_depth_percents(
|
||||
@ -73,9 +81,31 @@ for original_context_length in context_lengths:
|
||||
'language': 'Chinese',
|
||||
'needle': '\n小明最喜欢的实习的地点就是上海人工智能实验室。\n',
|
||||
'retrieval_question': '小明最喜欢的实习地点是哪里?请按照'
|
||||
'“小明最喜欢的实习地点就是________。”的格式回答。',
|
||||
'“小明最喜欢的实习地点就是________。”的格式回答。\n',
|
||||
'reader_cfg': cdme_reader_cfg,
|
||||
'infer_cfg': cdme_infer_cfg,
|
||||
'eval_cfg': cdme_eval_cfg
|
||||
}
|
||||
cdme_datasets.append(dataset_dict)
|
||||
|
||||
trim_dataset_dict = {
|
||||
'abbr': f'CDME_Length{original_context_length}'
|
||||
f'Depth{int(depth_percent)}',
|
||||
'type': CDMEDataset,
|
||||
'path': base_path,
|
||||
'length': original_context_length,
|
||||
'depth': int(depth_percent),
|
||||
'tokenizer_model': 'gpt-4',
|
||||
'file_list': file_list,
|
||||
'num_repeats_per_file': 10,
|
||||
'length_buffer': 200,
|
||||
'guide': True,
|
||||
'language': 'Chinese',
|
||||
'needle': '\n小明最喜欢的实习的地点就是上海人工智能实验室。\n',
|
||||
'retrieval_question': '小明最喜欢的实习地点是哪里?请按照'
|
||||
'“小明最喜欢的实习地点就是________。”的格式回答。\n',
|
||||
'reader_cfg': cdme_reader_cfg,
|
||||
'infer_cfg': cdme_infer_cfg,
|
||||
'eval_cfg': cdme_trim_eval_cfg
|
||||
}
|
||||
cdme_trim_datasets.append(trim_dataset_dict)
|
||||
|
@ -129,6 +129,32 @@ class CDMEDataset(BaseDataset):
|
||||
|
||||
class CDMEEvaluator(BaseEvaluator):
|
||||
|
||||
def __init__(self, use_trim=False):
|
||||
self.use_trim = use_trim
|
||||
|
||||
@staticmethod
|
||||
def _trim_prediction(prediction, reference):
|
||||
"""Trims the prediction string based on the length of the reference
|
||||
string.
|
||||
|
||||
Args:
|
||||
prediction (str): The prediction string.
|
||||
reference (str): The reference string.
|
||||
|
||||
Returns:
|
||||
str: The trimmed prediction string.
|
||||
"""
|
||||
l08 = int(0.8 * len(reference))
|
||||
l12 = int(1.2 * len(reference))
|
||||
trimmed_prediction = prediction[:l12]
|
||||
|
||||
if len(trimmed_prediction) > l08 and \
|
||||
reference[-1] in trimmed_prediction[l08:]:
|
||||
end_pos = l08 + trimmed_prediction[l08:].index(reference[-1]) + 1
|
||||
trimmed_prediction = trimmed_prediction[:end_pos]
|
||||
|
||||
return trimmed_prediction
|
||||
|
||||
def levenshtein_distance(self, s1, s2):
|
||||
if len(s1) < len(s2):
|
||||
return self.levenshtein_distance(s2, s1)
|
||||
@ -159,6 +185,11 @@ class CDMEEvaluator(BaseEvaluator):
|
||||
for prediction, reference in zip(predictions, references):
|
||||
prediction = re.sub(r'\s+', '', prediction)
|
||||
reference = re.sub(r'\s+', '', reference)
|
||||
|
||||
if self.use_trim:
|
||||
prediction = CDMEEvaluator._trim_prediction(
|
||||
prediction, reference)
|
||||
|
||||
edit_distance = self.levenshtein_distance(prediction, reference)
|
||||
max_len = max(len(prediction), len(reference))
|
||||
score = 100 * (1 -
|
||||
|
Loading…
Reference in New Issue
Block a user