mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
fix lawbench 2-1 f0.5 score calculation bug (#795)
* fix lawbench 2-1 f0.5 score calculation bug * use path in overall datasets folder --------- Co-authored-by: Leymore <zfz-960727@163.com>
This commit is contained in:
parent
1c8e193de8
commit
a6c49f15ce
@ -22,8 +22,7 @@ def read_cilin():
|
||||
Cilin 詞林 is a thesaurus with semantic information
|
||||
"""
|
||||
# TODO -- fix this path
|
||||
project_dir = os.path.dirname(os.path.dirname(__file__)) # ymliu@2023.5.30 fix the path
|
||||
lines = open(os.path.join(project_dir, "data", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n")
|
||||
lines = open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n")
|
||||
semantic_dict = {}
|
||||
semantic_classes = {}
|
||||
for line in lines:
|
||||
@ -40,8 +39,7 @@ def read_cilin():
|
||||
|
||||
def read_confusion():
|
||||
confusion_dict = {}
|
||||
project_dir = os.path.dirname(os.path.dirname(__file__)) # ymliu@2023.5.30 fix the path
|
||||
with open(os.path.join(project_dir, "data", "confusion_dict.txt"), "r", encoding="utf-8") as f:
|
||||
with open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "confusion_dict.txt"), "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
li = line.rstrip('\n').split(" ")
|
||||
confusion_dict[li[0]] = li[1:]
|
||||
|
@ -10,8 +10,7 @@ Correction = namedtuple(
|
||||
"inds",
|
||||
],
|
||||
)
|
||||
file_path = os.path.dirname(os.path.abspath(__file__))
|
||||
char_smi = CharFuncs(os.path.join(file_path.replace("modules", ""), 'data/char_meta.txt'))
|
||||
char_smi = CharFuncs(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "char_meta.txt"))
|
||||
|
||||
def check_spell_error(src_span: str,
|
||||
tgt_span: str,
|
||||
|
@ -57,8 +57,7 @@ class Tokenizer:
|
||||
"""
|
||||
if bpe:
|
||||
from . import tokenization
|
||||
project_dir = os.path.dirname(os.path.dirname(__file__))
|
||||
tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join(project_dir,"data","chinese_vocab.txt"), do_lower_case=False)
|
||||
tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "chinese_vocab.txt"), do_lower_case=False)
|
||||
results = []
|
||||
for input_string in input_strings:
|
||||
if not self.segmented: # 如果没有被分字,就按照每个字符隔开(不考虑英文标点的特殊处理,也不考虑BPE),否则遵循原分字结果
|
||||
|
Loading…
Reference in New Issue
Block a user