[Fix] Update lawbench data path (#2037)

This commit is contained in:
谢昕辰 2025-05-07 16:18:43 +08:00 committed by GitHub
parent d62b69aaef
commit 43b2c4ed76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 3 deletions

View File

@ -8,6 +8,7 @@ REAL_PATH = os.path.split(os.path.realpath(__file__))[0]
chinese_punct = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘'‛“”„‟…‧﹏" chinese_punct = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘'‛“”„‟…‧﹏"
english_punct = punctuation english_punct = punctuation
punct = chinese_punct + english_punct punct = chinese_punct + english_punct
cache_dir = os.environ.get('COMPASS_DATA_CACHE', '')
def check_all_chinese(word): def check_all_chinese(word):
""" """
@ -22,7 +23,7 @@ def read_cilin():
Cilin 詞林 is a thesaurus with semantic information Cilin 詞林 is a thesaurus with semantic information
""" """
# TODO -- fix this path # TODO -- fix this path
lines = open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n") lines = open(os.path.join(cache_dir, "data", "lawbench", "eval_assets", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n")
semantic_dict = {} semantic_dict = {}
semantic_classes = {} semantic_classes = {}
for line in lines: for line in lines:
@ -39,7 +40,7 @@ def read_cilin():
def read_confusion(): def read_confusion():
confusion_dict = {} confusion_dict = {}
with open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "confusion_dict.txt"), "r", encoding="utf-8") as f: with open(os.path.join(cache_dir, "data", "lawbench", "eval_assets", "confusion_dict.txt"), "r", encoding="utf-8") as f:
for line in f: for line in f:
li = line.rstrip('\n').split(" ") li = line.rstrip('\n').split(" ")
confusion_dict[li[0]] = li[1:] confusion_dict[li[0]] = li[1:]

View File

@ -10,7 +10,8 @@ Correction = namedtuple(
"inds", "inds",
], ],
) )
char_smi = CharFuncs(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "char_meta.txt")) cache_dir = os.environ.get('COMPASS_DATA_CACHE', '')
char_smi = CharFuncs(os.path.join(cache_dir, "data", "lawbench", "eval_assets", "char_meta.txt"))
def check_spell_error(src_span: str, def check_spell_error(src_span: str,
tgt_span: str, tgt_span: str,