diff --git a/opencompass/datasets/lawbench/utils/modules/alignment.py b/opencompass/datasets/lawbench/utils/modules/alignment.py index d11feb74..5330b2fa 100644 --- a/opencompass/datasets/lawbench/utils/modules/alignment.py +++ b/opencompass/datasets/lawbench/utils/modules/alignment.py @@ -8,6 +8,7 @@ REAL_PATH = os.path.split(os.path.realpath(__file__))[0] chinese_punct = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘'‛“”„‟…‧﹏" english_punct = punctuation punct = chinese_punct + english_punct +cache_dir = os.environ.get('COMPASS_DATA_CACHE', '') def check_all_chinese(word): """ @@ -22,7 +23,7 @@ def read_cilin(): Cilin 詞林 is a thesaurus with semantic information """ # TODO -- fix this path - lines = open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n") + lines = open(os.path.join(cache_dir, "data", "lawbench", "eval_assets", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n") semantic_dict = {} semantic_classes = {} for line in lines: @@ -39,7 +40,7 @@ def read_cilin(): def read_confusion(): confusion_dict = {} - with open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "confusion_dict.txt"), "r", encoding="utf-8") as f: + with open(os.path.join(cache_dir, "data", "lawbench", "eval_assets", "confusion_dict.txt"), "r", encoding="utf-8") as f: for line in f: li = line.rstrip('\n').split(" ") confusion_dict[li[0]] = li[1:] diff --git a/opencompass/datasets/lawbench/utils/modules/classifier.py b/opencompass/datasets/lawbench/utils/modules/classifier.py index a8e9b921..b8ee407b 100644 --- a/opencompass/datasets/lawbench/utils/modules/classifier.py +++ b/opencompass/datasets/lawbench/utils/modules/classifier.py @@ -10,7 +10,8 @@ Correction = namedtuple( "inds", ], ) -char_smi = CharFuncs(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "char_meta.txt")) +cache_dir = os.environ.get('COMPASS_DATA_CACHE', '') +char_smi = CharFuncs(os.path.join(cache_dir, "data", "lawbench", "eval_assets", "char_meta.txt")) def check_spell_error(src_span: str, tgt_span: str,