diff --git a/opencompass/datasets/lawbench/utils/modules/alignment.py b/opencompass/datasets/lawbench/utils/modules/alignment.py index cee5124c..d11feb74 100644 --- a/opencompass/datasets/lawbench/utils/modules/alignment.py +++ b/opencompass/datasets/lawbench/utils/modules/alignment.py @@ -22,8 +22,7 @@ def read_cilin(): Cilin 詞林 is a thesaurus with semantic information """ # TODO -- fix this path - project_dir = os.path.dirname(os.path.dirname(__file__)) # ymliu@2023.5.30 fix the path - lines = open(os.path.join(project_dir, "data", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n") + lines = open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "cilin.txt"), "r", encoding="gbk").read().strip().split("\n") semantic_dict = {} semantic_classes = {} for line in lines: @@ -40,8 +39,7 @@ def read_cilin(): def read_confusion(): confusion_dict = {} - project_dir = os.path.dirname(os.path.dirname(__file__)) # ymliu@2023.5.30 fix the path - with open(os.path.join(project_dir, "data", "confusion_dict.txt"), "r", encoding="utf-8") as f: + with open(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "confusion_dict.txt"), "r", encoding="utf-8") as f: for line in f: li = line.rstrip('\n').split(" ") confusion_dict[li[0]] = li[1:] diff --git a/opencompass/datasets/lawbench/utils/modules/classifier.py b/opencompass/datasets/lawbench/utils/modules/classifier.py index 66c225d4..a8e9b921 100644 --- a/opencompass/datasets/lawbench/utils/modules/classifier.py +++ b/opencompass/datasets/lawbench/utils/modules/classifier.py @@ -10,8 +10,7 @@ Correction = namedtuple( "inds", ], ) -file_path = os.path.dirname(os.path.abspath(__file__)) -char_smi = CharFuncs(os.path.join(file_path.replace("modules", ""), 'data/char_meta.txt')) +char_smi = CharFuncs(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "char_meta.txt")) def check_spell_error(src_span: str, tgt_span: str, diff --git a/opencompass/datasets/lawbench/utils/modules/tokenizer.py b/opencompass/datasets/lawbench/utils/modules/tokenizer.py index aa64cb97..bb7111af 100644 --- a/opencompass/datasets/lawbench/utils/modules/tokenizer.py +++ b/opencompass/datasets/lawbench/utils/modules/tokenizer.py @@ -57,8 +57,7 @@ class Tokenizer: """ if bpe: from . import tokenization - project_dir = os.path.dirname(os.path.dirname(__file__)) - tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join(project_dir,"data","chinese_vocab.txt"), do_lower_case=False) + tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..", "data", "lawbench", "eval_assets", "chinese_vocab.txt"), do_lower_case=False) results = [] for input_string in input_strings: if not self.segmented: # 如果没有被分字,就按照每个字符隔开(不考虑英文标点的特殊处理,也不考虑BPE),否则遵循原分字结果