From 2b1afa7d1e21b626b9bacfc29668f51771334da3 Mon Sep 17 00:00:00 2001 From: x54-729 <45304952+x54-729@users.noreply.github.com> Date: Tue, 15 Oct 2024 16:03:57 +0800 Subject: [PATCH] [Fix] fix interntrain's tokenizer truncate (#1605) Co-authored-by: x54-729 --- opencompass/models/interntrain.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/opencompass/models/interntrain.py b/opencompass/models/interntrain.py index e846aae2..8b9f3e62 100644 --- a/opencompass/models/interntrain.py +++ b/opencompass/models/interntrain.py @@ -318,9 +318,14 @@ class InternTrain(BaseModel): # keep same with InternTrain's default value min_out_len = 1 - tokens = self.batch_encode(inputs, - self.max_seq_len - max_out_len, - left_padding=True) + if self.mode == 'none': + tokens = self.batch_encode(inputs, + self.max_seq_len, + left_padding=True) + else: + tokens = self.batch_encode(inputs, + self.max_seq_len - max_out_len, + left_padding=True) # random seed for pass@k seed = torch.tensor(time.time(), dtype=torch.int64).cuda()