diff --git a/opencompass/models/huggingface.py b/opencompass/models/huggingface.py index af6cee90..19268023 100644 --- a/opencompass/models/huggingface.py +++ b/opencompass/models/huggingface.py @@ -40,6 +40,8 @@ class HuggingFace(BaseModel): prediction tokens before decoding. Defaults to False. batch_padding (bool): If False, inference with be performed in for-loop without batch padding. + pad_token_id (int): The id of the padding token. Defaults to None. Use + (#vocab + pad_token_id) if get negative value. Note: About ``extract_pred_after_decode``: Commonly, we should extract the @@ -59,7 +61,8 @@ class HuggingFace(BaseModel): model_kwargs: dict = dict(device_map='auto'), meta_template: Optional[Dict] = None, extract_pred_after_decode: bool = False, - batch_padding: bool = False): + batch_padding: bool = False, + pad_token_id: Optional[int] = None): super().__init__(path=path, max_seq_len=max_seq_len, tokenizer_only=tokenizer_only, @@ -69,6 +72,7 @@ class HuggingFace(BaseModel): hf_cache_dir = os.getenv('HF_MODEL_HUB', None) patch_hf_auto_model(hf_cache_dir) self.logger = get_logger() + self.pad_token_id = pad_token_id self._load_tokenizer(path=path, tokenizer_path=tokenizer_path, tokenizer_kwargs=tokenizer_kwargs) @@ -84,10 +88,31 @@ class HuggingFace(BaseModel): from transformers import AutoTokenizer self.tokenizer = AutoTokenizer.from_pretrained( tokenizer_path if tokenizer_path else path, **tokenizer_kwargs) - if self.tokenizer.pad_token_id is None: - self.logger.warning('pad_token_id is not set for the tokenizer. ' - 'Using eos_token_id as pad_token_id.') - self.tokenizer.pad_token = self.tokenizer.eos_token + + # A patch for some models without pad_token_id + if self.pad_token_id is not None: + if self.pad_token_id < 0: + self.pad_token_id += self.tokenizer.vocab_size + if self.tokenizer.pad_token_id is None: + self.logger.warning( + f'Using {self.pad_token_id} as pad_token_id') + elif self.tokenizer.pad_token_id != self.pad_token_id: + self.logger.warning( + f'pad_token_id is not consistent with the tokenizer. Using {self.pad_token_id} as pad_token_id' # noqa + ) + self.tokenizer.pad_token_id = self.pad_token_id + elif self.tokenizer.pad_token_id is None: + self.logger.warning('pad_token_id is not set for the tokenizer.') + if self.tokenizer.eos_token is not None: + self.logger.warning('Using eos_token_id as pad_token_id.') + self.logger.warning( + f'{self.tokenizer.eos_token} la {self.tokenizer.eos_token is None}' # noqa + ) + self.tokenizer.pad_token = self.tokenizer.eos_token + else: + raise ValueError( + 'pad_token_id is not set for this tokenizer. Try to set pad_token_id via passing `pad_token_id={PAD_TOKEN_ID}` in model_cfg. You may find pad_token_id in `generation.json`' # noqa + ) # A patch for llama when batch_padding = True if 'decapoda-research/llama' in path or \ @@ -298,7 +323,7 @@ class HuggingFace(BaseModel): """ outputs, inputs = self.get_logits(inputs) - shift_logits = outputs[..., :-1, :].contiguous() + shift_logits = outputs[..., :-1, :].contiguous().float() shift_labels = inputs['tokens']['input_ids'][..., 1:].contiguous() diff --git a/opencompass/models/intern_model.py b/opencompass/models/intern_model.py index 824db008..d6330016 100644 --- a/opencompass/models/intern_model.py +++ b/opencompass/models/intern_model.py @@ -104,7 +104,7 @@ class InternLM(BaseModel): """ outputs, inputs = self.generator.get_logits(input_texts) - shift_logits = outputs[..., :-1, :].contiguous() + shift_logits = outputs[..., :-1, :].contiguous().float() shift_labels = inputs['tokens'][..., 1:].contiguous() loss_fct = torch.nn.CrossEntropyLoss( diff --git a/opencompass/models/llama2.py b/opencompass/models/llama2.py index a92e4dc0..b452e684 100644 --- a/opencompass/models/llama2.py +++ b/opencompass/models/llama2.py @@ -84,7 +84,7 @@ class Llama2(BaseModel): # forward outputs = self.model.forward(tokens, 0) # compute ppl - shift_logits = outputs[..., :-1, :].contiguous() + shift_logits = outputs[..., :-1, :].contiguous().float() shift_labels = tokens[..., 1:].contiguous() shift_logits = shift_logits.view(-1, shift_logits.size(-1)) shift_labels = shift_labels.view(-1) diff --git a/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py b/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py index b369738b..c264fb56 100644 --- a/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py +++ b/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py @@ -215,7 +215,7 @@ class CLPInferencer(BaseInferencer): else: outputs, _ = self.model.get_logits(input_texts) - shift_logits = outputs[..., :-1, :].contiguous() + shift_logits = outputs[..., :-1, :].contiguous().float() shift_logits = F.log_softmax(shift_logits, dim=-1) log_probs = [] diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py index 03b29692..c1a3f76e 100644 --- a/opencompass/utils/run.py +++ b/opencompass/utils/run.py @@ -90,6 +90,7 @@ def get_config_from_arg(args) -> Config: max_out_len=args.max_out_len, batch_padding=not args.no_batch_padding, batch_size=args.batch_size, + pad_token_id=args.pad_token_id, run_cfg=dict(num_gpus=args.num_gpus)) models.append(model) return Config(dict(models=models, datasets=datasets), diff --git a/run.py b/run.py index dad78f5b..565a6c9e 100644 --- a/run.py +++ b/run.py @@ -177,6 +177,7 @@ def parse_hf_args(hf_parser): default=False) hf_parser.add_argument('--batch-size', type=int) hf_parser.add_argument('--num-gpus', type=int) + hf_parser.add_argument('--pad-token-id', type=int) def main():