mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix when missing both pad and eos token (#287)
* fix when missing both pad and eos token * update pad_token_id impl
This commit is contained in:
parent
166022f568
commit
e810974068
@ -40,6 +40,8 @@ class HuggingFace(BaseModel):
|
||||
prediction tokens before decoding. Defaults to False.
|
||||
batch_padding (bool): If False, inference with be performed in for-loop
|
||||
without batch padding.
|
||||
pad_token_id (int): The id of the padding token. Defaults to None. Use
|
||||
(#vocab + pad_token_id) if get negative value.
|
||||
|
||||
Note:
|
||||
About ``extract_pred_after_decode``: Commonly, we should extract the
|
||||
@ -59,7 +61,8 @@ class HuggingFace(BaseModel):
|
||||
model_kwargs: dict = dict(device_map='auto'),
|
||||
meta_template: Optional[Dict] = None,
|
||||
extract_pred_after_decode: bool = False,
|
||||
batch_padding: bool = False):
|
||||
batch_padding: bool = False,
|
||||
pad_token_id: Optional[int] = None):
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
tokenizer_only=tokenizer_only,
|
||||
@ -69,6 +72,7 @@ class HuggingFace(BaseModel):
|
||||
hf_cache_dir = os.getenv('HF_MODEL_HUB', None)
|
||||
patch_hf_auto_model(hf_cache_dir)
|
||||
self.logger = get_logger()
|
||||
self.pad_token_id = pad_token_id
|
||||
self._load_tokenizer(path=path,
|
||||
tokenizer_path=tokenizer_path,
|
||||
tokenizer_kwargs=tokenizer_kwargs)
|
||||
@ -84,10 +88,31 @@ class HuggingFace(BaseModel):
|
||||
from transformers import AutoTokenizer
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||
tokenizer_path if tokenizer_path else path, **tokenizer_kwargs)
|
||||
if self.tokenizer.pad_token_id is None:
|
||||
self.logger.warning('pad_token_id is not set for the tokenizer. '
|
||||
'Using eos_token_id as pad_token_id.')
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# A patch for some models without pad_token_id
|
||||
if self.pad_token_id is not None:
|
||||
if self.pad_token_id < 0:
|
||||
self.pad_token_id += self.tokenizer.vocab_size
|
||||
if self.tokenizer.pad_token_id is None:
|
||||
self.logger.warning(
|
||||
f'Using {self.pad_token_id} as pad_token_id')
|
||||
elif self.tokenizer.pad_token_id != self.pad_token_id:
|
||||
self.logger.warning(
|
||||
f'pad_token_id is not consistent with the tokenizer. Using {self.pad_token_id} as pad_token_id' # noqa
|
||||
)
|
||||
self.tokenizer.pad_token_id = self.pad_token_id
|
||||
elif self.tokenizer.pad_token_id is None:
|
||||
self.logger.warning('pad_token_id is not set for the tokenizer.')
|
||||
if self.tokenizer.eos_token is not None:
|
||||
self.logger.warning('Using eos_token_id as pad_token_id.')
|
||||
self.logger.warning(
|
||||
f'{self.tokenizer.eos_token} la {self.tokenizer.eos_token is None}' # noqa
|
||||
)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
else:
|
||||
raise ValueError(
|
||||
'pad_token_id is not set for this tokenizer. Try to set pad_token_id via passing `pad_token_id={PAD_TOKEN_ID}` in model_cfg. You may find pad_token_id in `generation.json`' # noqa
|
||||
)
|
||||
|
||||
# A patch for llama when batch_padding = True
|
||||
if 'decapoda-research/llama' in path or \
|
||||
@ -298,7 +323,7 @@ class HuggingFace(BaseModel):
|
||||
"""
|
||||
|
||||
outputs, inputs = self.get_logits(inputs)
|
||||
shift_logits = outputs[..., :-1, :].contiguous()
|
||||
shift_logits = outputs[..., :-1, :].contiguous().float()
|
||||
|
||||
shift_labels = inputs['tokens']['input_ids'][..., 1:].contiguous()
|
||||
|
||||
|
@ -104,7 +104,7 @@ class InternLM(BaseModel):
|
||||
"""
|
||||
outputs, inputs = self.generator.get_logits(input_texts)
|
||||
|
||||
shift_logits = outputs[..., :-1, :].contiguous()
|
||||
shift_logits = outputs[..., :-1, :].contiguous().float()
|
||||
shift_labels = inputs['tokens'][..., 1:].contiguous()
|
||||
|
||||
loss_fct = torch.nn.CrossEntropyLoss(
|
||||
|
@ -84,7 +84,7 @@ class Llama2(BaseModel):
|
||||
# forward
|
||||
outputs = self.model.forward(tokens, 0)
|
||||
# compute ppl
|
||||
shift_logits = outputs[..., :-1, :].contiguous()
|
||||
shift_logits = outputs[..., :-1, :].contiguous().float()
|
||||
shift_labels = tokens[..., 1:].contiguous()
|
||||
shift_logits = shift_logits.view(-1, shift_logits.size(-1))
|
||||
shift_labels = shift_labels.view(-1)
|
||||
|
@ -215,7 +215,7 @@ class CLPInferencer(BaseInferencer):
|
||||
else:
|
||||
outputs, _ = self.model.get_logits(input_texts)
|
||||
|
||||
shift_logits = outputs[..., :-1, :].contiguous()
|
||||
shift_logits = outputs[..., :-1, :].contiguous().float()
|
||||
|
||||
shift_logits = F.log_softmax(shift_logits, dim=-1)
|
||||
log_probs = []
|
||||
|
@ -90,6 +90,7 @@ def get_config_from_arg(args) -> Config:
|
||||
max_out_len=args.max_out_len,
|
||||
batch_padding=not args.no_batch_padding,
|
||||
batch_size=args.batch_size,
|
||||
pad_token_id=args.pad_token_id,
|
||||
run_cfg=dict(num_gpus=args.num_gpus))
|
||||
models.append(model)
|
||||
return Config(dict(models=models, datasets=datasets),
|
||||
|
Loading…
Reference in New Issue
Block a user