[Update] strip stop_words (#1635)

This commit is contained in:
Lyu Han 2024-10-24 20:39:20 +08:00 committed by GitHub
parent 662dddf41a
commit fb12c3f98a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -73,7 +73,11 @@ class TurboMindModelwithChatTemplate(BaseModel):
else:
assert isinstance(generation_config.eos_token_id, list)
for token_id in generation_config.eos_token_id:
potential_stop_words.append(self.tokenizer.decode(token_id))
stop_word = self.tokenizer.decode(token_id)
if stop_word.startswith(' '):
self.logger.warning(f'stop_word "{stop_word}" contains blanks, which will be stripped')
stop_word = stop_word.strip()
potential_stop_words.append(stop_word)
if self.tokenizer.eos_token is not None:
potential_stop_words.append(self.tokenizer.eos_token)
potential_stop_words = list(set(potential_stop_words))