From 82a533a6904539b2a22afc82636024255e7d31e0 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:15:19 +0800 Subject: [PATCH] add rwkv-5-3b model (#666) * support rwkv5-3b learnboard * update rwkv-5-3b config * update config * refine * fix bug * update config * refine * reduce batch size * refine * reduce batch size to avoid oom in special datasets * Update huggingface.py * Update huggingface.py --- configs/eval_rwkv5_3b.py | 6 ++++++ configs/models/rwkv/rwkv5_3b.py | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 configs/eval_rwkv5_3b.py create mode 100644 configs/models/rwkv/rwkv5_3b.py diff --git a/configs/eval_rwkv5_3b.py b/configs/eval_rwkv5_3b.py new file mode 100644 index 00000000..c711d34d --- /dev/null +++ b/configs/eval_rwkv5_3b.py @@ -0,0 +1,6 @@ +from mmengine.config import read_base + +with read_base(): + from .models.rwkv.rwkv5_3b import models + from .datasets.collections.base_medium_llama import datasets + from .summarizers.leaderboard import summarizer diff --git a/configs/models/rwkv/rwkv5_3b.py b/configs/models/rwkv/rwkv5_3b.py new file mode 100644 index 00000000..30ad03f1 --- /dev/null +++ b/configs/models/rwkv/rwkv5_3b.py @@ -0,0 +1,25 @@ +from opencompass.models import HuggingFaceCausalLM + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='rwkv-5-3b', + path="RWKV/rwkv-5-world-3b", + tokenizer_path='RWKV/rwkv-5-world-3b', + model_kwargs=dict( + device_map='auto', + trust_remote_code=True, + ), + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + use_fast=False, + ), + max_out_len=100, + max_seq_len=2048, + batch_padding=True, + batch_size=16, + run_cfg=dict(num_gpus=1, num_procs=1), + ) +]