[Model] Yhzhang/add mlugowl llamaadapter (#405)

* refine gitignore

* [Feature]: Add minigpt-4

* [Feature]: Add mm local runner

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* [Feature]: Add minigpt-4

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* lint

* update

* lint

* lint

* add __init__.py

* update

* update

* update

* update

* [Feature]: Add minigpt-4

* [Feature]: Add mm local runner

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* [Feature]: Add minigpt-4

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* lint

* update

* lint

* lint

* add __init__.py

* update

* update

* update

* update

* optimize mmbench dataset args

* update

* update

* run commit hook

---------

Co-authored-by: liuyuan <3463423099@qq.com>
Co-authored-by: kennymckormick <dhd@pku.edu.cn>
Co-authored-by: kennymckormick <dhd.efz@gmail.com>
This commit is contained in:
Yuanhan Zhang 2023-09-19 14:21:26 +08:00 committed by GitHub
parent 267401bded
commit 7c2726c23b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 102 additions and 36 deletions

View File

@ -28,18 +28,21 @@ llama_adapter_mmbench_dataloader = dict(batch_size=1,
sampler=dict(type='DefaultSampler', shuffle=False))
# model settings
llama_adapter_model = dict(
llama_adapter_mmbench_model = dict(
type='LLaMA-adapter-v2',
llama_dir= # noqa
'/llama_adapter_v2_multimodal',
prompt_constructor=dict(type=LlamaAadapterMMBenchPromptConstructor),
post_processor=dict(type=LlamaAadapterMMBenchPostProcessor))
post_processor=dict(type=LlamaAadapterMMBenchPostProcessor)
)
# evaluation settings
llama_adapter_evaluator = [
llama_adapter_mmbench_evaluator = [
dict(
type='opencompass.DumpResults',
save_path='work_dirs/llama-adapter-v2-multimodal-mmagibench-v0.1.0.xlsx'
)
]
llama_adapter_mmbench_load_from = None # noqa

View File

@ -35,8 +35,8 @@ mplug_owl_mmbench_dataloader = dict(
# model settings
mplug_owl_mmbench_model = dict(
type='mplug_owl-7b',
model_path='/mplug-owl-llama-7b-ft',
type='mplug_owl_7b',
model_path='/mplug-owl-llama-7b-ft/',
prompt_constructor=dict(type=MplugOwlMMBenchPromptConstructor),
post_processor=dict(type=MplugOwlMMBenchPostProcessor)
) # noqa
@ -46,3 +46,5 @@ mplug_owl_mmbench_evaluator = [
dict(type='opencompass.DumpResults',
save_path='work_dirs/mplug_owl-7b-mmagibench-v0.1.0.xlsx')
]
mplug_owl_mmbench_load_from = None

View File

@ -41,12 +41,11 @@ class MMBenchDataset(Dataset):
return len(self.df)
def __getitem__(self, idx: int) -> dict:
# Mandatory Fields Begin
index = self.df.iloc[idx]['index']
image = self.df.iloc[idx]['image']
image = decode_base64_to_image(image)
question = self.df.iloc[idx]['question']
catetory = self.df.iloc[idx]['category']
l2_catetory = self.df.iloc[idx]['l2-category']
option_candidate = ['A', 'B', 'C', 'D', 'E']
options = {
@ -57,13 +56,19 @@ class MMBenchDataset(Dataset):
options_prompt = f'{self.sys_prompt}\n'
for key, item in options.items():
options_prompt += f'{key}. {item}\n'
# Mandatory Fields End
# Optional Fields Begin
hint = self.load_from_df(idx, 'hint')
category = self.load_from_df(idx, 'category')
l2_catetory = self.load_from_df(idx, 'l2-category')
# Optional Fields End
data = {
'img': image,
'question': question,
'options': options_prompt,
'category': catetory,
'category': category,
'l2-category': l2_catetory,
'options_dict': options,
'index': index,

View File

@ -1,20 +1,37 @@
import json
import os
import os.path as osp
import sys
from pathlib import Path
import clip
import mmengine
import torch
import torch.nn as nn
from llama_adapter_v2_multimodal7b.llama.llama import ModelArgs, Transformer
from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
from mmengine.device import get_device
from timm.models.vision_transformer import Block
from opencompass.registry import MM_MODELS
def load_package():
"""Load required packages from llama_adapter_v2_multimodal7b."""
current_file_path = os.path.abspath(__file__)
current_folder_path = os.path.dirname(current_file_path)
sys.path.append(os.path.join(current_folder_path, 'LLaMA-Adapter')) # noqa
from llama_adapter_v2_multimodal7b.llama.llama import (ModelArgs,
Transformer)
from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
sys.path.pop(-1)
return ModelArgs, Transformer, Tokenizer, sample_top_p
ModelArgs, Transformer, Tokenizer, sample_top_p = load_package()
class LLaMA_adapter(nn.Module):
def __init__(self,
@ -182,7 +199,6 @@ class LLaMA_adapter(nn.Module):
data_sample = data_samples[0]
prompts = [prompts]
imgs = image
# import pdb;pdb.set_trace()
@ -261,12 +277,14 @@ class LLaMA_adapter_v2(nn.Module):
llama_dir,
prompt_constructor: dict,
post_processor: dict,
model_path: str = 'llama_adapter_v2_multimodal7b',
name: str = 'LORA-BIAS-7B',
mode: str = 'generation',
device='cuda' if torch.cuda.is_available() else 'cpu',
download_root='ckpts'):
super().__init__()
name = 'BIAS-7B'
assert name in ['LORA-BIAS-7B', 'BIAS-7B', 'CAPTION-7B']
# BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B
llama_type = name.split('.')[0].split('-')[-1]
llama_ckpt_dir = os.path.join(llama_dir, llama_type)
@ -274,9 +292,22 @@ class LLaMA_adapter_v2(nn.Module):
# load llama_adapter weights and model_cfg
print(f'Loading LLaMA-Adapter from {llama_dir}')
ckpt = torch.load(
f'{llama_dir}/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth', # noqa: E501
map_location='cpu')
current_file_path = os.path.abspath(__file__)
current_folder_path = os.path.dirname(current_file_path)
model_path = osp.join(current_folder_path, 'LLaMA-Adapter', model_path)
ckpt_root = osp.join(model_path, download_root)
ckpt_map = {
'LORA-BIAS-7B':
'1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth', # noqa: E501
'BIAS-7B':
'7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth', # noqa: E501
'CAPTION-7B':
'5088aeb63a89746b90bcfd5cb819e1c7411b2771b267c6d131ce73e250a8abf0_CAPTION-7B.pth' # noqa: E501
}
ckpt = torch.load(osp.join(ckpt_root, ckpt_map[name]),
map_location='cpu')
model_cfg = ckpt.get('config', {})
self.model = LLaMA_adapter(

View File

@ -7,7 +7,7 @@ class LlamaAadapterMMBenchPostProcessor:
def __init__(self) -> None:
pass
def __call__(self, output_token: torch.tensor, tokenizer) -> str:
def __call__(self, output_token: torch.tensor) -> str:
if len(output_token) >= 2:
if output_token[1] == '.':

View File

@ -49,8 +49,10 @@ class LlamaAadapterMMBenchPromptConstructor:
data_sample.get('context') for data_sample in data_samples
]
else:
context = ''
prompts = context + ' ' + question + ' ' + options # noqa
context = [''] * len(data_samples)
prompts = []
for cur_context, cur_question, cur_options in zip(
context, question, options):
prompts.append(cur_context + ' ' + cur_question + ' ' +
cur_options) # noqa
return prompts

View File

@ -1,4 +1,4 @@
from .mplug_owl import MplugOwl
from .mplug_owl_7b import MplugOwl
from .post_processor import MplugOwlMMBenchPostProcessor
from .prompt_constructor import MplugOwlMMBenchPromptConstructor # noqa

View File

@ -1,27 +1,45 @@
import os
import sys
import mmengine
import torch
import torch.nn as nn
from mmengine.device import get_device
# Load via Huggingface Style
from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
MplugOwlProcessor)
from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
from opencompass.registry import MM_MODELS
@MM_MODELS.register_module('mplug_owl')
def load_package():
"""Load required packages from llama_adapter_v2_multimodal7b."""
current_file_path = os.path.abspath(__file__)
current_folder_path = os.path.dirname(current_file_path)
sys.path.append(os.path.join(current_folder_path, 'mPLUG-Owl')) # noqa
from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
MplugOwlProcessor)
from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
sys.path.pop(-1)
return MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer # noqa
MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer = load_package( # noqa
) # noqa
@MM_MODELS.register_module('mplug_owl_7b')
class MplugOwl(nn.Module):
def __init__(self,
prompt_constructor: dict,
post_processor: dict,
model_path='MAGAer13/mplug-owl-llama-7b',
mode: str = 'generation') -> None:
mode: str = 'generation'):
super().__init__()
pretrained_ckpt = model_path
# import pdb;pdb.set_trace()
print(pretrained_ckpt)
self.model = MplugOwlForConditionalGeneration.from_pretrained(
pretrained_ckpt,
torch_dtype=torch.bfloat16,
@ -57,7 +75,7 @@ class MplugOwl(nn.Module):
inputs = {'image': images, 'data_samples': data_samples}
inputs = self.prompt_constructor(inputs)
image = inputs['image']
prompt = inputs['prompt']
prompt = inputs['prompt'][0]
data_samples = inputs['data_samples']
data_sample = data_samples[0]

View File

@ -9,7 +9,7 @@ class MplugOwlMMBenchPostProcessor:
def __init__(self) -> None:
pass
def __call__(self, output_token: torch.tensor, tokenizer) -> str:
def __call__(self, output_token: torch.tensor) -> str:
pattern = re.compile(r'([A-Z]\.)')
res = pattern.findall(output_token)
if len(res) > 0:

View File

@ -48,8 +48,11 @@ class MplugOwlMMBenchPromptConstructor:
data_sample.get('context') for data_sample in data_samples
]
else:
context = ''
prompts = context + ' ' + question + ' ' + options # noqa
context = [''] * len(data_samples)
prompts = []
for cur_context, cur_question, cur_options in zip(
context, question, options):
prompts.append(cur_context + ' ' + cur_question + ' ' +
cur_options) # noqa
return prompts

View File

@ -122,6 +122,7 @@ class MultimodalInferTask:
dataloader = Runner.build_dataloader(self.dataloader)
# build model
model = build_model(self.cfg)
model.eval()
# build evaluator
evaluator = Evaluator(self.evaluator)

View File

@ -95,7 +95,7 @@ def get_config_from_arg(args) -> Config:
pad_token_id=args.pad_token_id,
run_cfg=dict(num_gpus=args.num_gpus))
models.append(model)
summarizer = None
if args.summarizer:
s = match_cfg_file('configs/summarizers/', [args.summarizer])[0]
@ -103,7 +103,8 @@ def get_config_from_arg(args) -> Config:
cfg = Config.fromfile(s[1])
summarizer = cfg['summarizer']
return Config(dict(models=models, datasets=datasets, summarizer=summarizer),
return Config(dict(models=models, datasets=datasets,
summarizer=summarizer),
format_python_code=False)