[Model] Yhzhang/add mlugowl llamaadapter (#405)

* refine gitignore

* [Feature]: Add minigpt-4

* [Feature]: Add mm local runner

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* [Feature]: Add minigpt-4

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* lint

* update

* lint

* lint

* add __init__.py

* update

* update

* update

* update

* [Feature]: Add minigpt-4

* [Feature]: Add mm local runner

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* [Feature]: Add minigpt-4

* [Feature]: Add instructblip

* add otter and llama-adapter

* add owl

* add llama2-adapter and owl

* lint

* lint

* update

* lint

* lint

* add __init__.py

* update

* update

* update

* update

* optimize mmbench dataset args

* update

* update

* run commit hook

---------

Co-authored-by: liuyuan <3463423099@qq.com>
Co-authored-by: kennymckormick <dhd@pku.edu.cn>
Co-authored-by: kennymckormick <dhd.efz@gmail.com>
This commit is contained in:
Yuanhan Zhang 2023-09-19 14:21:26 +08:00 committed by GitHub
parent 267401bded
commit 7c2726c23b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 102 additions and 36 deletions

View File

@ -28,18 +28,21 @@ llama_adapter_mmbench_dataloader = dict(batch_size=1,
sampler=dict(type='DefaultSampler', shuffle=False)) sampler=dict(type='DefaultSampler', shuffle=False))
# model settings # model settings
llama_adapter_model = dict( llama_adapter_mmbench_model = dict(
type='LLaMA-adapter-v2', type='LLaMA-adapter-v2',
llama_dir= # noqa llama_dir= # noqa
'/llama_adapter_v2_multimodal', '/llama_adapter_v2_multimodal',
prompt_constructor=dict(type=LlamaAadapterMMBenchPromptConstructor), prompt_constructor=dict(type=LlamaAadapterMMBenchPromptConstructor),
post_processor=dict(type=LlamaAadapterMMBenchPostProcessor)) post_processor=dict(type=LlamaAadapterMMBenchPostProcessor)
) )
# evaluation settings # evaluation settings
llama_adapter_evaluator = [ llama_adapter_mmbench_evaluator = [
dict( dict(
type='opencompass.DumpResults', type='opencompass.DumpResults',
save_path='work_dirs/llama-adapter-v2-multimodal-mmagibench-v0.1.0.xlsx' save_path='work_dirs/llama-adapter-v2-multimodal-mmagibench-v0.1.0.xlsx'
) )
] ]
llama_adapter_mmbench_load_from = None # noqa

View File

@ -35,8 +35,8 @@ mplug_owl_mmbench_dataloader = dict(
# model settings # model settings
mplug_owl_mmbench_model = dict( mplug_owl_mmbench_model = dict(
type='mplug_owl-7b', type='mplug_owl_7b',
model_path='/mplug-owl-llama-7b-ft', model_path='/mplug-owl-llama-7b-ft/',
prompt_constructor=dict(type=MplugOwlMMBenchPromptConstructor), prompt_constructor=dict(type=MplugOwlMMBenchPromptConstructor),
post_processor=dict(type=MplugOwlMMBenchPostProcessor) post_processor=dict(type=MplugOwlMMBenchPostProcessor)
) # noqa ) # noqa
@ -46,3 +46,5 @@ mplug_owl_mmbench_evaluator = [
dict(type='opencompass.DumpResults', dict(type='opencompass.DumpResults',
save_path='work_dirs/mplug_owl-7b-mmagibench-v0.1.0.xlsx') save_path='work_dirs/mplug_owl-7b-mmagibench-v0.1.0.xlsx')
] ]
mplug_owl_mmbench_load_from = None

View File

@ -41,12 +41,11 @@ class MMBenchDataset(Dataset):
return len(self.df) return len(self.df)
def __getitem__(self, idx: int) -> dict: def __getitem__(self, idx: int) -> dict:
# Mandatory Fields Begin
index = self.df.iloc[idx]['index'] index = self.df.iloc[idx]['index']
image = self.df.iloc[idx]['image'] image = self.df.iloc[idx]['image']
image = decode_base64_to_image(image) image = decode_base64_to_image(image)
question = self.df.iloc[idx]['question'] question = self.df.iloc[idx]['question']
catetory = self.df.iloc[idx]['category']
l2_catetory = self.df.iloc[idx]['l2-category']
option_candidate = ['A', 'B', 'C', 'D', 'E'] option_candidate = ['A', 'B', 'C', 'D', 'E']
options = { options = {
@ -57,13 +56,19 @@ class MMBenchDataset(Dataset):
options_prompt = f'{self.sys_prompt}\n' options_prompt = f'{self.sys_prompt}\n'
for key, item in options.items(): for key, item in options.items():
options_prompt += f'{key}. {item}\n' options_prompt += f'{key}. {item}\n'
# Mandatory Fields End
# Optional Fields Begin
hint = self.load_from_df(idx, 'hint') hint = self.load_from_df(idx, 'hint')
category = self.load_from_df(idx, 'category')
l2_catetory = self.load_from_df(idx, 'l2-category')
# Optional Fields End
data = { data = {
'img': image, 'img': image,
'question': question, 'question': question,
'options': options_prompt, 'options': options_prompt,
'category': catetory, 'category': category,
'l2-category': l2_catetory, 'l2-category': l2_catetory,
'options_dict': options, 'options_dict': options,
'index': index, 'index': index,

View File

@ -1,20 +1,37 @@
import json import json
import os import os
import os.path as osp
import sys
from pathlib import Path from pathlib import Path
import clip import clip
import mmengine import mmengine
import torch import torch
import torch.nn as nn import torch.nn as nn
from llama_adapter_v2_multimodal7b.llama.llama import ModelArgs, Transformer
from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
from mmengine.device import get_device from mmengine.device import get_device
from timm.models.vision_transformer import Block from timm.models.vision_transformer import Block
from opencompass.registry import MM_MODELS from opencompass.registry import MM_MODELS
def load_package():
"""Load required packages from llama_adapter_v2_multimodal7b."""
current_file_path = os.path.abspath(__file__)
current_folder_path = os.path.dirname(current_file_path)
sys.path.append(os.path.join(current_folder_path, 'LLaMA-Adapter')) # noqa
from llama_adapter_v2_multimodal7b.llama.llama import (ModelArgs,
Transformer)
from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
sys.path.pop(-1)
return ModelArgs, Transformer, Tokenizer, sample_top_p
ModelArgs, Transformer, Tokenizer, sample_top_p = load_package()
class LLaMA_adapter(nn.Module): class LLaMA_adapter(nn.Module):
def __init__(self, def __init__(self,
@ -182,7 +199,6 @@ class LLaMA_adapter(nn.Module):
data_sample = data_samples[0] data_sample = data_samples[0]
prompts = [prompts]
imgs = image imgs = image
# import pdb;pdb.set_trace() # import pdb;pdb.set_trace()
@ -261,12 +277,14 @@ class LLaMA_adapter_v2(nn.Module):
llama_dir, llama_dir,
prompt_constructor: dict, prompt_constructor: dict,
post_processor: dict, post_processor: dict,
model_path: str = 'llama_adapter_v2_multimodal7b',
name: str = 'LORA-BIAS-7B',
mode: str = 'generation', mode: str = 'generation',
device='cuda' if torch.cuda.is_available() else 'cpu', device='cuda' if torch.cuda.is_available() else 'cpu',
download_root='ckpts'): download_root='ckpts'):
super().__init__() super().__init__()
name = 'BIAS-7B'
assert name in ['LORA-BIAS-7B', 'BIAS-7B', 'CAPTION-7B']
# BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B
llama_type = name.split('.')[0].split('-')[-1] llama_type = name.split('.')[0].split('-')[-1]
llama_ckpt_dir = os.path.join(llama_dir, llama_type) llama_ckpt_dir = os.path.join(llama_dir, llama_type)
@ -274,9 +292,22 @@ class LLaMA_adapter_v2(nn.Module):
# load llama_adapter weights and model_cfg # load llama_adapter weights and model_cfg
print(f'Loading LLaMA-Adapter from {llama_dir}') print(f'Loading LLaMA-Adapter from {llama_dir}')
ckpt = torch.load(
f'{llama_dir}/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth', # noqa: E501 current_file_path = os.path.abspath(__file__)
current_folder_path = os.path.dirname(current_file_path)
model_path = osp.join(current_folder_path, 'LLaMA-Adapter', model_path)
ckpt_root = osp.join(model_path, download_root)
ckpt_map = {
'LORA-BIAS-7B':
'1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth', # noqa: E501
'BIAS-7B':
'7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth', # noqa: E501
'CAPTION-7B':
'5088aeb63a89746b90bcfd5cb819e1c7411b2771b267c6d131ce73e250a8abf0_CAPTION-7B.pth' # noqa: E501
}
ckpt = torch.load(osp.join(ckpt_root, ckpt_map[name]),
map_location='cpu') map_location='cpu')
model_cfg = ckpt.get('config', {}) model_cfg = ckpt.get('config', {})
self.model = LLaMA_adapter( self.model = LLaMA_adapter(

View File

@ -7,7 +7,7 @@ class LlamaAadapterMMBenchPostProcessor:
def __init__(self) -> None: def __init__(self) -> None:
pass pass
def __call__(self, output_token: torch.tensor, tokenizer) -> str: def __call__(self, output_token: torch.tensor) -> str:
if len(output_token) >= 2: if len(output_token) >= 2:
if output_token[1] == '.': if output_token[1] == '.':

View File

@ -49,8 +49,10 @@ class LlamaAadapterMMBenchPromptConstructor:
data_sample.get('context') for data_sample in data_samples data_sample.get('context') for data_sample in data_samples
] ]
else: else:
context = '' context = [''] * len(data_samples)
prompts = []
prompts = context + ' ' + question + ' ' + options # noqa for cur_context, cur_question, cur_options in zip(
context, question, options):
prompts.append(cur_context + ' ' + cur_question + ' ' +
cur_options) # noqa
return prompts return prompts

View File

@ -1,4 +1,4 @@
from .mplug_owl import MplugOwl from .mplug_owl_7b import MplugOwl
from .post_processor import MplugOwlMMBenchPostProcessor from .post_processor import MplugOwlMMBenchPostProcessor
from .prompt_constructor import MplugOwlMMBenchPromptConstructor # noqa from .prompt_constructor import MplugOwlMMBenchPromptConstructor # noqa

View File

@ -1,27 +1,45 @@
import os
import sys
import mmengine import mmengine
import torch import torch
import torch.nn as nn import torch.nn as nn
from mmengine.device import get_device from mmengine.device import get_device
# Load via Huggingface Style
from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
MplugOwlProcessor)
from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
from opencompass.registry import MM_MODELS from opencompass.registry import MM_MODELS
@MM_MODELS.register_module('mplug_owl') def load_package():
"""Load required packages from llama_adapter_v2_multimodal7b."""
current_file_path = os.path.abspath(__file__)
current_folder_path = os.path.dirname(current_file_path)
sys.path.append(os.path.join(current_folder_path, 'mPLUG-Owl')) # noqa
from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
MplugOwlProcessor)
from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
sys.path.pop(-1)
return MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer # noqa
MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer = load_package( # noqa
) # noqa
@MM_MODELS.register_module('mplug_owl_7b')
class MplugOwl(nn.Module): class MplugOwl(nn.Module):
def __init__(self, def __init__(self,
prompt_constructor: dict, prompt_constructor: dict,
post_processor: dict, post_processor: dict,
model_path='MAGAer13/mplug-owl-llama-7b', model_path='MAGAer13/mplug-owl-llama-7b',
mode: str = 'generation') -> None: mode: str = 'generation'):
super().__init__() super().__init__()
pretrained_ckpt = model_path pretrained_ckpt = model_path
# import pdb;pdb.set_trace() # import pdb;pdb.set_trace()
print(pretrained_ckpt)
self.model = MplugOwlForConditionalGeneration.from_pretrained( self.model = MplugOwlForConditionalGeneration.from_pretrained(
pretrained_ckpt, pretrained_ckpt,
torch_dtype=torch.bfloat16, torch_dtype=torch.bfloat16,
@ -57,7 +75,7 @@ class MplugOwl(nn.Module):
inputs = {'image': images, 'data_samples': data_samples} inputs = {'image': images, 'data_samples': data_samples}
inputs = self.prompt_constructor(inputs) inputs = self.prompt_constructor(inputs)
image = inputs['image'] image = inputs['image']
prompt = inputs['prompt'] prompt = inputs['prompt'][0]
data_samples = inputs['data_samples'] data_samples = inputs['data_samples']
data_sample = data_samples[0] data_sample = data_samples[0]

View File

@ -9,7 +9,7 @@ class MplugOwlMMBenchPostProcessor:
def __init__(self) -> None: def __init__(self) -> None:
pass pass
def __call__(self, output_token: torch.tensor, tokenizer) -> str: def __call__(self, output_token: torch.tensor) -> str:
pattern = re.compile(r'([A-Z]\.)') pattern = re.compile(r'([A-Z]\.)')
res = pattern.findall(output_token) res = pattern.findall(output_token)
if len(res) > 0: if len(res) > 0:

View File

@ -48,8 +48,11 @@ class MplugOwlMMBenchPromptConstructor:
data_sample.get('context') for data_sample in data_samples data_sample.get('context') for data_sample in data_samples
] ]
else: else:
context = '' context = [''] * len(data_samples)
prompts = []
prompts = context + ' ' + question + ' ' + options # noqa for cur_context, cur_question, cur_options in zip(
context, question, options):
prompts.append(cur_context + ' ' + cur_question + ' ' +
cur_options) # noqa
return prompts return prompts

View File

@ -122,6 +122,7 @@ class MultimodalInferTask:
dataloader = Runner.build_dataloader(self.dataloader) dataloader = Runner.build_dataloader(self.dataloader)
# build model # build model
model = build_model(self.cfg) model = build_model(self.cfg)
model.eval()
# build evaluator # build evaluator
evaluator = Evaluator(self.evaluator) evaluator = Evaluator(self.evaluator)

View File

@ -103,7 +103,8 @@ def get_config_from_arg(args) -> Config:
cfg = Config.fromfile(s[1]) cfg = Config.fromfile(s[1])
summarizer = cfg['summarizer'] summarizer = cfg['summarizer']
return Config(dict(models=models, datasets=datasets, summarizer=summarizer), return Config(dict(models=models, datasets=datasets,
summarizer=summarizer),
format_python_code=False) format_python_code=False)