[Model] Yhzhang/add mlugowl llamaadapter (#405)

* refine gitignore * [Feature]: Add minigpt-4 * [Feature]: Add mm local runner * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * [Feature]: Add minigpt-4 * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * lint * update * lint * lint * add __init__.py * update * update * update * update * [Feature]: Add minigpt-4 * [Feature]: Add mm local runner * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * [Feature]: Add minigpt-4 * [Feature]: Add instructblip * add otter and llama-adapter * add owl * add llama2-adapter and owl * lint * lint * update * lint * lint * add __init__.py * update * update * update * update * optimize mmbench dataset args * update * update * run commit hook --------- Co-authored-by: liuyuan <3463423099@qq.com> Co-authored-by: kennymckormick <dhd@pku.edu.cn> Co-authored-by: kennymckormick <dhd.efz@gmail.com>
2025-05-30 16:03:24 +08:00 · 2023-09-19 14:21:26 +08:00 · 2023-09-19 14:21:26 +08:00 · 7c2726c23b
commit 7c2726c23b
parent 267401bded
12 changed files with 102 additions and 36 deletions
--- a/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py
+++ b/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py
@ -28,18 +28,21 @@ llama_adapter_mmbench_dataloader = dict(batch_size=1,
                  sampler=dict(type='DefaultSampler', shuffle=False))

 # model settings
-llama_adapter_model = dict(
+llama_adapter_mmbench_model = dict(
    type='LLaMA-adapter-v2',
    llama_dir=  # noqa
    '/llama_adapter_v2_multimodal',
    prompt_constructor=dict(type=LlamaAadapterMMBenchPromptConstructor),
-    post_processor=dict(type=LlamaAadapterMMBenchPostProcessor))
+    post_processor=dict(type=LlamaAadapterMMBenchPostProcessor)
 )

 # evaluation settings
-llama_adapter_evaluator = [
+llama_adapter_mmbench_evaluator = [
    dict(
        type='opencompass.DumpResults',
        save_path='work_dirs/llama-adapter-v2-multimodal-mmagibench-v0.1.0.xlsx'
    )
 ]
+
+llama_adapter_mmbench_load_from = None  # noqa
+
--- a/configs/multimodal/mplug_owl/mplug_owl_7b_mmbench.py
+++ b/configs/multimodal/mplug_owl/mplug_owl_7b_mmbench.py
@ -35,8 +35,8 @@ mplug_owl_mmbench_dataloader = dict(

 # model settings
 mplug_owl_mmbench_model = dict(
-    type='mplug_owl-7b',
-    model_path='/mplug-owl-llama-7b-ft',
+    type='mplug_owl_7b',
+    model_path='/mplug-owl-llama-7b-ft/',
    prompt_constructor=dict(type=MplugOwlMMBenchPromptConstructor),
    post_processor=dict(type=MplugOwlMMBenchPostProcessor)
 )  # noqa
@ -46,3 +46,5 @@ mplug_owl_mmbench_evaluator = [
    dict(type='opencompass.DumpResults',
         save_path='work_dirs/mplug_owl-7b-mmagibench-v0.1.0.xlsx')
 ]
+
+mplug_owl_mmbench_load_from = None
--- a/opencompass/multimodal/datasets/mmbench.py
+++ b/opencompass/multimodal/datasets/mmbench.py
@ -41,12 +41,11 @@ class MMBenchDataset(Dataset):
        return len(self.df)

    def __getitem__(self, idx: int) -> dict:
+        # Mandatory Fields Begin
        index = self.df.iloc[idx]['index']
        image = self.df.iloc[idx]['image']
        image = decode_base64_to_image(image)
        question = self.df.iloc[idx]['question']
-        catetory = self.df.iloc[idx]['category']
-        l2_catetory = self.df.iloc[idx]['l2-category']

        option_candidate = ['A', 'B', 'C', 'D', 'E']
        options = {
@ -57,13 +56,19 @@ class MMBenchDataset(Dataset):
        options_prompt = f'{self.sys_prompt}\n'
        for key, item in options.items():
            options_prompt += f'{key}. {item}\n'
+        # Mandatory Fields End

+        # Optional Fields Begin
        hint = self.load_from_df(idx, 'hint')
+        category = self.load_from_df(idx, 'category')
+        l2_catetory = self.load_from_df(idx, 'l2-category')
+        # Optional Fields End
+
        data = {
            'img': image,
            'question': question,
            'options': options_prompt,
-            'category': catetory,
+            'category': category,
            'l2-category': l2_catetory,
            'options_dict': options,
            'index': index,
--- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py
+++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py
@ -1,20 +1,37 @@
 import json
 import os
+import os.path as osp
+import sys
 from pathlib import Path

 import clip
 import mmengine
 import torch
 import torch.nn as nn
-from llama_adapter_v2_multimodal7b.llama.llama import ModelArgs, Transformer
-from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
-from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
 from mmengine.device import get_device
 from timm.models.vision_transformer import Block

 from opencompass.registry import MM_MODELS


+def load_package():
+    """Load required packages from llama_adapter_v2_multimodal7b."""
+    current_file_path = os.path.abspath(__file__)
+    current_folder_path = os.path.dirname(current_file_path)
+
+    sys.path.append(os.path.join(current_folder_path, 'LLaMA-Adapter'))  # noqa
+    from llama_adapter_v2_multimodal7b.llama.llama import (ModelArgs,
+                                                           Transformer)
+    from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer
+    from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p
+    sys.path.pop(-1)
+
+    return ModelArgs, Transformer, Tokenizer, sample_top_p
+
+
+ModelArgs, Transformer, Tokenizer, sample_top_p = load_package()
+
+
 class LLaMA_adapter(nn.Module):

    def __init__(self,
@ -182,7 +199,6 @@ class LLaMA_adapter(nn.Module):

        data_sample = data_samples[0]

-        prompts = [prompts]
        imgs = image

        # import pdb;pdb.set_trace()
@ -261,12 +277,14 @@ class LLaMA_adapter_v2(nn.Module):
                 llama_dir,
                 prompt_constructor: dict,
                 post_processor: dict,
+                 model_path: str = 'llama_adapter_v2_multimodal7b',
+                 name: str = 'LORA-BIAS-7B',
                 mode: str = 'generation',
                 device='cuda' if torch.cuda.is_available() else 'cpu',
                 download_root='ckpts'):
        super().__init__()
-        name = 'BIAS-7B'

+        assert name in ['LORA-BIAS-7B', 'BIAS-7B', 'CAPTION-7B']
        # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B
        llama_type = name.split('.')[0].split('-')[-1]
        llama_ckpt_dir = os.path.join(llama_dir, llama_type)
@ -274,9 +292,22 @@ class LLaMA_adapter_v2(nn.Module):

        # load llama_adapter weights and model_cfg
        print(f'Loading LLaMA-Adapter from {llama_dir}')
-        ckpt = torch.load(
-            f'{llama_dir}/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth',  # noqa: E501
-            map_location='cpu')
+
+        current_file_path = os.path.abspath(__file__)
+        current_folder_path = os.path.dirname(current_file_path)
+        model_path = osp.join(current_folder_path, 'LLaMA-Adapter', model_path)
+        ckpt_root = osp.join(model_path, download_root)
+        ckpt_map = {
+            'LORA-BIAS-7B':
+            '1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth',  # noqa: E501
+            'BIAS-7B':
+            '7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth',  # noqa: E501
+            'CAPTION-7B':
+            '5088aeb63a89746b90bcfd5cb819e1c7411b2771b267c6d131ce73e250a8abf0_CAPTION-7B.pth'  # noqa: E501
+        }
+        ckpt = torch.load(osp.join(ckpt_root, ckpt_map[name]),
+                          map_location='cpu')
+
        model_cfg = ckpt.get('config', {})

        self.model = LLaMA_adapter(
--- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py
+++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py
@ -7,7 +7,7 @@ class LlamaAadapterMMBenchPostProcessor:
    def __init__(self) -> None:
        pass

-    def __call__(self, output_token: torch.tensor, tokenizer) -> str:
+    def __call__(self, output_token: torch.tensor) -> str:

        if len(output_token) >= 2:
            if output_token[1] == '.':
--- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py
+++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py
@ -49,8 +49,10 @@ class LlamaAadapterMMBenchPromptConstructor:
                data_sample.get('context') for data_sample in data_samples
            ]
        else:
-            context = ''
-
-        prompts = context + ' ' + question + ' ' + options  # noqa
-
+            context = [''] * len(data_samples)
+        prompts = []
+        for cur_context, cur_question, cur_options in zip(
+                context, question, options):
+            prompts.append(cur_context + ' ' + cur_question + ' ' +
+                           cur_options)  # noqa
        return prompts
--- a/opencompass/multimodal/models/mplug_owl/init.py
+++ b/opencompass/multimodal/models/mplug_owl/init.py
@ -1,4 +1,4 @@
-from .mplug_owl import MplugOwl
+from .mplug_owl_7b import MplugOwl
 from .post_processor import MplugOwlMMBenchPostProcessor
 from .prompt_constructor import MplugOwlMMBenchPromptConstructor  # noqa

--- a/opencompass/multimodal/models/mplug_owl/mplug_owl_7b.py
+++ b/opencompass/multimodal/models/mplug_owl/mplug_owl_7b.py
@ -1,27 +1,45 @@
+import os
+import sys
+
 import mmengine
 import torch
 import torch.nn as nn
 from mmengine.device import get_device
-# Load via Huggingface Style
-from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
-from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
-                                            MplugOwlProcessor)
-from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer

 from opencompass.registry import MM_MODELS


-@MM_MODELS.register_module('mplug_owl')
+def load_package():
+    """Load required packages from llama_adapter_v2_multimodal7b."""
+    current_file_path = os.path.abspath(__file__)
+    current_folder_path = os.path.dirname(current_file_path)
+
+    sys.path.append(os.path.join(current_folder_path, 'mPLUG-Owl'))  # noqa
+    from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration
+    from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor,
+                                                MplugOwlProcessor)
+    from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer
+    sys.path.pop(-1)
+
+    return MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer  # noqa
+
+
+MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer = load_package(  # noqa
+)  # noqa
+
+
+@MM_MODELS.register_module('mplug_owl_7b')
 class MplugOwl(nn.Module):

    def __init__(self,
                 prompt_constructor: dict,
                 post_processor: dict,
                 model_path='MAGAer13/mplug-owl-llama-7b',
-                 mode: str = 'generation') -> None:
+                 mode: str = 'generation'):
        super().__init__()
        pretrained_ckpt = model_path
        # import pdb;pdb.set_trace()
+        print(pretrained_ckpt)
        self.model = MplugOwlForConditionalGeneration.from_pretrained(
            pretrained_ckpt,
            torch_dtype=torch.bfloat16,
@ -57,7 +75,7 @@ class MplugOwl(nn.Module):
        inputs = {'image': images, 'data_samples': data_samples}
        inputs = self.prompt_constructor(inputs)
        image = inputs['image']
-        prompt = inputs['prompt']
+        prompt = inputs['prompt'][0]
        data_samples = inputs['data_samples']

        data_sample = data_samples[0]
--- a/opencompass/multimodal/models/mplug_owl/post_processor.py
+++ b/opencompass/multimodal/models/mplug_owl/post_processor.py
@ -9,7 +9,7 @@ class MplugOwlMMBenchPostProcessor:
    def __init__(self) -> None:
        pass

-    def __call__(self, output_token: torch.tensor, tokenizer) -> str:
+    def __call__(self, output_token: torch.tensor) -> str:
        pattern = re.compile(r'([A-Z]\.)')
        res = pattern.findall(output_token)
        if len(res) > 0:
--- a/opencompass/multimodal/models/mplug_owl/prompt_constructor.py
+++ b/opencompass/multimodal/models/mplug_owl/prompt_constructor.py
@ -48,8 +48,11 @@ class MplugOwlMMBenchPromptConstructor:
                data_sample.get('context') for data_sample in data_samples
            ]
        else:
-            context = ''
-
-        prompts = context + ' ' + question + ' ' + options  # noqa
+            context = [''] * len(data_samples)
+        prompts = []
+        for cur_context, cur_question, cur_options in zip(
+                context, question, options):
+            prompts.append(cur_context + ' ' + cur_question + ' ' +
+                           cur_options)  # noqa

        return prompts
--- a/opencompass/tasks/mm_infer.py
+++ b/opencompass/tasks/mm_infer.py
@ -122,6 +122,7 @@ class MultimodalInferTask:
        dataloader = Runner.build_dataloader(self.dataloader)
        # build model
        model = build_model(self.cfg)
+        model.eval()
        # build evaluator
        evaluator = Evaluator(self.evaluator)

--- a/opencompass/utils/run.py
+++ b/opencompass/utils/run.py
@ -95,7 +95,7 @@ def get_config_from_arg(args) -> Config:
                     pad_token_id=args.pad_token_id,
                     run_cfg=dict(num_gpus=args.num_gpus))
        models.append(model)
-    
+
    summarizer = None
    if args.summarizer:
        s = match_cfg_file('configs/summarizers/', [args.summarizer])[0]
@ -103,7 +103,8 @@ def get_config_from_arg(args) -> Config:
        cfg = Config.fromfile(s[1])
        summarizer = cfg['summarizer']

-    return Config(dict(models=models, datasets=datasets, summarizer=summarizer),
+    return Config(dict(models=models, datasets=datasets,
+                       summarizer=summarizer),
                  format_python_code=False)