Skip to content

Commit

Permalink
Fix device error in evaluation (OpenGVLab#478)
Browse files Browse the repository at this point in the history
  • Loading branch information
czczup committed Aug 9, 2024
1 parent c1d6c3f commit 5d8f485
Show file tree
Hide file tree
Showing 18 changed files with 86 additions and 188 deletions.
13 changes: 2 additions & 11 deletions internvl_chat/eval/caption/evaluate_caption.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
from functools import partial

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from pycocoevalcap.eval import COCOEvalCap
from pycocotools.coco import COCO
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'flickr30k': {
Expand Down Expand Up @@ -254,15 +253,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
13 changes: 2 additions & 11 deletions internvl_chat/eval/cmmmu/evaluate_cmmmu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
import random

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'art_and_design': {
Expand Down Expand Up @@ -166,15 +165,7 @@ def evaluate_chat_model():
print('datasets:', args.datasets)
assert args.batch_size == 1, 'Only batch size 1 is supported'

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
14 changes: 2 additions & 12 deletions internvl_chat/eval/llava_bench/evaluate_llava_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
import random

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'llava_bench': {
Expand Down Expand Up @@ -104,7 +103,6 @@ def evaluate_chat_model():


if __name__ == '__main__':

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', type=str, default='')
parser.add_argument('--datasets', type=str, default='llava_bench')
Expand All @@ -128,15 +126,7 @@ def evaluate_chat_model():
print('datasets:', args.datasets)
assert args.batch_size == 1, 'Only batch size 1 is supported'

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
13 changes: 2 additions & 11 deletions internvl_chat/eval/mathvista/evaluate_mathvista.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@

import torch
from datasets import concatenate_datasets, load_dataset
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'MathVista_testmini': {
Expand Down Expand Up @@ -204,15 +203,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
13 changes: 2 additions & 11 deletions internvl_chat/eval/mmbench/evaluate_mmbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@

import pandas as pd
import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'mmbench_dev_20230712': {
Expand Down Expand Up @@ -291,15 +290,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
15 changes: 3 additions & 12 deletions internvl_chat/eval/mme/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import re

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from tqdm import tqdm
from transformers import AutoTokenizer


def load_image(image_file, input_size=224):
Expand Down Expand Up @@ -47,16 +46,7 @@ def post_processing(response):
parser.add_argument('--auto', action='store_true')
args = parser.parse_args()

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
prompt = 'Answer the question using a single word or phrase.'
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand All @@ -74,6 +64,7 @@ def post_processing(response):

output = os.path.basename(args.checkpoint)
os.makedirs(output, exist_ok=True)
prompt = 'Answer the question using a single word or phrase.'

for filename in os.listdir(args.root):
fin = open(os.path.join(args.root, filename), 'r', encoding='utf-8')
Expand Down
13 changes: 2 additions & 11 deletions internvl_chat/eval/mmmu/evaluate_mmmu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
import torch
from data_utils import CAT_SHORT2LONG, process_single_sample
from datasets import concatenate_datasets, load_dataset
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'MMMU_validation': {
Expand Down Expand Up @@ -286,15 +285,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
14 changes: 2 additions & 12 deletions internvl_chat/eval/mmvet/evaluate_mmvet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import time

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'mmvet': {
Expand Down Expand Up @@ -110,7 +109,6 @@ def evaluate_chat_model():


if __name__ == '__main__':

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', type=str, default='')
parser.add_argument('--datasets', type=str, default='pope')
Expand All @@ -134,15 +132,7 @@ def evaluate_chat_model():
print('datasets:', args.datasets)
assert args.batch_size == 1, 'Only batch size 1 is supported'

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
13 changes: 2 additions & 11 deletions internvl_chat/eval/mmvp/evaluate_mmvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
from functools import partial

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'MMVP': {
Expand Down Expand Up @@ -268,15 +267,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
13 changes: 2 additions & 11 deletions internvl_chat/eval/mvbench/evaluate_mvbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
import numpy as np
import torch
from decord import VideoReader, cpu
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import AutoTokenizer

data_list = {
'Action Sequence': ('action_sequence.json', './data/MVBench/video/star/Charades_v1_480/', 'video', True),
Expand Down Expand Up @@ -387,15 +386,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
14 changes: 2 additions & 12 deletions internvl_chat/eval/pope/evaluate_pope.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
from functools import partial

import torch
from internvl.model.internvl_chat import InternVLChatModel
from internvl.model import load_model_and_tokenizer
from internvl.train.dataset import build_transform, dynamic_preprocess
from PIL import Image
from tqdm import tqdm
from transformers import AutoTokenizer

ds_collections = {
'pope': {
Expand Down Expand Up @@ -175,7 +174,6 @@ def evaluate_chat_model():


if __name__ == '__main__':

parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', type=str, default='')
parser.add_argument('--datasets', type=str, default='pope')
Expand Down Expand Up @@ -207,15 +205,7 @@ def evaluate_chat_model():

torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))

if args.auto:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
kwargs = {'device_map': 'auto'} if args.auto else {}
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
model = InternVLChatModel.from_pretrained(
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
model = model.cuda()
model, tokenizer = load_model_and_tokenizer(args)
image_size = model.config.force_image_size or model.config.vision_config.image_size
use_thumbnail = model.config.use_thumbnail

Expand Down
Loading

0 comments on commit 5d8f485

Please sign in to comment.