Skip to content

Commit

Permalink
[Minor] Smooth Evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
kennymckormick committed Dec 29, 2023
1 parent 613201a commit 5b055a4
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 5 deletions.
8 changes: 8 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from vlmeval.evaluate import COCO_eval, MME_eval, MMVet_eval, multiple_choice_eval, MME_rating, VQAEval
from vlmeval.inference import infer_data_job, prefetch_acc
from vlmeval.config import supported_VLM
from vlmeval.utils import dataset_URLs, abbr2full

def parse_args():
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -33,6 +34,13 @@ def main():
pred_root = model_name

for i, dataset_name in enumerate(args.data):
if dataset_name not in dataset_URLs:
dataset_name = abbr2full(dataset_name)

if dataset_name not in dataset_URLs:
logger.error(f'Unknown dataset: {dataset_name}. ')
continue

tmpl = f'{pred_root}/' + '{}' + f'{world_size}_{dataset_name}.pkl'
out_file = tmpl.format(rank)
result_file = f'{pred_root}/{model_name}_{dataset_name}.xlsx'
Expand Down
2 changes: 1 addition & 1 deletion scripts/auto_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ def is_api(x):
if '80b' in m:
cmd = f'python run.py --data {dataset_str} --model {m}'
else:
cmd = f'torchrun --nproc-per-node=8 run.py --data {dataset_str} --model {m}'
cmd = f'bash run.sh --data {dataset_str} --model {m}'
print(cmd)
os.system(cmd)
6 changes: 4 additions & 2 deletions scripts/report_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

dataset = [
'MME', 'SEEDBench_IMG', 'MMBench', 'CCBench', 'MMBench_CN',
'MMVet', 'OCRVQA_TESTCORE', 'TextVQA_VAL', 'COCO_VAL', 'MMMU_DEV_VAL'
'MMVet', 'OCRVQA_TESTCORE', 'TextVQA_VAL', 'COCO_VAL', 'MMMU_DEV_VAL',
'ChartQA_VALTEST_HUMAN'
]
suffix = [
'score.csv', 'acc.csv', 'acc.csv', 'acc.csv', 'acc.csv',
'gpt-4-turbo_score.csv', 'acc.csv', 'acc.csv', 'score.json', 'acc.csv'
'gpt-4-turbo_score.csv', 'acc.csv', 'acc.csv', 'score.json', 'acc.csv',
'acc.csv'
]

N = len(dataset)
Expand Down
4 changes: 2 additions & 2 deletions vlmeval/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from .matching_util import can_infer, can_infer_option, can_infer_text
from .mp_util import track_progress_rich
from .custom_prompt import CustomPrompt
from .dataset_config import dataset_URLs, img_root_map, DATASET_TYPE
from .dataset_config import dataset_URLs, img_root_map, DATASET_TYPE, abbr2full
from .dataset import TSVDataset, split_MMMU


__all__ = [
'can_infer', 'can_infer_option', 'can_infer_text', 'track_progress_rich',
'TSVDataset', 'dataset_URLs', 'img_root_map', 'DATASET_TYPE', 'CustomPrompt',
'split_MMMU'
'split_MMMU', 'abbr2full'
]
10 changes: 10 additions & 0 deletions vlmeval/utils/dataset_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,13 @@ def DATASET_TYPE(dataset):
elif listinstr(['ocrvqa', 'textvqa', 'chartqa'], dataset.lower()):
return 'VQA'
return None

def abbr2full(s):
datasets = [x for x in img_root_map]
ins = [s in d for d in datasets]
if sum(ins) == 1:
for d in datasets:
if s in d:
return d
else:
return None

0 comments on commit 5b055a4

Please sign in to comment.