Skip to content

Commit

Permalink
scripts for mt-bench eval
Browse files Browse the repository at this point in the history
  • Loading branch information
yuchenlin committed Jan 6, 2024
1 parent 586fc42 commit 01928f8
Show file tree
Hide file tree
Showing 15 changed files with 375 additions and 68 deletions.
29 changes: 0 additions & 29 deletions AE_run.md

This file was deleted.

39 changes: 34 additions & 5 deletions VLLM_Run.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,57 @@

```bash
version="inst_help_v2"
# version="inst_1k_v3"
output_dir="result_dirs/alpaca_eval/vllm_urial-${version}/"
mkdir -p $output_dir

n_shards=1
n_shards=8
shard_size=101
start_gpu=0
for ((start = 0, end = (($shard_size)), gpu = $start_gpu; gpu < $n_shards+$start_gpu; start += $shard_size, end += $shard_size, gpu++)); do
# gpu=0
CUDA_VISIBLE_DEVICES=$gpu python src/vllm_infer.py \
--start_index $start --end_index $end \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-7b-hf \
--tensor_parallel_size 1 \
--dtype bfloat16 \
--max_tokens 2048 \
--data_name alpaca_eval \
--top_p 1 --temperature 0 --repetition_penalty 1.2 --batch_size 4 --max_tokens 4096 \
--output_folder $output_dir/rp=1.2/
# &
--top_p 1 --temperature 0 --repetition_penalty 1.0 --batch_size 1 --max_tokens 2048 \
--output_folder $output_dir/rp=1.0/ \
--overwrite
&
done




# HF
version="inst_help_v2"
output_dir="result_dirs/alpaca_eval/hf_urial-${version}/"
mkdir -p $output_dir
start=0
end=101
python src/vllm_infer.py \
--engine hf \
--start_index $start --end_index $end \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-7b-hf --hf_bf16 \
--tensor_parallel_size 1 \
--dtype bfloat16 \
--tokenizer_mode slow \
--max_tokens 2048 \
--data_name alpaca_eval \
--batch_size 1 --max_tokens 1024 \
--output_folder $output_dir/rp=1.0/ \
--overwrite

# --top_p 1 --temperature 0 --repetition_penalty 1.0
--dtype bfloat16 \


python src/scripts/merge_results.py result_dirs/alpaca_eval/urial-inst_help/ Mistral-7B-v0.1
python src/scripts/reformat.py result_dirs/alpaca_eval/urial-inst_help/Mistral-7B-v0.1.json
```
15 changes: 15 additions & 0 deletions run_scritps/alpaca_eval/llama-70b-urial.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version="inst_help_v2"
output_dir="result_dirs/alpaca_eval/vllm_urial-${version}/"
mkdir -p $output_dir
gpu=2,3
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-70b-hf \
--tensor_parallel_size 2 \
--dtype bfloat16 \
--data_name alpaca_eval \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 8 --max_tokens 2048 \
--output_folder $output_dir/rp=1.15/ \
--overwrite

22 changes: 22 additions & 0 deletions run_scritps/alpaca_eval/llama-7b-urial.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version="inst_help_v2"
output_dir="result_dirs/alpaca_eval/vllm_urial-${version}/"
mkdir -p $output_dir

n_shards=4
shard_size=202
start_gpu=0
for ((start = 0, end = (($shard_size)), gpu = $start_gpu; gpu < $n_shards+$start_gpu; start += $shard_size, end += $shard_size, gpu++)); do
# gpu=0
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--start_index $start --end_index $end \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-7b-hf \
--tensor_parallel_size 1 \
--dtype bfloat16 \
--data_name alpaca_eval \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 8 --max_tokens 2048 \
--output_folder $output_dir/rp=1.15/ \
--overwrite &
done

16 changes: 16 additions & 0 deletions run_scritps/alpaca_eval/mixtral.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version="inst_help_v2"
output_dir="result_dirs/alpaca_eval/vllm_urial-${version}/"
mkdir -p $output_dir
gpu=0,1,2,3
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name mistralai/Mixtral-8x7B-v0.1 \
--tensor_parallel_size 4 \
--dtype bfloat16 \
--data_name alpaca_eval \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 8 --max_tokens 2048 \
--output_folder $output_dir/rp=1.15/
# \
# --overwrite

39 changes: 39 additions & 0 deletions run_scritps/mt-bench/formatting_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json
import sys
import random, string
import os

turn1_path = "result_dirs/mt-bench/vllm_urial-inst_help_v2/rp=1.15/Llama-2-70b-hf.turn1.json"
turn2_path = "result_dirs/mt-bench/vllm_urial-inst_help_v2/rp=1.15/Llama-2-70b-hf.turn2.json"
output_folder = "result_dirs/mt-bench/vllm_urial-inst_help_v2/rp=1.15/"
model_id = "Llama-2-70b-hf-URIAL"
turn1_results = json.load(open(turn1_path))
turn2_results = json.load(open(turn2_path))

results = []
for item1, item2 in zip(turn1_results, turn2_results):
assert item1["question_id"] == item2["question_id"]
res_item = {}
res_item["question_id"] = item1["question_id"]
# generate a random string
res_item["answer_id"] = ''.join(random.choices(string.ascii_uppercase + string.digits, k=22))
res_item["model_id"] = model_id
res_item["choices"] = [
{
"index": 0,
"turns": [
item1["turn1_output"],
item2["turn2_output"]
]
}
]
results.append(res_item)

with open(f"{output_folder}/{model_id}.jsonl", "w") as f:
for item in results:
f.write(json.dumps(item) + "\n")

# copy the file to `/home/yuchenl/FastChat/fastchat/llm_judge/data/mt_bench/model_answer/`
os.system(f"cp {output_folder}/{model_id}.jsonl /home/yuchenl/FastChat/fastchat/llm_judge/data/mt_bench/model_answer/")


30 changes: 30 additions & 0 deletions run_scritps/mt-bench/llama-70b-urial.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
version="inst_help_v2"
output_dir="result_dirs/mt-bench/vllm_urial-${version}/"
mkdir -p $output_dir
gpu=0,1,2,3
n=4
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-70b-hf \
--tensor_parallel_size $n \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 1 \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 4 --max_tokens 2048 \
--filepath $output_dir/rp=1.15/Llama-2-70b-hf.turn1.json \
--overwrite


CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-70b-hf \
--tensor_parallel_size $n \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 2 \
--mt_turn1_result $output_dir/rp=1.15/Llama-2-70b-hf.turn1.json \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 8 --max_tokens 2048 \
--filepath $output_dir/rp=1.15/Llama-2-70b-hf.turn2.json \
--overwrite
29 changes: 29 additions & 0 deletions run_scritps/mt-bench/llama-7b-urial.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
version="inst_help_v2"
output_dir="result_dirs/mt-bench/vllm_urial-${version}/"
mkdir -p $output_dir
gpu=0
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-7b-hf \
--tensor_parallel_size 1 \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 1 \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 4 --max_tokens 2048 \
--filepath $output_dir/rp=1.15/Llama-2-7b-hf.turn1.json \
--overwrite


CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name meta-llama/Llama-2-7b-hf \
--tensor_parallel_size 1 \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 2 \
--mt_turn1_result $output_dir/rp=1.15/Llama-2-7b-hf.turn1.json \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 8 --max_tokens 2048 \
--filepath $output_dir/rp=1.15/Llama-2-7b-hf.turn2.json \
--overwrite
30 changes: 30 additions & 0 deletions run_scritps/mt-bench/mixtral.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
version="inst_help_v2"
output_dir="result_dirs/mt-bench/vllm_urial-${version}/"
mkdir -p $output_dir
gpu=0,1,2,3
n=4
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name mistralai/Mixtral-8x7B-v0.1 \
--tensor_parallel_size $n \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 1 \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 4 --max_tokens 2048 \
--filepath $output_dir/rp=1.15/Llama-2-70b-hf.turn1.json \
--overwrite


CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name mistralai/Mixtral-8x7B-v0.1 \
--tensor_parallel_size $n \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 2 \
--mt_turn1_result $output_dir/rp=1.15/Llama-2-70b-hf.turn1.json \
--top_p 1 --temperature 0 --repetition_penalty 1.15 --batch_size 8 --max_tokens 2048 \
--filepath $output_dir/rp=1.15/Llama-2-70b-hf.turn2.json \
--overwrite
3 changes: 0 additions & 3 deletions src/adapt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def preprocess(in_text, ind, args):
in_text = f"{urial_prefix}\n# Query:\n```{in_text}```\n\n# Answer:\n```\n"
return in_text


elif args.adapt_mode == "restyle":
if retrieve_data is None and os.path.exists(args.retrieve_data_path):
with open(args.retrieve_data_path) as f:
Expand Down Expand Up @@ -133,8 +132,6 @@ def preprocess(in_text, ind, args):

if args.adapt_mode == "retrieve":
return in_text
elif args.adapt_mode == "retrieve+prefix":
return in_text
return in_text


Expand Down
25 changes: 13 additions & 12 deletions src/fastchat_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class SeparatorStyle(IntEnum):
CHATGLM3 = auto()
DEEPSEEK_CHAT = auto()
METAMATH = auto()
URIAL = auto()
URIAL = auto()


@dataclasses.dataclass
Expand Down Expand Up @@ -246,16 +246,14 @@ def get_prompt(self) -> str:
elif self.sep_style == SeparatorStyle.URIAL:
ret = system_prompt
for role, message in self.messages:
if role == self.roles[0]:
ret += "\n\n"
else:
ret += "\n"
# if role == self.roles[0]:
ret += "\n\n"
# else:
# ret += "\n"
if message:
ret += role + "\n" + message + "\n\n"
ret += role + "\n" + self.sep + "\n" + message + "\n"+ self.sep2 + "\n"
else:
if role == self.roles[0]:
ret += "\n"
ret += role + "\n"
ret += role + "\n" + self.sep + "\n"
return ret
else:
raise ValueError(f"Invalid style: {self.sep_style}")
Expand Down Expand Up @@ -1402,7 +1400,8 @@ def get_conv_template(name: str) -> Conversation:
system_message="",
roles=("# Query:", "# Answer:"),
sep_style=SeparatorStyle.URIAL,
sep="\n\n",
# sep="```", sep2="```",
sep="", sep2="",
stop_str="# Query",
)
)
Expand All @@ -1413,8 +1412,9 @@ def get_conv_template(name: str) -> Conversation:
from datasets import load_dataset
print("-- URIAL template --")
conv = get_conv_template("urial")
urial = "inst_help_v2"
url = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{urial}.txt"
urial = "inst_1k_v3"
# url = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{urial}.txt"
url = f"urial_prompts/{urial}.txt"
print(f"Loading URIAL prompt from {url}")
dataset = load_dataset("text", data_files=url, split="train", sample_by="document", download_mode="force_redownload")
urial_prompt = dataset["text"][0]
Expand All @@ -1425,6 +1425,7 @@ def get_conv_template(name: str) -> Conversation:
conv.append_message(conv.roles[1], "Hi!")
conv.append_message(conv.roles[0], "How are you?")
conv.append_message(conv.roles[1], None)
print([conv.get_prompt()])
print(conv.get_prompt())


Expand Down
Loading

0 comments on commit 01928f8

Please sign in to comment.