Skip to content

Latest commit

 

History

History
59 lines (49 loc) · 1.73 KB

VLLM_Run.md

File metadata and controls

59 lines (49 loc) · 1.73 KB

Run URIAL for AlpacaEval

version="inst_help_v2"
# version="inst_1k_v3"
output_dir="result_dirs/alpaca_eval/vllm_urial-${version}/"
mkdir -p $output_dir

n_shards=8
shard_size=101
start_gpu=0
for ((start = 0, end = (($shard_size)), gpu = $start_gpu; gpu < $n_shards+$start_gpu; start += $shard_size, end += $shard_size, gpu++)); do
    # gpu=0
    CUDA_VISIBLE_DEVICES=$gpu python src/vllm_infer.py \
        --start_index $start --end_index $end \
        --urial $version \
        --download_dir /net/nfs/s2-research/llama2/ \
        --model_name meta-llama/Llama-2-7b-hf \
        --tensor_parallel_size 1 \
        --dtype bfloat16 \
        --data_name alpaca_eval \
        --top_p 1 --temperature 0 --repetition_penalty 1.0 --batch_size 1 --max_tokens 2048 \
        --output_folder $output_dir/rp=1.0/ \
        --overwrite
        &
done




# HF
version="inst_help_v2"
output_dir="result_dirs/alpaca_eval/hf_urial-${version}/"
mkdir -p $output_dir
start=0
end=101
python src/vllm_infer.py \
        --engine hf \
        --start_index $start --end_index $end \
        --urial $version \
        --download_dir /net/nfs/s2-research/llama2/ \
        --model_name meta-llama/Llama-2-7b-hf --hf_bf16 \
        --tensor_parallel_size 1 \
        --dtype bfloat16 \
        --tokenizer_mode slow \
        --max_tokens 2048 \
        --data_name alpaca_eval \
        --batch_size 1 --max_tokens 1024 \
        --output_folder $output_dir/rp=1.0/ \
        --overwrite

        # --top_p 1 --temperature 0 --repetition_penalty 1.0 
        --dtype bfloat16 \


python src/scripts/merge_results.py result_dirs/alpaca_eval/urial-inst_help/ Mistral-7B-v0.1
python src/scripts/reformat.py result_dirs/alpaca_eval/urial-inst_help/Mistral-7B-v0.1.json