Skip to content

Commit 9cbd773

Browse files
committed
update
1 parent 9d648aa commit 9cbd773

File tree

7 files changed

+166
-12
lines changed

7 files changed

+166
-12
lines changed

tests/test_tipc/auto_tuner/llama/N1C8/CE_llama7b_autotuner_bs8_fp16_lora.sh renamed to tests/test_tipc/auto_tuner/llama_finetune/N1C8/CE_llama7b_autotuner_bs8_fp16_lora.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ param+="autoconfig_json_file=autoconfig/llama7b_lora.json "
2020
param+="modle_json_file=autoconfig/llama7b_lora_params.json "
2121

2222
cd ./tests
23-
bash ./test_tipc/auto_tuner/llama/benchmark_common/prepare.sh
23+
bash ./test_tipc/auto_tuner/llama_finetune/benchmark_common/prepare.sh
2424

25-
bash -c "${param} bash ./test_tipc/auto_tuner/llama/benchmark_common/run_benchmark.sh"
25+
bash -c "${param} bash ./test_tipc/auto_tuner/llama_finetune/benchmark_common/run_benchmark.sh"

tests/test_tipc/auto_tuner/llama/N1C8/CE_llama7b_autotuner_bs8_fp16_sft.sh renamed to tests/test_tipc/auto_tuner/llama_finetune/N1C8/CE_llama7b_autotuner_bs8_fp16_sft.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ param+="autoconfig_json_file=autoconfig/llama7b_sft.json "
2020
param+="modle_json_file=autoconfig/llama7b_sft_params.json "
2121

2222
cd ./tests
23-
bash ./test_tipc/auto_tuner/llama/benchmark_common/prepare.sh
23+
bash ./test_tipc/auto_tuner/llama_finetune/benchmark_common/prepare.sh
2424

25-
bash -c "${param} bash ./test_tipc/auto_tuner/llama/benchmark_common/run_benchmark.sh"
25+
bash -c "${param} bash ./test_tipc/auto_tuner/llama_finetune/benchmark_common/run_benchmark.sh"
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
python -m pip install -r ../requirements.txt
16+
pip install regex
17+
18+
cd ../../../llm/
19+
wget https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_ids.npy
20+
wget https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k_idx.npz
21+
22+
mkdir data
23+
mv llama_openwebtext_100k_ids.npy ./data
24+
mv llama_openwebtext_100k_idx.npz ./data
25+
26+
# mv autoconfig
27+
rm -rf autoconfig
28+
cp -r ../../tests/test_tipc/auto_tuner/autoconfig ./
29+
unset PADDLE_ELASTIC_JOB_ID
30+
unset PADDLE_TRAINER_ENDPOINTS
31+
unset DISTRIBUTED_TRAINER_ENDPOINTS
32+
unset FLAGS_START_PORT
33+
unset PADDLE_ELASTIC_TIMEOUT
34+
unset PADDLE_TRAINERS_NUM

tests/test_tipc/auto_tuner/llama/benchmark_common/run_benchmark.sh renamed to tests/test_tipc/auto_tuner/llama_finetune/benchmark_common/run_benchmark.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ function _train(){
8282
fi
8383
# 以下为通用执行命令,无特殊可不用修改
8484
case ${run_mode} in
85-
pretrain) echo "Run with: run_mode=${run_mode}"
86-
train_cmd="python -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
87-
--auto_tuner_json ${autoconfig_json_file} run_pretrain.py ${modle_json_file}"
88-
;;
8985
lora) echo "Run with: run_mode=${run_mode}"
9086
train_cmd="python -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
9187
--auto_tuner_json ${autoconfig_json_file} finetune_generation.py ${modle_json_file}"
@@ -96,10 +92,10 @@ function _train(){
9692
;;
9793
*) echo "Run with: device_num=${device_num}, run_mode=${run_mode}"
9894
train_cmd="python -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
99-
--auto_tuner_json ${autoconfig_json_file} run_pretrain.py ${modle_json_file}"
95+
--auto_tuner_json ${autoconfig_json_file} finetune_generation.py ${modle_json_file}"
10096
;;
10197
esac
102-
cd ../llm/llama
98+
cd ../llm/
10399
echo "train_cmd: ${train_cmd} log_file: ${log_file}"
104100
python -c "import paddlenlp"
105101
if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间

tests/test_tipc/auto_tuner/llama/N1C8/CE_llama7b_autotuner_bs8_fp16_pretrain.sh renamed to tests/test_tipc/auto_tuner/llama_pretrain/N1C8/CE_llama7b_autotuner_bs8_fp16_pretrain.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ param+="autoconfig_json_file=autoconfig/llama7b_pretrain.json "
2020
param+="modle_json_file=autoconfig/llama7b_pretrain_params.json "
2121

2222
cd ./tests
23-
bash ./test_tipc/auto_tuner/llama/benchmark_common/prepare.sh
23+
bash ./test_tipc/auto_tuner/llama_pretrain/benchmark_common/prepare.sh
2424

25-
bash -c "${param} bash ./test_tipc/auto_tuner/llama/benchmark_common/run_benchmark.sh"
25+
bash -c "${param} bash ./test_tipc/auto_tuner/llama_pretrain/benchmark_common/run_benchmark.sh"
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# Test training benchmark for a model.
18+
# Usage:bash benchmark/run_benchmark.sh ${model_name_or_path} ${per_device_train_batch_size} ${tensor_parallel_degree} ${pipeline_parallel_degree} ${virtual_pp_degree} ${sequence_parallel} ${sharding_parallel_degree} ${sharding} ${recompute} ${run_mode} ${device_num}
19+
function _set_params(){
20+
model_item=${model_item:-"CE_llama7b_autotuner"}
21+
run_mode=${run_mode:-"pretrain"}
22+
device_num=${device_num:-"N1C8"}
23+
global_batch_size=${global_batch_size:-8}
24+
autoconfig_json_file=${autoconfig_json_file:-"autoconfig/llama7b_pretrain.json"}
25+
modle_json_file=${modle_json_file:-"autoconfig/llama7b_pretrain_params.json"}
26+
27+
base_batch_size=${global_batch_size}
28+
29+
profiling=${PROFILING:-"false"} # (必选) Profiling 开关,默认关闭,通过全局变量传递
30+
model_repo="PaddleNLP" # (必选) 模型套件的名字
31+
speed_unit="tokens/s" # (必选)速度指标单位
32+
skip_steps=0 # (必选)解析日志,跳过模型前几个性能不稳定的step
33+
keyword="ips:" # (必选)解析日志,筛选出性能数据所在行的关键字
34+
convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:"
35+
36+
fp_item="fp16"
37+
workerlog_id=0
38+
# 以下为通用执行命令,无特殊可不用修改
39+
model_name=${model_item}_bs${global_batch_size}_${fp_item}_${run_mode} # (必填) 且格式不要改动,与竞品名称对齐
40+
device=${CUDA_VISIBLE_DEVICES//,/ }
41+
arr=(${device})
42+
num_gpu_devices=${#arr[*]}
43+
run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # (必填) TRAIN_LOG_DIR benchmark框架设置该参数为全局变量
44+
profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)} # (必填) PROFILING_LOG_DIR benchmark框架设置该参数为全局变量
45+
speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
46+
train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log
47+
mkdir -p $(dirname ${train_log_file})
48+
49+
profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling
50+
mkdir -p $(dirname ${profiling_log_file})
51+
52+
speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed
53+
mkdir -p $(dirname ${speed_log_file})
54+
55+
OUTPUT_PATH=${run_log_path}/output
56+
is_large_model=True
57+
}
58+
59+
function _train(){
60+
batch_size=${per_device_train_batch_size} # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs
61+
62+
if [ -d $OUTPUT_PATH ]; then
63+
rm -rf $OUTPUT_PATH
64+
fi
65+
mkdir $OUTPUT_PATH
66+
67+
echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}"
68+
69+
if [ ${profiling} = "true" ];then
70+
add_options="--profiler_options=\"batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile\""
71+
log_file=${profiling_log_file}
72+
else
73+
add_options=""
74+
log_file=${train_log_file}
75+
fi
76+
77+
if [ ${PADDLE_TRAINER_ID} ]
78+
then
79+
PADDLE_RANK_OPTION=" --rank ${PADDLE_TRAINER_ID}"
80+
else
81+
PADDLE_RANK_OPTION=""
82+
fi
83+
# 以下为通用执行命令,无特殊可不用修改
84+
case ${run_mode} in
85+
pretrain) echo "Run with: run_mode=${run_mode}"
86+
train_cmd="python -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
87+
--auto_tuner_json ${autoconfig_json_file} run_pretrain.py ${modle_json_file}"
88+
;;
89+
*) echo "Run with: device_num=${device_num}, run_mode=${run_mode}"
90+
train_cmd="python -m paddle.distributed.launch --gpus=0,1,2,3,4,5,6,7 ${PADDLE_RANK_OPTION}\
91+
--auto_tuner_json ${autoconfig_json_file} run_pretrain.py ${modle_json_file}"
92+
;;
93+
esac
94+
cd ../llm/llama
95+
echo "train_cmd: ${train_cmd} log_file: ${log_file}"
96+
python -c "import paddlenlp"
97+
if [[ ${model_item} =~ "CE" ]];then # CE精度-不限制执行时间
98+
${train_cmd} > ${log_file} 2>&1
99+
else
100+
timeout 30m ${train_cmd} > ${log_file} 2>&1
101+
fi
102+
if [ $? -ne 0 ];then
103+
echo -e "${model_name}, FAIL" >> ${log_file}
104+
else
105+
echo -e "${model_name}, SUCCESS" >> ${log_file}
106+
fi
107+
bash autoconfig/check.sh ${autoconfig_json_file} >> ${log_file} 2>&1
108+
if [ $? -ne 0 ];then
109+
echo -e "auto_tuner, FAIL" >> ${log_file}
110+
else
111+
echo -e "auto_tuner, SUCCESS" >> ${log_file}
112+
fi
113+
#kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
114+
if [ ${device_num} != "N1C1" -a -d ./autoconfig/best_cfg ]; then
115+
case_path=$PWD && cd - && mkdir -p mylog # PaddleNLP/tests/mylog
116+
cp -r ${case_path}/autoconfig/best_cfg/workerlog.* ./mylog/
117+
fi
118+
}
119+
120+
export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH
121+
source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开
122+
_set_params $@
123+
#_train # 如果只产出训练log,不解析,可取消注释
124+
_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开

0 commit comments

Comments
 (0)