Skip to content

Commit d340899

Browse files
author
ssbuild
committed
初始化 rlhf chatyuan
Signed-off-by: ssbuild <[email protected]>
0 parents  commit d340899

19 files changed

+1746
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/.idea

README.MD

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
2+
## 安装
3+
pip install --pre -U deep_training>=0.1.5
4+
python >= 3.10
5+
6+
7+
## 更新详情
8+
- [deep_training](https://github.com/ssbuild/deep_training)
9+
10+
## 深度学习常规任务例子
11+
12+
- [pytorch-task-example](https://github.com/ssbuild/pytorch-task-example)
13+
- [tf-task-example](https://github.com/ssbuild/tf-task-example)
14+
- [chatglm_finetuning](https://github.com/ssbuild/chatglm_finetuning)
15+
- [chatyuan_finetuning](https://github.com/ssbuild/chatyuan_finetuning)
16+
- [llm_finetuning](https://github.com/ssbuild/llm_finetuning)
17+
- [rlhf_llm](https://github.com/ssbuild/rlhf_llm)
18+
- [rlhf_chatglm](https://github.com/ssbuild/rlhf_chatglm)
19+
- [rlhf_chatyuan](https://github.com/ssbuild/rlhf_chatyuan)
20+
21+
22+
23+
## 预训练模型下载
24+
25+
- [ChatYuan-large-v1](https://huggingface.co/ClueAI/ChatYuan-large-v1)
26+
- [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2)
27+
28+
29+
## 数据示例
30+
31+
```json
32+
{
33+
"prompt": "\n\nHuman:你能描述如何清除笔记本计算机屏幕上的指纹和污垢的步骤吗?\n\nAssistant:是的,也许。为了清洁你的屏幕,你首先需要使用微纤维布或软溼布,轻轻地擦拭屏幕表面。接下来,你会想抓住一个软,无毛巾,微纤维清洁布,轻轻地擦回屏幕上去除指纹和污垢。\n\nHuman:我可以将伊索普罗素酒精喷射到布上,并以这种方式清洗它吗?\n\nAssistant:",
34+
"response": " 是的,你可以这样做,以帮助布从屏幕上收集更多的污垢。 请确保始终使用干净的软布,而不是一块粗糙的、粗糙的或结构化的材料,并确保它是无毛布的。",
35+
"chosen": " 是的,你可以这样做,以帮助布从屏幕上收集更多的污垢。 请确保始终使用干净的软布,而不是一块粗糙的、粗糙的或结构化的材料,并确保它是无毛布的。",
36+
"rejected": " 是的,你可以直接将它喷射到布上。"
37+
}
38+
```
39+
40+
41+
##
42+
纯粹而干净的代码

config/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# coding=utf8
2+
# @Time : 2023/5/7 17:28
3+
# @Author : tk
4+
# @FileName: __init__.py

config/reward_config.py

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# coding=utf8
2+
# @Time : 2023/5/7 17:28
3+
# @Author : tk
4+
# @FileName: reward_config
5+
import json
6+
7+
# 默认禁用lora 相关模块 , lora 和 adalora 只能同时启用一个
8+
9+
10+
lora_info_args = {
11+
'with_lora': True, # 是否启用lora模块
12+
'lora_type': 'lora',
13+
'r': 8,
14+
'target_modules': ['q', 'v'],
15+
'lora_alpha': 32,
16+
'lora_dropout': 0.1,
17+
'fan_in_fan_out': False,
18+
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
19+
'modules_to_save' : ['score'],
20+
}
21+
22+
adalora_info_args = {
23+
'with_lora': False, # 是否启用adalora模块
24+
'lora_type': 'adalora',
25+
'r': 8,
26+
'target_modules': ['q', 'v'],
27+
'lora_alpha': 32,
28+
'lora_dropout': 0.1,
29+
'fan_in_fan_out': False,
30+
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
31+
'modules_to_save' : ['score'],
32+
33+
'target_r':8, # Target Lora matrix dimension.
34+
'init_r': 12, #Intial Lora matrix dimension.
35+
'tinit': 0, #The steps of initial warmup.
36+
'tfinal': 0, #The steps of final warmup.
37+
'deltaT': 1, #Step interval of rank allocation.
38+
'beta1': 0.85, #Hyperparameter of EMA.
39+
'beta2': 0.85, #Hyperparameter of EMA.
40+
'orth_reg_weight': 0.5, #The orthogonal regularization coefficient.
41+
'total_step': None, #The total training steps.
42+
'rank_pattern': None, #The saved rank pattern.
43+
}
44+
45+
train_info_args = {
46+
'devices': 1,
47+
'data_backend': 'record',
48+
'model_type': 'bloom',
49+
# 预训练模型路径 , 从0训练,则置空
50+
# 'model_name_or_path': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1',
51+
# 'tokenizer_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1',
52+
# 'config_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1/config.json',
53+
54+
# 'model_name_or_path': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2',
55+
# 'tokenizer_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2',
56+
# 'config_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2/config.json',
57+
58+
'model_name_or_path': '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5',
59+
'tokenizer_name': '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5',
60+
'config_name': '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5/config.json',
61+
62+
63+
'convert_onnx': False, # 转换onnx模型
64+
'do_train': True,
65+
'train_file': [ './data/train.json'],
66+
'max_epochs': 20,
67+
'max_steps': -1,
68+
'optimizer': 'lion', # one of adamw,adam,lamb,lion
69+
70+
# 'scheduler_type': 'CAWR',
71+
# 'scheduler':{'T_mult': 1, 'rewarm_epoch_num': 0.5, 'verbose': False},
72+
73+
'scheduler_type': 'linear',# one of [linear,WarmupCosine,CAWR,CAL,Step,ReduceLROnPlateau
74+
'scheduler': None,
75+
76+
# 切换scheduler类型
77+
# 'scheduler_type': 'WarmupCosine',
78+
# 'scheduler': None,
79+
80+
# 'scheduler_type': 'ReduceLROnPlateau',
81+
# 'scheduler': None,
82+
83+
# 'scheduler_type': 'Step',
84+
# 'scheduler':{ 'decay_rate': 0.999,'decay_steps': 100,'verbose': True},
85+
86+
# 'scheduler_type': 'CAWR',
87+
# 'scheduler':{'T_mult': 1, 'rewarm_epoch_num': 2, 'verbose': True},
88+
89+
# 'scheduler_type': 'CAL',
90+
# 'scheduler': {'rewarm_epoch_num': 2,'verbose': True},
91+
92+
93+
'optimizer_betas': (0.9, 0.999),
94+
'train_batch_size': 4,
95+
'eval_batch_size': 2,
96+
'test_batch_size': 2,
97+
'learning_rate': 2e-5, #
98+
'adam_epsilon': 1e-8,
99+
'gradient_accumulation_steps': 1,
100+
'max_grad_norm': 1.0,
101+
'weight_decay': 0,
102+
'warmup_steps': 0,
103+
'output_dir': './output',
104+
'max_seq_length': 512, #
105+
'max_target_length': 100, # 预测最大长度, 保留字段
106+
'use_fast_tokenizer': False,
107+
108+
109+
############## lora模块
110+
'lora': {**lora_info_args},
111+
'adalora': {**adalora_info_args},
112+
113+
}
114+
115+
116+
117+
enable_deepspeed = False
118+
119+
def get_deepspeed_config():
120+
# 是否开启deepspeed
121+
if not enable_deepspeed:
122+
return None
123+
with open('./deepspeed.json', mode='r', encoding='utf-8') as f:
124+
deepspeed_config = json.loads(f.read())
125+
return deepspeed_config

config/rlhf_config.py

+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# coding=utf8
2+
# @Time : 2023/5/7 17:28
3+
# @Author : tk
4+
# @FileName: rlhf_config
5+
import json
6+
7+
# 默认禁用lora 相关模块 , lora 和 adalora 只能同时启用一个
8+
9+
10+
lora_info_args = {
11+
'with_lora': True, # 是否启用lora模块
12+
'lora_type': 'lora',
13+
'r': 8,
14+
'target_modules': ['q', 'v'],
15+
'lora_alpha': 32,
16+
'lora_dropout': 0.1,
17+
'fan_in_fan_out': False,
18+
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
19+
'modules_to_save' : ['score'],
20+
}
21+
22+
adalora_info_args = {
23+
'with_lora': False, # 是否启用adalora模块
24+
'lora_type': 'adalora',
25+
'r': 8,
26+
'target_modules': ['q', 'v'],
27+
'lora_alpha': 32,
28+
'lora_dropout': 0.1,
29+
'fan_in_fan_out': False,
30+
'bias': 'none', # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
31+
'modules_to_save' : ['score'],
32+
33+
'target_r':8, # Target Lora matrix dimension.
34+
'init_r': 12, #Intial Lora matrix dimension.
35+
'tinit': 0, #The steps of initial warmup.
36+
'tfinal': 0, #The steps of final warmup.
37+
'deltaT': 1, #Step interval of rank allocation.
38+
'beta1': 0.85, #Hyperparameter of EMA.
39+
'beta2': 0.85, #Hyperparameter of EMA.
40+
'orth_reg_weight': 0.5, #The orthogonal regularization coefficient.
41+
'total_step': None, #The total training steps.
42+
'rank_pattern': None, #The saved rank pattern.
43+
}
44+
45+
46+
47+
ppp_info_args = {
48+
"model_arch_type": "seq2seq" , # one of one of causal, prefixlm,seq2seq
49+
"ppo_epochs": 2, # Number of updates per batch
50+
"num_rollouts": 128, # Number of experiences to observe before learning
51+
"chunk_size": 1, # Number of chunk_size of training
52+
"minibatch_size": None,
53+
"init_kl_coef": 0.001, # Initial value for KL coefficient
54+
"target": None, # Target value for KL coefficient
55+
"horizon": 10000, # Number of steps for KL coefficient to reach target
56+
"gamma": 1., # Discount factor"
57+
"lam": 0.95, # GAE lambda
58+
"cliprange": 0.2, # "Clipping range for PPO policy loss (1 - cliprange, 1 + cliprange)"})
59+
# cliprange_value: float = field(default=0.2, metadata={"help": "Clipping range for predicted values"
60+
"cliprange_value": 0.2, # Clipping range for predicted values"
61+
# "(observed values - cliprange_value, observed values + cliprange_value)"}
62+
"vf_coef": 1., # Value loss scale w.r.t policy loss
63+
"scale_reward": "ignored",
64+
"ref_mean": None,
65+
"ref_std": None,
66+
"cliprange_reward": 10,
67+
# Additioanl kwargs for the generation
68+
"gen_kwargs": dict(
69+
max_new_tokens=128,
70+
top_k=0,
71+
top_p=1.0,
72+
do_sample=True,
73+
),
74+
"gen_experience_kwargs": None, # Additioanl kwargs for the gen_experience_kwargs
75+
76+
}
77+
78+
79+
train_info_args = {
80+
'devices': 1,
81+
'data_backend': 'record',
82+
'model_type': 'bloom',
83+
# 预训练模型路径 , 从0训练,则置空
84+
# 'model_name_or_path': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1',
85+
# 'tokenizer_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1',
86+
# 'config_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1/config.json',
87+
# 'model_name_or_path': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2',
88+
# 'tokenizer_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2',
89+
# 'config_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2/config.json',
90+
'model_name_or_path': '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5',
91+
'tokenizer_name': '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5',
92+
'config_name': '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5/config.json',
93+
94+
'convert_onnx': False, # 转换onnx模型
95+
'do_train': True,
96+
'train_file': [ './data/train.json'],
97+
'max_epochs': 20,
98+
'max_steps': -1,
99+
'optimizer': 'lion', # one of adamw,adam,lamb,lion
100+
101+
'scheduler_type': 'CAWR',
102+
'scheduler':{'T_mult': 1, 'rewarm_epoch_num': 0.5, 'verbose': False},
103+
104+
# 'scheduler_type': 'linear',# one of [linear,WarmupCosine,CAWR,CAL,Step,ReduceLROnPlateau
105+
# 'scheduler': None,
106+
107+
# 切换scheduler类型
108+
# 'scheduler_type': 'WarmupCosine',
109+
# 'scheduler': None,
110+
111+
# 'scheduler_type': 'ReduceLROnPlateau',
112+
# 'scheduler': None,
113+
114+
# 'scheduler_type': 'Step',
115+
# 'scheduler':{ 'decay_rate': 0.999,'decay_steps': 100,'verbose': True},
116+
117+
# 'scheduler_type': 'CAWR',
118+
# 'scheduler':{'T_mult': 1, 'rewarm_epoch_num': 2, 'verbose': True},
119+
120+
# 'scheduler_type': 'CAL',
121+
# 'scheduler': {'rewarm_epoch_num': 2,'verbose': True},
122+
123+
'optimizer_betas': (0.9, 0.999),
124+
'train_batch_size': 1,
125+
'eval_batch_size': 2,
126+
'test_batch_size': 2,
127+
'learning_rate': 2e-5, #
128+
'adam_epsilon': 1e-8,
129+
'gradient_accumulation_steps': 1,
130+
'max_grad_norm': 1.0,
131+
'weight_decay': 0,
132+
'warmup_steps': 0,
133+
'output_dir': './output',
134+
'max_seq_length': 256, #
135+
'max_target_length': 100, # 预测最大长度
136+
'use_fast_tokenizer': False,
137+
138+
139+
############## lora模块
140+
'lora': {**lora_info_args},
141+
'adalora': {**adalora_info_args},
142+
"ppo": {**ppp_info_args},
143+
}
144+
145+
146+
147+
enable_deepspeed = False
148+
149+
def get_deepspeed_config():
150+
# 是否开启deepspeed
151+
if not enable_deepspeed:
152+
return None
153+
with open('./deepspeed.json', mode='r', encoding='utf-8') as f:
154+
deepspeed_config = json.loads(f.read())
155+
return deepspeed_config

0 commit comments

Comments
 (0)