1
+ # coding=utf8
2
+ # @Time : 2023/5/7 17:28
3
+ # @Author : tk
4
+ # @FileName: rlhf_config
5
+ import json
6
+
7
+ # 默认禁用lora 相关模块 , lora 和 adalora 只能同时启用一个
8
+
9
+
10
+ lora_info_args = {
11
+ 'with_lora' : True , # 是否启用lora模块
12
+ 'lora_type' : 'lora' ,
13
+ 'r' : 8 ,
14
+ 'target_modules' : ['q' , 'v' ],
15
+ 'lora_alpha' : 32 ,
16
+ 'lora_dropout' : 0.1 ,
17
+ 'fan_in_fan_out' : False ,
18
+ 'bias' : 'none' , # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
19
+ 'modules_to_save' : ['score' ],
20
+ }
21
+
22
+ adalora_info_args = {
23
+ 'with_lora' : False , # 是否启用adalora模块
24
+ 'lora_type' : 'adalora' ,
25
+ 'r' : 8 ,
26
+ 'target_modules' : ['q' , 'v' ],
27
+ 'lora_alpha' : 32 ,
28
+ 'lora_dropout' : 0.1 ,
29
+ 'fan_in_fan_out' : False ,
30
+ 'bias' : 'none' , # Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
31
+ 'modules_to_save' : ['score' ],
32
+
33
+ 'target_r' :8 , # Target Lora matrix dimension.
34
+ 'init_r' : 12 , #Intial Lora matrix dimension.
35
+ 'tinit' : 0 , #The steps of initial warmup.
36
+ 'tfinal' : 0 , #The steps of final warmup.
37
+ 'deltaT' : 1 , #Step interval of rank allocation.
38
+ 'beta1' : 0.85 , #Hyperparameter of EMA.
39
+ 'beta2' : 0.85 , #Hyperparameter of EMA.
40
+ 'orth_reg_weight' : 0.5 , #The orthogonal regularization coefficient.
41
+ 'total_step' : None , #The total training steps.
42
+ 'rank_pattern' : None , #The saved rank pattern.
43
+ }
44
+
45
+
46
+
47
+ ppp_info_args = {
48
+ "model_arch_type" : "seq2seq" , # one of one of causal, prefixlm,seq2seq
49
+ "ppo_epochs" : 2 , # Number of updates per batch
50
+ "num_rollouts" : 128 , # Number of experiences to observe before learning
51
+ "chunk_size" : 1 , # Number of chunk_size of training
52
+ "minibatch_size" : None ,
53
+ "init_kl_coef" : 0.001 , # Initial value for KL coefficient
54
+ "target" : None , # Target value for KL coefficient
55
+ "horizon" : 10000 , # Number of steps for KL coefficient to reach target
56
+ "gamma" : 1. , # Discount factor"
57
+ "lam" : 0.95 , # GAE lambda
58
+ "cliprange" : 0.2 , # "Clipping range for PPO policy loss (1 - cliprange, 1 + cliprange)"})
59
+ # cliprange_value: float = field(default=0.2, metadata={"help": "Clipping range for predicted values"
60
+ "cliprange_value" : 0.2 , # Clipping range for predicted values"
61
+ # "(observed values - cliprange_value, observed values + cliprange_value)"}
62
+ "vf_coef" : 1. , # Value loss scale w.r.t policy loss
63
+ "scale_reward" : "ignored" ,
64
+ "ref_mean" : None ,
65
+ "ref_std" : None ,
66
+ "cliprange_reward" : 10 ,
67
+ # Additioanl kwargs for the generation
68
+ "gen_kwargs" : dict (
69
+ max_new_tokens = 128 ,
70
+ top_k = 0 ,
71
+ top_p = 1.0 ,
72
+ do_sample = True ,
73
+ ),
74
+ "gen_experience_kwargs" : None , # Additioanl kwargs for the gen_experience_kwargs
75
+
76
+ }
77
+
78
+
79
+ train_info_args = {
80
+ 'devices' : 1 ,
81
+ 'data_backend' : 'record' ,
82
+ 'model_type' : 'bloom' ,
83
+ # 预训练模型路径 , 从0训练,则置空
84
+ # 'model_name_or_path': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1',
85
+ # 'tokenizer_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1',
86
+ # 'config_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v1/config.json',
87
+ # 'model_name_or_path': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2',
88
+ # 'tokenizer_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2',
89
+ # 'config_name': '/data/nlp/pre_models/torch/t5/ChatYuan-large-v2/config.json',
90
+ 'model_name_or_path' : '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5' ,
91
+ 'tokenizer_name' : '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5' ,
92
+ 'config_name' : '/data/nlp/pre_models/torch/t5/PromptCLUE-base-v1-5/config.json' ,
93
+
94
+ 'convert_onnx' : False , # 转换onnx模型
95
+ 'do_train' : True ,
96
+ 'train_file' : [ './data/train.json' ],
97
+ 'max_epochs' : 20 ,
98
+ 'max_steps' : - 1 ,
99
+ 'optimizer' : 'lion' , # one of adamw,adam,lamb,lion
100
+
101
+ 'scheduler_type' : 'CAWR' ,
102
+ 'scheduler' :{'T_mult' : 1 , 'rewarm_epoch_num' : 0.5 , 'verbose' : False },
103
+
104
+ # 'scheduler_type': 'linear',# one of [linear,WarmupCosine,CAWR,CAL,Step,ReduceLROnPlateau
105
+ # 'scheduler': None,
106
+
107
+ # 切换scheduler类型
108
+ # 'scheduler_type': 'WarmupCosine',
109
+ # 'scheduler': None,
110
+
111
+ # 'scheduler_type': 'ReduceLROnPlateau',
112
+ # 'scheduler': None,
113
+
114
+ # 'scheduler_type': 'Step',
115
+ # 'scheduler':{ 'decay_rate': 0.999,'decay_steps': 100,'verbose': True},
116
+
117
+ # 'scheduler_type': 'CAWR',
118
+ # 'scheduler':{'T_mult': 1, 'rewarm_epoch_num': 2, 'verbose': True},
119
+
120
+ # 'scheduler_type': 'CAL',
121
+ # 'scheduler': {'rewarm_epoch_num': 2,'verbose': True},
122
+
123
+ 'optimizer_betas' : (0.9 , 0.999 ),
124
+ 'train_batch_size' : 1 ,
125
+ 'eval_batch_size' : 2 ,
126
+ 'test_batch_size' : 2 ,
127
+ 'learning_rate' : 2e-5 , #
128
+ 'adam_epsilon' : 1e-8 ,
129
+ 'gradient_accumulation_steps' : 1 ,
130
+ 'max_grad_norm' : 1.0 ,
131
+ 'weight_decay' : 0 ,
132
+ 'warmup_steps' : 0 ,
133
+ 'output_dir' : './output' ,
134
+ 'max_seq_length' : 256 , #
135
+ 'max_target_length' : 100 , # 预测最大长度
136
+ 'use_fast_tokenizer' : False ,
137
+
138
+
139
+ ############## lora模块
140
+ 'lora' : {** lora_info_args },
141
+ 'adalora' : {** adalora_info_args },
142
+ "ppo" : {** ppp_info_args },
143
+ }
144
+
145
+
146
+
147
+ enable_deepspeed = False
148
+
149
+ def get_deepspeed_config ():
150
+ # 是否开启deepspeed
151
+ if not enable_deepspeed :
152
+ return None
153
+ with open ('./deepspeed.json' , mode = 'r' , encoding = 'utf-8' ) as f :
154
+ deepspeed_config = json .loads (f .read ())
155
+ return deepspeed_config
0 commit comments