ekshat commited on
Commit
dd6d985
·
1 Parent(s): 7b14a04

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -28
README.md CHANGED
@@ -41,66 +41,66 @@ print(result[0]['generated_text'])
41
 
42
 
43
  # Model Information
44
- - ** model_name = "NousResearch/Llama-2-7b-chat-hf"
45
 
46
- - ** dataset_name = "b-mc2/sql-create-context"
47
 
48
 
49
  # QLoRA parameters
50
- - ** lora_r = 64
51
 
52
- - ** lora_alpha = 16
53
 
54
- - ** lora_dropout = 0.1
55
 
56
 
57
  # bitsandbytes parameters
58
- - ** use_4bit = True
59
 
60
- - ** bnb_4bit_compute_dtype = "float16"
61
 
62
- - ** bnb_4bit_quant_type = "nf4"
63
 
64
- - ** use_nested_quant = False
65
 
66
 
67
  # TrainingArguments parameters
68
- - ** num_train_epochs = 1
69
 
70
- - ** fp16 = False
71
 
72
- - ** bf16 = False
73
 
74
- - ** per_device_train_batch_size = 8
75
 
76
- - ** per_device_eval_batch_size = 4
77
 
78
- - ** gradient_accumulation_steps = 1
79
 
80
- - ** gradient_checkpointing = True
81
 
82
- - ** max_grad_norm = 0.3
83
 
84
- - ** learning_rate = 2e-4
85
 
86
- - ** weight_decay = 0.001
87
 
88
- - ** optim = "paged_adamw_32bit"
89
 
90
- - ** lr_scheduler_type = "cosine"
91
 
92
- - ** max_steps = -1
93
 
94
- - ** warmup_ratio = 0.03
95
 
96
- - ** group_by_length = True
97
 
98
- - ** save_steps = 0
99
 
100
- - ** logging_steps = 25
101
 
102
 
103
  # SFT parameters
104
- - ** max_seq_length = None
105
 
106
- - ** packing = False
 
41
 
42
 
43
  # Model Information
44
+ - **model_name = "NousResearch/Llama-2-7b-chat-hf"
45
 
46
+ - **dataset_name = "b-mc2/sql-create-context"
47
 
48
 
49
  # QLoRA parameters
50
+ - **lora_r = 64
51
 
52
+ - **lora_alpha = 16
53
 
54
+ - **lora_dropout = 0.1
55
 
56
 
57
  # bitsandbytes parameters
58
+ - **use_4bit = True
59
 
60
+ - **bnb_4bit_compute_dtype = "float16"
61
 
62
+ - **bnb_4bit_quant_type = "nf4"
63
 
64
+ - **use_nested_quant = False
65
 
66
 
67
  # TrainingArguments parameters
68
+ - **num_train_epochs = 1
69
 
70
+ - **fp16 = False
71
 
72
+ - **bf16 = False
73
 
74
+ - **per_device_train_batch_size = 8
75
 
76
+ - **per_device_eval_batch_size = 4
77
 
78
+ - **gradient_accumulation_steps = 1
79
 
80
+ - **gradient_checkpointing = True
81
 
82
+ - **max_grad_norm = 0.3
83
 
84
+ - **learning_rate = 2e-4
85
 
86
+ - **weight_decay = 0.001
87
 
88
+ - **optim = "paged_adamw_32bit"
89
 
90
+ - **lr_scheduler_type = "cosine"
91
 
92
+ - **max_steps = -1
93
 
94
+ - **warmup_ratio = 0.03
95
 
96
+ - **group_by_length = True
97
 
98
+ - **save_steps = 0
99
 
100
+ - **logging_steps = 25
101
 
102
 
103
  # SFT parameters
104
+ - **max_seq_length = None
105
 
106
+ - **packing = False