ekshat commited on
Commit
7b14a04
·
1 Parent(s): e111d99

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -28
README.md CHANGED
@@ -41,66 +41,66 @@ print(result[0]['generated_text'])
41
 
42
 
43
  # Model Information
44
- **model_name = "NousResearch/Llama-2-7b-chat-hf"
45
 
46
- **dataset_name = "b-mc2/sql-create-context"
47
 
48
 
49
  # QLoRA parameters
50
- **lora_r = 64
51
 
52
- **lora_alpha = 16
53
 
54
- **lora_dropout = 0.1
55
 
56
 
57
  # bitsandbytes parameters
58
- **use_4bit = True
59
 
60
- **bnb_4bit_compute_dtype = "float16"
61
 
62
- **bnb_4bit_quant_type = "nf4"
63
 
64
- **use_nested_quant = False
65
 
66
 
67
  # TrainingArguments parameters
68
- **num_train_epochs = 1
69
 
70
- **fp16 = False
71
 
72
- **bf16 = False
73
 
74
- **per_device_train_batch_size = 8
75
 
76
- **per_device_eval_batch_size = 4
77
 
78
- **gradient_accumulation_steps = 1
79
 
80
- **gradient_checkpointing = True
81
 
82
- **max_grad_norm = 0.3
83
 
84
- **learning_rate = 2e-4
85
 
86
- **weight_decay = 0.001
87
 
88
- **optim = "paged_adamw_32bit"
89
 
90
- **lr_scheduler_type = "cosine"
91
 
92
- **max_steps = -1
93
 
94
- **warmup_ratio = 0.03
95
 
96
- **group_by_length = True
97
 
98
- **save_steps = 0
99
 
100
- **logging_steps = 25
101
 
102
 
103
  # SFT parameters
104
- **max_seq_length = None
105
 
106
- **packing = False
 
41
 
42
 
43
  # Model Information
44
+ - ** model_name = "NousResearch/Llama-2-7b-chat-hf"
45
 
46
+ - ** dataset_name = "b-mc2/sql-create-context"
47
 
48
 
49
  # QLoRA parameters
50
+ - ** lora_r = 64
51
 
52
+ - ** lora_alpha = 16
53
 
54
+ - ** lora_dropout = 0.1
55
 
56
 
57
  # bitsandbytes parameters
58
+ - ** use_4bit = True
59
 
60
+ - ** bnb_4bit_compute_dtype = "float16"
61
 
62
+ - ** bnb_4bit_quant_type = "nf4"
63
 
64
+ - ** use_nested_quant = False
65
 
66
 
67
  # TrainingArguments parameters
68
+ - ** num_train_epochs = 1
69
 
70
+ - ** fp16 = False
71
 
72
+ - ** bf16 = False
73
 
74
+ - ** per_device_train_batch_size = 8
75
 
76
+ - ** per_device_eval_batch_size = 4
77
 
78
+ - ** gradient_accumulation_steps = 1
79
 
80
+ - ** gradient_checkpointing = True
81
 
82
+ - ** max_grad_norm = 0.3
83
 
84
+ - ** learning_rate = 2e-4
85
 
86
+ - ** weight_decay = 0.001
87
 
88
+ - ** optim = "paged_adamw_32bit"
89
 
90
+ - ** lr_scheduler_type = "cosine"
91
 
92
+ - ** max_steps = -1
93
 
94
+ - ** warmup_ratio = 0.03
95
 
96
+ - ** group_by_length = True
97
 
98
+ - ** save_steps = 0
99
 
100
+ - ** logging_steps = 25
101
 
102
 
103
  # SFT parameters
104
+ - ** max_seq_length = None
105
 
106
+ - ** packing = False