ekshat commited on
Commit
044ee52
·
1 Parent(s): e9185ce

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -1
README.md CHANGED
@@ -42,42 +42,65 @@ print(result[0]['generated_text'])
42
 
43
  # Model Information
44
  model_name = "NousResearch/Llama-2-7b-chat-hf"
 
45
  dataset_name = "b-mc2/sql-create-context"
46
 
47
 
48
  # QLoRA parameters
49
  lora_r = 64
 
50
  lora_alpha = 16
 
51
  lora_dropout = 0.1
52
 
53
 
54
  # bitsandbytes parameters
55
  use_4bit = True
 
56
  bnb_4bit_compute_dtype = "float16"
 
57
  bnb_4bit_quant_type = "nf4"
 
58
  use_nested_quant = False
59
 
60
 
61
  # TrainingArguments parameters
62
  num_train_epochs = 1
 
63
  fp16 = False
 
64
  bf16 = False
 
65
  per_device_train_batch_size = 8
 
66
  per_device_eval_batch_size = 4
 
67
  gradient_accumulation_steps = 1
 
68
  gradient_checkpointing = True
 
69
  max_grad_norm = 0.3
 
70
  learning_rate = 2e-4
 
71
  weight_decay = 0.001
 
72
  optim = "paged_adamw_32bit"
 
73
  lr_scheduler_type = "cosine"
 
74
  max_steps = -1
 
75
  warmup_ratio = 0.03
 
76
  group_by_length = True
 
77
  save_steps = 0
 
78
  logging_steps = 25
79
 
80
 
81
  # SFT parameters
82
  max_seq_length = None
83
- packing = False
 
 
42
 
43
  # Model Information
44
  model_name = "NousResearch/Llama-2-7b-chat-hf"
45
+
46
  dataset_name = "b-mc2/sql-create-context"
47
 
48
 
49
  # QLoRA parameters
50
  lora_r = 64
51
+
52
  lora_alpha = 16
53
+
54
  lora_dropout = 0.1
55
 
56
 
57
  # bitsandbytes parameters
58
  use_4bit = True
59
+
60
  bnb_4bit_compute_dtype = "float16"
61
+
62
  bnb_4bit_quant_type = "nf4"
63
+
64
  use_nested_quant = False
65
 
66
 
67
  # TrainingArguments parameters
68
  num_train_epochs = 1
69
+
70
  fp16 = False
71
+
72
  bf16 = False
73
+
74
  per_device_train_batch_size = 8
75
+
76
  per_device_eval_batch_size = 4
77
+
78
  gradient_accumulation_steps = 1
79
+
80
  gradient_checkpointing = True
81
+
82
  max_grad_norm = 0.3
83
+
84
  learning_rate = 2e-4
85
+
86
  weight_decay = 0.001
87
+
88
  optim = "paged_adamw_32bit"
89
+
90
  lr_scheduler_type = "cosine"
91
+
92
  max_steps = -1
93
+
94
  warmup_ratio = 0.03
95
+
96
  group_by_length = True
97
+
98
  save_steps = 0
99
+
100
  logging_steps = 25
101
 
102
 
103
  # SFT parameters
104
  max_seq_length = None
105
+
106
+ packing = False