ekshat
/

Llama-2-7b-chat-finetune-for-text2sql

Text Generation

text-generation-inference

Model card Files Files and versions

ekshat commited on Sep 26, 2023

Commit

fed424d

·

1 Parent(s): dd6d985

Update README.md

Files changed (1) hide show

README.md +28 -28

README.md CHANGED Viewed

@@ -41,66 +41,66 @@ print(result[0]['generated_text'])
 # Model Information
-- **model_name = "NousResearch/Llama-2-7b-chat-hf"
-- **dataset_name = "b-mc2/sql-create-context"
 # QLoRA parameters
-- **lora_r = 64
-- **lora_alpha = 16
-- **lora_dropout = 0.1
 # bitsandbytes parameters
-- **use_4bit = True
-- **bnb_4bit_compute_dtype = "float16"
-- **bnb_4bit_quant_type = "nf4"
-- **use_nested_quant = False
 # TrainingArguments parameters
-- **num_train_epochs = 1
-- **fp16 = False
-- **bf16 = False
-- **per_device_train_batch_size = 8
-- **per_device_eval_batch_size = 4
-- **gradient_accumulation_steps = 1
-- **gradient_checkpointing = True
-- **max_grad_norm = 0.3
-- **learning_rate = 2e-4
-- **weight_decay = 0.001
-- **optim = "paged_adamw_32bit"
-- **lr_scheduler_type = "cosine"
-- **max_steps = -1
-- **warmup_ratio = 0.03
-- **group_by_length = True
-- **save_steps = 0
-- **logging_steps = 25
 # SFT parameters
-- **max_seq_length = None
-- **packing = False

 # Model Information
+- **model_name = "NousResearch/Llama-2-7b-chat-hf"**
+- **dataset_name = "b-mc2/sql-create-context"**
 # QLoRA parameters
+- **lora_r = 64**
+- **lora_alpha = 16**
+- **lora_dropout = 0.1**
 # bitsandbytes parameters
+- **use_4bit = True**
+- **bnb_4bit_compute_dtype = "float16"**
+- **bnb_4bit_quant_type = "nf4"**
+- **use_nested_quant = False**
 # TrainingArguments parameters
+- **num_train_epochs = 1**
+- **fp16 = False**
+- **bf16 = False**
+- **per_device_train_batch_size = 8**
+- **per_device_eval_batch_size = 4**
+- **gradient_accumulation_steps = 1**
+- **gradient_checkpointing = True**
+- **max_grad_norm = 0.3**
+- **learning_rate = 2e-4**
+- **weight_decay = 0.001**
+- **optim = "paged_adamw_32bit"**
+- **lr_scheduler_type = "cosine"**
+- **max_steps = -1**
+- **warmup_ratio = 0.03**
+- **group_by_length = True**
+- **save_steps = 0**
+- **logging_steps = 25**
 # SFT parameters
+- **max_seq_length = None**
+- **packing = False**