ekshat
/

Llama-2-7b-chat-finetune-for-text2sql

Text Generation

text-generation-inference

Model card Files Files and versions

ekshat commited on Sep 26, 2023

Commit

7b14a04

·

1 Parent(s): e111d99

Update README.md

Files changed (1) hide show

README.md +28 -28

README.md CHANGED Viewed

@@ -41,66 +41,66 @@ print(result[0]['generated_text'])
 # Model Information
-**model_name = "NousResearch/Llama-2-7b-chat-hf"
-**dataset_name = "b-mc2/sql-create-context"
 # QLoRA parameters
-**lora_r = 64
-**lora_alpha = 16
-**lora_dropout = 0.1
 # bitsandbytes parameters
-**use_4bit = True
-**bnb_4bit_compute_dtype = "float16"
-**bnb_4bit_quant_type = "nf4"
-**use_nested_quant = False
 # TrainingArguments parameters
-**num_train_epochs = 1
-**fp16 = False
-**bf16 = False
-**per_device_train_batch_size = 8
-**per_device_eval_batch_size = 4
-**gradient_accumulation_steps = 1
-**gradient_checkpointing = True
-**max_grad_norm = 0.3
-**learning_rate = 2e-4
-**weight_decay = 0.001
-**optim = "paged_adamw_32bit"
-**lr_scheduler_type = "cosine"
-**max_steps = -1
-**warmup_ratio = 0.03
-**group_by_length = True
-**save_steps = 0
-**logging_steps = 25
 # SFT parameters
-**max_seq_length = None
-**packing = False

 # Model Information
+- ** model_name = "NousResearch/Llama-2-7b-chat-hf"
+- ** dataset_name = "b-mc2/sql-create-context"
 # QLoRA parameters
+- ** lora_r = 64
+- ** lora_alpha = 16
+- ** lora_dropout = 0.1
 # bitsandbytes parameters
+- ** use_4bit = True
+- ** bnb_4bit_compute_dtype = "float16"
+- ** bnb_4bit_quant_type = "nf4"
+- ** use_nested_quant = False
 # TrainingArguments parameters
+- ** num_train_epochs = 1
+- ** fp16 = False
+- ** bf16 = False
+- ** per_device_train_batch_size = 8
+- ** per_device_eval_batch_size = 4
+- ** gradient_accumulation_steps = 1
+- ** gradient_checkpointing = True
+- ** max_grad_norm = 0.3
+- ** learning_rate = 2e-4
+- ** weight_decay = 0.001
+- ** optim = "paged_adamw_32bit"
+- ** lr_scheduler_type = "cosine"
+- ** max_steps = -1
+- ** warmup_ratio = 0.03
+- ** group_by_length = True
+- ** save_steps = 0
+- ** logging_steps = 25
 # SFT parameters
+- ** max_seq_length = None
+- ** packing = False