ekshat
/

Llama-2-7b-chat-finetune-for-text2sql

Text Generation

text-generation-inference

Model card Files Files and versions

ekshat commited on Sep 26, 2023

Commit

044ee52

·

1 Parent(s): e9185ce

Update README.md

Files changed (1) hide show

README.md +24 -1

README.md CHANGED Viewed

@@ -42,42 +42,65 @@ print(result[0]['generated_text'])
 # Model Information
 model_name = "NousResearch/Llama-2-7b-chat-hf"
 dataset_name = "b-mc2/sql-create-context"
 # QLoRA parameters
 lora_r = 64
 lora_alpha = 16
 lora_dropout = 0.1
 # bitsandbytes parameters
 use_4bit = True
 bnb_4bit_compute_dtype = "float16"
 bnb_4bit_quant_type = "nf4"
 use_nested_quant = False
 # TrainingArguments parameters
 num_train_epochs = 1
 fp16 = False
 bf16 = False
 per_device_train_batch_size = 8
 per_device_eval_batch_size = 4
 gradient_accumulation_steps = 1
 gradient_checkpointing = True
 max_grad_norm = 0.3
 learning_rate = 2e-4
 weight_decay = 0.001
 optim = "paged_adamw_32bit"
 lr_scheduler_type = "cosine"
 max_steps = -1
 warmup_ratio = 0.03
 group_by_length = True
 save_steps = 0
 logging_steps = 25
 # SFT parameters
 max_seq_length = None
-packing = False

 # Model Information
 model_name = "NousResearch/Llama-2-7b-chat-hf"
 dataset_name = "b-mc2/sql-create-context"
 # QLoRA parameters
 lora_r = 64
 lora_alpha = 16
 lora_dropout = 0.1
 # bitsandbytes parameters
 use_4bit = True
 bnb_4bit_compute_dtype = "float16"
 bnb_4bit_quant_type = "nf4"
 use_nested_quant = False
 # TrainingArguments parameters
 num_train_epochs = 1
 fp16 = False
 bf16 = False
 per_device_train_batch_size = 8
 per_device_eval_batch_size = 4
 gradient_accumulation_steps = 1
 gradient_checkpointing = True
 max_grad_norm = 0.3
 learning_rate = 2e-4
 weight_decay = 0.001
 optim = "paged_adamw_32bit"
 lr_scheduler_type = "cosine"
 max_steps = -1
 warmup_ratio = 0.03
 group_by_length = True
 save_steps = 0
 logging_steps = 25
 # SFT parameters
 max_seq_length = None
+packing = False