lxaw commited on
Commit
027f2fe
·
verified ·
1 Parent(s): 5f19b4d

Upload DoRA adapter

Browse files
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - llama
6
+ - peft
7
+ - dora
8
+ - lora
9
+ - adapter
10
+ license: apache-2.0
11
+ base_model: YongganFu/Llama-400M-12L
12
+ ---
13
+ # dora_model-adapter
14
+ Adapter only for DoRA-finetuned Llama-400M model
15
+ ## Adapter Details
16
+ This is the DoRA adapter for [lxaw/dora_model](https://huggingface.co/lxaw/dora_model).
17
+ ## Usage
18
+ ```python
19
+ from transformers import AutoModelForCausalLM, AutoTokenizer
20
+ from peft import PeftModel
21
+ # Load the base model first
22
+ base_model = AutoModelForCausalLM.from_pretrained("YongganFu/Llama-400M-12L")
23
+ # Load the DoRA adapter
24
+ model = PeftModel.from_pretrained(base_model, "lxaw/dora_model-adapter")
25
+ # Load the tokenizer from the base model
26
+ tokenizer = AutoTokenizer.from_pretrained("YongganFu/Llama-400M-12L")
27
+ # Example usage
28
+ input_text = "What is the capital of France?"
29
+ inputs = tokenizer(input_text, return_tensors="pt")
30
+ outputs = model.generate(inputs.input_ids, max_length=50)
31
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
32
+ ```
adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "data4elm/Llama-400M-12L",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "model_type":"llama",
23
+ "peft_type": "LORA",
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "embed_tokens",
30
+ "up_proj",
31
+ "lm_head",
32
+ "v_proj",
33
+ "gate_proj",
34
+ "k_proj",
35
+ "down_proj",
36
+ "o_proj"
37
+ ],
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": true,
41
+ "use_rslora": false
42
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87884ebe4ac1055b11084c5f595f7da3be566cb06a40e7665d56a8a31534fb0b
3
+ size 177776400
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 1.584664034112281e+19,
4
+ "train_loss": 0.2990152974241447,
5
+ "train_runtime": 9477.5808,
6
+ "train_samples": 7114660,
7
+ "train_samples_per_second": 750.683,
8
+ "train_steps_per_second": 5.213
9
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 1.584664034112281e+19,
4
+ "train_loss": 0.2990152974241447,
5
+ "train_runtime": 9477.5808,
6
+ "train_samples": 7114660,
7
+ "train_samples_per_second": 750.683,
8
+ "train_steps_per_second": 5.213
9
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a073a419676f298d64ea7fce015bfe6468b7d1132b2eaf234427cb2c580fd04
3
+ size 8081