init
Browse files- .gitattributes +2 -0
- data/processed_datasets/PublayNet_ltrb_lex/src1_test.txt +0 -0
- data/processed_datasets/PublayNet_ltrb_lex/src1_train.txt +3 -0
- data/processed_datasets/PublayNet_ltrb_lex/src1_valid.txt +0 -0
- data/processed_datasets/PublayNet_ltrb_lex_refine/src1_test.txt +0 -0
- data/processed_datasets/PublayNet_ltrb_pos/src1_test.txt +0 -0
- data/processed_datasets/PublayNet_ltrb_pos/src1_train.txt +3 -0
- data/processed_datasets/PublayNet_ltrb_pos/src1_valid.txt +0 -0
- data/processed_datasets/RICO_ltrb_lex/src1_test.txt +0 -0
- data/processed_datasets/RICO_ltrb_lex/src1_train.txt +0 -0
- data/processed_datasets/RICO_ltrb_lex/src1_valid.txt +0 -0
- data/processed_datasets/RICO_ltrb_lex_refine/src1_test.txt +0 -0
- data/processed_datasets/RICO_ltrb_pos/src1_test.txt +0 -0
- data/processed_datasets/RICO_ltrb_pos/src1_train.txt +0 -0
- data/processed_datasets/RICO_ltrb_pos/src1_valid.txt +0 -0
- data/processed_datasets/RICO_ltwh_random/src1_test.txt +0 -0
- data/processed_datasets/RICO_ltwh_random/src1_train.txt +0 -0
- data/processed_datasets/RICO_ltwh_random/src1_valid.txt +0 -0
- data/raw_datasets/publaynet/pre_processed_20_5/test.pt +3 -0
- data/raw_datasets/publaynet/pre_processed_20_5/train.pt +3 -0
- data/raw_datasets/publaynet/pre_processed_20_5/val.pt +3 -0
- data/raw_datasets/rico/pre_processed_20_25/test.pt +3 -0
- data/raw_datasets/rico/pre_processed_20_25/val.pt +3 -0
- results/checkpoint/README.md +1 -0
- results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/ema_0.9999_175000.pt +3 -0
- results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/random_emb.torch +0 -0
- results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/training_args.json +72 -0
- results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/vocab.json +1 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/ema_0.9999_400000.pt +3 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/random_emb.torch +0 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/training_args.json +72 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/vocab.json +1 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/ema_0.9999_350000.pt +3 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/random_emb.torch +0 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/training_args.json +72 -0
- results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/vocab.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/processed_datasets/PublayNet_ltrb_lex/src1_train.txt filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/processed_datasets/PublayNet_ltrb_pos/src1_train.txt filter=lfs diff=lfs merge=lfs -text
|
data/processed_datasets/PublayNet_ltrb_lex/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/PublayNet_ltrb_lex/src1_train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eed9126d47ff4ea38fec9eae7b6a0c24cdae36f77d27137ce95dbc44fad7c24a
|
3 |
+
size 58280772
|
data/processed_datasets/PublayNet_ltrb_lex/src1_valid.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/PublayNet_ltrb_lex_refine/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/PublayNet_ltrb_pos/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/PublayNet_ltrb_pos/src1_train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c7f430d713006b79229bcc047455e2f8944416a670e71df358e434a5727626c
|
3 |
+
size 58280772
|
data/processed_datasets/PublayNet_ltrb_pos/src1_valid.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_lex/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_lex/src1_train.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_lex/src1_valid.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_lex_refine/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_pos/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_pos/src1_train.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltrb_pos/src1_valid.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltwh_random/src1_test.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltwh_random/src1_train.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/processed_datasets/RICO_ltwh_random/src1_valid.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/raw_datasets/publaynet/pre_processed_20_5/test.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c74852e1a931e8ee094eefde3a719e1a183db8de0079835d51d5616e63801303
|
3 |
+
size 9408251
|
data/raw_datasets/publaynet/pre_processed_20_5/train.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:812abbcbd7298901381fa0191a2ca503d82085f8f163b2185ca6056a86da9297
|
3 |
+
size 257991305
|
data/raw_datasets/publaynet/pre_processed_20_5/val.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe38fa40ebbb5405e3053a50c90c53a8a4ad2270de5346cea4187d98c52c8a66
|
3 |
+
size 13586523
|
data/raw_datasets/rico/pre_processed_20_25/test.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d02c13e26410641ccf53e1bc425dc7bcba90627d2a27a98496651515be32afad
|
3 |
+
size 3068769
|
data/raw_datasets/rico/pre_processed_20_25/val.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53b29544dff6887bb351480543c09dcef2cfbedeca159869224de2164749b319
|
3 |
+
size 1539153
|
results/checkpoint/README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
- path to save the checkpoint
|
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/ema_0.9999_175000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7dfe36ca6d6f796004b22c3fbcf6f6e6339ae515509e21af5fb467f3dcc7ec5
|
3 |
+
size 351792223
|
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/random_emb.torch
ADDED
Binary file (5.87 kB). View file
|
|
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/training_args.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"local_rank": -1,
|
3 |
+
"checkpoint_path": "/mnt/output/projects/diffusion-lm_base/amlt-results/7333918554.57373-d9fb69e7-9ace-4ce4-8016-a424562d01d7",
|
4 |
+
"submit": true,
|
5 |
+
"e2e_train": "/mnt/default/Diffusion-LM/datasets/datasets/RICO_ltrb_lex",
|
6 |
+
"data_dir": "",
|
7 |
+
"schedule_sampler": "uniform",
|
8 |
+
"lr": 4e-05,
|
9 |
+
"weight_decay": 0.0,
|
10 |
+
"lr_anneal_steps": 300000,
|
11 |
+
"batch_size": 64,
|
12 |
+
"microbatch": -1,
|
13 |
+
"ema_rate": "0.9999",
|
14 |
+
"log_interval": 50,
|
15 |
+
"save_interval": 25000,
|
16 |
+
"resume_checkpoint": "",
|
17 |
+
"use_fp16": false,
|
18 |
+
"fp16_scale_growth": 0.001,
|
19 |
+
"seed": 102,
|
20 |
+
"gradient_clipping": -1.0,
|
21 |
+
"eval_interval": 2000,
|
22 |
+
"ungen": false,
|
23 |
+
"self_cond": false,
|
24 |
+
"image_size": 11,
|
25 |
+
"num_channels": 128,
|
26 |
+
"num_res_blocks": 2,
|
27 |
+
"num_heads": 4,
|
28 |
+
"num_heads_upsample": -1,
|
29 |
+
"attention_resolutions": "16,8",
|
30 |
+
"dropout": 0.1,
|
31 |
+
"learn_sigma": false,
|
32 |
+
"sigma_small": false,
|
33 |
+
"class_cond": false,
|
34 |
+
"diffusion_steps": 200,
|
35 |
+
"noise_schedule": "gaussian_refine_pow2.5",
|
36 |
+
"timestep_respacing": "",
|
37 |
+
"use_kl": false,
|
38 |
+
"predict_xstart": true,
|
39 |
+
"rescale_timesteps": false,
|
40 |
+
"rescale_learned_sigmas": true,
|
41 |
+
"use_checkpoint": false,
|
42 |
+
"use_scale_shift_norm": true,
|
43 |
+
"model_arch": "transformer",
|
44 |
+
"in_channel": 8,
|
45 |
+
"out_channel": 8,
|
46 |
+
"training_mode": "discrete1",
|
47 |
+
"vocab_size": 159,
|
48 |
+
"config_name": "bert-base-uncased",
|
49 |
+
"experiment_mode": "lm",
|
50 |
+
"logits_mode": 1,
|
51 |
+
"constrained": null,
|
52 |
+
"att_1": 0.99999,
|
53 |
+
"alignment_loss": false,
|
54 |
+
"alignment_weight": 100000.0,
|
55 |
+
"aux_loss": true,
|
56 |
+
"modality": "e2e-tgt",
|
57 |
+
"dataset_name": "wikitext",
|
58 |
+
"dataset_config_name": "wikitext-2-raw-v1",
|
59 |
+
"config": "diffusion_lm/synthetic_data/configs/emnlp2020/experiments/difflm_seed0_m3_k128_trainc20000.yaml",
|
60 |
+
"model_name_or_path": "predictability/diff_models/compress_e=5_b=60_m=gpt2_wikitext-103-raw-v1_None",
|
61 |
+
"experiment": "random",
|
62 |
+
"roc_train": "diffusion_lm/ROCstory",
|
63 |
+
"wiki_train": "diffusion_lm/simple_wiki/data.v1.split/simple.training.txt",
|
64 |
+
"yelp_train": "diffusion_lm/yelpnlg-resources/yelpnlg-corpus",
|
65 |
+
"commonGen_train": "diffusion_lm/common-gen/commongen_data",
|
66 |
+
"emb_scale_factor": 1.0,
|
67 |
+
"noise_level": 0.0,
|
68 |
+
"cache_mode": "no",
|
69 |
+
"use_bert_tokenizer": "no",
|
70 |
+
"padding_mode": "pad",
|
71 |
+
"preprocessing_num_workers": 1
|
72 |
+
}
|
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"START": 0, "END": 1, "UNK": 2, "PAD": 3, "|": 4, "Image": 5, "Radio_Button": 6, "Text": 7, "Text_Button": 8, "Card": 9, "Icon": 10, "Toolbar": 11, "Web_View": 12, "List_Item": 13, "Advertisement": 14, "Background_Image": 15, "Pager_Indicator": 16, "Checkbox": 17, "Map_View": 18, "Drawer": 19, "Input": 20, "Video": 21, "Multi_Tab": 22, "On_Off_Switch": 23, "Slider": 24, "Bottom_Navigation": 25, "Date_Picker": 26, "Number_Stepper": 27, "Modal": 28, "Button_Bar": 29, "0": 30, "1": 31, "2": 32, "3": 33, "4": 34, "5": 35, "6": 36, "7": 37, "8": 38, "9": 39, "10": 40, "11": 41, "12": 42, "13": 43, "14": 44, "15": 45, "16": 46, "17": 47, "18": 48, "19": 49, "20": 50, "21": 51, "22": 52, "23": 53, "24": 54, "25": 55, "26": 56, "27": 57, "28": 58, "29": 59, "30": 60, "31": 61, "32": 62, "33": 63, "34": 64, "35": 65, "36": 66, "37": 67, "38": 68, "39": 69, "40": 70, "41": 71, "42": 72, "43": 73, "44": 74, "45": 75, "46": 76, "47": 77, "48": 78, "49": 79, "50": 80, "51": 81, "52": 82, "53": 83, "54": 84, "55": 85, "56": 86, "57": 87, "58": 88, "59": 89, "60": 90, "61": 91, "62": 92, "63": 93, "64": 94, "65": 95, "66": 96, "67": 97, "68": 98, "69": 99, "70": 100, "71": 101, "72": 102, "73": 103, "74": 104, "75": 105, "76": 106, "77": 107, "78": 108, "79": 109, "80": 110, "81": 111, "82": 112, "83": 113, "84": 114, "85": 115, "86": 116, "87": 117, "88": 118, "89": 119, "90": 120, "91": 121, "92": 122, "93": 123, "94": 124, "95": 125, "96": 126, "97": 127, "98": 128, "99": 129, "100": 130, "101": 131, "102": 132, "103": 133, "104": 134, "105": 135, "106": 136, "107": 137, "108": 138, "109": 139, "110": 140, "111": 141, "112": 142, "113": 143, "114": 144, "115": 145, "116": 146, "117": 147, "118": 148, "119": 149, "120": 150, "121": 151, "122": 152, "123": 153, "124": 154, "125": 155, "126": 156, "127": 157}
|
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/ema_0.9999_400000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bdf8ebecb84b4690a967d086fe6c6642f13b3321afc0ef54fbb044564a74da9
|
3 |
+
size 351669279
|
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/random_emb.torch
ADDED
Binary file (5.23 kB). View file
|
|
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/training_args.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"local_rank": -1,
|
3 |
+
"checkpoint_path": "//amlt6a45b6192fdf58ada032293c3756ac0c/projects/diffusion-lm_base/amlt-results/7333651979.04978-4d02c518-6060-4490-b393-525ec6ef061f",
|
4 |
+
"submit": true,
|
5 |
+
"e2e_train": "//amlt2d6da0a398a052038353b375ab0246b2/Diffusion-LM/datasets/datasets/PublayNet_ltrb_lex",
|
6 |
+
"data_dir": "",
|
7 |
+
"schedule_sampler": "uniform",
|
8 |
+
"lr": 5e-05,
|
9 |
+
"weight_decay": 0.0,
|
10 |
+
"lr_anneal_steps": 500000,
|
11 |
+
"batch_size": 64,
|
12 |
+
"microbatch": -1,
|
13 |
+
"ema_rate": "0.9999",
|
14 |
+
"log_interval": 50,
|
15 |
+
"save_interval": 50000,
|
16 |
+
"resume_checkpoint": "",
|
17 |
+
"use_fp16": false,
|
18 |
+
"fp16_scale_growth": 0.001,
|
19 |
+
"seed": 102,
|
20 |
+
"gradient_clipping": -1.0,
|
21 |
+
"eval_interval": 2000,
|
22 |
+
"ungen": false,
|
23 |
+
"self_cond": false,
|
24 |
+
"image_size": 11,
|
25 |
+
"num_channels": 128,
|
26 |
+
"num_res_blocks": 2,
|
27 |
+
"num_heads": 4,
|
28 |
+
"num_heads_upsample": -1,
|
29 |
+
"attention_resolutions": "16,8",
|
30 |
+
"dropout": 0.1,
|
31 |
+
"learn_sigma": false,
|
32 |
+
"sigma_small": false,
|
33 |
+
"class_cond": false,
|
34 |
+
"diffusion_steps": 200,
|
35 |
+
"noise_schedule": "gaussian_refine_pow2.5",
|
36 |
+
"timestep_respacing": "",
|
37 |
+
"use_kl": false,
|
38 |
+
"predict_xstart": true,
|
39 |
+
"rescale_timesteps": false,
|
40 |
+
"rescale_learned_sigmas": true,
|
41 |
+
"use_checkpoint": false,
|
42 |
+
"use_scale_shift_norm": true,
|
43 |
+
"model_arch": "transformer",
|
44 |
+
"in_channel": 8,
|
45 |
+
"out_channel": 8,
|
46 |
+
"training_mode": "discrete1",
|
47 |
+
"vocab_size": 139,
|
48 |
+
"config_name": "bert-base-uncased",
|
49 |
+
"experiment_mode": "lm",
|
50 |
+
"logits_mode": 1,
|
51 |
+
"constrained": null,
|
52 |
+
"att_1": 0.99999,
|
53 |
+
"alignment_loss": false,
|
54 |
+
"alignment_weight": 100.0,
|
55 |
+
"aux_loss": true,
|
56 |
+
"modality": "e2e-tgt",
|
57 |
+
"dataset_name": "wikitext",
|
58 |
+
"dataset_config_name": "wikitext-2-raw-v1",
|
59 |
+
"config": "diffusion_lm/synthetic_data/configs/emnlp2020/experiments/difflm_seed0_m3_k128_trainc20000.yaml",
|
60 |
+
"model_name_or_path": "predictability/diff_models/compress_e=5_b=60_m=gpt2_wikitext-103-raw-v1_None",
|
61 |
+
"experiment": "random",
|
62 |
+
"roc_train": "diffusion_lm/ROCstory",
|
63 |
+
"wiki_train": "diffusion_lm/simple_wiki/data.v1.split/simple.training.txt",
|
64 |
+
"yelp_train": "diffusion_lm/yelpnlg-resources/yelpnlg-corpus",
|
65 |
+
"commonGen_train": "diffusion_lm/common-gen/commongen_data",
|
66 |
+
"emb_scale_factor": 1.0,
|
67 |
+
"noise_level": 0.0,
|
68 |
+
"cache_mode": "no",
|
69 |
+
"use_bert_tokenizer": "no",
|
70 |
+
"padding_mode": "pad",
|
71 |
+
"preprocessing_num_workers": 1
|
72 |
+
}
|
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"START": 0, "END": 1, "UNK": 2, "PAD": 3, "|": 4, "figure": 5, "text": 6, "title": 7, "table": 8, "list": 9, "0": 10, "1": 11, "2": 12, "3": 13, "4": 14, "5": 15, "6": 16, "7": 17, "8": 18, "9": 19, "10": 20, "11": 21, "12": 22, "13": 23, "14": 24, "15": 25, "16": 26, "17": 27, "18": 28, "19": 29, "20": 30, "21": 31, "22": 32, "23": 33, "24": 34, "25": 35, "26": 36, "27": 37, "28": 38, "29": 39, "30": 40, "31": 41, "32": 42, "33": 43, "34": 44, "35": 45, "36": 46, "37": 47, "38": 48, "39": 49, "40": 50, "41": 51, "42": 52, "43": 53, "44": 54, "45": 55, "46": 56, "47": 57, "48": 58, "49": 59, "50": 60, "51": 61, "52": 62, "53": 63, "54": 64, "55": 65, "56": 66, "57": 67, "58": 68, "59": 69, "60": 70, "61": 71, "62": 72, "63": 73, "64": 74, "65": 75, "66": 76, "67": 77, "68": 78, "69": 79, "70": 80, "71": 81, "72": 82, "73": 83, "74": 84, "75": 85, "76": 86, "77": 87, "78": 88, "79": 89, "80": 90, "81": 91, "82": 92, "83": 93, "84": 94, "85": 95, "86": 96, "87": 97, "88": 98, "89": 99, "90": 100, "91": 101, "92": 102, "93": 103, "94": 104, "95": 105, "96": 106, "97": 107, "98": 108, "99": 109, "100": 110, "101": 111, "102": 112, "103": 113, "104": 114, "105": 115, "106": 116, "107": 117, "108": 118, "109": 119, "110": 120, "111": 121, "112": 122, "113": 123, "114": 124, "115": 125, "116": 126, "117": 127, "118": 128, "119": 129, "120": 130, "121": 131, "122": 132, "123": 133, "124": 134, "125": 135, "126": 136, "127": 137}
|
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/ema_0.9999_350000.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9d5845ef52ebb261ea0634315af3cdef365f603c43490b3229634da6ea8bb54
|
3 |
+
size 351669279
|
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/random_emb.torch
ADDED
Binary file (5.23 kB). View file
|
|
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/training_args.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"local_rank": -1,
|
3 |
+
"checkpoint_path": "/mnt/output/projects/diffusion-lm_base/amlt-results/7333740520.31320-61f3cca4-689d-4168-8466-ed1c3cdc784f",
|
4 |
+
"submit": true,
|
5 |
+
"e2e_train": "/mnt/default/Diffusion-LM/datasets/datasets/PublayNet_ltrb_pos",
|
6 |
+
"data_dir": "",
|
7 |
+
"schedule_sampler": "uniform",
|
8 |
+
"lr": 5e-05,
|
9 |
+
"weight_decay": 0.0,
|
10 |
+
"lr_anneal_steps": 500000,
|
11 |
+
"batch_size": 64,
|
12 |
+
"microbatch": -1,
|
13 |
+
"ema_rate": "0.9999",
|
14 |
+
"log_interval": 50,
|
15 |
+
"save_interval": 50000,
|
16 |
+
"resume_checkpoint": "",
|
17 |
+
"use_fp16": false,
|
18 |
+
"fp16_scale_growth": 0.001,
|
19 |
+
"seed": 102,
|
20 |
+
"gradient_clipping": -1.0,
|
21 |
+
"eval_interval": 2000,
|
22 |
+
"ungen": false,
|
23 |
+
"self_cond": false,
|
24 |
+
"image_size": 11,
|
25 |
+
"num_channels": 128,
|
26 |
+
"num_res_blocks": 2,
|
27 |
+
"num_heads": 4,
|
28 |
+
"num_heads_upsample": -1,
|
29 |
+
"attention_resolutions": "16,8",
|
30 |
+
"dropout": 0.1,
|
31 |
+
"learn_sigma": false,
|
32 |
+
"sigma_small": false,
|
33 |
+
"class_cond": false,
|
34 |
+
"diffusion_steps": 200,
|
35 |
+
"noise_schedule": "gaussian_refine_pow2.5",
|
36 |
+
"timestep_respacing": "",
|
37 |
+
"use_kl": false,
|
38 |
+
"predict_xstart": true,
|
39 |
+
"rescale_timesteps": false,
|
40 |
+
"rescale_learned_sigmas": true,
|
41 |
+
"use_checkpoint": false,
|
42 |
+
"use_scale_shift_norm": true,
|
43 |
+
"model_arch": "transformer",
|
44 |
+
"in_channel": 8,
|
45 |
+
"out_channel": 8,
|
46 |
+
"training_mode": "discrete1",
|
47 |
+
"vocab_size": 139,
|
48 |
+
"config_name": "bert-base-uncased",
|
49 |
+
"experiment_mode": "lm",
|
50 |
+
"logits_mode": 1,
|
51 |
+
"constrained": null,
|
52 |
+
"att_1": 0.99999,
|
53 |
+
"alignment_loss": false,
|
54 |
+
"alignment_weight": 100.0,
|
55 |
+
"aux_loss": true,
|
56 |
+
"modality": "e2e-tgt",
|
57 |
+
"dataset_name": "wikitext",
|
58 |
+
"dataset_config_name": "wikitext-2-raw-v1",
|
59 |
+
"config": "diffusion_lm/synthetic_data/configs/emnlp2020/experiments/difflm_seed0_m3_k128_trainc20000.yaml",
|
60 |
+
"model_name_or_path": "predictability/diff_models/compress_e=5_b=60_m=gpt2_wikitext-103-raw-v1_None",
|
61 |
+
"experiment": "random",
|
62 |
+
"roc_train": "diffusion_lm/ROCstory",
|
63 |
+
"wiki_train": "diffusion_lm/simple_wiki/data.v1.split/simple.training.txt",
|
64 |
+
"yelp_train": "diffusion_lm/yelpnlg-resources/yelpnlg-corpus",
|
65 |
+
"commonGen_train": "diffusion_lm/common-gen/commongen_data",
|
66 |
+
"emb_scale_factor": 1.0,
|
67 |
+
"noise_level": 0.0,
|
68 |
+
"cache_mode": "no",
|
69 |
+
"use_bert_tokenizer": "no",
|
70 |
+
"padding_mode": "pad",
|
71 |
+
"preprocessing_num_workers": 1
|
72 |
+
}
|
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"START": 0, "END": 1, "UNK": 2, "PAD": 3, "|": 4, "figure": 5, "text": 6, "title": 7, "table": 8, "list": 9, "0": 10, "1": 11, "2": 12, "3": 13, "4": 14, "5": 15, "6": 16, "7": 17, "8": 18, "9": 19, "10": 20, "11": 21, "12": 22, "13": 23, "14": 24, "15": 25, "16": 26, "17": 27, "18": 28, "19": 29, "20": 30, "21": 31, "22": 32, "23": 33, "24": 34, "25": 35, "26": 36, "27": 37, "28": 38, "29": 39, "30": 40, "31": 41, "32": 42, "33": 43, "34": 44, "35": 45, "36": 46, "37": 47, "38": 48, "39": 49, "40": 50, "41": 51, "42": 52, "43": 53, "44": 54, "45": 55, "46": 56, "47": 57, "48": 58, "49": 59, "50": 60, "51": 61, "52": 62, "53": 63, "54": 64, "55": 65, "56": 66, "57": 67, "58": 68, "59": 69, "60": 70, "61": 71, "62": 72, "63": 73, "64": 74, "65": 75, "66": 76, "67": 77, "68": 78, "69": 79, "70": 80, "71": 81, "72": 82, "73": 83, "74": 84, "75": 85, "76": 86, "77": 87, "78": 88, "79": 89, "80": 90, "81": 91, "82": 92, "83": 93, "84": 94, "85": 95, "86": 96, "87": 97, "88": 98, "89": 99, "90": 100, "91": 101, "92": 102, "93": 103, "94": 104, "95": 105, "96": 106, "97": 107, "98": 108, "99": 109, "100": 110, "101": 111, "102": 112, "103": 113, "104": 114, "105": 115, "106": 116, "107": 117, "108": 118, "109": 119, "110": 120, "111": 121, "112": 122, "113": 123, "114": 124, "115": 125, "116": 126, "117": 127, "118": 128, "119": 129, "120": 130, "121": 131, "122": 132, "123": 133, "124": 134, "125": 135, "126": 136, "127": 137}
|