Junyi42 commited on
Commit
3b283cc
·
1 Parent(s): becee24
Files changed (36) hide show
  1. .gitattributes +2 -0
  2. data/processed_datasets/PublayNet_ltrb_lex/src1_test.txt +0 -0
  3. data/processed_datasets/PublayNet_ltrb_lex/src1_train.txt +3 -0
  4. data/processed_datasets/PublayNet_ltrb_lex/src1_valid.txt +0 -0
  5. data/processed_datasets/PublayNet_ltrb_lex_refine/src1_test.txt +0 -0
  6. data/processed_datasets/PublayNet_ltrb_pos/src1_test.txt +0 -0
  7. data/processed_datasets/PublayNet_ltrb_pos/src1_train.txt +3 -0
  8. data/processed_datasets/PublayNet_ltrb_pos/src1_valid.txt +0 -0
  9. data/processed_datasets/RICO_ltrb_lex/src1_test.txt +0 -0
  10. data/processed_datasets/RICO_ltrb_lex/src1_train.txt +0 -0
  11. data/processed_datasets/RICO_ltrb_lex/src1_valid.txt +0 -0
  12. data/processed_datasets/RICO_ltrb_lex_refine/src1_test.txt +0 -0
  13. data/processed_datasets/RICO_ltrb_pos/src1_test.txt +0 -0
  14. data/processed_datasets/RICO_ltrb_pos/src1_train.txt +0 -0
  15. data/processed_datasets/RICO_ltrb_pos/src1_valid.txt +0 -0
  16. data/processed_datasets/RICO_ltwh_random/src1_test.txt +0 -0
  17. data/processed_datasets/RICO_ltwh_random/src1_train.txt +0 -0
  18. data/processed_datasets/RICO_ltwh_random/src1_valid.txt +0 -0
  19. data/raw_datasets/publaynet/pre_processed_20_5/test.pt +3 -0
  20. data/raw_datasets/publaynet/pre_processed_20_5/train.pt +3 -0
  21. data/raw_datasets/publaynet/pre_processed_20_5/val.pt +3 -0
  22. data/raw_datasets/rico/pre_processed_20_25/test.pt +3 -0
  23. data/raw_datasets/rico/pre_processed_20_25/val.pt +3 -0
  24. results/checkpoint/README.md +1 -0
  25. results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/ema_0.9999_175000.pt +3 -0
  26. results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/random_emb.torch +0 -0
  27. results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/training_args.json +72 -0
  28. results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/vocab.json +1 -0
  29. results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/ema_0.9999_400000.pt +3 -0
  30. results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/random_emb.torch +0 -0
  31. results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/training_args.json +72 -0
  32. results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/vocab.json +1 -0
  33. results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/ema_0.9999_350000.pt +3 -0
  34. results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/random_emb.torch +0 -0
  35. results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/training_args.json +72 -0
  36. results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/vocab.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/processed_datasets/PublayNet_ltrb_lex/src1_train.txt filter=lfs diff=lfs merge=lfs -text
37
+ data/processed_datasets/PublayNet_ltrb_pos/src1_train.txt filter=lfs diff=lfs merge=lfs -text
data/processed_datasets/PublayNet_ltrb_lex/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/PublayNet_ltrb_lex/src1_train.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed9126d47ff4ea38fec9eae7b6a0c24cdae36f77d27137ce95dbc44fad7c24a
3
+ size 58280772
data/processed_datasets/PublayNet_ltrb_lex/src1_valid.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/PublayNet_ltrb_lex_refine/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/PublayNet_ltrb_pos/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/PublayNet_ltrb_pos/src1_train.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7f430d713006b79229bcc047455e2f8944416a670e71df358e434a5727626c
3
+ size 58280772
data/processed_datasets/PublayNet_ltrb_pos/src1_valid.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_lex/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_lex/src1_train.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_lex/src1_valid.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_lex_refine/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_pos/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_pos/src1_train.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltrb_pos/src1_valid.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltwh_random/src1_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltwh_random/src1_train.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed_datasets/RICO_ltwh_random/src1_valid.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/raw_datasets/publaynet/pre_processed_20_5/test.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74852e1a931e8ee094eefde3a719e1a183db8de0079835d51d5616e63801303
3
+ size 9408251
data/raw_datasets/publaynet/pre_processed_20_5/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812abbcbd7298901381fa0191a2ca503d82085f8f163b2185ca6056a86da9297
3
+ size 257991305
data/raw_datasets/publaynet/pre_processed_20_5/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe38fa40ebbb5405e3053a50c90c53a8a4ad2270de5346cea4187d98c52c8a66
3
+ size 13586523
data/raw_datasets/rico/pre_processed_20_25/test.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02c13e26410641ccf53e1bc425dc7bcba90627d2a27a98496651515be32afad
3
+ size 3068769
data/raw_datasets/rico/pre_processed_20_25/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b29544dff6887bb351480543c09dcef2cfbedeca159869224de2164749b319
3
+ size 1539153
results/checkpoint/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ - path to save the checkpoint
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/ema_0.9999_175000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7dfe36ca6d6f796004b22c3fbcf6f6e6339ae515509e21af5fb467f3dcc7ec5
3
+ size 351792223
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/random_emb.torch ADDED
Binary file (5.87 kB). View file
 
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/training_args.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "local_rank": -1,
3
+ "checkpoint_path": "/mnt/output/projects/diffusion-lm_base/amlt-results/7333918554.57373-d9fb69e7-9ace-4ce4-8016-a424562d01d7",
4
+ "submit": true,
5
+ "e2e_train": "/mnt/default/Diffusion-LM/datasets/datasets/RICO_ltrb_lex",
6
+ "data_dir": "",
7
+ "schedule_sampler": "uniform",
8
+ "lr": 4e-05,
9
+ "weight_decay": 0.0,
10
+ "lr_anneal_steps": 300000,
11
+ "batch_size": 64,
12
+ "microbatch": -1,
13
+ "ema_rate": "0.9999",
14
+ "log_interval": 50,
15
+ "save_interval": 25000,
16
+ "resume_checkpoint": "",
17
+ "use_fp16": false,
18
+ "fp16_scale_growth": 0.001,
19
+ "seed": 102,
20
+ "gradient_clipping": -1.0,
21
+ "eval_interval": 2000,
22
+ "ungen": false,
23
+ "self_cond": false,
24
+ "image_size": 11,
25
+ "num_channels": 128,
26
+ "num_res_blocks": 2,
27
+ "num_heads": 4,
28
+ "num_heads_upsample": -1,
29
+ "attention_resolutions": "16,8",
30
+ "dropout": 0.1,
31
+ "learn_sigma": false,
32
+ "sigma_small": false,
33
+ "class_cond": false,
34
+ "diffusion_steps": 200,
35
+ "noise_schedule": "gaussian_refine_pow2.5",
36
+ "timestep_respacing": "",
37
+ "use_kl": false,
38
+ "predict_xstart": true,
39
+ "rescale_timesteps": false,
40
+ "rescale_learned_sigmas": true,
41
+ "use_checkpoint": false,
42
+ "use_scale_shift_norm": true,
43
+ "model_arch": "transformer",
44
+ "in_channel": 8,
45
+ "out_channel": 8,
46
+ "training_mode": "discrete1",
47
+ "vocab_size": 159,
48
+ "config_name": "bert-base-uncased",
49
+ "experiment_mode": "lm",
50
+ "logits_mode": 1,
51
+ "constrained": null,
52
+ "att_1": 0.99999,
53
+ "alignment_loss": false,
54
+ "alignment_weight": 100000.0,
55
+ "aux_loss": true,
56
+ "modality": "e2e-tgt",
57
+ "dataset_name": "wikitext",
58
+ "dataset_config_name": "wikitext-2-raw-v1",
59
+ "config": "diffusion_lm/synthetic_data/configs/emnlp2020/experiments/difflm_seed0_m3_k128_trainc20000.yaml",
60
+ "model_name_or_path": "predictability/diff_models/compress_e=5_b=60_m=gpt2_wikitext-103-raw-v1_None",
61
+ "experiment": "random",
62
+ "roc_train": "diffusion_lm/ROCstory",
63
+ "wiki_train": "diffusion_lm/simple_wiki/data.v1.split/simple.training.txt",
64
+ "yelp_train": "diffusion_lm/yelpnlg-resources/yelpnlg-corpus",
65
+ "commonGen_train": "diffusion_lm/common-gen/commongen_data",
66
+ "emb_scale_factor": 1.0,
67
+ "noise_level": 0.0,
68
+ "cache_mode": "no",
69
+ "use_bert_tokenizer": "no",
70
+ "padding_mode": "pad",
71
+ "preprocessing_num_workers": 1
72
+ }
results/checkpoint/discrete_gaussian_pow2.5_aux_lex_ltrb_200_fine_4e5/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"START": 0, "END": 1, "UNK": 2, "PAD": 3, "|": 4, "Image": 5, "Radio_Button": 6, "Text": 7, "Text_Button": 8, "Card": 9, "Icon": 10, "Toolbar": 11, "Web_View": 12, "List_Item": 13, "Advertisement": 14, "Background_Image": 15, "Pager_Indicator": 16, "Checkbox": 17, "Map_View": 18, "Drawer": 19, "Input": 20, "Video": 21, "Multi_Tab": 22, "On_Off_Switch": 23, "Slider": 24, "Bottom_Navigation": 25, "Date_Picker": 26, "Number_Stepper": 27, "Modal": 28, "Button_Bar": 29, "0": 30, "1": 31, "2": 32, "3": 33, "4": 34, "5": 35, "6": 36, "7": 37, "8": 38, "9": 39, "10": 40, "11": 41, "12": 42, "13": 43, "14": 44, "15": 45, "16": 46, "17": 47, "18": 48, "19": 49, "20": 50, "21": 51, "22": 52, "23": 53, "24": 54, "25": 55, "26": 56, "27": 57, "28": 58, "29": 59, "30": 60, "31": 61, "32": 62, "33": 63, "34": 64, "35": 65, "36": 66, "37": 67, "38": 68, "39": 69, "40": 70, "41": 71, "42": 72, "43": 73, "44": 74, "45": 75, "46": 76, "47": 77, "48": 78, "49": 79, "50": 80, "51": 81, "52": 82, "53": 83, "54": 84, "55": 85, "56": 86, "57": 87, "58": 88, "59": 89, "60": 90, "61": 91, "62": 92, "63": 93, "64": 94, "65": 95, "66": 96, "67": 97, "68": 98, "69": 99, "70": 100, "71": 101, "72": 102, "73": 103, "74": 104, "75": 105, "76": 106, "77": 107, "78": 108, "79": 109, "80": 110, "81": 111, "82": 112, "83": 113, "84": 114, "85": 115, "86": 116, "87": 117, "88": 118, "89": 119, "90": 120, "91": 121, "92": 122, "93": 123, "94": 124, "95": 125, "96": 126, "97": 127, "98": 128, "99": 129, "100": 130, "101": 131, "102": 132, "103": 133, "104": 134, "105": 135, "106": 136, "107": 137, "108": 138, "109": 139, "110": 140, "111": 141, "112": 142, "113": 143, "114": 144, "115": 145, "116": 146, "117": 147, "118": 148, "119": 149, "120": 150, "121": 151, "122": 152, "123": 153, "124": 154, "125": 155, "126": 156, "127": 157}
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/ema_0.9999_400000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bdf8ebecb84b4690a967d086fe6c6642f13b3321afc0ef54fbb044564a74da9
3
+ size 351669279
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/random_emb.torch ADDED
Binary file (5.23 kB). View file
 
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/training_args.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "local_rank": -1,
3
+ "checkpoint_path": "//amlt6a45b6192fdf58ada032293c3756ac0c/projects/diffusion-lm_base/amlt-results/7333651979.04978-4d02c518-6060-4490-b393-525ec6ef061f",
4
+ "submit": true,
5
+ "e2e_train": "//amlt2d6da0a398a052038353b375ab0246b2/Diffusion-LM/datasets/datasets/PublayNet_ltrb_lex",
6
+ "data_dir": "",
7
+ "schedule_sampler": "uniform",
8
+ "lr": 5e-05,
9
+ "weight_decay": 0.0,
10
+ "lr_anneal_steps": 500000,
11
+ "batch_size": 64,
12
+ "microbatch": -1,
13
+ "ema_rate": "0.9999",
14
+ "log_interval": 50,
15
+ "save_interval": 50000,
16
+ "resume_checkpoint": "",
17
+ "use_fp16": false,
18
+ "fp16_scale_growth": 0.001,
19
+ "seed": 102,
20
+ "gradient_clipping": -1.0,
21
+ "eval_interval": 2000,
22
+ "ungen": false,
23
+ "self_cond": false,
24
+ "image_size": 11,
25
+ "num_channels": 128,
26
+ "num_res_blocks": 2,
27
+ "num_heads": 4,
28
+ "num_heads_upsample": -1,
29
+ "attention_resolutions": "16,8",
30
+ "dropout": 0.1,
31
+ "learn_sigma": false,
32
+ "sigma_small": false,
33
+ "class_cond": false,
34
+ "diffusion_steps": 200,
35
+ "noise_schedule": "gaussian_refine_pow2.5",
36
+ "timestep_respacing": "",
37
+ "use_kl": false,
38
+ "predict_xstart": true,
39
+ "rescale_timesteps": false,
40
+ "rescale_learned_sigmas": true,
41
+ "use_checkpoint": false,
42
+ "use_scale_shift_norm": true,
43
+ "model_arch": "transformer",
44
+ "in_channel": 8,
45
+ "out_channel": 8,
46
+ "training_mode": "discrete1",
47
+ "vocab_size": 139,
48
+ "config_name": "bert-base-uncased",
49
+ "experiment_mode": "lm",
50
+ "logits_mode": 1,
51
+ "constrained": null,
52
+ "att_1": 0.99999,
53
+ "alignment_loss": false,
54
+ "alignment_weight": 100.0,
55
+ "aux_loss": true,
56
+ "modality": "e2e-tgt",
57
+ "dataset_name": "wikitext",
58
+ "dataset_config_name": "wikitext-2-raw-v1",
59
+ "config": "diffusion_lm/synthetic_data/configs/emnlp2020/experiments/difflm_seed0_m3_k128_trainc20000.yaml",
60
+ "model_name_or_path": "predictability/diff_models/compress_e=5_b=60_m=gpt2_wikitext-103-raw-v1_None",
61
+ "experiment": "random",
62
+ "roc_train": "diffusion_lm/ROCstory",
63
+ "wiki_train": "diffusion_lm/simple_wiki/data.v1.split/simple.training.txt",
64
+ "yelp_train": "diffusion_lm/yelpnlg-resources/yelpnlg-corpus",
65
+ "commonGen_train": "diffusion_lm/common-gen/commongen_data",
66
+ "emb_scale_factor": 1.0,
67
+ "noise_level": 0.0,
68
+ "cache_mode": "no",
69
+ "use_bert_tokenizer": "no",
70
+ "padding_mode": "pad",
71
+ "preprocessing_num_workers": 1
72
+ }
results/checkpoint/gaussian_refine_pow2.5_aux_lex_ltrb_200_5e5_pub/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"START": 0, "END": 1, "UNK": 2, "PAD": 3, "|": 4, "figure": 5, "text": 6, "title": 7, "table": 8, "list": 9, "0": 10, "1": 11, "2": 12, "3": 13, "4": 14, "5": 15, "6": 16, "7": 17, "8": 18, "9": 19, "10": 20, "11": 21, "12": 22, "13": 23, "14": 24, "15": 25, "16": 26, "17": 27, "18": 28, "19": 29, "20": 30, "21": 31, "22": 32, "23": 33, "24": 34, "25": 35, "26": 36, "27": 37, "28": 38, "29": 39, "30": 40, "31": 41, "32": 42, "33": 43, "34": 44, "35": 45, "36": 46, "37": 47, "38": 48, "39": 49, "40": 50, "41": 51, "42": 52, "43": 53, "44": 54, "45": 55, "46": 56, "47": 57, "48": 58, "49": 59, "50": 60, "51": 61, "52": 62, "53": 63, "54": 64, "55": 65, "56": 66, "57": 67, "58": 68, "59": 69, "60": 70, "61": 71, "62": 72, "63": 73, "64": 74, "65": 75, "66": 76, "67": 77, "68": 78, "69": 79, "70": 80, "71": 81, "72": 82, "73": 83, "74": 84, "75": 85, "76": 86, "77": 87, "78": 88, "79": 89, "80": 90, "81": 91, "82": 92, "83": 93, "84": 94, "85": 95, "86": 96, "87": 97, "88": 98, "89": 99, "90": 100, "91": 101, "92": 102, "93": 103, "94": 104, "95": 105, "96": 106, "97": 107, "98": 108, "99": 109, "100": 110, "101": 111, "102": 112, "103": 113, "104": 114, "105": 115, "106": 116, "107": 117, "108": 118, "109": 119, "110": 120, "111": 121, "112": 122, "113": 123, "114": 124, "115": 125, "116": 126, "117": 127, "118": 128, "119": 129, "120": 130, "121": 131, "122": 132, "123": 133, "124": 134, "125": 135, "126": 136, "127": 137}
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/ema_0.9999_350000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d5845ef52ebb261ea0634315af3cdef365f603c43490b3229634da6ea8bb54
3
+ size 351669279
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/random_emb.torch ADDED
Binary file (5.23 kB). View file
 
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/training_args.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "local_rank": -1,
3
+ "checkpoint_path": "/mnt/output/projects/diffusion-lm_base/amlt-results/7333740520.31320-61f3cca4-689d-4168-8466-ed1c3cdc784f",
4
+ "submit": true,
5
+ "e2e_train": "/mnt/default/Diffusion-LM/datasets/datasets/PublayNet_ltrb_pos",
6
+ "data_dir": "",
7
+ "schedule_sampler": "uniform",
8
+ "lr": 5e-05,
9
+ "weight_decay": 0.0,
10
+ "lr_anneal_steps": 500000,
11
+ "batch_size": 64,
12
+ "microbatch": -1,
13
+ "ema_rate": "0.9999",
14
+ "log_interval": 50,
15
+ "save_interval": 50000,
16
+ "resume_checkpoint": "",
17
+ "use_fp16": false,
18
+ "fp16_scale_growth": 0.001,
19
+ "seed": 102,
20
+ "gradient_clipping": -1.0,
21
+ "eval_interval": 2000,
22
+ "ungen": false,
23
+ "self_cond": false,
24
+ "image_size": 11,
25
+ "num_channels": 128,
26
+ "num_res_blocks": 2,
27
+ "num_heads": 4,
28
+ "num_heads_upsample": -1,
29
+ "attention_resolutions": "16,8",
30
+ "dropout": 0.1,
31
+ "learn_sigma": false,
32
+ "sigma_small": false,
33
+ "class_cond": false,
34
+ "diffusion_steps": 200,
35
+ "noise_schedule": "gaussian_refine_pow2.5",
36
+ "timestep_respacing": "",
37
+ "use_kl": false,
38
+ "predict_xstart": true,
39
+ "rescale_timesteps": false,
40
+ "rescale_learned_sigmas": true,
41
+ "use_checkpoint": false,
42
+ "use_scale_shift_norm": true,
43
+ "model_arch": "transformer",
44
+ "in_channel": 8,
45
+ "out_channel": 8,
46
+ "training_mode": "discrete1",
47
+ "vocab_size": 139,
48
+ "config_name": "bert-base-uncased",
49
+ "experiment_mode": "lm",
50
+ "logits_mode": 1,
51
+ "constrained": null,
52
+ "att_1": 0.99999,
53
+ "alignment_loss": false,
54
+ "alignment_weight": 100.0,
55
+ "aux_loss": true,
56
+ "modality": "e2e-tgt",
57
+ "dataset_name": "wikitext",
58
+ "dataset_config_name": "wikitext-2-raw-v1",
59
+ "config": "diffusion_lm/synthetic_data/configs/emnlp2020/experiments/difflm_seed0_m3_k128_trainc20000.yaml",
60
+ "model_name_or_path": "predictability/diff_models/compress_e=5_b=60_m=gpt2_wikitext-103-raw-v1_None",
61
+ "experiment": "random",
62
+ "roc_train": "diffusion_lm/ROCstory",
63
+ "wiki_train": "diffusion_lm/simple_wiki/data.v1.split/simple.training.txt",
64
+ "yelp_train": "diffusion_lm/yelpnlg-resources/yelpnlg-corpus",
65
+ "commonGen_train": "diffusion_lm/common-gen/commongen_data",
66
+ "emb_scale_factor": 1.0,
67
+ "noise_level": 0.0,
68
+ "cache_mode": "no",
69
+ "use_bert_tokenizer": "no",
70
+ "padding_mode": "pad",
71
+ "preprocessing_num_workers": 1
72
+ }
results/checkpoint/gaussian_refine_pow2.5_aux_pos_ltrb_200_5e5_pub/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"START": 0, "END": 1, "UNK": 2, "PAD": 3, "|": 4, "figure": 5, "text": 6, "title": 7, "table": 8, "list": 9, "0": 10, "1": 11, "2": 12, "3": 13, "4": 14, "5": 15, "6": 16, "7": 17, "8": 18, "9": 19, "10": 20, "11": 21, "12": 22, "13": 23, "14": 24, "15": 25, "16": 26, "17": 27, "18": 28, "19": 29, "20": 30, "21": 31, "22": 32, "23": 33, "24": 34, "25": 35, "26": 36, "27": 37, "28": 38, "29": 39, "30": 40, "31": 41, "32": 42, "33": 43, "34": 44, "35": 45, "36": 46, "37": 47, "38": 48, "39": 49, "40": 50, "41": 51, "42": 52, "43": 53, "44": 54, "45": 55, "46": 56, "47": 57, "48": 58, "49": 59, "50": 60, "51": 61, "52": 62, "53": 63, "54": 64, "55": 65, "56": 66, "57": 67, "58": 68, "59": 69, "60": 70, "61": 71, "62": 72, "63": 73, "64": 74, "65": 75, "66": 76, "67": 77, "68": 78, "69": 79, "70": 80, "71": 81, "72": 82, "73": 83, "74": 84, "75": 85, "76": 86, "77": 87, "78": 88, "79": 89, "80": 90, "81": 91, "82": 92, "83": 93, "84": 94, "85": 95, "86": 96, "87": 97, "88": 98, "89": 99, "90": 100, "91": 101, "92": 102, "93": 103, "94": 104, "95": 105, "96": 106, "97": 107, "98": 108, "99": 109, "100": 110, "101": 111, "102": 112, "103": 113, "104": 114, "105": 115, "106": 116, "107": 117, "108": 118, "109": 119, "110": 120, "111": 121, "112": 122, "113": 123, "114": 124, "115": 125, "116": 126, "117": 127, "118": 128, "119": 129, "120": 130, "121": 131, "122": 132, "123": 133, "124": 134, "125": 135, "126": 136, "127": 137}