Commit
·
4c3e56c
1
Parent(s):
3abed59
add small-rl-gen9beta
Browse files- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_0.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_10.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_12.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_14.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_16.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_18.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_2.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_20.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_22.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_24.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_4.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_6.pt +3 -0
- small-rl-gen9beta/ckpts/policy_weights/policy_epoch_8.pt +3 -0
- small-rl-gen9beta/config.txt +130 -0
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4c0e2750fb1d118c645a49f1c3dd3b6266f1d6e04e22fcbd56895a45d96b17e
|
3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_10.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd9c1296b431039a8ccb5ad3ddd13bde8e55cccd3c84a93623f6920de53989fa
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_12.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c466284dd15acc688efae73e7e2f66ca9fc77070daaa28b58786091519ce82f7
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_14.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb33f5610cf213451343f04b73527591c7e2774e85402c88069168958e0859af
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_16.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9d97e58a357d2c3401f13ec5a05a22eb803ed26c716813cc866c7928bb1f204
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_18.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4e9b3a42f208c3e4538db8871b1a0d2c05f05266e2aaceda919f33181d4926a
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_2.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e8891b6c6a134a1bf6ea7d64c5e31a0c6ccc4e6fc34c1f6ecc378df833ddb6c
|
3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_20.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:beaa0fa4a4c400defc0801d3b31e383e045a497d792f4f6a9f6b18650385f8e6
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_22.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d93a4598039280403913ec35c41dd87a489f9f381c26854481a7a3afc973f991
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_24.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:921d0c1bfcbc9a4319bccad8003ee4fbb22bee93c9bc588e0118e6deb3c6b144
|
3 |
+
size 56576351
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_4.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02904f0048c05b7008460aac2bfa07ed43e078669be6b1f3641aa4108eb13dec
|
3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_6.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4fad78a293aff52a9ec310e24a464c37b0969c783efa464c60e59df40a152f3
|
3 |
+
size 56576177
|
small-rl-gen9beta/ckpts/policy_weights/policy_epoch_8.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9124f620013bdaa82afe502454c0e89503507f31c8720b77cab2bbfe20e6d1f5
|
3 |
+
size 56576177
|
small-rl-gen9beta/config.txt
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import amago.agent
|
2 |
+
import amago.experiment as amago2
|
3 |
+
import amago.nets.actor_critic as amago3
|
4 |
+
import amago.nets.traj_encoders as amago4
|
5 |
+
|
6 |
+
# Parameters for Actor:
|
7 |
+
# ==============================================================================
|
8 |
+
# None.
|
9 |
+
|
10 |
+
# Parameters for Agent:
|
11 |
+
# ==============================================================================
|
12 |
+
# None.
|
13 |
+
|
14 |
+
# Parameters for Discrete:
|
15 |
+
# ==============================================================================
|
16 |
+
Discrete.clip_prob_high = 0.99
|
17 |
+
Discrete.clip_prob_low = 0.001
|
18 |
+
|
19 |
+
# Parameters for Experiment:
|
20 |
+
# ==============================================================================
|
21 |
+
Experiment.always_load_latest = False
|
22 |
+
Experiment.always_save_latest = True
|
23 |
+
Experiment.has_dset_edit_rights = True
|
24 |
+
Experiment.save_trajs_as = 'npz'
|
25 |
+
Experiment.stagger_traj_file_lengths = True
|
26 |
+
Experiment.traj_save_len = 10000000000.0
|
27 |
+
Experiment.wandb_group_name = None
|
28 |
+
|
29 |
+
# Parameters for FlashAttention:
|
30 |
+
# ==============================================================================
|
31 |
+
FlashAttention.window_size = (32, 0)
|
32 |
+
|
33 |
+
# Parameters for MetamonAMAGOExperiment:
|
34 |
+
# ==============================================================================
|
35 |
+
MetamonAMAGOExperiment.agent_type = @agent.MultiTaskAgent
|
36 |
+
MetamonAMAGOExperiment.critic_loss_weight = 10.0
|
37 |
+
MetamonAMAGOExperiment.grad_clip = 1.5
|
38 |
+
MetamonAMAGOExperiment.l2_coeff = 0.0001
|
39 |
+
MetamonAMAGOExperiment.learning_rate = 0.00015
|
40 |
+
MetamonAMAGOExperiment.lr_warmup_steps = 1000
|
41 |
+
MetamonAMAGOExperiment.max_seq_len = 200
|
42 |
+
MetamonAMAGOExperiment.traj_encoder_type = @traj_encoders.TformerTrajEncoder
|
43 |
+
MetamonAMAGOExperiment.tstep_encoder_type = @MetamonTstepEncoder
|
44 |
+
|
45 |
+
# Parameters for MetamonMaskedActor:
|
46 |
+
# ==============================================================================
|
47 |
+
MetamonMaskedActor.activation = 'leaky_relu'
|
48 |
+
MetamonMaskedActor.continuous_dist_type = None
|
49 |
+
MetamonMaskedActor.d_hidden = 300
|
50 |
+
MetamonMaskedActor.dropout_p = 0.0
|
51 |
+
MetamonMaskedActor.mask_illegal_actions = True
|
52 |
+
MetamonMaskedActor.n_layers = 2
|
53 |
+
|
54 |
+
# Parameters for MetamonTstepEncoder:
|
55 |
+
# ==============================================================================
|
56 |
+
MetamonTstepEncoder.d_model = 100
|
57 |
+
MetamonTstepEncoder.dropout = 0.05
|
58 |
+
MetamonTstepEncoder.extra_emb_dim = 18
|
59 |
+
MetamonTstepEncoder.n_heads = 5
|
60 |
+
MetamonTstepEncoder.n_layers = 3
|
61 |
+
MetamonTstepEncoder.numerical_tokens = 6
|
62 |
+
MetamonTstepEncoder.scratch_tokens = 4
|
63 |
+
MetamonTstepEncoder.token_mask_aug = False
|
64 |
+
|
65 |
+
# Parameters for Multigammas:
|
66 |
+
# ==============================================================================
|
67 |
+
Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
|
68 |
+
Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
|
69 |
+
|
70 |
+
# Parameters for MultiTaskAgent:
|
71 |
+
# ==============================================================================
|
72 |
+
MultiTaskAgent.actor_type = @MetamonMaskedActor
|
73 |
+
MultiTaskAgent.critic_type = @actor_critic.NCriticsTwoHot
|
74 |
+
MultiTaskAgent.fake_filter = False
|
75 |
+
MultiTaskAgent.fbc_filter_func = @agent.exp_filter
|
76 |
+
MultiTaskAgent.gamma = 0.999
|
77 |
+
MultiTaskAgent.num_actions_for_value_in_actor_loss = 3
|
78 |
+
MultiTaskAgent.num_actions_for_value_in_critic_loss = 5
|
79 |
+
MultiTaskAgent.num_critics = 4
|
80 |
+
MultiTaskAgent.num_critics_td = 2
|
81 |
+
MultiTaskAgent.offline_coeff = 1.0
|
82 |
+
MultiTaskAgent.online_coeff = 0.0
|
83 |
+
MultiTaskAgent.pass_obs_keys_to_actor = ['illegal_actions']
|
84 |
+
MultiTaskAgent.popart = True
|
85 |
+
MultiTaskAgent.reward_multiplier = 10.0
|
86 |
+
MultiTaskAgent.tau = 0.004
|
87 |
+
MultiTaskAgent.use_multigamma = True
|
88 |
+
MultiTaskAgent.use_target_actor = True
|
89 |
+
|
90 |
+
# Parameters for NCriticsTwoHot:
|
91 |
+
# ==============================================================================
|
92 |
+
NCriticsTwoHot.activation = 'leaky_relu'
|
93 |
+
NCriticsTwoHot.d_hidden = 300
|
94 |
+
NCriticsTwoHot.dropout_p = 0.0
|
95 |
+
NCriticsTwoHot.max_return = 1100
|
96 |
+
NCriticsTwoHot.min_return = -1100
|
97 |
+
NCriticsTwoHot.n_layers = 2
|
98 |
+
NCriticsTwoHot.output_bins = 64
|
99 |
+
NCriticsTwoHot.use_symlog = False
|
100 |
+
|
101 |
+
# Parameters for PopArtLayer:
|
102 |
+
# ==============================================================================
|
103 |
+
PopArtLayer.beta = 0.0005
|
104 |
+
PopArtLayer.init_nu = 100.0
|
105 |
+
|
106 |
+
# Parameters for SigmaReparam:
|
107 |
+
# ==============================================================================
|
108 |
+
SigmaReparam.bias = True
|
109 |
+
SigmaReparam.fast_init = False
|
110 |
+
|
111 |
+
# Parameters for TformerTrajEncoder:
|
112 |
+
# ==============================================================================
|
113 |
+
TformerTrajEncoder.activation = 'leaky_relu'
|
114 |
+
TformerTrajEncoder.d_ff = 2048
|
115 |
+
TformerTrajEncoder.d_model = 512
|
116 |
+
TformerTrajEncoder.dropout_attn = 0.0
|
117 |
+
TformerTrajEncoder.dropout_emb = 0.05
|
118 |
+
TformerTrajEncoder.dropout_ff = 0.05
|
119 |
+
TformerTrajEncoder.dropout_qkv = 0.0
|
120 |
+
TformerTrajEncoder.head_scaling = True
|
121 |
+
TformerTrajEncoder.n_heads = 8
|
122 |
+
TformerTrajEncoder.n_layers = 3
|
123 |
+
TformerTrajEncoder.norm = 'layer'
|
124 |
+
TformerTrajEncoder.normformer_norms = True
|
125 |
+
TformerTrajEncoder.pos_emb = 'fixed'
|
126 |
+
TformerTrajEncoder.sigma_reparam = True
|
127 |
+
|
128 |
+
# Parameters for TransformerTurnEmbedding:
|
129 |
+
# ==============================================================================
|
130 |
+
# None.
|