TinyNews-11M / config.json
GregSamek's picture
Upload config.json
bc9eec1 verified
raw
history blame contribute delete
373 Bytes
{
"model": {
"vocab_size": 8192,
"context_length": 128,
"d_embedding": 256,
"d_intermediate": 1024,
"n_heads": 8,
"n_layers": 8,
"qkv_bias": false
},
"train": {
"peak_lr": 0.001,
"warmup_ratio": 0.01,
"n_epochs": 2,
"batch_size": 8,
"weight_decay": 0.1
}
}