File size: 2,010 Bytes
10f998c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from transformers import PreTrainedModel, PretrainedConfig
import torch
import torch.nn as nn
import torch.nn.functional as F
class SASOKConfig(PretrainedConfig):
model_type = "sasok"
def __init__(self, vocab_size=50000, hidden_size=512, num_heads=8, num_layers=4, **kwargs):
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_heads = num_heads
self.num_layers = num_layers
class SASOKModel(PreTrainedModel):
config_class = SASOKConfig
def __init__(self, config):
super().__init__(config)
self.embedding = nn.Embedding(config.vocab_size, config.hidden_size)
self.percept_bn = nn.BatchNorm1d(config.hidden_size)
self.emotion_ln = nn.LayerNorm(config.hidden_size)
self.attn = nn.MultiheadAttention(config.hidden_size, config.num_heads, batch_first=True)
self.attn_ln = nn.LayerNorm(config.hidden_size)
self.meta_stack = nn.ModuleList([
nn.TransformerEncoderLayer(d_model=config.hidden_size, nhead=config.num_heads, norm_first=True)
for _ in range(config.num_layers)
])
self.final_ln = nn.LayerNorm(config.hidden_size)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
self.init_weights()
def forward(self, input_ids, attention_mask=None, labels=None):
x = self.embedding(input_ids)
x = x.transpose(1, 2)
x = self.percept_bn(x).transpose(1, 2)
x = self.emotion_ln(x)
x_ln = self.attn_ln(x)
x, _ = self.attn(x_ln, x_ln, x_ln) + x
for layer in self.meta_stack:
x = layer(x)
x = self.final_ln(x)
logits = self.lm_head(x)
loss = None
if labels is not None:
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(logits.view(-1, logits.size(-1)), labels.view(-1))
return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits} |