Add inference code
Browse files
README.md
CHANGED
@@ -16,6 +16,31 @@ pipeline_tag: translation
|
|
16 |
Input = Polish toponym (say Stare Miasto, literally Old city)
|
17 |
Output = Equivalent toponym (say Altstadt, meaning Old city)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
## Model Details
|
21 |
|
@@ -34,3 +59,6 @@ Output = Equivalent toponym (say Altstadt, meaning Old city)
|
|
34 |
|
35 |
- Time = Approx. 30 minutes
|
36 |
- Device = 1 × P100 (Available on Kaggle)
|
|
|
|
|
|
|
|
16 |
Input = Polish toponym (say Stare Miasto, literally Old city)
|
17 |
Output = Equivalent toponym (say Altstadt, meaning Old city)
|
18 |
|
19 |
+
# Inference Code
|
20 |
+
|
21 |
+
```
|
22 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
23 |
+
import torch
|
24 |
+
|
25 |
+
model_path = "DebasishDhal99/polish-to-german-toponym-model-opus-mt-pl-de"
|
26 |
+
|
27 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
28 |
+
|
29 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device)
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
31 |
+
|
32 |
+
polish_name = "Stare miasteczko" #Change this to any polish place name
|
33 |
+
|
34 |
+
inputs = tokenizer(polish_name, return_tensors="pt", padding=True, truncation=True)
|
35 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
36 |
+
|
37 |
+
with torch.no_grad():
|
38 |
+
outputs = model.generate(**inputs, max_length=50)
|
39 |
+
|
40 |
+
german_name = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
41 |
+
print(german_name)
|
42 |
+
|
43 |
+
```
|
44 |
|
45 |
## Model Details
|
46 |
|
|
|
59 |
|
60 |
- Time = Approx. 30 minutes
|
61 |
- Device = 1 × P100 (Available on Kaggle)
|
62 |
+
|
63 |
+
|
64 |
+
- Further training is needed for better performance, I'll make one more such model with more epochs.
|