NLP2425
/

deep_learning

Model card Files Files and versions Community

josipabebic commited on Jun 6

Commit

ed69a79

verified ·

1 Parent(s): 7e8117c

Upload 2 files

Browse files

Files changed (2) hide show

code (test1,2,3).ipynb +548 -0
results(test1,2,3).md +177 -0

code (test1,2,3).ipynb ADDED Viewed

	@@ -0,0 +1,548 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6c9745be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "LSTM training...\n",
+      "LSTM Epoch 1: Train loss 0.9225 | Validation loss 0.9003\n",
+      "LSTM Epoch 5: Train loss 0.9064 | Validation loss 0.8971\n",
+      "LSTM Epoch 10: Train loss 0.9024 | Validation loss 0.8979\n",
+      "LSTM Epoch 15: Train loss 0.9013 | Validation loss 0.8975\n",
+      "LSTM Epoch 20: Train loss 0.9458 | Validation loss 0.9297\n",
+      "LSTM Epoch 25: Train loss 0.9019 | Validation loss 0.9019\n",
+      "LSTM Epoch 30: Train loss 0.9034 | Validation loss 0.9004\n",
+      "LSTM Epoch 35: Train loss 0.9002 | Validation loss 0.9023\n",
+      "LSTM Epoch 40: Train loss 0.8989 | Validation loss 0.9054\n",
+      "LSTM Epoch 45: Train loss 0.8987 | Validation loss 0.9030\n",
+      "LSTM Epoch 50: Train loss 0.8978 | Validation loss 0.9007\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "LSTM on test1 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.0000    0.0000    0.0000       165\n",
+      "     neutral     0.6585    1.0000    0.7941       430\n",
+      "    negative     0.0000    0.0000    0.0000        58\n",
+      "\n",
+      "    accuracy                         0.6585       653\n",
+      "   macro avg     0.2195    0.3333    0.2647       653\n",
+      "weighted avg     0.4336    0.6585    0.5229       653\n",
+      "\n",
+      "LSTM on test1 Confusion matrix:\n",
+      " [[  0 165   0]\n",
+      " [  0 430   0]\n",
+      " [  0  58   0]]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "LSTM on test2 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.0000    0.0000    0.0000       216\n",
+      "     neutral     0.5816    1.0000    0.7355       431\n",
+      "    negative     0.0000    0.0000    0.0000        94\n",
+      "\n",
+      "    accuracy                         0.5816       741\n",
+      "   macro avg     0.1939    0.3333    0.2452       741\n",
+      "weighted avg     0.3383    0.5816    0.4278       741\n",
+      "\n",
+      "LSTM on test2 Confusion matrix:\n",
+      " [[  0 216   0]\n",
+      " [  0 431   0]\n",
+      " [  0  94   0]]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
+      "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "LSTM on test3 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.0000    0.0000    0.0000       267\n",
+      "     neutral     0.3317    1.0000    0.4981       263\n",
+      "    negative     0.0000    0.0000    0.0000       263\n",
+      "\n",
+      "    accuracy                         0.3317       793\n",
+      "   macro avg     0.1106    0.3333    0.1660       793\n",
+      "weighted avg     0.1100    0.3317    0.1652       793\n",
+      "\n",
+      "LSTM on test3 Confusion matrix:\n",
+      " [[  0 267   0]\n",
+      " [  0 263   0]\n",
+      " [  0 263   0]]\n",
+      "\n",
+      "GRU training...\n",
+      "GRU Epoch 1: Train loss 0.9163 | Validation loss 0.8981\n",
+      "GRU Epoch 5: Train loss 0.9048 | Validation loss 0.8972\n",
+      "GRU Epoch 10: Train loss 0.8214 | Validation loss 0.8023\n",
+      "GRU Epoch 15: Train loss 0.7494 | Validation loss 0.7687\n",
+      "GRU Epoch 20: Train loss 0.6789 | Validation loss 0.7580\n",
+      "GRU Epoch 25: Train loss 0.5857 | Validation loss 0.8096\n",
+      "GRU Epoch 30: Train loss 0.4784 | Validation loss 0.9778\n",
+      "GRU Epoch 35: Train loss 0.3589 | Validation loss 1.1809\n",
+      "GRU Epoch 40: Train loss 0.2612 | Validation loss 1.3460\n",
+      "GRU Epoch 45: Train loss 0.1947 | Validation loss 1.4596\n",
+      "GRU Epoch 50: Train loss 0.1336 | Validation loss 1.7536\n",
+      "\n",
+      "GRU on test1 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.4322    0.5212    0.4725       165\n",
+      "     neutral     0.7457    0.7023    0.7234       430\n",
+      "    negative     0.1633    0.1379    0.1495        58\n",
+      "\n",
+      "    accuracy                         0.6064       653\n",
+      "   macro avg     0.4470    0.4538    0.4485       653\n",
+      "weighted avg     0.6147    0.6064    0.6090       653\n",
+      "\n",
+      "GRU on test1 Confusion matrix:\n",
+      " [[ 86  69  10]\n",
+      " [ 97 302  31]\n",
+      " [ 16  34   8]]\n",
+      "\n",
+      "GRU on test2 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.8682    0.8843    0.8761       216\n",
+      "     neutral     0.9211    0.9211    0.9211       431\n",
+      "    negative     0.7778    0.7447    0.7609        94\n",
+      "\n",
+      "    accuracy                         0.8880       741\n",
+      "   macro avg     0.8557    0.8500    0.8527       741\n",
+      "weighted avg     0.8875    0.8880    0.8877       741\n",
+      "\n",
+      "GRU on test2 Confusion matrix:\n",
+      " [[191  19   6]\n",
+      " [ 20 397  14]\n",
+      " [  9  15  70]]\n",
+      "\n",
+      "GRU on test3 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.7510    0.7004    0.7248       267\n",
+      "     neutral     0.5524    0.9011    0.6850       263\n",
+      "    negative     0.7652    0.3346    0.4656       263\n",
+      "\n",
+      "    accuracy                         0.6456       793\n",
+      "   macro avg     0.6896    0.6454    0.6251       793\n",
+      "weighted avg     0.6899    0.6456    0.6256       793\n",
+      "\n",
+      "GRU on test3 Confusion matrix:\n",
+      " [[187  58  22]\n",
+      " [ 21 237   5]\n",
+      " [ 41 134  88]]\n",
+      "\n",
+      "CNN training...\n",
+      "CNN Epoch 1: Train loss 0.9112 | Validation loss 0.8838\n",
+      "CNN Epoch 5: Train loss 0.8149 | Validation loss 0.8114\n",
+      "CNN Epoch 10: Train loss 0.7071 | Validation loss 0.7645\n",
+      "CNN Epoch 15: Train loss 0.6159 | Validation loss 0.7597\n",
+      "CNN Epoch 20: Train loss 0.5508 | Validation loss 0.7568\n",
+      "CNN Epoch 25: Train loss 0.4648 | Validation loss 0.7638\n",
+      "CNN Epoch 30: Train loss 0.4148 | Validation loss 0.7818\n",
+      "CNN Epoch 35: Train loss 0.3572 | Validation loss 0.8047\n",
+      "CNN Epoch 40: Train loss 0.3099 | Validation loss 0.8082\n",
+      "CNN Epoch 45: Train loss 0.2741 | Validation loss 0.8595\n",
+      "CNN Epoch 50: Train loss 0.2376 | Validation loss 0.9191\n",
+      "\n",
+      "CNN on test1 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.4656    0.3697    0.4122       165\n",
+      "     neutral     0.7224    0.8535    0.7825       430\n",
+      "    negative     0.6429    0.1552    0.2500        58\n",
+      "\n",
+      "    accuracy                         0.6692       653\n",
+      "   macro avg     0.6103    0.4595    0.4816       653\n",
+      "weighted avg     0.6505    0.6692    0.6416       653\n",
+      "\n",
+      "CNN on test1 Confusion matrix:\n",
+      " [[ 61 103   1]\n",
+      " [ 59 367   4]\n",
+      " [ 11  38   9]]\n",
+      "\n",
+      "CNN on test2 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.9000    0.8333    0.8654       216\n",
+      "     neutral     0.8936    0.9745    0.9323       431\n",
+      "    negative     0.9296    0.7021    0.8000        94\n",
+      "\n",
+      "    accuracy                         0.8988       741\n",
+      "   macro avg     0.9077    0.8366    0.8659       741\n",
+      "weighted avg     0.9000    0.8988    0.8960       741\n",
+      "\n",
+      "CNN on test2 Confusion matrix:\n",
+      " [[180  33   3]\n",
+      " [  9 420   2]\n",
+      " [ 11  17  66]]\n",
+      "\n",
+      "CNN on test3 Classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "    positive     0.8352    0.5693    0.6771       267\n",
+      "     neutral     0.4674    0.9810    0.6331       263\n",
+      "    negative     0.8983    0.2015    0.3292       263\n",
+      "\n",
+      "    accuracy                         0.5839       793\n",
+      "   macro avg     0.7336    0.5839    0.5465       793\n",
+      "weighted avg     0.7341    0.5839    0.5471       793\n",
+      "\n",
+      "CNN on test3 Confusion matrix:\n",
+      " [[152 109   6]\n",
+      " [  5 258   0]\n",
+      " [ 25 185  53]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# !pip install gensim scikit-learn pandas numpy torch tqdm\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from sklearn.metrics import classification_report, confusion_matrix\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from collections import Counter\n",
+    "import gensim\n",
+    "\n",
+    "# --- UČITAVANJE I PODJELA PODATAKA ---\n",
+    "full_df = pd.read_csv('TRAIN.csv')\n",
+    "\n",
+    "# Učitaj sve test skupove\n",
+    "test1_df = pd.read_csv('test-1.csv')\n",
+    "test2_df = pd.read_csv('test-2.csv')\n",
+    "test3_df = pd.read_csv('test-3.csv')\n",
+    "\n",
+    "def get_text_column(df):\n",
+    "    for col in df.columns:\n",
+    "        if col.lower() in ['sentence', 'text']:\n",
+    "            return col\n",
+    "    raise ValueError(\"Nema stupca 'Sentence' ili 'Text'!\")\n",
+    "\n",
+    "text_col = get_text_column(full_df)\n",
+    "\n",
+    "# Stratified split: 95% train, 5% valid\n",
+    "train_df, valid_df = train_test_split(full_df, test_size=0.05, stratify=full_df['Label'], random_state=42)\n",
+    "\n",
+    "# --- TOKENIZACIJA I VOKABULAR ---\n",
+    "def tokenize(text):\n",
+    "    return text.lower().split()\n",
+    "\n",
+    "counter = Counter()\n",
+    "for text in train_df[text_col]:\n",
+    "    counter.update(tokenize(text))\n",
+    "vocab = {word: idx+2 for idx, (word, _) in enumerate(counter.most_common())}\n",
+    "vocab['<unk>'] = 0\n",
+    "vocab['<pad>'] = 1\n",
+    "\n",
+    "# --- EMBEDDING ---\n",
+    "from gensim.models.fasttext import load_facebook_model\n",
+    "\n",
+    "embedding_path = 'cc.hr.300.bin'\n",
+    "ft_model = load_facebook_model(embedding_path)\n",
+    "embeddings = ft_model.wv  \n",
+    "\n",
+    "embedding_dim = embeddings.vector_size\n",
+    "embedding_matrix = np.zeros((len(vocab), embedding_dim))\n",
+    "for word, idx in vocab.items():\n",
+    "    if word in embeddings:\n",
+    "        embedding_matrix[idx] = embeddings[word]\n",
+    "    else:\n",
+    "        embedding_matrix[idx] = np.random.normal(scale=0.6, size=(embedding_dim, ))\n",
+    "\n",
+    "# --- DATASET ---\n",
+    "class TextDataset(Dataset):\n",
+    "    def __init__(self, df, text_col, vocab, max_len=50):\n",
+    "        self.texts = df[text_col].tolist()\n",
+    "        self.labels = df['Label'].tolist()\n",
+    "        self.vocab = vocab\n",
+    "        self.max_len = max_len\n",
+    "    def __len__(self):\n",
+    "        return len(self.texts)\n",
+    "    def __getitem__(self, idx):\n",
+    "        tokens = tokenize(self.texts[idx])\n",
+    "        ids = [self.vocab.get(token, self.vocab['<unk>']) for token in tokens][:self.max_len]\n",
+    "        ids += [self.vocab['<pad>']] * (self.max_len - len(ids))\n",
+    "        return torch.tensor(ids), torch.tensor(self.labels[idx])\n",
+    "\n",
+    "max_len = 50\n",
+    "batch_size = 32\n",
+    "train_ds = TextDataset(train_df, text_col, vocab, max_len)\n",
+    "valid_ds = TextDataset(valid_df, text_col, vocab, max_len)\n",
+    "\n",
+    "test1_text_col = get_text_column(test1_df)\n",
+    "test2_text_col = get_text_column(test2_df)\n",
+    "test3_text_col = get_text_column(test3_df)\n",
+    "\n",
+    "test1_ds = TextDataset(test1_df, test1_text_col, vocab, max_len)\n",
+    "test2_ds = TextDataset(test2_df, test2_text_col, vocab, max_len)\n",
+    "test3_ds = TextDataset(test3_df, test3_text_col, vocab, max_len)\n",
+    "\n",
+    "train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)\n",
+    "valid_dl = DataLoader(valid_ds, batch_size=batch_size)\n",
+    "test1_dl = DataLoader(test1_ds, batch_size=batch_size)\n",
+    "test2_dl = DataLoader(test2_ds, batch_size=batch_size)\n",
+    "test3_dl = DataLoader(test3_ds, batch_size=batch_size)\n",
+    "\n",
+    "# --- MODELI ---\n",
+    "class LSTMClassifier(nn.Module):\n",
+    "    def __init__(self, embedding_matrix, hidden_dim=256, num_classes=3, dropout=0.8):\n",
+    "        super().__init__()\n",
+    "        num_embeddings, embedding_dim = embedding_matrix.shape\n",
+    "        self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
+    "        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))\n",
+    "        self.embedding.weight.requires_grad = False\n",
+    "        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)\n",
+    "        self.dropout = nn.Dropout(dropout)\n",
+    "        self.fc = nn.Linear(hidden_dim, num_classes)\n",
+    "    def forward(self, x):\n",
+    "        x = self.embedding(x)\n",
+    "        _, (hidden, _) = self.lstm(x)\n",
+    "        out = self.dropout(hidden[-1])\n",
+    "        return self.fc(out)\n",
+    "\n",
+    "class GRUClassifier(nn.Module):\n",
+    "    def __init__(self, embedding_matrix, hidden_dim=256, num_classes=3, dropout=0.8):\n",
+    "        super().__init__()\n",
+    "        num_embeddings, embedding_dim = embedding_matrix.shape\n",
+    "        self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
+    "        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))\n",
+    "        self.embedding.weight.requires_grad = False\n",
+    "        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)\n",
+    "        self.dropout = nn.Dropout(dropout)\n",
+    "        self.fc = nn.Linear(hidden_dim, num_classes)\n",
+    "    def forward(self, x):\n",
+    "        x = self.embedding(x)\n",
+    "        _, hidden = self.gru(x)\n",
+    "        out = self.dropout(hidden[-1])\n",
+    "        return self.fc(out)\n",
+    "\n",
+    "class CNNClassifier(nn.Module):\n",
+    "    def __init__(self, embedding_matrix, num_filters=128, kernel_sizes=[3,4,5], num_classes=3, dropout=0.8):\n",
+    "        super().__init__()\n",
+    "        num_embeddings, embedding_dim = embedding_matrix.shape\n",
+    "        self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
+    "        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))\n",
+    "        self.embedding.weight.requires_grad = False\n",
+    "        self.convs = nn.ModuleList([\n",
+    "            nn.Conv2d(1, num_filters, (k, embedding_dim)) for k in kernel_sizes\n",
+    "        ])\n",
+    "        self.dropout = nn.Dropout(dropout)\n",
+    "        self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)\n",
+    "    def forward(self, x):\n",
+    "        x = self.embedding(x)\n",
+    "        x = x.unsqueeze(1)\n",
+    "        x = [torch.relu(conv(x)).squeeze(3) for conv in self.convs]\n",
+    "        x = [torch.max(pool, dim=2)[0] for pool in x]\n",
+    "        x = torch.cat(x, dim=1)\n",
+    "        x = self.dropout(x)\n",
+    "        return self.fc(x)\n",
+    "\n",
+    "# --- TRENING I VALIDACIJA ---\n",
+    "def train_epoch(model, dataloader, optimizer, criterion, device):\n",
+    "    model.train()\n",
+    "    total_loss = 0\n",
+    "    for x, y in dataloader:\n",
+    "        x, y = x.to(device), y.to(device)\n",
+    "        optimizer.zero_grad()\n",
+    "        logits = model(x)\n",
+    "        loss = criterion(logits, y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        total_loss += loss.item()\n",
+    "    return total_loss / len(dataloader)\n",
+    "\n",
+    "def eval_model(model, dataloader, device, criterion=None, return_loss=False):\n",
+    "    model.eval()\n",
+    "    preds, targets = [], []\n",
+    "    total_loss = 0\n",
+    "    with torch.no_grad():\n",
+    "        for x, y in dataloader:\n",
+    "            x, y = x.to(device), y.to(device)\n",
+    "            logits = model(x)\n",
+    "            if criterion and return_loss:\n",
+    "                loss = criterion(logits, y)\n",
+    "                total_loss += loss.item()\n",
+    "            pred = logits.argmax(1).cpu().numpy()\n",
+    "            preds.extend(pred)\n",
+    "            targets.extend(y.cpu().numpy())\n",
+    "    if return_loss and criterion:\n",
+    "        return np.array(preds), np.array(targets), total_loss / len(dataloader)\n",
+    "    return np.array(preds), np.array(targets)\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "\n",
+    "def run_training(model_class, name, epochs=50, dropout=0.8, lr=5e-4):\n",
+    "    print(f\"\\n{name} training...\")\n",
+    "    model = model_class(embedding_matrix, dropout=dropout).to(device)\n",
+    "    optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
+    "    criterion = nn.CrossEntropyLoss()\n",
+    "    for epoch in range(epochs):\n",
+    "        train_loss = train_epoch(model, train_dl, optimizer, criterion, device)\n",
+    "        _, _, val_loss = eval_model(model, valid_dl, device, criterion, return_loss=True)\n",
+    "        if (epoch+1) % 5 == 0 or epoch == 0:\n",
+    "            print(f\"{name} Epoch {epoch+1}: Train loss {train_loss:.4f} | Validation loss {val_loss:.4f}\")\n",
+    "    results = {}\n",
+    "    for test_name, test_dl in zip(\n",
+    "        ['test1', 'test2', 'test3'],\n",
+    "        [test1_dl, test2_dl, test3_dl]\n",
+    "    ):\n",
+    "        preds, targets = eval_model(model, test_dl, device)\n",
+    "        report = classification_report(targets, preds, digits=4, output_dict=True, target_names=[\"positive\", \"neutral\", \"negative\"])\n",
+    "        matrix = confusion_matrix(targets, preds)\n",
+    "        print(f\"\\n{name} on {test_name} Classification report:\\n\", classification_report(targets, preds, digits=4, target_names=[\"positive\", \"neutral\", \"negative\"]))\n",
+    "        print(f\"{name} on {test_name} Confusion matrix:\\n\", matrix)\n",
+    "        results[test_name] = {\n",
+    "            'precision': report['macro avg']['precision'],\n",
+    "            'recall': report['macro avg']['recall'],\n",
+    "            'f1': report['macro avg']['f1-score'],\n",
+    "            'accuracy': report['accuracy'],\n",
+    "            'confusion_matrix': matrix.tolist(),\n",
+    "            'full_report': classification_report(targets, preds, digits=4, target_names=[\"positive\", \"neutral\", \"negative\"])\n",
+    "        }\n",
+    "    return results\n",
+    "\n",
+    "# --- POKRETANJE ---\n",
+    "lstm_results = run_training(LSTMClassifier, \"LSTM\", epochs=50, dropout=0.8, lr=5e-4)\n",
+    "gru_results = run_training(GRUClassifier, \"GRU\", epochs=50, dropout=0.8, lr=5e-4)\n",
+    "cnn_results = run_training(CNNClassifier, \"CNN\", epochs=50, dropout=0.8, lr=5e-4)\n",
+    "\n",
+    "# --- SPREMANJE ---\n",
+    "with open('results.md', 'w', encoding='utf-8') as f:\n",
+    "    for model_name, results in [('LSTM', lstm_results), ('GRU', gru_results), ('CNN', cnn_results)]:\n",
+    "        f.write(f\"## {model_name}\\n\\n\")\n",
+    "        for test_name, res in results.items():\n",
+    "            f.write(f\"### {test_name}\\n\")\n",
+    "            f.write(f\"- Precision: {res['precision']:.4f}\\n\")\n",
+    "            f.write(f\"- Recall: {res['recall']:.4f}\\n\")\n",
+    "            f.write(f\"- F1: {res['f1']:.4f}\\n\")\n",
+    "            f.write(f\"- Accuracy: {res['accuracy']:.4f}\\n\")\n",
+    "            f.write(f\"- Confusion matrix: {res['confusion_matrix']}\\n\\n\")\n",
+    "            f.write(f\"Full classification report:\\n{res['full_report']}\\n\\n\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

results(test1,2,3).md ADDED Viewed

	@@ -0,0 +1,177 @@

+## LSTM
+### test1
+- Precision: 0.2195
+- Recall: 0.3333
+- F1: 0.2647
+- Accuracy: 0.6585
+- Confusion matrix: [[0, 165, 0], [0, 430, 0], [0, 58, 0]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.0000    0.0000    0.0000       165
+     neutral     0.6585    1.0000    0.7941       430
+    negative     0.0000    0.0000    0.0000        58
+    accuracy                         0.6585       653
+   macro avg     0.2195    0.3333    0.2647       653
+weighted avg     0.4336    0.6585    0.5229       653
+### test2
+- Precision: 0.1939
+- Recall: 0.3333
+- F1: 0.2452
+- Accuracy: 0.5816
+- Confusion matrix: [[0, 216, 0], [0, 431, 0], [0, 94, 0]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.0000    0.0000    0.0000       216
+     neutral     0.5816    1.0000    0.7355       431
+    negative     0.0000    0.0000    0.0000        94
+    accuracy                         0.5816       741
+   macro avg     0.1939    0.3333    0.2452       741
+weighted avg     0.3383    0.5816    0.4278       741
+### test3
+- Precision: 0.1106
+- Recall: 0.3333
+- F1: 0.1660
+- Accuracy: 0.3317
+- Confusion matrix: [[0, 267, 0], [0, 263, 0], [0, 263, 0]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.0000    0.0000    0.0000       267
+     neutral     0.3317    1.0000    0.4981       263
+    negative     0.0000    0.0000    0.0000       263
+    accuracy                         0.3317       793
+   macro avg     0.1106    0.3333    0.1660       793
+weighted avg     0.1100    0.3317    0.1652       793
+## GRU
+### test1
+- Precision: 0.4470
+- Recall: 0.4538
+- F1: 0.4485
+- Accuracy: 0.6064
+- Confusion matrix: [[86, 69, 10], [97, 302, 31], [16, 34, 8]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.4322    0.5212    0.4725       165
+     neutral     0.7457    0.7023    0.7234       430
+    negative     0.1633    0.1379    0.1495        58
+    accuracy                         0.6064       653
+   macro avg     0.4470    0.4538    0.4485       653
+weighted avg     0.6147    0.6064    0.6090       653
+### test2
+- Precision: 0.8557
+- Recall: 0.8500
+- F1: 0.8527
+- Accuracy: 0.8880
+- Confusion matrix: [[191, 19, 6], [20, 397, 14], [9, 15, 70]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.8682    0.8843    0.8761       216
+     neutral     0.9211    0.9211    0.9211       431
+    negative     0.7778    0.7447    0.7609        94
+    accuracy                         0.8880       741
+   macro avg     0.8557    0.8500    0.8527       741
+weighted avg     0.8875    0.8880    0.8877       741
+### test3
+- Precision: 0.6896
+- Recall: 0.6454
+- F1: 0.6251
+- Accuracy: 0.6456
+- Confusion matrix: [[187, 58, 22], [21, 237, 5], [41, 134, 88]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.7510    0.7004    0.7248       267
+     neutral     0.5524    0.9011    0.6850       263
+    negative     0.7652    0.3346    0.4656       263
+    accuracy                         0.6456       793
+   macro avg     0.6896    0.6454    0.6251       793
+weighted avg     0.6899    0.6456    0.6256       793
+## CNN
+### test1
+- Precision: 0.6103
+- Recall: 0.4595
+- F1: 0.4816
+- Accuracy: 0.6692
+- Confusion matrix: [[61, 103, 1], [59, 367, 4], [11, 38, 9]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.4656    0.3697    0.4122       165
+     neutral     0.7224    0.8535    0.7825       430
+    negative     0.6429    0.1552    0.2500        58
+    accuracy                         0.6692       653
+   macro avg     0.6103    0.4595    0.4816       653
+weighted avg     0.6505    0.6692    0.6416       653
+### test2
+- Precision: 0.9077
+- Recall: 0.8366
+- F1: 0.8659
+- Accuracy: 0.8988
+- Confusion matrix: [[180, 33, 3], [9, 420, 2], [11, 17, 66]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.9000    0.8333    0.8654       216
+     neutral     0.8936    0.9745    0.9323       431
+    negative     0.9296    0.7021    0.8000        94
+    accuracy                         0.8988       741
+   macro avg     0.9077    0.8366    0.8659       741
+weighted avg     0.9000    0.8988    0.8960       741
+### test3
+- Precision: 0.7336
+- Recall: 0.5839
+- F1: 0.5465
+- Accuracy: 0.5839
+- Confusion matrix: [[152, 109, 6], [5, 258, 0], [25, 185, 53]]
+Full classification report:
+              precision    recall  f1-score   support
+    positive     0.8352    0.5693    0.6771       267
+     neutral     0.4674    0.9810    0.6331       263
+    negative     0.8983    0.2015    0.3292       263
+    accuracy                         0.5839       793
+   macro avg     0.7336    0.5839    0.5465       793
+weighted avg     0.7341    0.5839    0.5471       793