josipabebic commited on
Commit
ed69a79
·
verified ·
1 Parent(s): 7e8117c

Upload 2 files

Browse files
Files changed (2) hide show
  1. code (test1,2,3).ipynb +548 -0
  2. results(test1,2,3).md +177 -0
code (test1,2,3).ipynb ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "6c9745be",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
14
+ " warnings.warn(\n"
15
+ ]
16
+ },
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "\n",
22
+ "LSTM training...\n",
23
+ "LSTM Epoch 1: Train loss 0.9225 | Validation loss 0.9003\n",
24
+ "LSTM Epoch 5: Train loss 0.9064 | Validation loss 0.8971\n",
25
+ "LSTM Epoch 10: Train loss 0.9024 | Validation loss 0.8979\n",
26
+ "LSTM Epoch 15: Train loss 0.9013 | Validation loss 0.8975\n",
27
+ "LSTM Epoch 20: Train loss 0.9458 | Validation loss 0.9297\n",
28
+ "LSTM Epoch 25: Train loss 0.9019 | Validation loss 0.9019\n",
29
+ "LSTM Epoch 30: Train loss 0.9034 | Validation loss 0.9004\n",
30
+ "LSTM Epoch 35: Train loss 0.9002 | Validation loss 0.9023\n",
31
+ "LSTM Epoch 40: Train loss 0.8989 | Validation loss 0.9054\n",
32
+ "LSTM Epoch 45: Train loss 0.8987 | Validation loss 0.9030\n",
33
+ "LSTM Epoch 50: Train loss 0.8978 | Validation loss 0.9007\n"
34
+ ]
35
+ },
36
+ {
37
+ "name": "stderr",
38
+ "output_type": "stream",
39
+ "text": [
40
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
41
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
42
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
43
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
44
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
45
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
46
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
47
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
48
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
49
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
50
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
51
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
52
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
53
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
54
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
55
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
56
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
57
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
58
+ ]
59
+ },
60
+ {
61
+ "name": "stdout",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "\n",
65
+ "LSTM on test1 Classification report:\n",
66
+ " precision recall f1-score support\n",
67
+ "\n",
68
+ " positive 0.0000 0.0000 0.0000 165\n",
69
+ " neutral 0.6585 1.0000 0.7941 430\n",
70
+ " negative 0.0000 0.0000 0.0000 58\n",
71
+ "\n",
72
+ " accuracy 0.6585 653\n",
73
+ " macro avg 0.2195 0.3333 0.2647 653\n",
74
+ "weighted avg 0.4336 0.6585 0.5229 653\n",
75
+ "\n",
76
+ "LSTM on test1 Confusion matrix:\n",
77
+ " [[ 0 165 0]\n",
78
+ " [ 0 430 0]\n",
79
+ " [ 0 58 0]]\n"
80
+ ]
81
+ },
82
+ {
83
+ "name": "stderr",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
87
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
88
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
89
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
90
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
91
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
92
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
93
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
94
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
95
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
96
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
97
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
98
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
99
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
100
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
101
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
102
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
103
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
104
+ ]
105
+ },
106
+ {
107
+ "name": "stdout",
108
+ "output_type": "stream",
109
+ "text": [
110
+ "\n",
111
+ "LSTM on test2 Classification report:\n",
112
+ " precision recall f1-score support\n",
113
+ "\n",
114
+ " positive 0.0000 0.0000 0.0000 216\n",
115
+ " neutral 0.5816 1.0000 0.7355 431\n",
116
+ " negative 0.0000 0.0000 0.0000 94\n",
117
+ "\n",
118
+ " accuracy 0.5816 741\n",
119
+ " macro avg 0.1939 0.3333 0.2452 741\n",
120
+ "weighted avg 0.3383 0.5816 0.4278 741\n",
121
+ "\n",
122
+ "LSTM on test2 Confusion matrix:\n",
123
+ " [[ 0 216 0]\n",
124
+ " [ 0 431 0]\n",
125
+ " [ 0 94 0]]\n"
126
+ ]
127
+ },
128
+ {
129
+ "name": "stderr",
130
+ "output_type": "stream",
131
+ "text": [
132
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
133
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
134
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
135
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
136
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
137
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
138
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
139
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
140
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
141
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
142
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
143
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
144
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
145
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
146
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
147
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
148
+ "/Users/ivancarevic/Library/Python/3.9/lib/python/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
149
+ " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
150
+ ]
151
+ },
152
+ {
153
+ "name": "stdout",
154
+ "output_type": "stream",
155
+ "text": [
156
+ "\n",
157
+ "LSTM on test3 Classification report:\n",
158
+ " precision recall f1-score support\n",
159
+ "\n",
160
+ " positive 0.0000 0.0000 0.0000 267\n",
161
+ " neutral 0.3317 1.0000 0.4981 263\n",
162
+ " negative 0.0000 0.0000 0.0000 263\n",
163
+ "\n",
164
+ " accuracy 0.3317 793\n",
165
+ " macro avg 0.1106 0.3333 0.1660 793\n",
166
+ "weighted avg 0.1100 0.3317 0.1652 793\n",
167
+ "\n",
168
+ "LSTM on test3 Confusion matrix:\n",
169
+ " [[ 0 267 0]\n",
170
+ " [ 0 263 0]\n",
171
+ " [ 0 263 0]]\n",
172
+ "\n",
173
+ "GRU training...\n",
174
+ "GRU Epoch 1: Train loss 0.9163 | Validation loss 0.8981\n",
175
+ "GRU Epoch 5: Train loss 0.9048 | Validation loss 0.8972\n",
176
+ "GRU Epoch 10: Train loss 0.8214 | Validation loss 0.8023\n",
177
+ "GRU Epoch 15: Train loss 0.7494 | Validation loss 0.7687\n",
178
+ "GRU Epoch 20: Train loss 0.6789 | Validation loss 0.7580\n",
179
+ "GRU Epoch 25: Train loss 0.5857 | Validation loss 0.8096\n",
180
+ "GRU Epoch 30: Train loss 0.4784 | Validation loss 0.9778\n",
181
+ "GRU Epoch 35: Train loss 0.3589 | Validation loss 1.1809\n",
182
+ "GRU Epoch 40: Train loss 0.2612 | Validation loss 1.3460\n",
183
+ "GRU Epoch 45: Train loss 0.1947 | Validation loss 1.4596\n",
184
+ "GRU Epoch 50: Train loss 0.1336 | Validation loss 1.7536\n",
185
+ "\n",
186
+ "GRU on test1 Classification report:\n",
187
+ " precision recall f1-score support\n",
188
+ "\n",
189
+ " positive 0.4322 0.5212 0.4725 165\n",
190
+ " neutral 0.7457 0.7023 0.7234 430\n",
191
+ " negative 0.1633 0.1379 0.1495 58\n",
192
+ "\n",
193
+ " accuracy 0.6064 653\n",
194
+ " macro avg 0.4470 0.4538 0.4485 653\n",
195
+ "weighted avg 0.6147 0.6064 0.6090 653\n",
196
+ "\n",
197
+ "GRU on test1 Confusion matrix:\n",
198
+ " [[ 86 69 10]\n",
199
+ " [ 97 302 31]\n",
200
+ " [ 16 34 8]]\n",
201
+ "\n",
202
+ "GRU on test2 Classification report:\n",
203
+ " precision recall f1-score support\n",
204
+ "\n",
205
+ " positive 0.8682 0.8843 0.8761 216\n",
206
+ " neutral 0.9211 0.9211 0.9211 431\n",
207
+ " negative 0.7778 0.7447 0.7609 94\n",
208
+ "\n",
209
+ " accuracy 0.8880 741\n",
210
+ " macro avg 0.8557 0.8500 0.8527 741\n",
211
+ "weighted avg 0.8875 0.8880 0.8877 741\n",
212
+ "\n",
213
+ "GRU on test2 Confusion matrix:\n",
214
+ " [[191 19 6]\n",
215
+ " [ 20 397 14]\n",
216
+ " [ 9 15 70]]\n",
217
+ "\n",
218
+ "GRU on test3 Classification report:\n",
219
+ " precision recall f1-score support\n",
220
+ "\n",
221
+ " positive 0.7510 0.7004 0.7248 267\n",
222
+ " neutral 0.5524 0.9011 0.6850 263\n",
223
+ " negative 0.7652 0.3346 0.4656 263\n",
224
+ "\n",
225
+ " accuracy 0.6456 793\n",
226
+ " macro avg 0.6896 0.6454 0.6251 793\n",
227
+ "weighted avg 0.6899 0.6456 0.6256 793\n",
228
+ "\n",
229
+ "GRU on test3 Confusion matrix:\n",
230
+ " [[187 58 22]\n",
231
+ " [ 21 237 5]\n",
232
+ " [ 41 134 88]]\n",
233
+ "\n",
234
+ "CNN training...\n",
235
+ "CNN Epoch 1: Train loss 0.9112 | Validation loss 0.8838\n",
236
+ "CNN Epoch 5: Train loss 0.8149 | Validation loss 0.8114\n",
237
+ "CNN Epoch 10: Train loss 0.7071 | Validation loss 0.7645\n",
238
+ "CNN Epoch 15: Train loss 0.6159 | Validation loss 0.7597\n",
239
+ "CNN Epoch 20: Train loss 0.5508 | Validation loss 0.7568\n",
240
+ "CNN Epoch 25: Train loss 0.4648 | Validation loss 0.7638\n",
241
+ "CNN Epoch 30: Train loss 0.4148 | Validation loss 0.7818\n",
242
+ "CNN Epoch 35: Train loss 0.3572 | Validation loss 0.8047\n",
243
+ "CNN Epoch 40: Train loss 0.3099 | Validation loss 0.8082\n",
244
+ "CNN Epoch 45: Train loss 0.2741 | Validation loss 0.8595\n",
245
+ "CNN Epoch 50: Train loss 0.2376 | Validation loss 0.9191\n",
246
+ "\n",
247
+ "CNN on test1 Classification report:\n",
248
+ " precision recall f1-score support\n",
249
+ "\n",
250
+ " positive 0.4656 0.3697 0.4122 165\n",
251
+ " neutral 0.7224 0.8535 0.7825 430\n",
252
+ " negative 0.6429 0.1552 0.2500 58\n",
253
+ "\n",
254
+ " accuracy 0.6692 653\n",
255
+ " macro avg 0.6103 0.4595 0.4816 653\n",
256
+ "weighted avg 0.6505 0.6692 0.6416 653\n",
257
+ "\n",
258
+ "CNN on test1 Confusion matrix:\n",
259
+ " [[ 61 103 1]\n",
260
+ " [ 59 367 4]\n",
261
+ " [ 11 38 9]]\n",
262
+ "\n",
263
+ "CNN on test2 Classification report:\n",
264
+ " precision recall f1-score support\n",
265
+ "\n",
266
+ " positive 0.9000 0.8333 0.8654 216\n",
267
+ " neutral 0.8936 0.9745 0.9323 431\n",
268
+ " negative 0.9296 0.7021 0.8000 94\n",
269
+ "\n",
270
+ " accuracy 0.8988 741\n",
271
+ " macro avg 0.9077 0.8366 0.8659 741\n",
272
+ "weighted avg 0.9000 0.8988 0.8960 741\n",
273
+ "\n",
274
+ "CNN on test2 Confusion matrix:\n",
275
+ " [[180 33 3]\n",
276
+ " [ 9 420 2]\n",
277
+ " [ 11 17 66]]\n",
278
+ "\n",
279
+ "CNN on test3 Classification report:\n",
280
+ " precision recall f1-score support\n",
281
+ "\n",
282
+ " positive 0.8352 0.5693 0.6771 267\n",
283
+ " neutral 0.4674 0.9810 0.6331 263\n",
284
+ " negative 0.8983 0.2015 0.3292 263\n",
285
+ "\n",
286
+ " accuracy 0.5839 793\n",
287
+ " macro avg 0.7336 0.5839 0.5465 793\n",
288
+ "weighted avg 0.7341 0.5839 0.5471 793\n",
289
+ "\n",
290
+ "CNN on test3 Confusion matrix:\n",
291
+ " [[152 109 6]\n",
292
+ " [ 5 258 0]\n",
293
+ " [ 25 185 53]]\n"
294
+ ]
295
+ }
296
+ ],
297
+ "source": [
298
+ "# !pip install gensim scikit-learn pandas numpy torch tqdm\n",
299
+ "\n",
300
+ "import pandas as pd\n",
301
+ "import numpy as np\n",
302
+ "import torch\n",
303
+ "import torch.nn as nn\n",
304
+ "from torch.utils.data import Dataset, DataLoader\n",
305
+ "from sklearn.metrics import classification_report, confusion_matrix\n",
306
+ "from sklearn.model_selection import train_test_split\n",
307
+ "from collections import Counter\n",
308
+ "import gensim\n",
309
+ "\n",
310
+ "# --- UČITAVANJE I PODJELA PODATAKA ---\n",
311
+ "full_df = pd.read_csv('TRAIN.csv')\n",
312
+ "\n",
313
+ "# Učitaj sve test skupove\n",
314
+ "test1_df = pd.read_csv('test-1.csv')\n",
315
+ "test2_df = pd.read_csv('test-2.csv')\n",
316
+ "test3_df = pd.read_csv('test-3.csv')\n",
317
+ "\n",
318
+ "def get_text_column(df):\n",
319
+ " for col in df.columns:\n",
320
+ " if col.lower() in ['sentence', 'text']:\n",
321
+ " return col\n",
322
+ " raise ValueError(\"Nema stupca 'Sentence' ili 'Text'!\")\n",
323
+ "\n",
324
+ "text_col = get_text_column(full_df)\n",
325
+ "\n",
326
+ "# Stratified split: 95% train, 5% valid\n",
327
+ "train_df, valid_df = train_test_split(full_df, test_size=0.05, stratify=full_df['Label'], random_state=42)\n",
328
+ "\n",
329
+ "# --- TOKENIZACIJA I VOKABULAR ---\n",
330
+ "def tokenize(text):\n",
331
+ " return text.lower().split()\n",
332
+ "\n",
333
+ "counter = Counter()\n",
334
+ "for text in train_df[text_col]:\n",
335
+ " counter.update(tokenize(text))\n",
336
+ "vocab = {word: idx+2 for idx, (word, _) in enumerate(counter.most_common())}\n",
337
+ "vocab['<unk>'] = 0\n",
338
+ "vocab['<pad>'] = 1\n",
339
+ "\n",
340
+ "# --- EMBEDDING ---\n",
341
+ "from gensim.models.fasttext import load_facebook_model\n",
342
+ "\n",
343
+ "embedding_path = 'cc.hr.300.bin'\n",
344
+ "ft_model = load_facebook_model(embedding_path)\n",
345
+ "embeddings = ft_model.wv \n",
346
+ "\n",
347
+ "embedding_dim = embeddings.vector_size\n",
348
+ "embedding_matrix = np.zeros((len(vocab), embedding_dim))\n",
349
+ "for word, idx in vocab.items():\n",
350
+ " if word in embeddings:\n",
351
+ " embedding_matrix[idx] = embeddings[word]\n",
352
+ " else:\n",
353
+ " embedding_matrix[idx] = np.random.normal(scale=0.6, size=(embedding_dim, ))\n",
354
+ "\n",
355
+ "# --- DATASET ---\n",
356
+ "class TextDataset(Dataset):\n",
357
+ " def __init__(self, df, text_col, vocab, max_len=50):\n",
358
+ " self.texts = df[text_col].tolist()\n",
359
+ " self.labels = df['Label'].tolist()\n",
360
+ " self.vocab = vocab\n",
361
+ " self.max_len = max_len\n",
362
+ " def __len__(self):\n",
363
+ " return len(self.texts)\n",
364
+ " def __getitem__(self, idx):\n",
365
+ " tokens = tokenize(self.texts[idx])\n",
366
+ " ids = [self.vocab.get(token, self.vocab['<unk>']) for token in tokens][:self.max_len]\n",
367
+ " ids += [self.vocab['<pad>']] * (self.max_len - len(ids))\n",
368
+ " return torch.tensor(ids), torch.tensor(self.labels[idx])\n",
369
+ "\n",
370
+ "max_len = 50\n",
371
+ "batch_size = 32\n",
372
+ "train_ds = TextDataset(train_df, text_col, vocab, max_len)\n",
373
+ "valid_ds = TextDataset(valid_df, text_col, vocab, max_len)\n",
374
+ "\n",
375
+ "test1_text_col = get_text_column(test1_df)\n",
376
+ "test2_text_col = get_text_column(test2_df)\n",
377
+ "test3_text_col = get_text_column(test3_df)\n",
378
+ "\n",
379
+ "test1_ds = TextDataset(test1_df, test1_text_col, vocab, max_len)\n",
380
+ "test2_ds = TextDataset(test2_df, test2_text_col, vocab, max_len)\n",
381
+ "test3_ds = TextDataset(test3_df, test3_text_col, vocab, max_len)\n",
382
+ "\n",
383
+ "train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)\n",
384
+ "valid_dl = DataLoader(valid_ds, batch_size=batch_size)\n",
385
+ "test1_dl = DataLoader(test1_ds, batch_size=batch_size)\n",
386
+ "test2_dl = DataLoader(test2_ds, batch_size=batch_size)\n",
387
+ "test3_dl = DataLoader(test3_ds, batch_size=batch_size)\n",
388
+ "\n",
389
+ "# --- MODELI ---\n",
390
+ "class LSTMClassifier(nn.Module):\n",
391
+ " def __init__(self, embedding_matrix, hidden_dim=256, num_classes=3, dropout=0.8):\n",
392
+ " super().__init__()\n",
393
+ " num_embeddings, embedding_dim = embedding_matrix.shape\n",
394
+ " self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
395
+ " self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))\n",
396
+ " self.embedding.weight.requires_grad = False\n",
397
+ " self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)\n",
398
+ " self.dropout = nn.Dropout(dropout)\n",
399
+ " self.fc = nn.Linear(hidden_dim, num_classes)\n",
400
+ " def forward(self, x):\n",
401
+ " x = self.embedding(x)\n",
402
+ " _, (hidden, _) = self.lstm(x)\n",
403
+ " out = self.dropout(hidden[-1])\n",
404
+ " return self.fc(out)\n",
405
+ "\n",
406
+ "class GRUClassifier(nn.Module):\n",
407
+ " def __init__(self, embedding_matrix, hidden_dim=256, num_classes=3, dropout=0.8):\n",
408
+ " super().__init__()\n",
409
+ " num_embeddings, embedding_dim = embedding_matrix.shape\n",
410
+ " self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
411
+ " self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))\n",
412
+ " self.embedding.weight.requires_grad = False\n",
413
+ " self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)\n",
414
+ " self.dropout = nn.Dropout(dropout)\n",
415
+ " self.fc = nn.Linear(hidden_dim, num_classes)\n",
416
+ " def forward(self, x):\n",
417
+ " x = self.embedding(x)\n",
418
+ " _, hidden = self.gru(x)\n",
419
+ " out = self.dropout(hidden[-1])\n",
420
+ " return self.fc(out)\n",
421
+ "\n",
422
+ "class CNNClassifier(nn.Module):\n",
423
+ " def __init__(self, embedding_matrix, num_filters=128, kernel_sizes=[3,4,5], num_classes=3, dropout=0.8):\n",
424
+ " super().__init__()\n",
425
+ " num_embeddings, embedding_dim = embedding_matrix.shape\n",
426
+ " self.embedding = nn.Embedding(num_embeddings, embedding_dim)\n",
427
+ " self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))\n",
428
+ " self.embedding.weight.requires_grad = False\n",
429
+ " self.convs = nn.ModuleList([\n",
430
+ " nn.Conv2d(1, num_filters, (k, embedding_dim)) for k in kernel_sizes\n",
431
+ " ])\n",
432
+ " self.dropout = nn.Dropout(dropout)\n",
433
+ " self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)\n",
434
+ " def forward(self, x):\n",
435
+ " x = self.embedding(x)\n",
436
+ " x = x.unsqueeze(1)\n",
437
+ " x = [torch.relu(conv(x)).squeeze(3) for conv in self.convs]\n",
438
+ " x = [torch.max(pool, dim=2)[0] for pool in x]\n",
439
+ " x = torch.cat(x, dim=1)\n",
440
+ " x = self.dropout(x)\n",
441
+ " return self.fc(x)\n",
442
+ "\n",
443
+ "# --- TRENING I VALIDACIJA ---\n",
444
+ "def train_epoch(model, dataloader, optimizer, criterion, device):\n",
445
+ " model.train()\n",
446
+ " total_loss = 0\n",
447
+ " for x, y in dataloader:\n",
448
+ " x, y = x.to(device), y.to(device)\n",
449
+ " optimizer.zero_grad()\n",
450
+ " logits = model(x)\n",
451
+ " loss = criterion(logits, y)\n",
452
+ " loss.backward()\n",
453
+ " optimizer.step()\n",
454
+ " total_loss += loss.item()\n",
455
+ " return total_loss / len(dataloader)\n",
456
+ "\n",
457
+ "def eval_model(model, dataloader, device, criterion=None, return_loss=False):\n",
458
+ " model.eval()\n",
459
+ " preds, targets = [], []\n",
460
+ " total_loss = 0\n",
461
+ " with torch.no_grad():\n",
462
+ " for x, y in dataloader:\n",
463
+ " x, y = x.to(device), y.to(device)\n",
464
+ " logits = model(x)\n",
465
+ " if criterion and return_loss:\n",
466
+ " loss = criterion(logits, y)\n",
467
+ " total_loss += loss.item()\n",
468
+ " pred = logits.argmax(1).cpu().numpy()\n",
469
+ " preds.extend(pred)\n",
470
+ " targets.extend(y.cpu().numpy())\n",
471
+ " if return_loss and criterion:\n",
472
+ " return np.array(preds), np.array(targets), total_loss / len(dataloader)\n",
473
+ " return np.array(preds), np.array(targets)\n",
474
+ "\n",
475
+ "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
476
+ "\n",
477
+ "def run_training(model_class, name, epochs=50, dropout=0.8, lr=5e-4):\n",
478
+ " print(f\"\\n{name} training...\")\n",
479
+ " model = model_class(embedding_matrix, dropout=dropout).to(device)\n",
480
+ " optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
481
+ " criterion = nn.CrossEntropyLoss()\n",
482
+ " for epoch in range(epochs):\n",
483
+ " train_loss = train_epoch(model, train_dl, optimizer, criterion, device)\n",
484
+ " _, _, val_loss = eval_model(model, valid_dl, device, criterion, return_loss=True)\n",
485
+ " if (epoch+1) % 5 == 0 or epoch == 0:\n",
486
+ " print(f\"{name} Epoch {epoch+1}: Train loss {train_loss:.4f} | Validation loss {val_loss:.4f}\")\n",
487
+ " results = {}\n",
488
+ " for test_name, test_dl in zip(\n",
489
+ " ['test1', 'test2', 'test3'],\n",
490
+ " [test1_dl, test2_dl, test3_dl]\n",
491
+ " ):\n",
492
+ " preds, targets = eval_model(model, test_dl, device)\n",
493
+ " report = classification_report(targets, preds, digits=4, output_dict=True, target_names=[\"positive\", \"neutral\", \"negative\"])\n",
494
+ " matrix = confusion_matrix(targets, preds)\n",
495
+ " print(f\"\\n{name} on {test_name} Classification report:\\n\", classification_report(targets, preds, digits=4, target_names=[\"positive\", \"neutral\", \"negative\"]))\n",
496
+ " print(f\"{name} on {test_name} Confusion matrix:\\n\", matrix)\n",
497
+ " results[test_name] = {\n",
498
+ " 'precision': report['macro avg']['precision'],\n",
499
+ " 'recall': report['macro avg']['recall'],\n",
500
+ " 'f1': report['macro avg']['f1-score'],\n",
501
+ " 'accuracy': report['accuracy'],\n",
502
+ " 'confusion_matrix': matrix.tolist(),\n",
503
+ " 'full_report': classification_report(targets, preds, digits=4, target_names=[\"positive\", \"neutral\", \"negative\"])\n",
504
+ " }\n",
505
+ " return results\n",
506
+ "\n",
507
+ "# --- POKRETANJE ---\n",
508
+ "lstm_results = run_training(LSTMClassifier, \"LSTM\", epochs=50, dropout=0.8, lr=5e-4)\n",
509
+ "gru_results = run_training(GRUClassifier, \"GRU\", epochs=50, dropout=0.8, lr=5e-4)\n",
510
+ "cnn_results = run_training(CNNClassifier, \"CNN\", epochs=50, dropout=0.8, lr=5e-4)\n",
511
+ "\n",
512
+ "# --- SPREMANJE ---\n",
513
+ "with open('results.md', 'w', encoding='utf-8') as f:\n",
514
+ " for model_name, results in [('LSTM', lstm_results), ('GRU', gru_results), ('CNN', cnn_results)]:\n",
515
+ " f.write(f\"## {model_name}\\n\\n\")\n",
516
+ " for test_name, res in results.items():\n",
517
+ " f.write(f\"### {test_name}\\n\")\n",
518
+ " f.write(f\"- Precision: {res['precision']:.4f}\\n\")\n",
519
+ " f.write(f\"- Recall: {res['recall']:.4f}\\n\")\n",
520
+ " f.write(f\"- F1: {res['f1']:.4f}\\n\")\n",
521
+ " f.write(f\"- Accuracy: {res['accuracy']:.4f}\\n\")\n",
522
+ " f.write(f\"- Confusion matrix: {res['confusion_matrix']}\\n\\n\")\n",
523
+ " f.write(f\"Full classification report:\\n{res['full_report']}\\n\\n\")\n"
524
+ ]
525
+ }
526
+ ],
527
+ "metadata": {
528
+ "kernelspec": {
529
+ "display_name": "Python 3",
530
+ "language": "python",
531
+ "name": "python3"
532
+ },
533
+ "language_info": {
534
+ "codemirror_mode": {
535
+ "name": "ipython",
536
+ "version": 3
537
+ },
538
+ "file_extension": ".py",
539
+ "mimetype": "text/x-python",
540
+ "name": "python",
541
+ "nbconvert_exporter": "python",
542
+ "pygments_lexer": "ipython3",
543
+ "version": "3.9.6"
544
+ }
545
+ },
546
+ "nbformat": 4,
547
+ "nbformat_minor": 5
548
+ }
results(test1,2,3).md ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## LSTM
2
+
3
+ ### test1
4
+ - Precision: 0.2195
5
+ - Recall: 0.3333
6
+ - F1: 0.2647
7
+ - Accuracy: 0.6585
8
+ - Confusion matrix: [[0, 165, 0], [0, 430, 0], [0, 58, 0]]
9
+
10
+ Full classification report:
11
+ precision recall f1-score support
12
+
13
+ positive 0.0000 0.0000 0.0000 165
14
+ neutral 0.6585 1.0000 0.7941 430
15
+ negative 0.0000 0.0000 0.0000 58
16
+
17
+ accuracy 0.6585 653
18
+ macro avg 0.2195 0.3333 0.2647 653
19
+ weighted avg 0.4336 0.6585 0.5229 653
20
+
21
+
22
+ ### test2
23
+ - Precision: 0.1939
24
+ - Recall: 0.3333
25
+ - F1: 0.2452
26
+ - Accuracy: 0.5816
27
+ - Confusion matrix: [[0, 216, 0], [0, 431, 0], [0, 94, 0]]
28
+
29
+ Full classification report:
30
+ precision recall f1-score support
31
+
32
+ positive 0.0000 0.0000 0.0000 216
33
+ neutral 0.5816 1.0000 0.7355 431
34
+ negative 0.0000 0.0000 0.0000 94
35
+
36
+ accuracy 0.5816 741
37
+ macro avg 0.1939 0.3333 0.2452 741
38
+ weighted avg 0.3383 0.5816 0.4278 741
39
+
40
+
41
+ ### test3
42
+ - Precision: 0.1106
43
+ - Recall: 0.3333
44
+ - F1: 0.1660
45
+ - Accuracy: 0.3317
46
+ - Confusion matrix: [[0, 267, 0], [0, 263, 0], [0, 263, 0]]
47
+
48
+ Full classification report:
49
+ precision recall f1-score support
50
+
51
+ positive 0.0000 0.0000 0.0000 267
52
+ neutral 0.3317 1.0000 0.4981 263
53
+ negative 0.0000 0.0000 0.0000 263
54
+
55
+ accuracy 0.3317 793
56
+ macro avg 0.1106 0.3333 0.1660 793
57
+ weighted avg 0.1100 0.3317 0.1652 793
58
+
59
+
60
+ ## GRU
61
+
62
+ ### test1
63
+ - Precision: 0.4470
64
+ - Recall: 0.4538
65
+ - F1: 0.4485
66
+ - Accuracy: 0.6064
67
+ - Confusion matrix: [[86, 69, 10], [97, 302, 31], [16, 34, 8]]
68
+
69
+ Full classification report:
70
+ precision recall f1-score support
71
+
72
+ positive 0.4322 0.5212 0.4725 165
73
+ neutral 0.7457 0.7023 0.7234 430
74
+ negative 0.1633 0.1379 0.1495 58
75
+
76
+ accuracy 0.6064 653
77
+ macro avg 0.4470 0.4538 0.4485 653
78
+ weighted avg 0.6147 0.6064 0.6090 653
79
+
80
+
81
+ ### test2
82
+ - Precision: 0.8557
83
+ - Recall: 0.8500
84
+ - F1: 0.8527
85
+ - Accuracy: 0.8880
86
+ - Confusion matrix: [[191, 19, 6], [20, 397, 14], [9, 15, 70]]
87
+
88
+ Full classification report:
89
+ precision recall f1-score support
90
+
91
+ positive 0.8682 0.8843 0.8761 216
92
+ neutral 0.9211 0.9211 0.9211 431
93
+ negative 0.7778 0.7447 0.7609 94
94
+
95
+ accuracy 0.8880 741
96
+ macro avg 0.8557 0.8500 0.8527 741
97
+ weighted avg 0.8875 0.8880 0.8877 741
98
+
99
+
100
+ ### test3
101
+ - Precision: 0.6896
102
+ - Recall: 0.6454
103
+ - F1: 0.6251
104
+ - Accuracy: 0.6456
105
+ - Confusion matrix: [[187, 58, 22], [21, 237, 5], [41, 134, 88]]
106
+
107
+ Full classification report:
108
+ precision recall f1-score support
109
+
110
+ positive 0.7510 0.7004 0.7248 267
111
+ neutral 0.5524 0.9011 0.6850 263
112
+ negative 0.7652 0.3346 0.4656 263
113
+
114
+ accuracy 0.6456 793
115
+ macro avg 0.6896 0.6454 0.6251 793
116
+ weighted avg 0.6899 0.6456 0.6256 793
117
+
118
+
119
+ ## CNN
120
+
121
+ ### test1
122
+ - Precision: 0.6103
123
+ - Recall: 0.4595
124
+ - F1: 0.4816
125
+ - Accuracy: 0.6692
126
+ - Confusion matrix: [[61, 103, 1], [59, 367, 4], [11, 38, 9]]
127
+
128
+ Full classification report:
129
+ precision recall f1-score support
130
+
131
+ positive 0.4656 0.3697 0.4122 165
132
+ neutral 0.7224 0.8535 0.7825 430
133
+ negative 0.6429 0.1552 0.2500 58
134
+
135
+ accuracy 0.6692 653
136
+ macro avg 0.6103 0.4595 0.4816 653
137
+ weighted avg 0.6505 0.6692 0.6416 653
138
+
139
+
140
+ ### test2
141
+ - Precision: 0.9077
142
+ - Recall: 0.8366
143
+ - F1: 0.8659
144
+ - Accuracy: 0.8988
145
+ - Confusion matrix: [[180, 33, 3], [9, 420, 2], [11, 17, 66]]
146
+
147
+ Full classification report:
148
+ precision recall f1-score support
149
+
150
+ positive 0.9000 0.8333 0.8654 216
151
+ neutral 0.8936 0.9745 0.9323 431
152
+ negative 0.9296 0.7021 0.8000 94
153
+
154
+ accuracy 0.8988 741
155
+ macro avg 0.9077 0.8366 0.8659 741
156
+ weighted avg 0.9000 0.8988 0.8960 741
157
+
158
+
159
+ ### test3
160
+ - Precision: 0.7336
161
+ - Recall: 0.5839
162
+ - F1: 0.5465
163
+ - Accuracy: 0.5839
164
+ - Confusion matrix: [[152, 109, 6], [5, 258, 0], [25, 185, 53]]
165
+
166
+ Full classification report:
167
+ precision recall f1-score support
168
+
169
+ positive 0.8352 0.5693 0.6771 267
170
+ neutral 0.4674 0.9810 0.6331 263
171
+ negative 0.8983 0.2015 0.3292 263
172
+
173
+ accuracy 0.5839 793
174
+ macro avg 0.7336 0.5839 0.5465 793
175
+ weighted avg 0.7341 0.5839 0.5471 793
176
+
177
+