qubvel-hf HF Staff commited on
Commit
c95fb3e
·
verified ·
1 Parent(s): b913c79

Fix weights

Browse files
Files changed (3) hide show
  1. config.json +344 -344
  2. model.safetensors +1 -1
  3. preprocessor_config.json +26 -26
config.json CHANGED
@@ -1,344 +1,344 @@
1
- {
2
- "activation_dropout": 0.0,
3
- "activation_function": "silu",
4
- "anchor_image_size": null,
5
- "architectures": [
6
- "DFineForObjectDetection"
7
- ],
8
- "attention_dropout": 0.0,
9
- "auxiliary_loss": true,
10
- "backbone": null,
11
- "backbone_config": {
12
- "depths": [
13
- 3,
14
- 4,
15
- 6,
16
- 3
17
- ],
18
- "downsample_in_bottleneck": false,
19
- "downsample_in_first_stage": false,
20
- "embedding_size": 32,
21
- "hidden_act": "relu",
22
- "hidden_sizes": [
23
- 128,
24
- 256,
25
- 512,
26
- 1024
27
- ],
28
- "initializer_range": 0.02,
29
- "layer_type": "basic",
30
- "model_type": "hgnet_v2",
31
- "num_channels": 3,
32
- "out_features": [
33
- "stage3",
34
- "stage4"
35
- ],
36
- "out_indices": [
37
- 3,
38
- 4
39
- ],
40
- "stage_downsample": [
41
- false,
42
- true,
43
- true,
44
- true
45
- ],
46
- "stage_in_channels": [
47
- 16,
48
- 64,
49
- 256,
50
- 512
51
- ],
52
- "stage_kernel_size": [
53
- 3,
54
- 3,
55
- 5,
56
- 5
57
- ],
58
- "stage_light_block": [
59
- false,
60
- false,
61
- true,
62
- true
63
- ],
64
- "stage_mid_channels": [
65
- 16,
66
- 32,
67
- 64,
68
- 128
69
- ],
70
- "stage_names": [
71
- "stem",
72
- "stage1",
73
- "stage2",
74
- "stage3",
75
- "stage4"
76
- ],
77
- "stage_num_blocks": [
78
- 1,
79
- 1,
80
- 2,
81
- 1
82
- ],
83
- "stage_numb_of_layers": [
84
- 3,
85
- 3,
86
- 3,
87
- 3
88
- ],
89
- "stage_out_channels": [
90
- 64,
91
- 256,
92
- 512,
93
- 1024
94
- ],
95
- "stem_channels": [
96
- 3,
97
- 16,
98
- 16
99
- ],
100
- "use_learnable_affine_block": true
101
- },
102
- "backbone_kwargs": null,
103
- "batch_norm_eps": 1e-05,
104
- "box_noise_scale": 1.0,
105
- "d_model": 128,
106
- "decoder_activation_function": "relu",
107
- "decoder_attention_heads": 8,
108
- "decoder_ffn_dim": 512,
109
- "decoder_in_channels": [
110
- 128,
111
- 128
112
- ],
113
- "decoder_layers": 3,
114
- "decoder_method": "default",
115
- "decoder_n_points": [
116
- 6,
117
- 6
118
- ],
119
- "decoder_offset_scale": 0.5,
120
- "depth_mult": 0.5,
121
- "dropout": 0.0,
122
- "encode_proj_layers": [
123
- 1
124
- ],
125
- "encoder_activation_function": "gelu",
126
- "encoder_attention_heads": 8,
127
- "encoder_ffn_dim": 512,
128
- "encoder_hidden_dim": 128,
129
- "encoder_in_channels": [
130
- 512,
131
- 1024
132
- ],
133
- "encoder_layers": 1,
134
- "eos_coefficient": 0.0001,
135
- "eval_idx": -1,
136
- "eval_size": null,
137
- "feat_strides": [
138
- 16,
139
- 32
140
- ],
141
- "focal_loss_alpha": 0.75,
142
- "focal_loss_gamma": 2.0,
143
- "freeze_backbone_batch_norms": true,
144
- "hidden_expansion": 0.34,
145
- "id2label": {
146
- "0": "person",
147
- "1": "bicycle",
148
- "2": "car",
149
- "3": "motorbike",
150
- "4": "aeroplane",
151
- "5": "bus",
152
- "6": "train",
153
- "7": "truck",
154
- "8": "boat",
155
- "9": "traffic light",
156
- "10": "fire hydrant",
157
- "11": "stop sign",
158
- "12": "parking meter",
159
- "13": "bench",
160
- "14": "bird",
161
- "15": "cat",
162
- "16": "dog",
163
- "17": "horse",
164
- "18": "sheep",
165
- "19": "cow",
166
- "20": "elephant",
167
- "21": "bear",
168
- "22": "zebra",
169
- "23": "giraffe",
170
- "24": "backpack",
171
- "25": "umbrella",
172
- "26": "handbag",
173
- "27": "tie",
174
- "28": "suitcase",
175
- "29": "frisbee",
176
- "30": "skis",
177
- "31": "snowboard",
178
- "32": "sports ball",
179
- "33": "kite",
180
- "34": "baseball bat",
181
- "35": "baseball glove",
182
- "36": "skateboard",
183
- "37": "surfboard",
184
- "38": "tennis racket",
185
- "39": "bottle",
186
- "40": "wine glass",
187
- "41": "cup",
188
- "42": "fork",
189
- "43": "knife",
190
- "44": "spoon",
191
- "45": "bowl",
192
- "46": "banana",
193
- "47": "apple",
194
- "48": "sandwich",
195
- "49": "orange",
196
- "50": "broccoli",
197
- "51": "carrot",
198
- "52": "hot dog",
199
- "53": "pizza",
200
- "54": "donut",
201
- "55": "cake",
202
- "56": "chair",
203
- "57": "sofa",
204
- "58": "pottedplant",
205
- "59": "bed",
206
- "60": "diningtable",
207
- "61": "toilet",
208
- "62": "tvmonitor",
209
- "63": "laptop",
210
- "64": "mouse",
211
- "65": "remote",
212
- "66": "keyboard",
213
- "67": "cell phone",
214
- "68": "microwave",
215
- "69": "oven",
216
- "70": "toaster",
217
- "71": "sink",
218
- "72": "refrigerator",
219
- "73": "book",
220
- "74": "clock",
221
- "75": "vase",
222
- "76": "scissors",
223
- "77": "teddy bear",
224
- "78": "hair drier",
225
- "79": "toothbrush"
226
- },
227
- "initializer_bias_prior_prob": null,
228
- "initializer_range": 0.01,
229
- "is_encoder_decoder": true,
230
- "label2id": {
231
- "aeroplane": 4,
232
- "apple": 47,
233
- "backpack": 24,
234
- "banana": 46,
235
- "baseball bat": 34,
236
- "baseball glove": 35,
237
- "bear": 21,
238
- "bed": 59,
239
- "bench": 13,
240
- "bicycle": 1,
241
- "bird": 14,
242
- "boat": 8,
243
- "book": 73,
244
- "bottle": 39,
245
- "bowl": 45,
246
- "broccoli": 50,
247
- "bus": 5,
248
- "cake": 55,
249
- "car": 2,
250
- "carrot": 51,
251
- "cat": 15,
252
- "cell phone": 67,
253
- "chair": 56,
254
- "clock": 74,
255
- "cow": 19,
256
- "cup": 41,
257
- "diningtable": 60,
258
- "dog": 16,
259
- "donut": 54,
260
- "elephant": 20,
261
- "fire hydrant": 10,
262
- "fork": 42,
263
- "frisbee": 29,
264
- "giraffe": 23,
265
- "hair drier": 78,
266
- "handbag": 26,
267
- "horse": 17,
268
- "hot dog": 52,
269
- "keyboard": 66,
270
- "kite": 33,
271
- "knife": 43,
272
- "laptop": 63,
273
- "microwave": 68,
274
- "motorbike": 3,
275
- "mouse": 64,
276
- "orange": 49,
277
- "oven": 69,
278
- "parking meter": 12,
279
- "person": 0,
280
- "pizza": 53,
281
- "pottedplant": 58,
282
- "refrigerator": 72,
283
- "remote": 65,
284
- "sandwich": 48,
285
- "scissors": 76,
286
- "sheep": 18,
287
- "sink": 71,
288
- "skateboard": 36,
289
- "skis": 30,
290
- "snowboard": 31,
291
- "sofa": 57,
292
- "spoon": 44,
293
- "sports ball": 32,
294
- "stop sign": 11,
295
- "suitcase": 28,
296
- "surfboard": 37,
297
- "teddy bear": 77,
298
- "tennis racket": 38,
299
- "tie": 27,
300
- "toaster": 70,
301
- "toilet": 61,
302
- "toothbrush": 79,
303
- "traffic light": 9,
304
- "train": 6,
305
- "truck": 7,
306
- "tvmonitor": 62,
307
- "umbrella": 25,
308
- "vase": 75,
309
- "wine glass": 40,
310
- "zebra": 22
311
- },
312
- "label_noise_ratio": 0.5,
313
- "layer_norm_eps": 1e-05,
314
- "layer_scale": 1,
315
- "learn_initial_query": false,
316
- "lqe_hidden_dim": 64,
317
- "lqe_layers": 2,
318
- "matcher_alpha": 0.25,
319
- "matcher_bbox_cost": 5.0,
320
- "matcher_class_cost": 2.0,
321
- "matcher_gamma": 2.0,
322
- "matcher_giou_cost": 2.0,
323
- "max_num_bins": 32,
324
- "model_type": "d_fine",
325
- "normalize_before": false,
326
- "num_denoising": 100,
327
- "num_feature_levels": 2,
328
- "num_queries": 300,
329
- "positional_encoding_temperature": 10000,
330
- "reg_scale": 4.0,
331
- "top_prob_values": 4,
332
- "torch_dtype": "float32",
333
- "transformers_version": "4.51.0.dev0",
334
- "up": 0.5,
335
- "use_focal_loss": true,
336
- "use_pretrained_backbone": false,
337
- "use_timm_backbone": false,
338
- "weight_loss_bbox": 5.0,
339
- "weight_loss_ddf": 1.5,
340
- "weight_loss_fgl": 0.15,
341
- "weight_loss_giou": 2.0,
342
- "weight_loss_vfl": 1.0,
343
- "with_box_refine": true
344
- }
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "silu",
4
+ "anchor_image_size": null,
5
+ "architectures": [
6
+ "DFineForObjectDetection"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "auxiliary_loss": true,
10
+ "backbone": null,
11
+ "backbone_config": {
12
+ "depths": [
13
+ 3,
14
+ 4,
15
+ 6,
16
+ 3
17
+ ],
18
+ "downsample_in_bottleneck": false,
19
+ "downsample_in_first_stage": false,
20
+ "embedding_size": 32,
21
+ "hidden_act": "relu",
22
+ "hidden_sizes": [
23
+ 128,
24
+ 256,
25
+ 512,
26
+ 1024
27
+ ],
28
+ "initializer_range": 0.02,
29
+ "layer_type": "basic",
30
+ "model_type": "hgnet_v2",
31
+ "num_channels": 3,
32
+ "out_features": [
33
+ "stage3",
34
+ "stage4"
35
+ ],
36
+ "out_indices": [
37
+ 3,
38
+ 4
39
+ ],
40
+ "stage_downsample": [
41
+ false,
42
+ true,
43
+ true,
44
+ true
45
+ ],
46
+ "stage_in_channels": [
47
+ 16,
48
+ 64,
49
+ 256,
50
+ 512
51
+ ],
52
+ "stage_kernel_size": [
53
+ 3,
54
+ 3,
55
+ 5,
56
+ 5
57
+ ],
58
+ "stage_light_block": [
59
+ false,
60
+ false,
61
+ true,
62
+ true
63
+ ],
64
+ "stage_mid_channels": [
65
+ 16,
66
+ 32,
67
+ 64,
68
+ 128
69
+ ],
70
+ "stage_names": [
71
+ "stem",
72
+ "stage1",
73
+ "stage2",
74
+ "stage3",
75
+ "stage4"
76
+ ],
77
+ "stage_num_blocks": [
78
+ 1,
79
+ 1,
80
+ 2,
81
+ 1
82
+ ],
83
+ "stage_numb_of_layers": [
84
+ 3,
85
+ 3,
86
+ 3,
87
+ 3
88
+ ],
89
+ "stage_out_channels": [
90
+ 64,
91
+ 256,
92
+ 512,
93
+ 1024
94
+ ],
95
+ "stem_channels": [
96
+ 3,
97
+ 16,
98
+ 16
99
+ ],
100
+ "use_learnable_affine_block": true
101
+ },
102
+ "backbone_kwargs": null,
103
+ "batch_norm_eps": 1e-05,
104
+ "box_noise_scale": 1.0,
105
+ "d_model": 128,
106
+ "decoder_activation_function": "relu",
107
+ "decoder_attention_heads": 8,
108
+ "decoder_ffn_dim": 512,
109
+ "decoder_in_channels": [
110
+ 128,
111
+ 128
112
+ ],
113
+ "decoder_layers": 3,
114
+ "decoder_method": "default",
115
+ "decoder_n_points": [
116
+ 6,
117
+ 6
118
+ ],
119
+ "decoder_offset_scale": 0.5,
120
+ "depth_mult": 0.5,
121
+ "dropout": 0.0,
122
+ "encode_proj_layers": [
123
+ 1
124
+ ],
125
+ "encoder_activation_function": "gelu",
126
+ "encoder_attention_heads": 8,
127
+ "encoder_ffn_dim": 512,
128
+ "encoder_hidden_dim": 128,
129
+ "encoder_in_channels": [
130
+ 512,
131
+ 1024
132
+ ],
133
+ "encoder_layers": 1,
134
+ "eos_coefficient": 0.0001,
135
+ "eval_idx": -1,
136
+ "eval_size": null,
137
+ "feat_strides": [
138
+ 16,
139
+ 32
140
+ ],
141
+ "focal_loss_alpha": 0.75,
142
+ "focal_loss_gamma": 2.0,
143
+ "freeze_backbone_batch_norms": true,
144
+ "hidden_expansion": 0.34,
145
+ "id2label": {
146
+ "0": "person",
147
+ "1": "bicycle",
148
+ "2": "car",
149
+ "3": "motorbike",
150
+ "4": "aeroplane",
151
+ "5": "bus",
152
+ "6": "train",
153
+ "7": "truck",
154
+ "8": "boat",
155
+ "9": "traffic light",
156
+ "10": "fire hydrant",
157
+ "11": "stop sign",
158
+ "12": "parking meter",
159
+ "13": "bench",
160
+ "14": "bird",
161
+ "15": "cat",
162
+ "16": "dog",
163
+ "17": "horse",
164
+ "18": "sheep",
165
+ "19": "cow",
166
+ "20": "elephant",
167
+ "21": "bear",
168
+ "22": "zebra",
169
+ "23": "giraffe",
170
+ "24": "backpack",
171
+ "25": "umbrella",
172
+ "26": "handbag",
173
+ "27": "tie",
174
+ "28": "suitcase",
175
+ "29": "frisbee",
176
+ "30": "skis",
177
+ "31": "snowboard",
178
+ "32": "sports ball",
179
+ "33": "kite",
180
+ "34": "baseball bat",
181
+ "35": "baseball glove",
182
+ "36": "skateboard",
183
+ "37": "surfboard",
184
+ "38": "tennis racket",
185
+ "39": "bottle",
186
+ "40": "wine glass",
187
+ "41": "cup",
188
+ "42": "fork",
189
+ "43": "knife",
190
+ "44": "spoon",
191
+ "45": "bowl",
192
+ "46": "banana",
193
+ "47": "apple",
194
+ "48": "sandwich",
195
+ "49": "orange",
196
+ "50": "broccoli",
197
+ "51": "carrot",
198
+ "52": "hot dog",
199
+ "53": "pizza",
200
+ "54": "donut",
201
+ "55": "cake",
202
+ "56": "chair",
203
+ "57": "sofa",
204
+ "58": "pottedplant",
205
+ "59": "bed",
206
+ "60": "diningtable",
207
+ "61": "toilet",
208
+ "62": "tvmonitor",
209
+ "63": "laptop",
210
+ "64": "mouse",
211
+ "65": "remote",
212
+ "66": "keyboard",
213
+ "67": "cell phone",
214
+ "68": "microwave",
215
+ "69": "oven",
216
+ "70": "toaster",
217
+ "71": "sink",
218
+ "72": "refrigerator",
219
+ "73": "book",
220
+ "74": "clock",
221
+ "75": "vase",
222
+ "76": "scissors",
223
+ "77": "teddy bear",
224
+ "78": "hair drier",
225
+ "79": "toothbrush"
226
+ },
227
+ "initializer_bias_prior_prob": null,
228
+ "initializer_range": 0.01,
229
+ "is_encoder_decoder": true,
230
+ "label2id": {
231
+ "aeroplane": 4,
232
+ "apple": 47,
233
+ "backpack": 24,
234
+ "banana": 46,
235
+ "baseball bat": 34,
236
+ "baseball glove": 35,
237
+ "bear": 21,
238
+ "bed": 59,
239
+ "bench": 13,
240
+ "bicycle": 1,
241
+ "bird": 14,
242
+ "boat": 8,
243
+ "book": 73,
244
+ "bottle": 39,
245
+ "bowl": 45,
246
+ "broccoli": 50,
247
+ "bus": 5,
248
+ "cake": 55,
249
+ "car": 2,
250
+ "carrot": 51,
251
+ "cat": 15,
252
+ "cell phone": 67,
253
+ "chair": 56,
254
+ "clock": 74,
255
+ "cow": 19,
256
+ "cup": 41,
257
+ "diningtable": 60,
258
+ "dog": 16,
259
+ "donut": 54,
260
+ "elephant": 20,
261
+ "fire hydrant": 10,
262
+ "fork": 42,
263
+ "frisbee": 29,
264
+ "giraffe": 23,
265
+ "hair drier": 78,
266
+ "handbag": 26,
267
+ "horse": 17,
268
+ "hot dog": 52,
269
+ "keyboard": 66,
270
+ "kite": 33,
271
+ "knife": 43,
272
+ "laptop": 63,
273
+ "microwave": 68,
274
+ "motorbike": 3,
275
+ "mouse": 64,
276
+ "orange": 49,
277
+ "oven": 69,
278
+ "parking meter": 12,
279
+ "person": 0,
280
+ "pizza": 53,
281
+ "pottedplant": 58,
282
+ "refrigerator": 72,
283
+ "remote": 65,
284
+ "sandwich": 48,
285
+ "scissors": 76,
286
+ "sheep": 18,
287
+ "sink": 71,
288
+ "skateboard": 36,
289
+ "skis": 30,
290
+ "snowboard": 31,
291
+ "sofa": 57,
292
+ "spoon": 44,
293
+ "sports ball": 32,
294
+ "stop sign": 11,
295
+ "suitcase": 28,
296
+ "surfboard": 37,
297
+ "teddy bear": 77,
298
+ "tennis racket": 38,
299
+ "tie": 27,
300
+ "toaster": 70,
301
+ "toilet": 61,
302
+ "toothbrush": 79,
303
+ "traffic light": 9,
304
+ "train": 6,
305
+ "truck": 7,
306
+ "tvmonitor": 62,
307
+ "umbrella": 25,
308
+ "vase": 75,
309
+ "wine glass": 40,
310
+ "zebra": 22
311
+ },
312
+ "label_noise_ratio": 0.5,
313
+ "layer_norm_eps": 1e-05,
314
+ "layer_scale": 1,
315
+ "learn_initial_query": false,
316
+ "lqe_hidden_dim": 64,
317
+ "lqe_layers": 2,
318
+ "matcher_alpha": 0.25,
319
+ "matcher_bbox_cost": 5.0,
320
+ "matcher_class_cost": 2.0,
321
+ "matcher_gamma": 2.0,
322
+ "matcher_giou_cost": 2.0,
323
+ "max_num_bins": 32,
324
+ "model_type": "d_fine",
325
+ "normalize_before": false,
326
+ "num_denoising": 100,
327
+ "num_feature_levels": 2,
328
+ "num_queries": 300,
329
+ "positional_encoding_temperature": 10000,
330
+ "reg_scale": 4.0,
331
+ "top_prob_values": 4,
332
+ "torch_dtype": "float32",
333
+ "transformers_version": "4.56.0.dev0",
334
+ "up": 0.5,
335
+ "use_focal_loss": true,
336
+ "use_pretrained_backbone": false,
337
+ "use_timm_backbone": false,
338
+ "weight_loss_bbox": 5.0,
339
+ "weight_loss_ddf": 1.5,
340
+ "weight_loss_fgl": 0.15,
341
+ "weight_loss_giou": 2.0,
342
+ "weight_loss_vfl": 1.0,
343
+ "with_box_refine": true
344
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2875ef5e84a226d66361582ef002e1a1991fc58b2f01cbd5abf1aa78224563ad
3
  size 15278996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e06bdc873da819920a8d373b879721a5b9759d822f8213220bb09abbdab58b
3
  size 15278996
preprocessor_config.json CHANGED
@@ -1,26 +1,26 @@
1
- {
2
- "do_convert_annotations": true,
3
- "do_normalize": false,
4
- "do_pad": false,
5
- "do_rescale": true,
6
- "do_resize": true,
7
- "format": "coco_detection",
8
- "image_mean": [
9
- 0.485,
10
- 0.456,
11
- 0.406
12
- ],
13
- "image_processor_type": "RTDetrImageProcessor",
14
- "image_std": [
15
- 0.229,
16
- 0.224,
17
- 0.225
18
- ],
19
- "pad_size": null,
20
- "resample": 2,
21
- "rescale_factor": 0.00392156862745098,
22
- "size": {
23
- "height": 640,
24
- "width": 640
25
- }
26
- }
 
1
+ {
2
+ "do_convert_annotations": true,
3
+ "do_normalize": false,
4
+ "do_pad": false,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "format": "coco_detection",
8
+ "image_mean": [
9
+ 0.485,
10
+ 0.456,
11
+ 0.406
12
+ ],
13
+ "image_processor_type": "RTDetrImageProcessor",
14
+ "image_std": [
15
+ 0.229,
16
+ 0.224,
17
+ 0.225
18
+ ],
19
+ "pad_size": null,
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 640,
24
+ "width": 640
25
+ }
26
+ }