Bekhouche
/

TPS-ResNet-BiLSTM-Attn-STR

Transformers

Safetensors

Model card Files Files and versions

xet

Community

Bekhouche commited on May 30

Commit

39f8461

verified ·

1 Parent(s): b2384cc

Upload Pipeline

Browse files

Files changed (2) hide show

config.json +236 -254
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -1,254 +1,236 @@
-{
-  "inputs": [
-    "images"
-  ],
-  "modules": {
-    "avg_pool": {
-      "config": {
-        "args": {
-          "output_size": [
-            null,
-            1
-          ]
-        }
-      },
-      "type": "torch.nn.AdaptiveAvgPool2d"
-    },
-    "feature_extraction": {
-      "config": {
-        "args": {
-          "input_channel": 1,
-          "output_channel": 512,
-          "variant": "DTRB"
-        }
-      },
-      "type": "DeepTextRecognition.ResNetModel"
-    },
-    "max": {
-      "config": {
-        "args": {
-          "dim": 2
-        }
-      },
-      "type": "torch.max"
-    },
-    "permute": {
-      "config": {
-        "args": {
-          "dims": [
-            0,
-            3,
-            1,
-            2
-          ]
-        }
-      },
-      "type": "torch.permute"
-    },
-    "prediction": {
-      "config": {
-        "args": {
-          "hidden_size": 256,
-          "input_size": 256,
-          "num_classes": 38
-        }
-      },
-      "type": "DeepTextRecognition.AttentionModel"
-    },
-    "processing": {
-      "config": {
-        "args": {
-          "channels_size": 1,
-          "image_size": [
-            32,
-            100
-          ],
-          "padding": "left"
-        }
-      },
-      "type": "DeepTextRecognition.ImageProcessor"
-    },
-    "sequence_modeling": {
-      "config": {
-        "args": {
-          "hidden_sizes": [
-            256,
-            256
-          ],
-          "input_size": 512,
-          "output_size": 256
-        }
-      },
-      "type": "DeepTextRecognition.BiLSTMModel"
-    },
-    "squeeze": {
-      "config": {
-        "args": {
-          "dim": 3
-        }
-      },
-      "type": "torch.squeeze"
-    },
-    "tokenizer": {
-      "config": {
-        "args": {
-          "characters": [
-            "0",
-            "1",
-            "2",
-            "3",
-            "4",
-            "5",
-            "6",
-            "7",
-            "8",
-            "9",
-            "a",
-            "b",
-            "c",
-            "d",
-            "e",
-            "f",
-            "g",
-            "h",
-            "i",
-            "j",
-            "k",
-            "l",
-            "m",
-            "n",
-            "o",
-            "p",
-            "q",
-            "r",
-            "s",
-            "t",
-            "u",
-            "v",
-            "w",
-            "x",
-            "y",
-            "z"
-          ],
-          "max_length": 25
-        }
-      },
-      "type": "DeepTextRecognition.AttentionTokenizer"
-    },
-    "transformation": {
-      "config": {
-        "args": {
-          "F": 20,
-          "I_channel_num": 1,
-          "I_r_size": [
-            32,
-            100
-          ],
-          "I_size": [
-            32,
-            100
-          ]
-        }
-      },
-      "type": "DeepTextRecognition.TPSModel"
-    }
-  },
-  "order": [
-    "processing",
-    "transformation",
-    "feature_extraction",
-    "permute",
-    "avg_pool",
-    "squeeze",
-    "sequence_modeling",
-    "prediction",
-    "max",
-    "tokenizer"
-  ],
-  "outputs": [
-    "tokenizer:labels"
-  ],
-  "routing": {
-    "avg_pool": {
-      "inputs": [
-        "permute:permuted_features"
-      ],
-      "outputs": [
-        "avg_pool:pooled_features"
-      ]
-    },
-    "feature_extraction": {
-      "inputs": [
-        "transformation:transformed_images"
-      ],
-      "outputs": [
-        "feature_extraction:extracted_features"
-      ]
-    },
-    "max": {
-      "inputs": [
-        "prediction:predictions"
-      ],
-      "outputs": [
-        "max:none",
-        "max:predictions"
-      ]
-    },
-    "permute": {
-      "inputs": [
-        "feature_extraction:extracted_features"
-      ],
-      "outputs": [
-        "permute:permuted_features"
-      ]
-    },
-    "prediction": {
-      "inputs": [
-        "sequence_modeling:modeled_features"
-      ],
-      "outputs": [
-        "prediction:predictions"
-      ]
-    },
-    "processing": {
-      "inputs": [
-        "images"
-      ],
-      "outputs": [
-        "processing:processed_images"
-      ]
-    },
-    "sequence_modeling": {
-      "inputs": [
-        "squeeze:squeezed_features"
-      ],
-      "outputs": [
-        "sequence_modeling:modeled_features"
-      ]
-    },
-    "squeeze": {
-      "inputs": [
-        "avg_pool:pooled_features"
-      ],
-      "outputs": [
-        "squeeze:squeezed_features"
-      ]
-    },
-    "tokenizer": {
-      "inputs": [
-        "max:predictions"
-      ],
-      "outputs": [
-        "tokenizer:labels"
-      ]
-    },
-    "transformation": {
-      "inputs": [
-        "processing:processed_images"
-      ],
-      "outputs": [
-        "transformation:transformed_images"
-      ]
-    }
-  }
-}

+{
+  "inputs": [
+    "images"
+  ],
+  "modules": {
+    "avg_pool": {
+      "config": {
+        "args": {
+          "output_size": [
+            null,
+            1
+          ]
+        }
+      },
+      "type": "DeepTextRecognition.AdaptiveAvgPoolModule"
+    },
+    "feature_extraction": {
+      "config": {
+        "args": {
+          "input_channel": 1,
+          "output_channel": 512,
+          "variant": "DTRB"
+        }
+      },
+      "type": "DeepTextRecognition.ResNetModel"
+    },
+    "permute": {
+      "config": {
+        "args": {
+          "dims": [
+            0,
+            3,
+            1,
+            2
+          ]
+        }
+      },
+      "type": "DeepTextRecognition.PermuteModule"
+    },
+    "prediction": {
+      "config": {
+        "args": {
+          "hidden_size": 256,
+          "input_size": 256,
+          "num_classes": 38
+        }
+      },
+      "type": "DeepTextRecognition.TextRecognitionAttentionModel"
+    },
+    "processing": {
+      "config": {
+        "args": {
+          "channels_size": 1,
+          "image_size": [
+            32,
+            100
+          ],
+          "padding": "left"
+        }
+      },
+      "type": "DeepTextRecognition.ImageProcessor"
+    },
+    "sequence_modeling": {
+      "config": {
+        "args": {
+          "hidden_sizes": [
+            256,
+            256
+          ],
+          "input_size": 512,
+          "output_size": 256
+        }
+      },
+      "type": "DeepTextRecognition.BiLSTMModel"
+    },
+    "squeeze": {
+      "config": {
+        "args": {
+          "dim": 3
+        }
+      },
+      "type": "DeepTextRecognition.SqueezeModule"
+    },
+    "tokenizer": {
+      "config": {
+        "args": {
+          "characters": [
+            "0",
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z"
+          ],
+          "max_length": 25
+        }
+      },
+      "type": "DeepTextRecognition.AttentionTokenizer"
+    },
+    "transformation": {
+      "config": {
+        "args": {
+          "F": 20,
+          "I_channel_num": 1,
+          "I_r_size": [
+            32,
+            100
+          ],
+          "I_size": [
+            32,
+            100
+          ]
+        }
+      },
+      "type": "DeepTextRecognition.TPSModel"
+    }
+  },
+  "order": [
+    "processing",
+    "transformation",
+    "feature_extraction",
+    "permute",
+    "avg_pool",
+    "squeeze",
+    "sequence_modeling",
+    "prediction",
+    "tokenizer"
+  ],
+  "outputs": [
+    "tokenizer:labels"
+  ],
+  "routing": {
+    "avg_pool": {
+      "inputs": [
+        "permute:permuted_features"
+      ],
+      "outputs": [
+        "avg_pool:pooled_features"
+      ]
+    },
+    "feature_extraction": {
+      "inputs": [
+        "transformation:transformed_images"
+      ],
+      "outputs": [
+        "feature_extraction:extracted_features"
+      ]
+    },
+    "permute": {
+      "inputs": [
+        "feature_extraction:extracted_features"
+      ],
+      "outputs": [
+        "permute:permuted_features"
+      ]
+    },
+    "prediction": {
+      "inputs": [
+        "sequence_modeling:modeled_features"
+      ],
+      "outputs": [
+        "prediction:predictions"
+      ]
+    },
+    "processing": {
+      "inputs": [
+        "images"
+      ],
+      "outputs": [
+        "processing:processed_images"
+      ]
+    },
+    "sequence_modeling": {
+      "inputs": [
+        "squeeze:squeezed_features"
+      ],
+      "outputs": [
+        "sequence_modeling:modeled_features"
+      ]
+    },
+    "squeeze": {
+      "inputs": [
+        "avg_pool:pooled_features"
+      ],
+      "outputs": [
+        "squeeze:squeezed_features"
+      ]
+    },
+    "tokenizer": {
+      "inputs": [
+        "prediction:predictions"
+      ],
+      "outputs": [
+        "tokenizer:labels"
+      ]
+    },
+    "transformation": {
+      "inputs": [
+        "processing:processed_images"
+      ],
+      "outputs": [
+        "transformation:transformed_images"
+      ]
+    }
+  }
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7e8c504090d5988d78aa47de37297c860cf183f98190f22ae2c64451f527ede
-size 198655756

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9d06a0bcfdd11cdeb8ca6dcf671c71487f7a416c32d9d57fbc6c347af097d87
+size 198657772