alexwengg commited on
Commit
40c18c2
·
verified ·
1 Parent(s): a0dc10c

Upload 10 files

Browse files
Melspectrogram_v2.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8a8227f0e06c52e639c4335a606c21f0b1a2fe4c1464fdc49dc8adc30abef86
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ee8a634d9df8fe10688367fa81db20467e36c4444e82d6bcef70e37e210987
3
  size 243
Melspectrogram_v2.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e3b77fcd063d42c86b3d1fa633698de2fb56f2a9a82bd8affba183cba68ea11
3
- size 610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeef2321b385057904ac30af06a109aec6c31513c5189712b14be6f0d026a665
3
+ size 400
Melspectrogram_v2.mlmodelc/metadata.json CHANGED
@@ -1,14 +1,14 @@
1
  [
2
  {
3
- "shortDescription" : "Dynamic Mel-Spectrogram Preprocessor (0.1-10s)",
4
  "metadataOutputVersion" : "3.0",
 
5
  "outputSchema" : [
6
  {
7
  "hasShapeFlexibility" : "0",
8
  "isOptional" : "0",
9
  "dataType" : "Float32",
10
  "formattedType" : "MultiArray (Float32)",
11
- "shortDescription" : "128-bin mel-spectrogram features",
12
  "shape" : "[]",
13
  "name" : "melspectrogram",
14
  "type" : "MultiArray"
@@ -18,19 +18,16 @@
18
  "isOptional" : "0",
19
  "dataType" : "Int32",
20
  "formattedType" : "MultiArray (Int32 1)",
21
- "shortDescription" : "Number of valid mel-spectrogram frames",
22
  "shape" : "[1]",
23
  "name" : "melspectrogram_length",
24
  "type" : "MultiArray"
25
  }
26
  ],
27
- "version" : "1.0",
28
  "modelParameters" : [
29
 
30
  ],
31
- "author" : "FluidAudio",
32
  "specificationVersion" : 6,
33
- "storagePrecision" : "Float16",
34
  "mlProgramOperationTypeHistogram" : {
35
  "Range1d" : 2,
36
  "Gather" : 3,
@@ -60,10 +57,10 @@
60
  "Mul" : 1
61
  },
62
  "computePrecision" : "Mixed (Float16, Float32, Int32)",
 
63
  "stateSchema" : [
64
 
65
  ],
66
- "isUpdatable" : "0",
67
  "availability" : {
68
  "macOS" : "12.0",
69
  "tvOS" : "15.0",
@@ -75,35 +72,35 @@
75
  "modelType" : {
76
  "name" : "MLModelType_mlProgram"
77
  },
 
 
 
 
 
78
  "inputSchema" : [
79
  {
80
  "dataType" : "Float32",
81
  "hasShapeFlexibility" : "1",
82
  "isOptional" : "0",
83
- "shapeFlexibility" : "1 × 1600...160000",
84
- "shapeRange" : "[[1, 1], [1600, 160000]]",
85
- "formattedType" : "MultiArray (Float32 1 × 1600)",
86
  "type" : "MultiArray",
87
- "shape" : "[1, 1600]",
88
  "name" : "audio_signal",
89
- "shortDescription" : "Raw audio waveform (16kHz, 0.1-10 seconds)"
90
  },
91
  {
92
  "hasShapeFlexibility" : "0",
93
  "isOptional" : "0",
94
  "dataType" : "Int32",
95
  "formattedType" : "MultiArray (Int32 1)",
96
- "shortDescription" : "Number of audio samples",
97
  "shape" : "[1]",
98
  "name" : "audio_length",
99
  "type" : "MultiArray"
100
  }
101
  ],
102
- "userDefinedMetadata" : {
103
- "com.github.apple.coremltools.source_dialect" : "TorchScript",
104
- "com.github.apple.coremltools.source" : "torch==2.5.0",
105
- "com.github.apple.coremltools.version" : "8.3.0"
106
- },
107
  "generatedClassName" : "Melspectrogram_v2",
108
  "method" : "predict"
109
  }
 
1
  [
2
  {
 
3
  "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
  "outputSchema" : [
6
  {
7
  "hasShapeFlexibility" : "0",
8
  "isOptional" : "0",
9
  "dataType" : "Float32",
10
  "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "",
12
  "shape" : "[]",
13
  "name" : "melspectrogram",
14
  "type" : "MultiArray"
 
18
  "isOptional" : "0",
19
  "dataType" : "Int32",
20
  "formattedType" : "MultiArray (Int32 1)",
21
+ "shortDescription" : "",
22
  "shape" : "[1]",
23
  "name" : "melspectrogram_length",
24
  "type" : "MultiArray"
25
  }
26
  ],
 
27
  "modelParameters" : [
28
 
29
  ],
 
30
  "specificationVersion" : 6,
 
31
  "mlProgramOperationTypeHistogram" : {
32
  "Range1d" : 2,
33
  "Gather" : 3,
 
57
  "Mul" : 1
58
  },
59
  "computePrecision" : "Mixed (Float16, Float32, Int32)",
60
+ "isUpdatable" : "0",
61
  "stateSchema" : [
62
 
63
  ],
 
64
  "availability" : {
65
  "macOS" : "12.0",
66
  "tvOS" : "15.0",
 
72
  "modelType" : {
73
  "name" : "MLModelType_mlProgram"
74
  },
75
+ "userDefinedMetadata" : {
76
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
77
+ "com.github.apple.coremltools.source" : "torch==2.5.0",
78
+ "com.github.apple.coremltools.version" : "8.3.0"
79
+ },
80
  "inputSchema" : [
81
  {
82
  "dataType" : "Float32",
83
  "hasShapeFlexibility" : "1",
84
  "isOptional" : "0",
85
+ "shapeFlexibility" : "1 × 1...160000",
86
+ "shapeRange" : "[[1, 1], [1, 160000]]",
87
+ "formattedType" : "MultiArray (Float32 1 × 1)",
88
  "type" : "MultiArray",
89
+ "shape" : "[1, 1]",
90
  "name" : "audio_signal",
91
+ "shortDescription" : ""
92
  },
93
  {
94
  "hasShapeFlexibility" : "0",
95
  "isOptional" : "0",
96
  "dataType" : "Int32",
97
  "formattedType" : "MultiArray (Int32 1)",
98
+ "shortDescription" : "",
99
  "shape" : "[1]",
100
  "name" : "audio_length",
101
  "type" : "MultiArray"
102
  }
103
  ],
 
 
 
 
 
104
  "generatedClassName" : "Melspectrogram_v2",
105
  "method" : "predict"
106
  }
Melspectrogram_v2.mlmodelc/model.mil CHANGED
@@ -1,7 +1,7 @@
1
  program(1.0)
2
  [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
  {
4
- func main<ios15>(tensor<int32, [1]> audio_length, tensor<fp32, [1, ?]> audio_signal) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_signal", [1, 1600]}}), ("RangeDims", {{"audio_signal", [[1, 1], [1600, 160000]]}})))] {
5
  tensor<int32, []> var_6 = const()[name = tensor<string, []>("op_6"), val = tensor<int32, []>(512)];
6
  tensor<int32, [1]> var_7 = add(x = audio_length, y = var_6)[name = tensor<string, []>("op_7")];
7
  tensor<int32, []> var_9 = const()[name = tensor<string, []>("op_9"), val = tensor<int32, []>(512)];
 
1
  program(1.0)
2
  [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
  {
4
+ func main<ios15>(tensor<int32, [1]> audio_length, tensor<fp32, [1, ?]> audio_signal) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"audio_signal", [1, 1]}}), ("RangeDims", {{"audio_signal", [[1, 1], [1, 160000]]}})))] {
5
  tensor<int32, []> var_6 = const()[name = tensor<string, []>("op_6"), val = tensor<int32, []>(512)];
6
  tensor<int32, [1]> var_7 = add(x = audio_length, y = var_6)[name = tensor<string, []>("op_7")];
7
  tensor<int32, []> var_9 = const()[name = tensor<string, []>("op_9"), val = tensor<int32, []>(512)];
ParakeetEncoder_v2.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f833910c665b12464e7a6cbd29f91b559ed928450506f5b28f8422e25619673f
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cefbb381c134972be702770aac5396402c51d93baf680dcdd5b81189511b1b7
3
  size 243
ParakeetEncoder_v2.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6a83d01a834eae99540b42ee87404447ac18fb1775e8ce4e93fd278304463c
3
- size 401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85298a1f0a959c85991a8a83ebcf541de3f84f99173664a5eeb108f16e4c080a
3
+ size 386
ParakeetEncoder_v2.mlmodelc/metadata.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Int8)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 126 × 1024)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 126, 1024]",
13
+ "name" : "encoder_output",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Int32",
20
+ "formattedType" : "MultiArray (Int32 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "encoder_output_length",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "modelParameters" : [
28
+
29
+ ],
30
+ "specificationVersion" : 7,
31
+ "mlProgramOperationTypeHistogram" : {
32
+ "Transpose" : 171,
33
+ "Ios16.softmax" : 24,
34
+ "Split" : 24,
35
+ "Ios16.linear" : 193,
36
+ "Ios16.add" : 174,
37
+ "Select" : 72,
38
+ "Tile" : 1,
39
+ "Ios16.sigmoid" : 24,
40
+ "Ios16.logicalAnd" : 2,
41
+ "Pad" : 48,
42
+ "ExpandDims" : 5,
43
+ "Ios16.constexprAffineDequantize" : 342,
44
+ "Ios16.silu" : 72,
45
+ "Ios16.cast" : 4,
46
+ "Ios16.less" : 1,
47
+ "Ios16.conv" : 77,
48
+ "Ios16.layerNorm" : 120,
49
+ "SliceByIndex" : 48,
50
+ "Ios16.relu" : 3,
51
+ "Ios16.matmul" : 72,
52
+ "Ios16.reshape" : 145,
53
+ "Ios16.floor" : 3,
54
+ "Ios16.mul" : 99,
55
+ "Ios16.logicalNot" : 2
56
+ },
57
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
58
+ "isUpdatable" : "0",
59
+ "stateSchema" : [
60
+
61
+ ],
62
+ "availability" : {
63
+ "macOS" : "13.0",
64
+ "tvOS" : "16.0",
65
+ "visionOS" : "1.0",
66
+ "watchOS" : "9.0",
67
+ "iOS" : "16.0",
68
+ "macCatalyst" : "16.0"
69
+ },
70
+ "modelType" : {
71
+ "name" : "MLModelType_mlProgram"
72
+ },
73
+ "userDefinedMetadata" : {
74
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
75
+ "com.github.apple.coremltools.source" : "torch==2.5.0",
76
+ "com.github.apple.coremltools.version" : "8.3.0"
77
+ },
78
+ "inputSchema" : [
79
+ {
80
+ "hasShapeFlexibility" : "0",
81
+ "isOptional" : "0",
82
+ "dataType" : "Float32",
83
+ "formattedType" : "MultiArray (Float32 1 × 128 × 1001)",
84
+ "shortDescription" : "",
85
+ "shape" : "[1, 128, 1001]",
86
+ "name" : "audio_signal",
87
+ "type" : "MultiArray"
88
+ },
89
+ {
90
+ "hasShapeFlexibility" : "0",
91
+ "isOptional" : "0",
92
+ "dataType" : "Int32",
93
+ "formattedType" : "MultiArray (Int32 1)",
94
+ "shortDescription" : "",
95
+ "shape" : "[1]",
96
+ "name" : "length",
97
+ "type" : "MultiArray"
98
+ }
99
+ ],
100
+ "generatedClassName" : "ParakeetEncoder_v2",
101
+ "method" : "predict"
102
+ }
103
+ ]
ParakeetEncoder_v2.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
ParakeetEncoder_v2.mlmodelc/weights/weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcc9e458812c178cbdbbf35f7cda2f62b860218ce8aa50bbff811c3e043125b1
3
- size 591157632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23bd5658d6e4c716207873b3275fc6d365ce1a027eb8791eaada176fb67abf86
3
+ size 591108480