ireneisdoomed commited on
Commit
08d59da
·
verified ·
1 Parent(s): b36ccc4
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +217 -0
  3. classifier.skops +3 -0
  4. config.json +196 -0
  5. test.parquet +3 -0
  6. train.parquet +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ classifier.skops filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sklearn
3
+ tags:
4
+ - sklearn
5
+ - skops
6
+ - tabular-classification
7
+ model_format: skops
8
+ model_file: classifier.skops
9
+ widget:
10
+ - structuredData:
11
+ credibleSetConfidence:
12
+ - 0.75
13
+ - 0.25
14
+ - 0.75
15
+ distanceFootprintMean:
16
+ - 1.0
17
+ - 0.9948455095291138
18
+ - 0.997710108757019
19
+ distanceFootprintMeanNeighbourhood:
20
+ - 1.0
21
+ - 1.0
22
+ - 0.997715413570404
23
+ distanceSentinelFootprint:
24
+ - 1.0
25
+ - 0.9999213218688965
26
+ - 0.997715413570404
27
+ distanceSentinelFootprintNeighbourhood:
28
+ - 1.0
29
+ - 1.0
30
+ - 0.997715413570404
31
+ distanceSentinelTss:
32
+ - 0.9999350309371948
33
+ - 0.9999213218688965
34
+ - 0.997715413570404
35
+ distanceSentinelTssNeighbourhood:
36
+ - 1.0
37
+ - 1.0
38
+ - 0.9980311393737793
39
+ distanceTssMean:
40
+ - 0.9999350309371948
41
+ - 0.9947366714477539
42
+ - 0.997710108757019
43
+ distanceTssMeanNeighbourhood:
44
+ - 1.0
45
+ - 1.0
46
+ - 0.9980310797691345
47
+ eQtlColocClppMaximum:
48
+ - 0.0
49
+ - 0.06608512997627258
50
+ - 0.0
51
+ eQtlColocClppMaximumNeighbourhood:
52
+ - 0.0
53
+ - 1.0
54
+ - 0.0
55
+ eQtlColocH4Maximum:
56
+ - 0.0
57
+ - 0.0
58
+ - 0.0
59
+ eQtlColocH4MaximumNeighbourhood:
60
+ - 0.0
61
+ - 0.0
62
+ - 0.0
63
+ geneCount500kb:
64
+ - 15.0
65
+ - 8.0
66
+ - 30.0
67
+ geneId:
68
+ - ENSG00000169174
69
+ - ENSG00000084674
70
+ - ENSG00000055118
71
+ goldStandardSet:
72
+ - 1
73
+ - 1
74
+ - 1
75
+ pQtlColocClppMaximum:
76
+ - 1.0
77
+ - 0.0
78
+ - 0.0
79
+ pQtlColocClppMaximumNeighbourhood:
80
+ - 1.0
81
+ - 0.0
82
+ - 0.0
83
+ pQtlColocH4Maximum:
84
+ - 1.0
85
+ - 0.0
86
+ - 0.0
87
+ pQtlColocH4MaximumNeighbourhood:
88
+ - 1.0
89
+ - 0.0
90
+ - 0.0
91
+ proteinGeneCount500kb:
92
+ - 7.0
93
+ - 3.0
94
+ - 21.0
95
+ sQtlColocClppMaximum:
96
+ - 0.0
97
+ - 0.21970131993293762
98
+ - 0.0
99
+ sQtlColocClppMaximumNeighbourhood:
100
+ - 0.0
101
+ - 1.0
102
+ - 0.0
103
+ sQtlColocH4Maximum:
104
+ - 0.0
105
+ - 0.0
106
+ - 0.0
107
+ sQtlColocH4MaximumNeighbourhood:
108
+ - 0.0
109
+ - 0.0
110
+ - 0.0
111
+ studyLocusId:
112
+ - 02c442ea4fa5ab80586a6d1ff6afa843
113
+ - 235e8ce166619f33e27582fff5bc0c94
114
+ - 6ad8c76d115fbb76a676bd57e940ee4d
115
+ vepMaximum:
116
+ - 0.6600000262260437
117
+ - 0.6600000262260437
118
+ - 0.0
119
+ vepMaximumNeighbourhood:
120
+ - 1.0
121
+ - 1.0
122
+ - 0.0
123
+ vepMean:
124
+ - 0.6600000262260437
125
+ - 0.0039977929554879665
126
+ - 0.0
127
+ vepMeanNeighbourhood:
128
+ - 1.0
129
+ - 1.0
130
+ - 0.0
131
+ ---
132
+
133
+ # Model description
134
+
135
+ The locus-to-gene (L2G) model derives features to prioritise likely causal genes at each GWAS locus based on genetic and functional genomics features. The main categories of predictive features are:
136
+
137
+ - Distance: (from credible set variants to gene)
138
+ - Molecular QTL Colocalization
139
+ - Variant Pathogenicity: (from VEP)
140
+
141
+ More information at: https://opentargets.github.io/gentropy/python_api/methods/l2g/_l2g/
142
+
143
+
144
+ ## Intended uses & limitations
145
+
146
+ [More Information Needed]
147
+
148
+ ## Training Procedure
149
+
150
+ Gradient Boosting Classifier
151
+
152
+ ### Hyperparameters
153
+
154
+ <details>
155
+ <summary> Click to expand </summary>
156
+
157
+ | Hyperparameter | Value |
158
+ |-------------------------|-----------------|
159
+ | objective | binary:logistic |
160
+ | base_score | |
161
+ | booster | |
162
+ | callbacks | |
163
+ | colsample_bylevel | |
164
+ | colsample_bynode | |
165
+ | colsample_bytree | 0.8 |
166
+ | device | |
167
+ | early_stopping_rounds | |
168
+ | enable_categorical | False |
169
+ | eval_metric | aucpr |
170
+ | feature_types | |
171
+ | feature_weights | |
172
+ | gamma | |
173
+ | grow_policy | |
174
+ | importance_type | |
175
+ | interaction_constraints | |
176
+ | learning_rate | |
177
+ | max_bin | |
178
+ | max_cat_threshold | |
179
+ | max_cat_to_onehot | |
180
+ | max_delta_step | |
181
+ | max_depth | 5 |
182
+ | max_leaves | |
183
+ | min_child_weight | 10 |
184
+ | missing | nan |
185
+ | monotone_constraints | |
186
+ | multi_strategy | |
187
+ | n_estimators | |
188
+ | n_jobs | |
189
+ | num_parallel_tree | |
190
+ | random_state | 42 |
191
+ | reg_alpha | 1 |
192
+ | reg_lambda | 1.0 |
193
+ | sampling_method | |
194
+ | scale_pos_weight | 0.8 |
195
+ | subsample | 0.8 |
196
+ | tree_method | |
197
+ | validate_parameters | |
198
+ | verbosity | |
199
+ | eta | 0.05 |
200
+
201
+ </details>
202
+
203
+ # How to Get Started with the Model
204
+
205
+ To use the model, you can load it using the `LocusToGeneModel.load_from_hub` method. This will return a `LocusToGeneModel` object that can be used to make predictions on a feature matrix.
206
+ The model can then be used to make predictions using the `predict` method.
207
+
208
+ More information can be found at: https://opentargets.github.io/gentropy/python_api/methods/l2g/model/
209
+
210
+
211
+ # Citation
212
+
213
+ https://doi.org/10.1038/s41588-021-00945-5
214
+
215
+ # License
216
+
217
+ MIT
classifier.skops ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a3b3082fe8ffe4f9bb5d11bb9f80ef9acb4c64b71bb7ddf4c39f8a1a03b30c
3
+ size 236273
config.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sklearn": {
3
+ "columns": [
4
+ "studyLocusId",
5
+ "geneId",
6
+ "goldStandardSet",
7
+ "eQtlColocClppMaximum",
8
+ "pQtlColocClppMaximum",
9
+ "sQtlColocClppMaximum",
10
+ "eQtlColocH4Maximum",
11
+ "pQtlColocH4Maximum",
12
+ "sQtlColocH4Maximum",
13
+ "eQtlColocClppMaximumNeighbourhood",
14
+ "pQtlColocClppMaximumNeighbourhood",
15
+ "sQtlColocClppMaximumNeighbourhood",
16
+ "eQtlColocH4MaximumNeighbourhood",
17
+ "pQtlColocH4MaximumNeighbourhood",
18
+ "sQtlColocH4MaximumNeighbourhood",
19
+ "distanceSentinelFootprint",
20
+ "distanceSentinelFootprintNeighbourhood",
21
+ "distanceFootprintMean",
22
+ "distanceFootprintMeanNeighbourhood",
23
+ "distanceTssMean",
24
+ "distanceTssMeanNeighbourhood",
25
+ "distanceSentinelTss",
26
+ "distanceSentinelTssNeighbourhood",
27
+ "vepMaximum",
28
+ "vepMaximumNeighbourhood",
29
+ "vepMean",
30
+ "vepMeanNeighbourhood",
31
+ "geneCount500kb",
32
+ "proteinGeneCount500kb",
33
+ "credibleSetConfidence"
34
+ ],
35
+ "environment": [
36
+ "xgboost=3.0.3"
37
+ ],
38
+ "example_input": {
39
+ "credibleSetConfidence": [
40
+ 0.75,
41
+ 0.25,
42
+ 0.75
43
+ ],
44
+ "distanceFootprintMean": [
45
+ 1.0,
46
+ 0.9948455095291138,
47
+ 0.997710108757019
48
+ ],
49
+ "distanceFootprintMeanNeighbourhood": [
50
+ 1.0,
51
+ 1.0,
52
+ 0.997715413570404
53
+ ],
54
+ "distanceSentinelFootprint": [
55
+ 1.0,
56
+ 0.9999213218688965,
57
+ 0.997715413570404
58
+ ],
59
+ "distanceSentinelFootprintNeighbourhood": [
60
+ 1.0,
61
+ 1.0,
62
+ 0.997715413570404
63
+ ],
64
+ "distanceSentinelTss": [
65
+ 0.9999350309371948,
66
+ 0.9999213218688965,
67
+ 0.997715413570404
68
+ ],
69
+ "distanceSentinelTssNeighbourhood": [
70
+ 1.0,
71
+ 1.0,
72
+ 0.9980311393737793
73
+ ],
74
+ "distanceTssMean": [
75
+ 0.9999350309371948,
76
+ 0.9947366714477539,
77
+ 0.997710108757019
78
+ ],
79
+ "distanceTssMeanNeighbourhood": [
80
+ 1.0,
81
+ 1.0,
82
+ 0.9980310797691345
83
+ ],
84
+ "eQtlColocClppMaximum": [
85
+ 0.0,
86
+ 0.06608512997627258,
87
+ 0.0
88
+ ],
89
+ "eQtlColocClppMaximumNeighbourhood": [
90
+ 0.0,
91
+ 1.0,
92
+ 0.0
93
+ ],
94
+ "eQtlColocH4Maximum": [
95
+ 0.0,
96
+ 0.0,
97
+ 0.0
98
+ ],
99
+ "eQtlColocH4MaximumNeighbourhood": [
100
+ 0.0,
101
+ 0.0,
102
+ 0.0
103
+ ],
104
+ "geneCount500kb": [
105
+ 15.0,
106
+ 8.0,
107
+ 30.0
108
+ ],
109
+ "geneId": [
110
+ "ENSG00000169174",
111
+ "ENSG00000084674",
112
+ "ENSG00000055118"
113
+ ],
114
+ "goldStandardSet": [
115
+ 1,
116
+ 1,
117
+ 1
118
+ ],
119
+ "pQtlColocClppMaximum": [
120
+ 1.0,
121
+ 0.0,
122
+ 0.0
123
+ ],
124
+ "pQtlColocClppMaximumNeighbourhood": [
125
+ 1.0,
126
+ 0.0,
127
+ 0.0
128
+ ],
129
+ "pQtlColocH4Maximum": [
130
+ 1.0,
131
+ 0.0,
132
+ 0.0
133
+ ],
134
+ "pQtlColocH4MaximumNeighbourhood": [
135
+ 1.0,
136
+ 0.0,
137
+ 0.0
138
+ ],
139
+ "proteinGeneCount500kb": [
140
+ 7.0,
141
+ 3.0,
142
+ 21.0
143
+ ],
144
+ "sQtlColocClppMaximum": [
145
+ 0.0,
146
+ 0.21970131993293762,
147
+ 0.0
148
+ ],
149
+ "sQtlColocClppMaximumNeighbourhood": [
150
+ 0.0,
151
+ 1.0,
152
+ 0.0
153
+ ],
154
+ "sQtlColocH4Maximum": [
155
+ 0.0,
156
+ 0.0,
157
+ 0.0
158
+ ],
159
+ "sQtlColocH4MaximumNeighbourhood": [
160
+ 0.0,
161
+ 0.0,
162
+ 0.0
163
+ ],
164
+ "studyLocusId": [
165
+ "02c442ea4fa5ab80586a6d1ff6afa843",
166
+ "235e8ce166619f33e27582fff5bc0c94",
167
+ "6ad8c76d115fbb76a676bd57e940ee4d"
168
+ ],
169
+ "vepMaximum": [
170
+ 0.6600000262260437,
171
+ 0.6600000262260437,
172
+ 0.0
173
+ ],
174
+ "vepMaximumNeighbourhood": [
175
+ 1.0,
176
+ 1.0,
177
+ 0.0
178
+ ],
179
+ "vepMean": [
180
+ 0.6600000262260437,
181
+ 0.0039977929554879665,
182
+ 0.0
183
+ ],
184
+ "vepMeanNeighbourhood": [
185
+ 1.0,
186
+ 1.0,
187
+ 0.0
188
+ ]
189
+ },
190
+ "model": {
191
+ "file": "classifier.skops"
192
+ },
193
+ "model_format": "skops",
194
+ "task": "tabular-classification"
195
+ }
196
+ }
test.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d64f5159851082035fa93505d829d0ff4fc2cab45e72d294d91e474a4a86a54
3
+ size 1276740
train.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6e795662f02b9d970d3147ffd0345e731922edd6a3f6d4f12571974fab7e37
3
+ size 4143971