ireneisdoomed commited on
Commit
34d2d81
·
verified ·
1 Parent(s): 666d1a6

test model

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +217 -0
  3. classifier_no_cross_val.skops +3 -0
  4. config.json +196 -0
  5. test.parquet +3 -0
  6. train.parquet +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ classifier_no_cross_val.skops filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sklearn
3
+ tags:
4
+ - sklearn
5
+ - skops
6
+ - tabular-classification
7
+ model_format: skops
8
+ model_file: classifier_no_cross_val.skops
9
+ widget:
10
+ - structuredData:
11
+ credibleSetConfidence:
12
+ - 0.75
13
+ - 0.75
14
+ - 0.25
15
+ distanceFootprintMean:
16
+ - 1.0
17
+ - 1.0
18
+ - 0.9948455095291138
19
+ distanceFootprintMeanNeighbourhood:
20
+ - 1.0
21
+ - 1.0
22
+ - 1.0
23
+ distanceSentinelFootprint:
24
+ - 1.0
25
+ - 1.0
26
+ - 0.9999213218688965
27
+ distanceSentinelFootprintNeighbourhood:
28
+ - 1.0
29
+ - 1.0
30
+ - 1.0
31
+ distanceSentinelTss:
32
+ - 0.9982281923294067
33
+ - 0.9999350309371948
34
+ - 0.9999213218688965
35
+ distanceSentinelTssNeighbourhood:
36
+ - 1.0
37
+ - 1.0
38
+ - 1.0
39
+ distanceTssMean:
40
+ - 0.9982281923294067
41
+ - 0.9999350309371948
42
+ - 0.9947366714477539
43
+ distanceTssMeanNeighbourhood:
44
+ - 1.0
45
+ - 1.0
46
+ - 1.0
47
+ eQtlColocClppMaximum:
48
+ - 0.949999988079071
49
+ - 0.0
50
+ - 0.06608512997627258
51
+ eQtlColocClppMaximumNeighbourhood:
52
+ - 1.0
53
+ - 0.0
54
+ - 1.0
55
+ eQtlColocH4Maximum:
56
+ - 1.0
57
+ - 0.0
58
+ - 0.0
59
+ eQtlColocH4MaximumNeighbourhood:
60
+ - 1.0
61
+ - 0.0
62
+ - 0.0
63
+ geneCount500kb:
64
+ - 20.0
65
+ - 15.0
66
+ - 8.0
67
+ geneId:
68
+ - ENSG00000087237
69
+ - ENSG00000169174
70
+ - ENSG00000084674
71
+ goldStandardSet:
72
+ - 1
73
+ - 1
74
+ - 1
75
+ pQtlColocClppMaximum:
76
+ - 0.0
77
+ - 1.0
78
+ - 0.0
79
+ pQtlColocClppMaximumNeighbourhood:
80
+ - 0.0
81
+ - 1.0
82
+ - 0.0
83
+ pQtlColocH4Maximum:
84
+ - 0.0
85
+ - 1.0
86
+ - 0.0
87
+ pQtlColocH4MaximumNeighbourhood:
88
+ - 0.0
89
+ - 1.0
90
+ - 0.0
91
+ proteinGeneCount500kb:
92
+ - 8.0
93
+ - 7.0
94
+ - 3.0
95
+ sQtlColocClppMaximum:
96
+ - 0.949999988079071
97
+ - 0.0
98
+ - 0.21970131993293762
99
+ sQtlColocClppMaximumNeighbourhood:
100
+ - 1.0
101
+ - 0.0
102
+ - 1.0
103
+ sQtlColocH4Maximum:
104
+ - 1.0
105
+ - 0.0
106
+ - 0.0
107
+ sQtlColocH4MaximumNeighbourhood:
108
+ - 1.0
109
+ - 0.0
110
+ - 0.0
111
+ studyLocusId:
112
+ - 005bc8624f8dd7f7c7bc63e651e9e59d
113
+ - 02c442ea4fa5ab80586a6d1ff6afa843
114
+ - 235e8ce166619f33e27582fff5bc0c94
115
+ vepMaximum:
116
+ - 0.33000001311302185
117
+ - 0.6600000262260437
118
+ - 0.6600000262260437
119
+ vepMaximumNeighbourhood:
120
+ - 1.0
121
+ - 1.0
122
+ - 1.0
123
+ vepMean:
124
+ - 0.33000001311302185
125
+ - 0.6600000262260437
126
+ - 0.0039977929554879665
127
+ vepMeanNeighbourhood:
128
+ - 1.0
129
+ - 1.0
130
+ - 1.0
131
+ ---
132
+
133
+ # Model description
134
+
135
+ The locus-to-gene (L2G) model derives features to prioritise likely causal genes at each GWAS locus based on genetic and functional genomics features. The main categories of predictive features are:
136
+
137
+ - Distance: (from credible set variants to gene)
138
+ - Molecular QTL Colocalization
139
+ - Variant Pathogenicity: (from VEP)
140
+
141
+ More information at: https://opentargets.github.io/gentropy/python_api/methods/l2g/_l2g/
142
+
143
+
144
+ ## Intended uses & limitations
145
+
146
+ [More Information Needed]
147
+
148
+ ## Training Procedure
149
+
150
+ Gradient Boosting Classifier
151
+
152
+ ### Hyperparameters
153
+
154
+ <details>
155
+ <summary> Click to expand </summary>
156
+
157
+ | Hyperparameter | Value |
158
+ |-------------------------|-----------------|
159
+ | objective | binary:logistic |
160
+ | base_score | |
161
+ | booster | |
162
+ | callbacks | |
163
+ | colsample_bylevel | |
164
+ | colsample_bynode | |
165
+ | colsample_bytree | 0.8 |
166
+ | device | |
167
+ | early_stopping_rounds | |
168
+ | enable_categorical | False |
169
+ | eval_metric | aucpr |
170
+ | feature_types | |
171
+ | feature_weights | |
172
+ | gamma | |
173
+ | grow_policy | |
174
+ | importance_type | |
175
+ | interaction_constraints | |
176
+ | learning_rate | |
177
+ | max_bin | |
178
+ | max_cat_threshold | |
179
+ | max_cat_to_onehot | |
180
+ | max_delta_step | |
181
+ | max_depth | 5 |
182
+ | max_leaves | |
183
+ | min_child_weight | 10 |
184
+ | missing | nan |
185
+ | monotone_constraints | |
186
+ | multi_strategy | |
187
+ | n_estimators | |
188
+ | n_jobs | |
189
+ | num_parallel_tree | |
190
+ | random_state | 777 |
191
+ | reg_alpha | 1 |
192
+ | reg_lambda | 1.0 |
193
+ | sampling_method | |
194
+ | scale_pos_weight | 0.8 |
195
+ | subsample | 0.8 |
196
+ | tree_method | |
197
+ | validate_parameters | |
198
+ | verbosity | |
199
+ | eta | 0.05 |
200
+
201
+ </details>
202
+
203
+ # How to Get Started with the Model
204
+
205
+ To use the model, you can load it using the `LocusToGeneModel.load_from_hub` method. This will return a `LocusToGeneModel` object that can be used to make predictions on a feature matrix.
206
+ The model can then be used to make predictions using the `predict` method.
207
+
208
+ More information can be found at: https://opentargets.github.io/gentropy/python_api/methods/l2g/model/
209
+
210
+
211
+ # Citation
212
+
213
+ https://doi.org/10.1038/s41588-021-00945-5
214
+
215
+ # License
216
+
217
+ MIT
classifier_no_cross_val.skops ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:477ea8d33c54a281fc707bbd1ece5d18102b2b294bcab07862d6e51037d17a5e
3
+ size 243893
config.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sklearn": {
3
+ "columns": [
4
+ "studyLocusId",
5
+ "geneId",
6
+ "goldStandardSet",
7
+ "eQtlColocClppMaximum",
8
+ "pQtlColocClppMaximum",
9
+ "sQtlColocClppMaximum",
10
+ "eQtlColocH4Maximum",
11
+ "pQtlColocH4Maximum",
12
+ "sQtlColocH4Maximum",
13
+ "eQtlColocClppMaximumNeighbourhood",
14
+ "pQtlColocClppMaximumNeighbourhood",
15
+ "sQtlColocClppMaximumNeighbourhood",
16
+ "eQtlColocH4MaximumNeighbourhood",
17
+ "pQtlColocH4MaximumNeighbourhood",
18
+ "sQtlColocH4MaximumNeighbourhood",
19
+ "distanceSentinelFootprint",
20
+ "distanceSentinelFootprintNeighbourhood",
21
+ "distanceFootprintMean",
22
+ "distanceFootprintMeanNeighbourhood",
23
+ "distanceTssMean",
24
+ "distanceTssMeanNeighbourhood",
25
+ "distanceSentinelTss",
26
+ "distanceSentinelTssNeighbourhood",
27
+ "vepMaximum",
28
+ "vepMaximumNeighbourhood",
29
+ "vepMean",
30
+ "vepMeanNeighbourhood",
31
+ "geneCount500kb",
32
+ "proteinGeneCount500kb",
33
+ "credibleSetConfidence"
34
+ ],
35
+ "environment": [
36
+ "xgboost=3.0.4"
37
+ ],
38
+ "example_input": {
39
+ "credibleSetConfidence": [
40
+ 0.75,
41
+ 0.75,
42
+ 0.25
43
+ ],
44
+ "distanceFootprintMean": [
45
+ 1.0,
46
+ 1.0,
47
+ 0.9948455095291138
48
+ ],
49
+ "distanceFootprintMeanNeighbourhood": [
50
+ 1.0,
51
+ 1.0,
52
+ 1.0
53
+ ],
54
+ "distanceSentinelFootprint": [
55
+ 1.0,
56
+ 1.0,
57
+ 0.9999213218688965
58
+ ],
59
+ "distanceSentinelFootprintNeighbourhood": [
60
+ 1.0,
61
+ 1.0,
62
+ 1.0
63
+ ],
64
+ "distanceSentinelTss": [
65
+ 0.9982281923294067,
66
+ 0.9999350309371948,
67
+ 0.9999213218688965
68
+ ],
69
+ "distanceSentinelTssNeighbourhood": [
70
+ 1.0,
71
+ 1.0,
72
+ 1.0
73
+ ],
74
+ "distanceTssMean": [
75
+ 0.9982281923294067,
76
+ 0.9999350309371948,
77
+ 0.9947366714477539
78
+ ],
79
+ "distanceTssMeanNeighbourhood": [
80
+ 1.0,
81
+ 1.0,
82
+ 1.0
83
+ ],
84
+ "eQtlColocClppMaximum": [
85
+ 0.949999988079071,
86
+ 0.0,
87
+ 0.06608512997627258
88
+ ],
89
+ "eQtlColocClppMaximumNeighbourhood": [
90
+ 1.0,
91
+ 0.0,
92
+ 1.0
93
+ ],
94
+ "eQtlColocH4Maximum": [
95
+ 1.0,
96
+ 0.0,
97
+ 0.0
98
+ ],
99
+ "eQtlColocH4MaximumNeighbourhood": [
100
+ 1.0,
101
+ 0.0,
102
+ 0.0
103
+ ],
104
+ "geneCount500kb": [
105
+ 20.0,
106
+ 15.0,
107
+ 8.0
108
+ ],
109
+ "geneId": [
110
+ "ENSG00000087237",
111
+ "ENSG00000169174",
112
+ "ENSG00000084674"
113
+ ],
114
+ "goldStandardSet": [
115
+ 1,
116
+ 1,
117
+ 1
118
+ ],
119
+ "pQtlColocClppMaximum": [
120
+ 0.0,
121
+ 1.0,
122
+ 0.0
123
+ ],
124
+ "pQtlColocClppMaximumNeighbourhood": [
125
+ 0.0,
126
+ 1.0,
127
+ 0.0
128
+ ],
129
+ "pQtlColocH4Maximum": [
130
+ 0.0,
131
+ 1.0,
132
+ 0.0
133
+ ],
134
+ "pQtlColocH4MaximumNeighbourhood": [
135
+ 0.0,
136
+ 1.0,
137
+ 0.0
138
+ ],
139
+ "proteinGeneCount500kb": [
140
+ 8.0,
141
+ 7.0,
142
+ 3.0
143
+ ],
144
+ "sQtlColocClppMaximum": [
145
+ 0.949999988079071,
146
+ 0.0,
147
+ 0.21970131993293762
148
+ ],
149
+ "sQtlColocClppMaximumNeighbourhood": [
150
+ 1.0,
151
+ 0.0,
152
+ 1.0
153
+ ],
154
+ "sQtlColocH4Maximum": [
155
+ 1.0,
156
+ 0.0,
157
+ 0.0
158
+ ],
159
+ "sQtlColocH4MaximumNeighbourhood": [
160
+ 1.0,
161
+ 0.0,
162
+ 0.0
163
+ ],
164
+ "studyLocusId": [
165
+ "005bc8624f8dd7f7c7bc63e651e9e59d",
166
+ "02c442ea4fa5ab80586a6d1ff6afa843",
167
+ "235e8ce166619f33e27582fff5bc0c94"
168
+ ],
169
+ "vepMaximum": [
170
+ 0.33000001311302185,
171
+ 0.6600000262260437,
172
+ 0.6600000262260437
173
+ ],
174
+ "vepMaximumNeighbourhood": [
175
+ 1.0,
176
+ 1.0,
177
+ 1.0
178
+ ],
179
+ "vepMean": [
180
+ 0.33000001311302185,
181
+ 0.6600000262260437,
182
+ 0.0039977929554879665
183
+ ],
184
+ "vepMeanNeighbourhood": [
185
+ 1.0,
186
+ 1.0,
187
+ 1.0
188
+ ]
189
+ },
190
+ "model": {
191
+ "file": "classifier_no_cross_val.skops"
192
+ },
193
+ "model_format": "skops",
194
+ "task": "tabular-classification"
195
+ }
196
+ }
test.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6585458cb016200bad8ba1e59aec1ecce849bca20a4ee583b0eec1d4c856bfc
3
+ size 700711
train.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db0511a3bd4e16d4587d6a2fe9a6a9639304074f5deb151bc5ced9a7af7c09f
3
+ size 4737931