Ali Kefia
commited on
Commit
·
4c31c97
1
Parent(s):
231da5b
ok
Browse files- .gitattributes +1 -0
- .mise.toml +1 -1
- data/eval.parquet +3 -0
- data/train.parquet +3 -0
- debug.py +13 -0
- imgs/confusion_matrix.png +0 -0
- imgs/roc_curve.png +0 -0
- model/model.pickle +2 -2
- out/confusion_matrix.png +0 -0
- out/preds.csv +0 -45
- out/roc_curve.png +0 -0
- prepare.py +45 -0
- train.py +12 -58
- usage.py +15 -23
- utils/__init__.py +0 -0
- utils/data.py +17 -0
- embed.py → utils/embed.py +0 -0
- utils/paths.py +9 -0
.gitattributes
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
2 |
*.pickle filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
3 |
*.pickle filter=lfs diff=lfs merge=lfs -text
|
.mise.toml
CHANGED
@@ -6,7 +6,7 @@ EMBEDDING_MODEL_REV = "d8c86521100d3556476a063fc2342036d45c106f"
|
|
6 |
|
7 |
DATA_DIR = "{{config_root}}/data"
|
8 |
MODEL_DIR = "{{config_root}}/model"
|
9 |
-
|
10 |
|
11 |
[tasks.deps]
|
12 |
run = [
|
|
|
6 |
|
7 |
DATA_DIR = "{{config_root}}/data"
|
8 |
MODEL_DIR = "{{config_root}}/model"
|
9 |
+
IMGS_DIR = "{{config_root}}/imgs"
|
10 |
|
11 |
[tasks.deps]
|
12 |
run = [
|
data/eval.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ce81584baeb7eb8ca4322bc0f50af105ae3795229718cda1dfa1f600e945f3a
|
3 |
+
size 195251
|
data/train.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf199fc047485c2c453c4d9b80714261ed58152ef34c59903a64f9725d0e4956
|
3 |
+
size 6608000
|
debug.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import polars as pl
|
2 |
+
|
3 |
+
from utils.paths import DATA
|
4 |
+
|
5 |
+
|
6 |
+
def main() -> None:
|
7 |
+
for name in ["train", "eval"]:
|
8 |
+
df = pl.read_parquet(DATA / (name + ".parquet"))
|
9 |
+
print(df)
|
10 |
+
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
main()
|
imgs/confusion_matrix.png
ADDED
![]() |
imgs/roc_curve.png
ADDED
![]() |
model/model.pickle
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73bf71607b6b99d8576a79ec96cdf97e008134e7d348477f93b8cdcf057db19e
|
3 |
+
size 3411728
|
out/confusion_matrix.png
DELETED
Binary file (16.4 kB)
|
|
out/preds.csv
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
url,is_news_article,prediction,is_prediction_correct
|
2 |
-
https://quantumcomputingreport.com/quandela-launches-belenos-photonic-quantum-computer-with-doubling-of-qubit-count-and-4000x-power-increase/,true,true,true
|
3 |
-
https://www.nqcc.ac.uk/,false,false,true
|
4 |
-
https://quantumcomputingreport.com/qsensato-raises-e500k-560k-usd-to-advance-integrated-atomic-quantum-sensors-for-precision-sensing/,true,true,true
|
5 |
-
https://quantumcomputingreport.com/zurich-instruments-and-rohde-schwarz-join-australias-national-quantum-computing-testbed-facility/,true,true,true
|
6 |
-
https://quantumcomputingreport.com/hbku-launches-qatars-first-quantum-computing-laboratory-backed-by-10m-mod-grant/,true,true,true
|
7 |
-
https://quantumcomputingreport.com/quantinuum-releases-%ce%bbambeq-gen-ii-for-scalable-interpretable-quantum-nlp/,true,false,false
|
8 |
-
https://quantumcomputingreport.com/quobly-secures-e21m-23-7m-usd-to-industrialize-100-qubit-silicon-quantum-processor/,true,true,true
|
9 |
-
https://quantumcomputingreport.com/semiqon-and-nanoacademic-partner-to-advance-silicon-spin-qubit-research-and-education/,true,true,true
|
10 |
-
https://quantumcomputingreport.com/united-nations-itu-launches-quantum-for-good-to-align-innovation-with-global-impact/,true,false,false
|
11 |
-
https://quantumcomputingreport.com/microsoft-adds-post-quantum-cryptography-to-windows-insider-builds-and-linux/,true,true,true
|
12 |
-
https://www.nqcc.ac.uk/technology-and-research/our-research/,false,false,true
|
13 |
-
https://quantumcomputingreport.com/podcast-with-scott-davis-ceo-and-co-founder-of-vescent/,false,false,true
|
14 |
-
https://quantumzeitgeist.com/building-atoms-the-rise-of-nanotechnology-and-molecular-engineering/,false,true,false
|
15 |
-
https://quantumzeitgeist.com/networked-services-technologies-applications-and-challenges-for-advanced-communication/,false,false,true
|
16 |
-
https://quantumzeitgeist.com/amazon-braket-sdk-and-multi-platform-quantum-development/,false,true,false
|
17 |
-
https://quantumzeitgeist.com/pennylane-and-quantum-machine-learning/,false,false,true
|
18 |
-
https://quantumzeitgeist.com/quantum-physics-meets-spiritual-philosophy-exploring-the-intersection-of-string-theory-and-consciousness/,false,false,true
|
19 |
-
https://quantumzeitgeist.com/quantum-computing-transforms-financial-derivatives-pricing-for-complex-options-and-risk-analysis/,false,true,false
|
20 |
-
https://quantumzeitgeist.com/quantifying-quantum-correlations-in-symmetric-gaussian-states-with-universal-invariants/,true,false,false
|
21 |
-
https://www.horseandhound.co.uk/news/horse-life-threatening-stomach-tumour-saved-pioneering-surgery-894298,true,true,true
|
22 |
-
https://www.maddyness.com/2025/06/02/vivatech-startups-deals-annonces-ce-que-la-mission-french-tech-prevoit-pour-levenement/,false,false,true
|
23 |
-
https://www.cbsnews.com/sanfrancisco/news/padel-a-fast-growing-sport-has-become-a-new-obsession-for-silicon-valley/,false,true,false
|
24 |
-
https://www.cloudcomputing-news.net/news/microsoft-launches-its-first-cloud-region-in-malaysia/,true,true,true
|
25 |
-
https://padelmagazine.fr/best-padel-racket-awards-2025-les-meilleures-raquettes-de-lannee-devoilees/,false,false,true
|
26 |
-
https://www.horseandhound.co.uk/news/polly-dickson-obituary-894506,true,true,true
|
27 |
-
https://www.homeselect.paris/en/blog/devenir-proprietaire,false,false,true
|
28 |
-
https://www.maddyness.com/2020/10/23/salomon-aiach-interview-facebook-startups/,false,false,true
|
29 |
-
https://www.solarpowerportal.co.uk/grid-operators-must-work-together-in-aftermath-of-spain-and-portugal-blackout/,false,true,false
|
30 |
-
https://www.cloudcomputing-news.net/news/podcast/nginx-f5-api-proxy-podcast-apac-sprint-two-point-one-podcast-s02-e30/,false,false,true
|
31 |
-
https://www.farminguk.com/news/vegan-activists-attempt-to-shut-down-royal-highland-parade_66662.html,true,true,true
|
32 |
-
https://dairynews.today/news/world_milk_day_2025_health_innovation_and_sustainability_drive_india_s_milk_movement_9339211.html,false,true,false
|
33 |
-
"https://lerail.com/news/95810-signature-du-second-appel-%C3%A0-projets-gares-de-demain-entre-la-r%C3%A9gion-%C3%AEle-de-france,-%C3%AEle-de-france-mobilit%C3%A9s-et-sncf-gares-connexions",true,false,false
|
34 |
-
https://lerail.com/news/95984-drive-to-zero-2025,false,false,true
|
35 |
-
https://www.horseandhound.co.uk/news/farewell-to-twinshock-warrior-894106,true,true,true
|
36 |
-
https://www.farminguk.com/news/new-ai-driven-test-targets-silent-killer-in-uk-cattle_66604.html,true,true,true
|
37 |
-
https://www.maddyness.com/2019/05/02/growthhacking-chahab-nastar-scaleups/,false,false,true
|
38 |
-
https://www.businesstravelnews.com/Lodging/Hyatt-Creates-New-Unscripted-Collection-Brand,true,false,false
|
39 |
-
https://meuble-info.fr/falmec-gessi-le-duo-gagnant-du-point-deau/,true,false,false
|
40 |
-
https://www.cloudcomputing-news.net/news/podcast/supply-chain-automation-warehousing-distribution-rpa-best-dematic-podcast-s03-e10/,false,false,true
|
41 |
-
https://www.maddyness.com/2025/05/06/mon-petit-placement-tombe-dans-le-giron-de-malakoff-humanis/,true,false,false
|
42 |
-
https://lerail.com/technical-articles/79770-southco-s%C3%A9curisation-du-v%C3%A9hicule-%C3%A9lectrique-infrastructure-de-recharge-et-de-stockage-sur-batterie-de-r%C3%A9seau,false,false,true
|
43 |
-
https://www.watches-news.com/alpine-eagle-41-xp-cs-platinum/,true,true,true
|
44 |
-
https://www.imarcgroup.com/football-market,false,true,false
|
45 |
-
https://www.constructionnews.co.uk/contractors/balfour-beatty/balfour-beatty-court-battle-over-serious-trucks-cartel-ends-17-01-2025/,true,true,true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
out/roc_curve.png
DELETED
Binary file (29.3 kB)
|
|
prepare.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
import polars as pl
|
4 |
+
|
5 |
+
from utils.embed import embed as embed
|
6 |
+
from utils.paths import DATA
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
logging.basicConfig(level=logging.INFO)
|
11 |
+
|
12 |
+
|
13 |
+
def load_dataset(file_name: str):
|
14 |
+
features = ["meta_title", "meta_description", "content"]
|
15 |
+
return (
|
16 |
+
pl.scan_csv(file_name)
|
17 |
+
.with_columns(
|
18 |
+
pl.concat_str([pl.col(c) for c in features], separator="\n\n").alias(
|
19 |
+
"text"
|
20 |
+
),
|
21 |
+
pl.col("date").str.to_date().alias("date"),
|
22 |
+
)
|
23 |
+
.rename(
|
24 |
+
{
|
25 |
+
"is_news_article": "is_news",
|
26 |
+
"link_count": "links",
|
27 |
+
"paragraph_count": "paragraphs",
|
28 |
+
}
|
29 |
+
)
|
30 |
+
.select("text", "is_news", "url", "date", "paragraphs", "links")
|
31 |
+
.collect()
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
def main() -> None:
|
36 |
+
for name in ["train", "eval"]:
|
37 |
+
df = load_dataset(DATA / (name + ".csv"))
|
38 |
+
embeds = embed(df.get_column("text").to_list())
|
39 |
+
df = df.with_columns(pl.Series(embeds).alias("embeds")).write_parquet(
|
40 |
+
DATA / (name + ".parquet")
|
41 |
+
)
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
main()
|
train.py
CHANGED
@@ -1,63 +1,17 @@
|
|
1 |
import logging
|
2 |
-
import os
|
3 |
import pickle
|
4 |
-
from pathlib import Path
|
5 |
|
6 |
import matplotlib.pyplot as plt
|
7 |
-
import numpy as np
|
8 |
import polars as pl
|
9 |
import seaborn as sns
|
10 |
from numpy.typing import NDArray
|
11 |
-
from polars import DataFrame
|
12 |
from sklearn.metrics import auc, confusion_matrix, roc_curve
|
13 |
from sklearn.svm import SVC
|
14 |
|
15 |
-
from
|
16 |
-
|
17 |
-
logger = logging.getLogger(__name__)
|
18 |
|
19 |
logging.basicConfig(level=logging.INFO)
|
20 |
|
21 |
-
DATA = Path(os.environ["DATA_DIR"])
|
22 |
-
DATA.mkdir(parents=True, exist_ok=True)
|
23 |
-
MODEL = Path(os.environ["MODEL_DIR"])
|
24 |
-
MODEL.mkdir(parents=True, exist_ok=True)
|
25 |
-
OUT = Path(os.environ["OUT_DIR"])
|
26 |
-
OUT.mkdir(parents=True, exist_ok=True)
|
27 |
-
|
28 |
-
|
29 |
-
def embed(df: DataFrame):
|
30 |
-
logger.info(f"embed start {df.height}")
|
31 |
-
features = ["content", "meta_title", "meta_description"]
|
32 |
-
embeddings = []
|
33 |
-
for col in features:
|
34 |
-
train_texts = df.select(col).to_series().to_list()
|
35 |
-
embeddings.append(_embed(train_texts))
|
36 |
-
res = np.hstack(embeddings)
|
37 |
-
logger.info(f"embed done {res.shape}")
|
38 |
-
return res
|
39 |
-
|
40 |
-
|
41 |
-
def train(df: DataFrame, target: str):
|
42 |
-
logger.info(f"train start {df.height}")
|
43 |
-
X = embed(df)
|
44 |
-
y = df.select(target).to_numpy().ravel()
|
45 |
-
clf = SVC(kernel="linear", probability=True)
|
46 |
-
clf.fit(X, y)
|
47 |
-
logger.info("train done")
|
48 |
-
return clf
|
49 |
-
|
50 |
-
|
51 |
-
def save_prediction(eval_df: DataFrame, y_eval: NDArray, y_pred: NDArray) -> None:
|
52 |
-
pl.DataFrame(
|
53 |
-
{
|
54 |
-
"url": eval_df.select("url").to_series().to_list(),
|
55 |
-
"is_news_article": y_eval,
|
56 |
-
"prediction": y_pred,
|
57 |
-
"is_prediction_correct": y_eval == y_pred,
|
58 |
-
}
|
59 |
-
).write_csv(OUT / "preds.csv")
|
60 |
-
|
61 |
|
62 |
def save_roc_curve(clf, X: NDArray, y: NDArray):
|
63 |
probs = clf.predict_proba(X)[:, 1] # Probability for the positive class
|
@@ -76,7 +30,7 @@ def save_roc_curve(clf, X: NDArray, y: NDArray):
|
|
76 |
plt.title("Receiver Operating Characteristic (ROC)")
|
77 |
plt.legend(loc="lower right")
|
78 |
plt.tight_layout()
|
79 |
-
plt.savefig(
|
80 |
plt.close()
|
81 |
|
82 |
|
@@ -94,26 +48,26 @@ def save_confusion_matrix(y: NDArray, pred: NDArray):
|
|
94 |
plt.ylabel("Actual")
|
95 |
plt.title("Confusion Matrix")
|
96 |
plt.tight_layout()
|
97 |
-
plt.savefig(
|
98 |
plt.close()
|
99 |
|
100 |
|
101 |
def main() -> None:
|
102 |
-
|
103 |
-
|
104 |
-
clf
|
|
|
|
|
|
|
105 |
with open(MODEL / "model.pickle", "wb") as f:
|
106 |
pickle.dump(clf, f)
|
107 |
|
108 |
-
eval_df = pl.
|
109 |
-
|
110 |
-
|
111 |
-
eval_y = eval_df.select(target).to_numpy().ravel()
|
112 |
eval_pred = clf.predict(eval_X)
|
113 |
-
save_prediction(eval_df, eval_y, eval_pred)
|
114 |
save_confusion_matrix(eval_y, eval_pred)
|
115 |
save_roc_curve(clf, eval_X, eval_y)
|
116 |
-
logger.info("eval done")
|
117 |
|
118 |
|
119 |
if __name__ == "__main__":
|
|
|
1 |
import logging
|
|
|
2 |
import pickle
|
|
|
3 |
|
4 |
import matplotlib.pyplot as plt
|
|
|
5 |
import polars as pl
|
6 |
import seaborn as sns
|
7 |
from numpy.typing import NDArray
|
|
|
8 |
from sklearn.metrics import auc, confusion_matrix, roc_curve
|
9 |
from sklearn.svm import SVC
|
10 |
|
11 |
+
from utils.paths import DATA, IMGS, MODEL
|
|
|
|
|
12 |
|
13 |
logging.basicConfig(level=logging.INFO)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def save_roc_curve(clf, X: NDArray, y: NDArray):
|
17 |
probs = clf.predict_proba(X)[:, 1] # Probability for the positive class
|
|
|
30 |
plt.title("Receiver Operating Characteristic (ROC)")
|
31 |
plt.legend(loc="lower right")
|
32 |
plt.tight_layout()
|
33 |
+
plt.savefig(IMGS / "roc_curve.png")
|
34 |
plt.close()
|
35 |
|
36 |
|
|
|
48 |
plt.ylabel("Actual")
|
49 |
plt.title("Confusion Matrix")
|
50 |
plt.tight_layout()
|
51 |
+
plt.savefig(IMGS / "confusion_matrix.png")
|
52 |
plt.close()
|
53 |
|
54 |
|
55 |
def main() -> None:
|
56 |
+
train_df = pl.read_parquet(DATA / "train.parquet")
|
57 |
+
clf = SVC(kernel="linear", probability=True)
|
58 |
+
clf.fit(
|
59 |
+
train_df.get_column("embeds").to_numpy(),
|
60 |
+
train_df.get_column("is_news").to_numpy(),
|
61 |
+
)
|
62 |
with open(MODEL / "model.pickle", "wb") as f:
|
63 |
pickle.dump(clf, f)
|
64 |
|
65 |
+
eval_df = pl.read_parquet(DATA / "eval.parquet")
|
66 |
+
eval_X = eval_df.get_column("embeds").to_numpy()
|
67 |
+
eval_y = eval_df.get_column("is_news").to_numpy()
|
|
|
68 |
eval_pred = clf.predict(eval_X)
|
|
|
69 |
save_confusion_matrix(eval_y, eval_pred)
|
70 |
save_roc_curve(clf, eval_X, eval_y)
|
|
|
71 |
|
72 |
|
73 |
if __name__ == "__main__":
|
usage.py
CHANGED
@@ -1,45 +1,37 @@
|
|
1 |
-
import os
|
2 |
import pickle
|
3 |
from functools import cache
|
4 |
-
from pathlib import Path
|
5 |
|
6 |
-
import numpy as np
|
7 |
import polars as pl
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
|
10 |
-
from embed import embed
|
11 |
-
|
12 |
-
DATA = Path(os.environ["DATA_DIR"])
|
13 |
-
|
14 |
-
features = ["content", "meta_title", "meta_description"]
|
15 |
|
16 |
|
17 |
@cache
|
18 |
def get_model():
|
19 |
-
file_name = hf_hub_download(
|
|
|
|
|
20 |
with open(file_name, "rb") as f:
|
21 |
return pickle.load(f)
|
22 |
|
23 |
|
24 |
-
def
|
25 |
-
df = pl.
|
26 |
-
return {
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
for f in features:
|
32 |
-
embeddings.append(embed([rec[f]]))
|
33 |
-
return np.hstack(embeddings)
|
34 |
|
35 |
|
36 |
def main():
|
37 |
model = get_model()
|
38 |
-
record =
|
39 |
-
embeds =
|
40 |
(pred,) = model.predict(embeds)
|
41 |
-
print(record["
|
42 |
-
print(f"is news (real): {record['is_news_article']}")
|
43 |
print(f"is news (pred): {pred}")
|
44 |
|
45 |
|
|
|
|
|
1 |
import pickle
|
2 |
from functools import cache
|
|
|
3 |
|
|
|
4 |
import polars as pl
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
|
7 |
+
from utils.embed import embed
|
8 |
+
from utils.paths import DATA
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
@cache
|
12 |
def get_model():
|
13 |
+
file_name = hf_hub_download(
|
14 |
+
"opale-ai/news-classifier", "model/model.pickle", revision="main"
|
15 |
+
)
|
16 |
with open(file_name, "rb") as f:
|
17 |
return pickle.load(f)
|
18 |
|
19 |
|
20 |
+
def get_record():
|
21 |
+
df = pl.read_parquet(DATA / "eval.parquet")
|
22 |
+
return {
|
23 |
+
col: val
|
24 |
+
for col, val in zip(df.columns, df.sample().row(0))
|
25 |
+
if col in ["text", "is_news"]
|
26 |
+
}
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
def main():
|
30 |
model = get_model()
|
31 |
+
record = get_record()
|
32 |
+
embeds = embed([record["text"]])
|
33 |
(pred,) = model.predict(embeds)
|
34 |
+
print(f"is news (real): {record['is_news']}")
|
|
|
35 |
print(f"is news (pred): {pred}")
|
36 |
|
37 |
|
utils/__init__.py
ADDED
File without changes
|
utils/data.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
COLUMNS = [
|
2 |
+
"url",
|
3 |
+
"website",
|
4 |
+
"og_type",
|
5 |
+
"meta_description",
|
6 |
+
"meta_title",
|
7 |
+
"content",
|
8 |
+
"date",
|
9 |
+
"days_old",
|
10 |
+
"link_count",
|
11 |
+
"paragraph_count",
|
12 |
+
"average_links",
|
13 |
+
"text_to_html_ratio",
|
14 |
+
"css_title",
|
15 |
+
"is_news_article",
|
16 |
+
"reason",
|
17 |
+
]
|
embed.py → utils/embed.py
RENAMED
File without changes
|
utils/paths.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
DATA = Path(os.environ["DATA_DIR"])
|
5 |
+
DATA.mkdir(parents=True, exist_ok=True)
|
6 |
+
MODEL = Path(os.environ["MODEL_DIR"])
|
7 |
+
MODEL.mkdir(parents=True, exist_ok=True)
|
8 |
+
IMGS = Path(os.environ["IMGS_DIR"])
|
9 |
+
IMGS.mkdir(parents=True, exist_ok=True)
|