-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.yaml
104 lines (104 loc) · 3.4 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
stages:
extract-alexa:
cmd: unzip data/raw/alexa_1M.zip -d data/extract-alexa/
deps:
- data/raw/alexa_1M.zip
outs:
- data/extract-alexa/
prepare-alexa:
cmd: python src/preparation/alexa_prepare.py data/extract-alexa/alexa_1M.csv data/prepare-alexa/
deps:
- data/extract-alexa/alexa_1M.csv
- src/preparation/alexa_prepare.py
params:
- preparation.seed
outs:
- data/prepare-alexa/
prepare-dga:
cmd: python src/preparation/dga_prepare.py data/raw/dga_domains.txt data/prepare-dga/
deps:
- data/raw/dga_domains.txt
- src/preparation/dga_prepare.py
outs:
- data/prepare-dga/
prepare-words:
cmd: python src/preparation/words_prepare.py data/raw/words.txt data/prepare-words/
deps:
- data/raw/words.txt
- src/preparation/words_prepare.py
outs:
- data/prepare-words/
split-alexa:
cmd: python src/preparation/train_test_split.py data/prepare-alexa/alexa_prepared.pkl
alexa data/split-alexa/
deps:
- data/prepare-alexa/alexa_prepared.pkl
- src/preparation/train_test_split.py
outs:
- data/split-alexa/
split-dga:
cmd: python src/preparation/train_test_split.py data/prepare-dga/dga_prepared.pkl
dga data/split-dga/
deps:
- data/prepare-dga/dga_prepared.pkl
- src/preparation/train_test_split.py
outs:
- data/split-dga/
merge-training:
cmd: python src/preparation/merge_sets.py data/split-alexa/alexa_train.pkl data/split-dga/dga_train.pkl
training data/merge-training/
deps:
- data/split-alexa/alexa_train.pkl
- data/split-dga/dga_train.pkl
- src/preparation/merge_sets.py
outs:
- data/merge-training/
merge-test:
cmd: python src/preparation/merge_sets.py data/split-alexa/alexa_test.pkl data/split-dga/dga_test.pkl
test data/merge-test/
deps:
- data/split-alexa/alexa_test.pkl
- data/split-dga/dga_test.pkl
- src/preparation/merge_sets.py
outs:
- data/merge-test/
build-features:
cmd: python src/features/build_features.py data/merge-training/merged_training_set.pkl
data/merge-test/merged_test_set.pkl data/prepare-alexa/alexa_prepared.pkl data/prepare-words/words_prepared.pkl
data/build-features/
deps:
- data/merge-test/merged_test_set.pkl
- data/merge-training/merged_training_set.pkl
- data/prepare-alexa/alexa_prepared.pkl
- data/prepare-words/words_prepared.pkl
- src/features/build_features.py
params:
- features.alexa_vectorization.max_df
- features.alexa_vectorization.min_df
- features.alexa_vectorization.range_high
- features.alexa_vectorization.range_low
- features.words_vectorization.max_df
- features.words_vectorization.min_df
- features.words_vectorization.range_high
- features.words_vectorization.range_low
outs:
- data/build-features/
train-model:
cmd: python src/models/train_model.py data/build-features/training_set.pkl data/train-model/
deps:
- data/build-features/training_set.pkl
- src/models/train_model.py
params:
- models.n_estimators
- models.seed
outs:
- data/train-model/
evaluate:
cmd: python src/evaluation/evaluate_model.py data/train-model/trained_model.pkl
data/build-features/test_set.pkl data/evaluate/
deps:
- data/build-features/test_set.pkl
- data/train-model/trained_model.pkl
- src/evaluation/evaluate_model.py
plots:
- data/evaluate/