-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.lock
193 lines (193 loc) · 5.91 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
extract-alexa:
cmd: unzip data/raw/alexa_1M.zip -d data/extract-alexa/
deps:
- path: data/raw/alexa_1M.zip
md5: ac6ef564db027f3a84fd30f6e9745299
size: 10266213
outs:
- path: data/extract-alexa/
md5: e12e0e03430e729268f8a075f0287aae.dir
size: 22991158
nfiles: 1
prepare-alexa:
cmd: python src/preparation/alexa_prepare.py data/extract-alexa/alexa_1M.csv data/prepare-alexa/
deps:
- path: data/extract-alexa/alexa_1M.csv
md5: 135bcc36298ce986a3b0cc5d21ee430b
size: 22991158
- path: src/preparation/alexa_prepare.py
md5: 76e0da76accdf76601205a4bfb3762d6
size: 1700
params:
params.yaml:
preparation.seed: 0
outs:
- path: data/prepare-alexa/
md5: a300a7f00d99549d84ebb855f13a8a4c.dir
size: 23269201
nfiles: 1
prepare-dga:
cmd: python src/preparation/dga_prepare.py data/raw/dga_domains.txt data/prepare-dga/
deps:
- path: data/raw/dga_domains.txt
md5: d26191eb20cbddff6c1dac7049b77558
size: 41875
- path: src/preparation/dga_prepare.py
md5: 78e3bb8846cdacdf68dca394ade0d40a
size: 1125
outs:
- path: data/prepare-dga/
md5: e5d233c5cbe290b03d5ea4176e589751.dir
size: 81123
nfiles: 1
prepare-words:
cmd: python src/preparation/words_prepare.py data/raw/words.txt data/prepare-words/
deps:
- path: data/raw/words.txt
md5: bc663cb75feb6b860131c12db7fbe825
size: 4950996
- path: src/preparation/words_prepare.py
md5: 759cc0a5d617bbc8c259a8f5fa27ddee
size: 1029
outs:
- path: data/prepare-words/
md5: b305b5b29a37211d771f49d183b000d6.dir
size: 8399364
nfiles: 1
split-alexa:
cmd: python src/preparation/train_test_split.py data/prepare-alexa/alexa_prepared.pkl
alexa data/split-alexa/
deps:
- path: data/prepare-alexa/alexa_prepared.pkl
md5: d076c041988e29152e2c39863fbc670e
size: 23269201
- path: src/preparation/train_test_split.py
md5: f5d11d7d36097c8d648caa363b9531cc
size: 1029
outs:
- path: data/split-alexa/
md5: f641d20db7b2fc1e607ce1a4f9d641c7.dir
size: 23269987
nfiles: 2
split-dga:
cmd: python src/preparation/train_test_split.py data/prepare-dga/dga_prepared.pkl
dga data/split-dga/
deps:
- path: data/prepare-dga/dga_prepared.pkl
md5: 8c7a661ac83ec5b58abdb2b0bc5a678a
size: 81123
- path: src/preparation/train_test_split.py
md5: f5d11d7d36097c8d648caa363b9531cc
size: 1029
outs:
- path: data/split-dga/
md5: 12dc4771883829929b9e1ca6b5e405f4.dir
size: 82020
nfiles: 2
merge-training:
cmd: python src/preparation/merge_sets.py data/split-alexa/alexa_train.pkl data/split-dga/dga_train.pkl
training data/merge-training/
deps:
- path: data/split-alexa/alexa_train.pkl
md5: 4c20ede22a1a7c6f6e9cdbb10532b072
size: 20940112
- path: data/split-dga/dga_train.pkl
md5: 87f4f9f873ae049974ab2f39aeb9cb4f
size: 73964
- path: src/preparation/merge_sets.py
md5: 52cba0f0d2b96f596016b5e529a2034b
size: 863
outs:
- path: data/merge-training/
md5: aabd302ad409f42d9a621479735508b4.dir
size: 14737227
nfiles: 1
merge-test:
cmd: python src/preparation/merge_sets.py data/split-alexa/alexa_test.pkl data/split-dga/dga_test.pkl
test data/merge-test/
deps:
- path: data/split-alexa/alexa_test.pkl
md5: 590a53692b67eee8fddb7afebbec8e51
size: 2329875
- path: data/split-dga/dga_test.pkl
md5: e70a622979e09212795611792c8722b4
size: 8056
- path: src/preparation/merge_sets.py
md5: 52cba0f0d2b96f596016b5e529a2034b
size: 863
outs:
- path: data/merge-test/
md5: f1e99d25b0770b6b79f2f6544211a669.dir
size: 1639677
nfiles: 1
build-features:
cmd: python src/features/build_features.py data/merge-training/merged_training_set.pkl
data/merge-test/merged_test_set.pkl data/prepare-alexa/alexa_prepared.pkl data/prepare-words/words_prepared.pkl
data/build-features/
deps:
- path: data/merge-test/merged_test_set.pkl
md5: 6b476bb50dcb5b7ebb2b8e11cdbbb11a
size: 1639677
- path: data/merge-training/merged_training_set.pkl
md5: f025cac858e46bf1344995785fcab290
size: 14737227
- path: data/prepare-alexa/alexa_prepared.pkl
md5: d076c041988e29152e2c39863fbc670e
size: 23269201
- path: data/prepare-words/words_prepared.pkl
md5: 1a7965f16d2553a435cff9dfb224384c
size: 8399364
- path: src/features/build_features.py
md5: deb7f7d520aacd18c824b1a1a89a72c8
size: 4044
params:
params.yaml:
features.alexa_vectorization.max_df: 1.0
features.alexa_vectorization.min_df: 0.0001
features.alexa_vectorization.range_high: 5
features.alexa_vectorization.range_low: 3
features.words_vectorization.max_df: 1.0
features.words_vectorization.min_df: 1e-05
features.words_vectorization.range_high: 5
features.words_vectorization.range_low: 3
outs:
- path: data/build-features/
md5: 0aa778910d8e1719087eca0cbf5dbe7f.dir
size: 43632046
nfiles: 2
train-model:
cmd: python src/models/train_model.py data/build-features/training_set.pkl data/train-model/
deps:
- path: data/build-features/training_set.pkl
md5: c2074fde7ad7713ac7e865d042bb8525
size: 39258157
- path: src/models/train_model.py
md5: 9b78fac605e43aa1711edb4f73a0917f
size: 1580
params:
params.yaml:
models.n_estimators: 20
models.seed: 0
outs:
- path: data/train-model/
md5: 3db8c64632a3fa7aa525f7223334ea26.dir
size: 4045738
nfiles: 1
evaluate:
cmd: python src/evaluation/evaluate_model.py data/train-model/trained_model.pkl
data/build-features/test_set.pkl data/evaluate/
deps:
- path: data/build-features/test_set.pkl
md5: 775247e16834dca9c3a9ae1663bbd23a
size: 4373889
- path: data/train-model/trained_model.pkl
md5: af9ab83fd5076d1ba38b81595b6d3e66
size: 4045738
- path: src/evaluation/evaluate_model.py
md5: 9ef16d2fddec0afb44e881ae38f9cbdb
size: 1074
outs:
- path: data/evaluate/
md5: 848b8b8d4e3afce30523108382db6055.dir
size: 1020597
nfiles: 1