generated from etalab-ia/data_science_template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
dvc.lock
347 lines (347 loc) · 11 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
schema: '2.0'
stages:
prepare:
cmd: python src/models_training/prepare.py data/salary_for_training/full_data_set.csv data/salary_for_training/prepared
deps:
- path: data/salary_for_training/full_data_set.csv
md5: cc2880d6e7f69f9d71d7f3243fa73518
size: 9510913
- path: src/models_training/prepare.py
md5: bd5536859714ee3b42ea6fdd9a2d2335
size: 1358
params:
params.yaml:
prepare.seed: 20170428
prepare.train_split: 0.8
outs:
- path: data/salary_for_training/prepared
md5: 6473c7a938de647ff441f19d89ab17e7.dir
size: 12492943
nfiles: 2
prepare_fdp:
cmd: python -m src.models_training.prepare data_dvc/salary/full_data_set.csv data_dvc/salary/prepared
deps:
- path: data_dvc/salary/full_data_set.csv
md5: 0256f85148aac849c82dd349b7df2633
size: 9510913
- path: src/models_training/prepare.py
md5: 0b4e07f1f2cfbea12ca3e9a134dde033
size: 1499
params:
params.yaml:
prepare.debug: false
prepare.seed: 42
prepare.train_split: 0.8
outs:
- path: data_dvc/salary/prepared
md5: 0f277dae0ad4ed4253b4681ebe23331f.dir
size: 12425044
nfiles: 2
featurize_fdp:
cmd: python -m src.models_training.featurization data_dvc/salary/prepared data_dvc/salary/features
featurize_fdp
deps:
- path: data_dvc/salary/prepared
md5: 0f277dae0ad4ed4253b4681ebe23331f.dir
size: 12425044
nfiles: 2
- path: src/augmentation
md5: 2809293a799a62a4dee1785a927b7efe.dir
size: 8573
nfiles: 5
- path: src/models_training/featurization.py
md5: c3ae75a6d32bf3d1e13c3039e6a931a7
size: 3024
- path: src/preprocessing
md5: 1043cd3b9f485d23b2bff8a495368402.dir
size: 3284284
nfiles: 10
params:
params.yaml:
featurize_fdp.data_augmentation: true
outs:
- path: data_dvc/salary/features
md5: f899ed0df5600549c157058dc3726fe1.dir
size: 1123912421
nfiles: 2
train_fpd:
cmd: python -m src.models_training.train data/salary_for_training/features fdp.json
model/fdp
deps:
- path: data/salary_for_training/features
md5: 005cba8c397903b2d0b4a30302ec2f0b.dir
size: 1125716330
nfiles: 1
- path: src/models_training/train.py
md5: bf5f36a3164f84e7ad5fbb8963fa08d6
size: 3181
- path: src/models_training/utils.py
md5: 4efa540f2e168d86dc08005299021556
size: 844
params:
params.yaml:
train.n_estimators: 200
train.n_iter: 100
train.optimize: true
outs:
- path: model/fdp
md5: c233dc66b513341bae6f6537d8ad37ac.dir
size: 12166376
nfiles: 5
prepare_cni:
cmd: python -m src.models_training.prepare data_dvc/cni_recto/full_data_set.csv data_dvc/cni_recto/prepared
deps:
- path: data_dvc/cni_recto/full_data_set.csv
md5: dc555776d6ed00338d5b6818fc7bd36f
size: 1413091
- path: src/models_training/prepare.py
md5: 0b4e07f1f2cfbea12ca3e9a134dde033
size: 1499
params:
params.yaml:
prepare.debug: false
prepare.seed: 42
prepare.train_split: 0.8
outs:
- path: data_dvc/cni_recto/prepared
md5: 23c0ff7c84a61bc02574446ed5353611.dir
size: 1866552
nfiles: 2
featurize_cni:
cmd: python -m src.models_training.featurization data_dvc/cni_recto/prepared data_dvc/cni_recto/features
featurize_cni
deps:
- path: data_dvc/cni_recto/prepared
md5: 23c0ff7c84a61bc02574446ed5353611.dir
size: 1866552
nfiles: 2
- path: src/augmentation
md5: 2809293a799a62a4dee1785a927b7efe.dir
size: 8573
nfiles: 5
- path: src/models_training/featurization.py
md5: c3ae75a6d32bf3d1e13c3039e6a931a7
size: 3024
- path: src/preprocessing
md5: 1043cd3b9f485d23b2bff8a495368402.dir
size: 3284284
nfiles: 10
params:
params.yaml:
featurize_cni.data_augmentation: false
outs:
- path: data_dvc/cni_recto/features
md5: 14df848e498c4b9c150b89fb64bb9b78.dir
size: 9282864
nfiles: 2
train_fdp:
cmd: python -m src.models_training.train data_dvc/salary/features data_dvc/salary/scheme.json
data_dvc/salary/model train_fdp
deps:
- path: data_dvc/salary/features
md5: f899ed0df5600549c157058dc3726fe1.dir
size: 1123912421
nfiles: 2
- path: data_dvc/salary/scheme.json
md5: 13c424e30841b81c7745436a75440531
size: 448
- path: src/models_training/train.py
md5: e6e5f3cf690974fc62d64c57b5dc3f82
size: 3531
- path: src/models_training/utils.py
md5: 84f6f9b5912986052a50cfbdf6f400ea
size: 1827
params:
params.yaml:
train_fdp.learning_rate: 0.06
train_fdp.max_depth: 65
train_fdp.max_leaf_nodes: 60
train_fdp.n_estimators: 200
train_fdp.n_iter: 50
train_fdp.optimize: false
outs:
- path: data_dvc/salary/model
md5: 6254866b8e6c8bf0da5d541fa73f3ad9.dir
size: 7906776
nfiles: 5
eval_fdp:
cmd: python -m src.models_training.eval data_dvc/salary/features data_dvc/salary/scheme.json
data_dvc/salary/model data_dvc/salary/results
deps:
- path: data_dvc/salary/features
md5: f899ed0df5600549c157058dc3726fe1.dir
size: 1123912421
nfiles: 2
- path: data_dvc/salary/model
md5: 6254866b8e6c8bf0da5d541fa73f3ad9.dir
size: 7906776
nfiles: 5
- path: data_dvc/salary/scheme.json
md5: 13c424e30841b81c7745436a75440531
size: 448
- path: src/models_training/eval.py
md5: aa714a415f83ba39a955c34790a0131b
size: 2034
- path: src/models_training/utils.py
md5: 84f6f9b5912986052a50cfbdf6f400ea
size: 1827
outs:
- path: data_dvc/salary/results/results.json
md5: bd0201a5fef7875ecd875e7c15f51c7c
size: 220
train_cni:
cmd: python -m src.models_training.train data_dvc/cni_recto/features data_dvc/cni_recto/scheme.json
data_dvc/cni_recto/model train_cni
deps:
- path: data_dvc/cni_recto/features
md5: 14df848e498c4b9c150b89fb64bb9b78.dir
size: 9282864
nfiles: 2
- path: data_dvc/cni_recto/scheme.json
md5: fc09dc8b7dbce83baa7b9f52718b94e5
size: 285
- path: src/models_training/train.py
md5: e6e5f3cf690974fc62d64c57b5dc3f82
size: 3531
- path: src/models_training/utils.py
md5: 84f6f9b5912986052a50cfbdf6f400ea
size: 1827
params:
params.yaml:
train_cni.learning_rate: 0.2
train_cni.max_depth: 3
train_cni.max_leaf_nodes: 12
train_cni.n_estimators: 200
train_cni.n_iter: 50
train_cni.optimize: false
outs:
- path: data_dvc/cni_recto/model
md5: 3487d2b3beb513a0ad8ad570bf0d0ccb.dir
size: 409088
nfiles: 3
eval_cni:
cmd: python -m src.models_training.eval data_dvc/cni_recto/features data_dvc/cni_recto/scheme.json
data_dvc/cni_recto/model data_dvc/cni_recto/results
deps:
- path: data_dvc/cni_recto/features
md5: 14df848e498c4b9c150b89fb64bb9b78.dir
size: 9282864
nfiles: 2
- path: data_dvc/cni_recto/model
md5: 3487d2b3beb513a0ad8ad570bf0d0ccb.dir
size: 409088
nfiles: 3
- path: data_dvc/cni_recto/scheme.json
md5: fc09dc8b7dbce83baa7b9f52718b94e5
size: 285
- path: src/models_training/eval.py
md5: aa714a415f83ba39a955c34790a0131b
size: 2034
- path: src/models_training/utils.py
md5: 84f6f9b5912986052a50cfbdf6f400ea
size: 1827
outs:
- path: data_dvc/cni_recto/results/results.json
md5: ddf90a3ede4a746669d18c02d24421c2
size: 130
prepare_quittances:
cmd: python -m src.models_training.prepare data_dvc/rent_receipts/full_data_set.csv data_dvc/rent_receipts/prepared
deps:
- path: data_dvc/rent_receipts/full_data_set.csv
md5: 21259821c56eab34cd179b8f26ed6fc6
size: 20584470
- path: src/models_training/prepare.py
md5: 0b4e07f1f2cfbea12ca3e9a134dde033
size: 1499
params:
params.yaml:
prepare.debug: false
prepare.seed: 42
prepare.train_split: 0.8
outs:
- path: data_dvc/rent_receipts/prepared
md5: 1f67d66c8e365c1b0433309969fe8bb7.dir
size: 27436611
nfiles: 2
featurize_quittances:
cmd: python -m src.models_training.featurization data_dvc/rent_receipts/prepared data_dvc/rent_receipts/features
featurize_quittances
deps:
- path: data_dvc/rent_receipts/prepared
md5: 1f67d66c8e365c1b0433309969fe8bb7.dir
size: 27436611
nfiles: 2
- path: src/augmentation
md5: 2809293a799a62a4dee1785a927b7efe.dir
size: 8573
nfiles: 5
- path: src/models_training/featurization.py
md5: c3ae75a6d32bf3d1e13c3039e6a931a7
size: 3024
- path: src/preprocessing
md5: 1043cd3b9f485d23b2bff8a495368402.dir
size: 3284284
nfiles: 10
params:
params.yaml:
featurize_quittances.data_augmentation: false
outs:
- path: data_dvc/rent_receipts/features
md5: 8ca2038f17e4e6184eb5b3ac909f921b.dir
size: 339124565
nfiles: 2
train_quittances:
cmd: python -m src.models_training.train data_dvc/rent_receipts/features data_dvc/rent_receipts/scheme.json
data_dvc/rent_receipts/model train_quittances
deps:
- path: data_dvc/rent_receipts/features
md5: 8ca2038f17e4e6184eb5b3ac909f921b.dir
size: 339124565
nfiles: 2
- path: data_dvc/rent_receipts/scheme.json
md5: 2f77f9d2d067a43e365de5f4fc9bf804
size: 575
- path: src/models_training/train.py
md5: e6e5f3cf690974fc62d64c57b5dc3f82
size: 3531
- path: src/models_training/utils.py
md5: 84f6f9b5912986052a50cfbdf6f400ea
size: 1827
params:
params.yaml:
train_quittances.learning_rate: 0.2
train_quittances.max_depth: 3
train_quittances.max_leaf_nodes: 12
train_quittances.n_estimators: 200
train_quittances.n_iter: 50
train_quittances.optimize: false
outs:
- path: data_dvc/rent_receipts/model
md5: 41822b08295359318c1d0c8cda086bde.dir
size: 1422016
nfiles: 6
eval_quittances:
cmd: python -m src.models_training.eval data_dvc/rent_receipts/features data_dvc/rent_receipts/scheme.json
data_dvc/rent_receipts/model data_dvc/rent_receipts/results
deps:
- path: data_dvc/rent_receipts/features
md5: 8ca2038f17e4e6184eb5b3ac909f921b.dir
size: 339124565
nfiles: 2
- path: data_dvc/rent_receipts/model
md5: 41822b08295359318c1d0c8cda086bde.dir
size: 1422016
nfiles: 6
- path: data_dvc/rent_receipts/scheme.json
md5: 2f77f9d2d067a43e365de5f4fc9bf804
size: 575
- path: src/models_training/eval.py
md5: aa714a415f83ba39a955c34790a0131b
size: 2034
- path: src/models_training/utils.py
md5: 84f6f9b5912986052a50cfbdf6f400ea
size: 1827
outs:
- path: data_dvc/rent_receipts/results/results.json
md5: b1766677ec3f4ed3e078f063b0c4a947
size: 277