generated from etalab-ia/data_science_template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
dvc.yaml
148 lines (148 loc) · 5.11 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
stages:
prepare_fdp:
cmd: python -m src.models_training.prepare data_dvc/salary/full_data_set.csv data_dvc/salary/prepared
deps:
- data_dvc/salary/full_data_set.csv
- src/models_training/prepare.py
params:
- prepare.seed
- prepare.train_split
- prepare.debug
outs:
- data_dvc/salary/prepared
featurize_fdp:
cmd: python -m src.models_training.featurization data_dvc/salary/prepared data_dvc/salary/features featurize_fdp
deps:
- data_dvc/salary/prepared
- src/augmentation
- src/preprocessing
- src/models_training/featurization.py
params:
- featurize_fdp.data_augmentation
outs:
- data_dvc/salary/features
train_fdp:
cmd: python -m src.models_training.train data_dvc/salary/features data_dvc/salary/scheme.json data_dvc/salary/model train_fdp
deps:
- data_dvc/salary/scheme.json
- data_dvc/salary/features
- src/models_training/utils.py
- src/models_training/train.py
params:
- train_fdp.optimize
- train_fdp.n_iter
- train_fdp.n_estimators
- train_fdp.learning_rate
- train_fdp.max_depth
- train_fdp.max_leaf_nodes
outs:
- data_dvc/salary/model
eval_fdp:
cmd: python -m src.models_training.eval data_dvc/salary/features data_dvc/salary/scheme.json data_dvc/salary/model data_dvc/salary/results
deps:
- data_dvc/salary/scheme.json
- data_dvc/salary/model
- data_dvc/salary/features
- src/models_training/utils.py
- src/models_training/eval.py
metrics:
- data_dvc/salary/results/results.json:
cache: false
prepare_cni:
cmd: python -m src.models_training.prepare data_dvc/cni_recto/full_data_set.csv data_dvc/cni_recto/prepared
deps:
- data_dvc/cni_recto/full_data_set.csv
- src/models_training/prepare.py
params:
- prepare.seed
- prepare.train_split
- prepare.debug
outs:
- data_dvc/cni_recto/prepared
featurize_cni:
cmd: python -m src.models_training.featurization data_dvc/cni_recto/prepared data_dvc/cni_recto/features featurize_cni
deps:
- data_dvc/cni_recto/prepared
- src/augmentation
- src/preprocessing
- src/models_training/featurization.py
params:
- featurize_cni.data_augmentation
outs:
- data_dvc/cni_recto/features
train_cni:
cmd: python -m src.models_training.train data_dvc/cni_recto/features data_dvc/cni_recto/scheme.json data_dvc/cni_recto/model train_cni
deps:
- data_dvc/cni_recto/scheme.json
- data_dvc/cni_recto/features
- src/models_training/utils.py
- src/models_training/train.py
params:
- train_cni.optimize
- train_cni.n_iter
- train_cni.n_estimators
- train_cni.learning_rate
- train_cni.max_depth
- train_cni.max_leaf_nodes
outs:
- data_dvc/cni_recto/model
eval_cni:
cmd: python -m src.models_training.eval data_dvc/cni_recto/features data_dvc/cni_recto/scheme.json data_dvc/cni_recto/model data_dvc/cni_recto/results
deps:
- data_dvc/cni_recto/scheme.json
- data_dvc/cni_recto/model
- data_dvc/cni_recto/features
- src/models_training/utils.py
- src/models_training/eval.py
metrics:
- data_dvc/cni_recto/results/results.json:
cache: false
prepare_quittances:
cmd: python -m src.models_training.prepare data_dvc/rent_receipts/full_data_set.csv data_dvc/rent_receipts/prepared
deps:
- data_dvc/rent_receipts/full_data_set.csv
- src/models_training/prepare.py
params:
- prepare.seed
- prepare.train_split
- prepare.debug
outs:
- data_dvc/rent_receipts/prepared
featurize_quittances:
cmd: python -m src.models_training.featurization data_dvc/rent_receipts/prepared data_dvc/rent_receipts/features featurize_quittances
deps:
- data_dvc/rent_receipts/prepared
- src/augmentation
- src/preprocessing
- src/models_training/featurization.py
params:
- featurize_quittances.data_augmentation
outs:
- data_dvc/rent_receipts/features
train_quittances:
cmd: python -m src.models_training.train data_dvc/rent_receipts/features data_dvc/rent_receipts/scheme.json data_dvc/rent_receipts/model train_quittances
deps:
- data_dvc/rent_receipts/scheme.json
- data_dvc/rent_receipts/features
- src/models_training/utils.py
- src/models_training/train.py
params:
- train_quittances.optimize
- train_quittances.n_iter
- train_quittances.n_estimators
- train_quittances.learning_rate
- train_quittances.max_depth
- train_quittances.max_leaf_nodes
outs:
- data_dvc/rent_receipts/model
eval_quittances:
cmd: python -m src.models_training.eval data_dvc/rent_receipts/features data_dvc/rent_receipts/scheme.json data_dvc/rent_receipts/model data_dvc/rent_receipts/results
deps:
- data_dvc/rent_receipts/scheme.json
- data_dvc/rent_receipts/model
- data_dvc/rent_receipts/features
- src/models_training/utils.py
- src/models_training/eval.py
metrics:
- data_dvc/rent_receipts/results/results.json:
cache: false