Skip to content

Commit

Permalink
fix: SparkML StandardScaler conversion fails when withStd or withMean…
Browse files Browse the repository at this point in the history
… is set to true (#555)

Signed-off-by: Jason Wang <jasowang@microsoft.com>
  • Loading branch information
memoryz authored Jun 5, 2022
1 parent 0a40d6d commit 4e1fe87
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions onnxmltools/convert/sparkml/operator_converters/scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def convert_sparkml_scaler(scope, operator, container):
attrs = {'name': scope.get_unique_operator_name(op_type)}
if isinstance(op, StandardScalerModel):
C = operator.inputs[0].type.shape[1]
attrs['offset'] = op.mean if op.getOrDefault("withMean") else [0.0] * C
attrs['scale'] = [1.0 / x for x in op.std] if op.getOrDefault("withStd") else [1.0] * C
attrs['offset'] = op.mean.toArray() if op.getOrDefault("withMean") else [0.0] * C
attrs['scale'] = [1.0 / x for x in op.std.toArray()] if op.getOrDefault("withStd") else [1.0] * C
elif isinstance(op, MinMaxScalerModel):
epsilon = 1.0e-8 # to avoid dividing by zero
attrs['offset'] = [x for x in op.originalMin]
Expand Down
2 changes: 1 addition & 1 deletion tests/sparkml/test_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_standard_scaler(self):
(1, Vectors.dense([2.0, 1.1, 1.0]),),
(2, Vectors.dense([3.0, 10.1, 3.0]),)
], ["id", "features"])
scaler = StandardScaler(inputCol='features', outputCol='scaled_features')
scaler = StandardScaler(inputCol='features', outputCol='scaled_features', withStd=True, withMean=True)
model = scaler.fit(data)

# the input names must match the inputCol(s) above
Expand Down

0 comments on commit 4e1fe87

Please sign in to comment.