From 80c4dbb93888df6ac6a062806b04ee902c7289aa Mon Sep 17 00:00:00 2001 From: "Steven S. Lyubomirsky" Date: Mon, 7 Dec 2020 17:10:32 -0800 Subject: [PATCH] Decouple data_layout from etab --- python/tvm/relay/frontend/keras.py | 138 ++++++++++++----------------- 1 file changed, 58 insertions(+), 80 deletions(-) diff --git a/python/tvm/relay/frontend/keras.py b/python/tvm/relay/frontend/keras.py index 54e4d0178e7ee..877d1815676d8 100644 --- a/python/tvm/relay/frontend/keras.py +++ b/python/tvm/relay/frontend/keras.py @@ -65,10 +65,8 @@ def _convert_recurrent_activation(inexpr, keras_layer): def _convert_activation( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument - if data_layout is None: - data_layout = etab.data_layout if isinstance(keras_layer, str): act_type = keras_layer else: @@ -127,12 +125,10 @@ def _convert_activation( ) -def _convert_advanced_activation(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_advanced_activation(inexpr, keras_layer, etab, data_layout, input_shape=None): act_type = type(keras_layer).__name__ if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if act_type == "Softmax": axis = keras_layer.axis @@ -241,7 +237,7 @@ def _convert_permute( def _convert_embedding( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument indices = inexpr weightList = keras_layer.get_weights() @@ -252,7 +248,7 @@ def _convert_embedding( def _convert_dense( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument weightList = keras_layer.get_weights() weight = etab.new_const(weightList[0].transpose([1, 0])) @@ -278,18 +274,20 @@ def _convert_dense( else: act_type = keras_layer.activation.__name__ if act_type != "linear": - out = _convert_activation(out, act_type, etab) + out = _convert_activation(out, act_type, etab, data_layout) if input_dim > 2: out = _op.expand_dims(out, axis=0) return out -def _convert_convolution1d(inexpr, keras_layer, etab): +def _convert_convolution1d(inexpr, keras_layer, etab, data_layout, input_shape=None): + if input_shape is None: + input_shape = keras_layer.input_shape _check_data_format(keras_layer) weightList = keras_layer.get_weights() weight = weightList[0] - if etab.data_layout == "NWC": + if data_layout == "NWC": kernel_layout = "WIO" else: kernel_layout = "OIW" @@ -297,7 +295,7 @@ def _convert_convolution1d(inexpr, keras_layer, etab): "Kernel layout with {} is not supported for operator Convolution1D " "in frontend Keras." ) - raise tvm.error.OpAttributeUnImplemented(msg.format(etab.data_layout)) + raise tvm.error.OpAttributeUnImplemented(msg.format(data_layout)) is_deconv = type(keras_layer).__name__ == "Conv1DTranspose" @@ -322,7 +320,7 @@ def _convert_convolution1d(inexpr, keras_layer, etab): "strides": [stride_w], "dilation": dilation, "padding": [0], - "data_layout": etab.data_layout, + "data_layout": data_layout, "kernel_layout": kernel_layout, } params["channels"] = n_filters @@ -331,7 +329,7 @@ def _convert_convolution1d(inexpr, keras_layer, etab): pass # calculate the padding values elif keras_layer.padding == "same": - in_w = keras_layer.input_shape[1] + in_w = input_shape[1] pad_w = _get_pad_pair(in_w, dilated_kernel_w, stride_w) params["padding"] = [pad_w[0], pad_w[1]] else: @@ -359,7 +357,7 @@ def _convert_convolution1d(inexpr, keras_layer, etab): return out -def _convert_convolution(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_convolution(inexpr, keras_layer, etab, data_layout, input_shape=None): _check_data_format(keras_layer) is_deconv = type(keras_layer).__name__ == "Conv2DTranspose" is_depthconv = type(keras_layer).__name__ == "DepthwiseConv2D" @@ -367,8 +365,6 @@ def _convert_convolution(inexpr, keras_layer, etab, input_shape=None, data_layou weight = weightList[0] if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NHWC": if is_depthconv: @@ -441,19 +437,17 @@ def _convert_convolution(inexpr, keras_layer, etab, input_shape=None, data_layou else: act_type = keras_layer.activation.__name__ if act_type != "linear": - out = _convert_activation(out, act_type, etab) + out = _convert_activation(out, act_type, etab, data_layout) return out -def _convert_convolution3d(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_convolution3d(inexpr, keras_layer, etab, data_layout, input_shape=None): _check_data_format(keras_layer) weightList = keras_layer.get_weights() weight = weightList[0] if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NDHWC": kernel_layout = "DHWIO" @@ -530,10 +524,8 @@ def _convert_convolution3d(inexpr, keras_layer, etab, input_shape=None, data_lay return out -def _convert_separable_convolution(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_separable_convolution(inexpr, keras_layer, etab, data_layout, input_shape=None): _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NHWC": kernel_layout = "HWOI" @@ -607,16 +599,14 @@ def _convert_separable_convolution(inexpr, keras_layer, etab, input_shape=None, else: act_type = keras_layer.activation.__name__ if act_type != "linear": - out = _convert_activation(out, act_type, etab) + out = _convert_activation(out, act_type, etab, data_layout) return out def _convert_flatten( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout # NCHW -> NHWC so that dense can be correctly converted if data_layout == "NCHW": @@ -625,11 +615,9 @@ def _convert_flatten( def _convert_pooling( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout pool_type = type(keras_layer).__name__ # global pool in keras = global pool + flatten in relay @@ -640,11 +628,13 @@ def _convert_pooling( if pool_type == "GlobalMaxPooling2D": return _convert_flatten( - _op.nn.global_max_pool2d(inexpr, **global_pool_params), keras_layer, etab + _op.nn.global_max_pool2d(inexpr, **global_pool_params), + keras_layer, etab, data_layout ) if pool_type == "GlobalAveragePooling2D": return _convert_flatten( - _op.nn.global_avg_pool2d(inexpr, **global_pool_params), keras_layer, etab + _op.nn.global_avg_pool2d(inexpr, **global_pool_params), + keras_layer, etab, data_layout ) pool_h, pool_w = keras_layer.pool_size stride_h, stride_w = keras_layer.strides @@ -676,13 +666,13 @@ def _convert_pooling( ) -def _convert_pooling3d(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_pooling3d( + inexpr, keras_layer, etab, data_layout, input_shape=None +): # pylint: disable=unused-argument _check_data_format(keras_layer) pool_type = type(keras_layer).__name__ if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if pool_type not in ["MaxPooling3D", "AveragePooling3D"]: raise tvm.error.OpNotImplemented( @@ -724,12 +714,9 @@ def _convert_pooling3d(inexpr, keras_layer, etab, input_shape=None, data_layout= def _convert_global_pooling3d( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout - pool_type = type(keras_layer).__name__ global_pool_params = {"layout": data_layout} @@ -746,11 +733,9 @@ def _convert_global_pooling3d( def _convert_upsample( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout upsample_type = type(keras_layer).__name__ params = {} if upsample_type == "UpSampling1D": @@ -779,11 +764,9 @@ def _convert_upsample( def _convert_upsample3d( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout params = {} d, h, w = keras_layer.size @@ -796,7 +779,7 @@ def _convert_upsample3d( def _convert_cropping( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) crop_type = type(keras_layer).__name__ @@ -817,9 +800,7 @@ def _convert_cropping( ) -def _convert_batchnorm(inexpr, keras_layer, etab, input_shape=None, data_layout=None): - if data_layout is None: - data_layout = etab.data_layout +def _convert_batchnorm(inexpr, keras_layer, etab, data_layout, input_shape=None): if input_shape is None: input_shape = keras_layer.input_shape if data_layout == "NCHW" or len(input_shape) < 4: @@ -855,11 +836,9 @@ def _convert_batchnorm(inexpr, keras_layer, etab, input_shape=None, data_layout= def _convert_padding( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout padding_type = type(keras_layer).__name__ padding = keras_layer.padding @@ -889,11 +868,9 @@ def _convert_padding( def _convert_padding3d( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout padding = keras_layer.padding @@ -936,13 +913,11 @@ def _convert_padding3d( def _convert_concat( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NHWC" or len(input_shape[0]) < 4: axis = -1 @@ -951,12 +926,12 @@ def _convert_concat( return _op.concatenate(_as_list(inexpr), axis=axis) -def _convert_reshape(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_reshape( + inexpr, keras_layer, etab, data_layout, input_shape=None +): # pylint: disable=unused-argument _check_data_format(keras_layer) if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout inshape = input_shape # includes batch tshape = keras_layer.target_shape # no batch @@ -987,7 +962,7 @@ def _convert_reshape(inexpr, keras_layer, etab, input_shape=None, data_layout=No def _convert_lstm( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if input_shape is None: @@ -1017,16 +992,18 @@ def _convert_lstm( gates = _op.split(gate, indices_or_sections=4, axis=1) in_gate = _convert_recurrent_activation(gates[0], keras_layer) in_transform = _convert_recurrent_activation(gates[1], keras_layer) - next_c = in_transform * next_c + in_gate * _convert_activation(gates[2], keras_layer, None) + next_c = in_transform * next_c + in_gate * _convert_activation( + gates[2], keras_layer, etab, data_layout + ) out_gate = _convert_recurrent_activation(gates[3], keras_layer) - next_h = out_gate * _convert_activation(next_c, keras_layer, None) + next_h = out_gate * _convert_activation(next_c, keras_layer, etab, data_layout) out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) out = _op.reshape(next_h, newshape=out_shape) return [out, next_h, next_c] def _convert_simple_rnn( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if not isinstance(inexpr, list): @@ -1045,14 +1022,14 @@ def _convert_simple_rnn( prev_op = _op.nn.batch_flatten(prev_op) ixh2 = _op.nn.dense(prev_op, recurrent_weight, units=units) output = ixh + ixh2 - output = _convert_activation(output, keras_layer, None) + output = _convert_activation(output, keras_layer, etab, data_layout) out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) output = _op.reshape(output, newshape=out_shape) return [output, output] def _convert_gru( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if not isinstance(inexpr, list): @@ -1088,7 +1065,7 @@ def _convert_gru( rec_act_r = _convert_recurrent_activation(x_r + recurrent_r, keras_layer) units = keras_layer.units recurrent_h = _op.nn.dense(rec_act_r * h_tm1_op, rec_weights[1], units=units) - act_hh = _convert_activation(x_h + recurrent_h, keras_layer, None) + act_hh = _convert_activation(x_h + recurrent_h, keras_layer, etab, data_layout) # previous and candidate state mixed by update gate output = rec_act_z * h_tm1_op + (_expr.const(1.0, dtype="float32") - rec_act_z) * act_hh out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) @@ -1097,7 +1074,7 @@ def _convert_gru( def _convert_repeat_vector( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument if input_shape is None: input_shape = keras_layer.input_shape @@ -1110,14 +1087,12 @@ def _convert_repeat_vector( return out -def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_time_distributed(inexpr, keras_layer, etab, data_layout, input_shape=None): # TimeDistributed: split input tensor along the second dimension (assumed to be time), # apply inner layer to each split individually, # and then combine the results if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout assert len(input_shape) >= 2, "Input to TimeDistributed must have at least two dimensions" @@ -1125,7 +1100,7 @@ def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_ inner_input_shape = [d for (i, d) in enumerate(input_shape) if i != 1] # for NDHWC, inner data layout will drop the D - inner_data_layout = None + inner_data_layout = data_layout if data_layout == "NDHWC": inner_data_layout = "NHWC" @@ -1140,7 +1115,7 @@ def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_ ) conversion_func = lambda expr: _convert_map[inner_layer_op_name]( - expr, inner_layer, etab, input_shape=inner_input_shape, data_layout=inner_data_layout + expr, inner_layer, etab, inner_data_layout, input_shape=inner_input_shape ) split_dim = input_shape[1] @@ -1170,7 +1145,7 @@ def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_ return _expr.Let(split_var, split_input.astuple(), _op.stack(splits, axis=1)) -def _default_skip(inexpr, keras_layer, _): # pylint: disable=unused-argument +def _default_skip(inexpr, keras_layer, etab, data_layout): # pylint: disable=unused-argument """Layers that can be skipped because they are train time only.""" return inexpr @@ -1257,7 +1232,7 @@ def _check_unsupported_layers(model): ) -def keras_op_to_relay(inexpr, keras_layer, outname, etab): +def keras_op_to_relay(inexpr, keras_layer, outname, etab, data_layout): """Convert a Keras layer to a Relay expression and update the expression table. Parameters @@ -1273,13 +1248,16 @@ def keras_op_to_relay(inexpr, keras_layer, outname, etab): etab : relay.frontend.common.ExprTable The global expression table to be updated. + + data_layout : str + The input data layout """ op_name = type(keras_layer).__name__ if op_name not in _convert_map: raise tvm.error.OpNotImplemented( "Operator {} is not supported for frontend Keras.".format(op_name) ) - outs = _convert_map[op_name](inexpr, keras_layer, etab) + outs = _convert_map[op_name](inexpr, keras_layer, etab, data_layout) outs = _as_list(outs) for t_idx, out in enumerate(outs): name = outname + ":" + str(t_idx) @@ -1352,7 +1330,6 @@ def _convert_input_layer(keras_layer): "NHWC", "NDHWC", ], "Layout must be one of 'NWC', 'NCHW', NHWC or NDHWC" - etab.data_layout = layout for keras_layer in model.layers: if isinstance(keras_layer, input_layer_class): _convert_input_layer(keras_layer) @@ -1407,7 +1384,8 @@ def _convert_input_layer(keras_layer): inexpr.append(expr) if len(inexpr) == 1: inexpr = inexpr[0] - keras_op_to_relay(inexpr, keras_layer, keras_layer.name + ":" + str(node_idx), etab) + keras_op_to_relay(inexpr, keras_layer, keras_layer.name + ":" + str(node_idx), + etab, layout) # model._output_coordinates contains out_node(oc[0]), node_index(oc[1]) and tensor_index(oc[2]) # Get all output nodes in etab using the name made from above values. # The out exprs were added to etab in keras_op_to_relay using this name.