diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 36436ec368ff..102dce073729 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -43,6 +43,7 @@ Do not modify directly.* |||[9, 13]|**T** = tensor(double), tensor(float)| |||[7, 8]|**T** = tensor(double), tensor(float)| |BitShift|*in* X:**T**
*in* Y:**T**
*out* Z:**T**|11+|**T** = tensor(uint32), tensor(uint64), tensor(uint8)| +|BlackmanWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |Cast|*in* input:**T1**
*out* output:**T2**|13+|**T1** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[6, 12]|**T1** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |Ceil|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float)| @@ -69,6 +70,7 @@ Do not modify directly.* |Crop|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |CumSum|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| +|DFT|*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|17+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| |DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[11, 12]|**T** = tensor(double), tensor(float)| |||[1, 10]|**T** = tensor(double), tensor(float)| @@ -125,6 +127,8 @@ Do not modify directly.* |GreaterOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[12, 15]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |GridSample|*in* X:**T1**
*in* grid:**T1**
*out* Y:**T2**|16+|**T1** = tensor(float)
**T2** = tensor(float)| +|HammingWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|HannWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |HardSigmoid|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| |Hardmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float)| |||[11, 12]|**T** = tensor(float)| @@ -186,6 +190,7 @@ Do not modify directly.* |MeanVarianceNormalization|*in* X:**T**
*out* Y:**T**

or

*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float)| |||[9, 12]|**T** = tensor(float)| |||[1, 8]|**T** = tensor(float)| +|MelWeightMatrix|*in* num_mel_bins:**T1**
*in* dft_length:**T1**
*in* sample_rate:**T1**
*in* lower_edge_hertz:**T2**
*in* upper_edge_hertz:**T2**
*out* output:**T3**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(float)
**T3** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |Min|*in* data_0:**T**
*out* min:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[8, 11]|**T** = tensor(double), tensor(float)| @@ -277,6 +282,7 @@ Do not modify directly.* |RoiAlign|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*out* Y:**T1**|16+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| |||[10, 15]|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| |Round|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(double), tensor(float), tensor(float16)| +|STFT|*in* signal:**T1**
*in* frame_step:**T2**
*in* window:**T1**
*in* frame_length:**T2**
*out* output:**T1**|17+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| |Scale|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |ScaledTanh|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |Scan|*in* initial_state_and_scan_inputs:**V**
*out* final_state_and_scan_outputs:**V**

or

*in* sequence_lens:**I**
*in* initial_state_and_scan_inputs:**V**
*out* final_state_and_scan_outputs:**V**|16+|**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 2068b3c3e3f1..d89d30b62c73 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -41,16 +41,6 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, FastG class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, NGramRepeatBlock); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, BifurcationDetector); -#ifdef BUILD_MS_EXPERIMENTAL_OPS -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, DFT); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, IDFT); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, HannWindow); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, HammingWindow); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, BlackmanWindow); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, MelWeightMatrix); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSExperimentalDomain, 1, STFT); -#endif - // ******** Start: Quantization ******************* // class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, MatMulInteger16); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearGlobalAveragePool); @@ -224,16 +214,6 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - -#ifdef BUILD_MS_EXPERIMENTAL_OPS - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, -#endif // These ops were experimental ops in onnx domain which have been removed now. We add them here as // contrib ops to main backward compatibility BuildKernelCreateInfo, diff --git a/onnxruntime/contrib_ops/cpu/signal/dft.cc b/onnxruntime/contrib_ops/cpu/signal/dft.cc deleted file mode 100644 index 87aac4497654..000000000000 --- a/onnxruntime/contrib_ops/cpu/signal/dft.cc +++ /dev/null @@ -1,606 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#ifdef BUILD_MS_EXPERIMENTAL_OPS - -#include "core/providers/common.h" -#include "core/framework/op_kernel.h" -#include "core/util/math_cpuonly.h" -#include "Eigen/src/Core/Map.h" -#include "dft.h" -#include - -#include "core/platform/threadpool.h" - -#include -#include - -namespace onnxruntime { -namespace contrib { - -ONNX_OPERATOR_KERNEL_EX( - DFT, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()), - DFT); - -ONNX_OPERATOR_KERNEL_EX( - IDFT, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()), - IDFT); - -ONNX_OPERATOR_KERNEL_EX( - STFT, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().MayInplace(0, 0).TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()), - STFT); - -// dedupe with the other one in window_functions.cc -template -static T get_scalar_value_from_tensor(const Tensor* tensor) { - ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value."); - - auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType(); - switch (data_type) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_INT32: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_INT64: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - default: - ORT_THROW("Unsupported input data type of ", data_type); - } -} - -static bool is_real_valued_signal(const onnxruntime::TensorShape & shape) { - return shape.NumDimensions() == 2 || shape[shape.NumDimensions() - 1] == 1; -} - -static bool is_complex_valued_signal(const onnxruntime::TensorShape& shape) { - return shape.NumDimensions() > 2 && shape[shape.NumDimensions() - 1] == 2; -} - -static bool is_power_of_2(size_t size) { - unsigned n_bits = 0; - while (size != 0) { - n_bits += size & 1; - size = size >> 1; - } - return n_bits == 1; -} - -static const unsigned char BitReverseTable256[] = -{ - 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, - 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, - 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, - 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, - 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, - 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, - 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, - 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, - 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, - 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, - 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, - 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, - 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, - 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, - 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF}; - -template -uint32_t bit_reverse(uint32_t num) { - uint32_t rev = (BitReverseTable256[num & 0xff] << 24) | - (BitReverseTable256[(num >> 8) & 0xff] << 16) | - (BitReverseTable256[(num >> 16) & 0xff] << 8) | - (BitReverseTable256[(num >> 24) & 0xff]); - return static_cast(((uint64_t)rev) >> (32 - TSignificantBits)); -} - -template -static inline T bit_reverse(T num, unsigned significant_bits) { - switch (significant_bits) { - case 0: return static_cast(bit_reverse<0>(static_cast(num))); - case 1: return static_cast(bit_reverse<1>(static_cast(num))); - case 2: return static_cast(bit_reverse<2>(static_cast(num))); - case 3: return static_cast(bit_reverse<3>(static_cast(num))); - case 4: return static_cast(bit_reverse<4>(static_cast(num))); - case 5: return static_cast(bit_reverse<5>(static_cast(num))); - case 6: return static_cast(bit_reverse<6>(static_cast(num))); - case 7: return static_cast(bit_reverse<7>(static_cast(num))); - case 8: return static_cast(bit_reverse<8>(static_cast(num))); - case 9: return static_cast(bit_reverse<9>(static_cast(num))); - case 10: return static_cast(bit_reverse<10>(static_cast(num))); - case 11: return static_cast(bit_reverse<11>(static_cast(num))); - case 12: return static_cast(bit_reverse<12>(static_cast(num))); - case 13: return static_cast(bit_reverse<13>(static_cast(num))); - case 14: return static_cast(bit_reverse<14>(static_cast(num))); - case 15: return static_cast(bit_reverse<15>(static_cast(num))); - case 16: return static_cast(bit_reverse<16>(static_cast(num))); - case 17: return static_cast(bit_reverse<17>(static_cast(num))); - case 18: return static_cast(bit_reverse<18>(static_cast(num))); - case 19: return static_cast(bit_reverse<19>(static_cast(num))); - case 20: return static_cast(bit_reverse<20>(static_cast(num))); - case 21: return static_cast(bit_reverse<21>(static_cast(num))); - case 22: return static_cast(bit_reverse<22>(static_cast(num))); - case 23: return static_cast(bit_reverse<23>(static_cast(num))); - case 24: return static_cast(bit_reverse<24>(static_cast(num))); - case 25: return static_cast(bit_reverse<25>(static_cast(num))); - case 26: return static_cast(bit_reverse<26>(static_cast(num))); - case 27: return static_cast(bit_reverse<27>(static_cast(num))); - case 28: return static_cast(bit_reverse<28>(static_cast(num))); - case 29: return static_cast(bit_reverse<29>(static_cast(num))); - case 30: return static_cast(bit_reverse<30>(static_cast(num))); - case 31: return static_cast(bit_reverse<31>(static_cast(num))); - case 32: return static_cast(bit_reverse<32>(static_cast(num))); - default: ORT_THROW("Unsupported bit size."); - } -} - -template -static T compute_angular_velocity(size_t number_of_samples, bool inverse) { - // Calculate fundamental angular velocity - static const T pi = static_cast(3.14159265); - static const T tau = 2 * pi; - T inverse_switch = inverse ? 1.f : -1.f; - T angular_velocity = inverse_switch * tau / number_of_samples; - return angular_velocity; -} - -template -static Status fft_radix2(OpKernelContext* /*ctx*/, - const Tensor* X, Tensor* Y, - size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, size_t dft_length, - const Tensor* window, bool is_onesided, bool inverse, - std::vector>& V, - std::vector>& temp_output) { - - // Get shape and significant bits - const auto& X_shape = X->Shape(); - size_t number_of_samples = static_cast(X_shape[axis]); - unsigned significant_bits = static_cast(log2(dft_length)); - - // Get data - auto* X_data = const_cast(reinterpret_cast(X->DataRaw())) + X_offset; - // Get window - U* window_data = nullptr; - if (window) { - window_data = const_cast(reinterpret_cast(window->DataRaw())); - } - - size_t Y_data_stride = 1; - std::complex* Y_data; - if (is_onesided) { - if (temp_output.size() != dft_length) { - temp_output = std::vector>(dft_length); - } - Y_data = temp_output.data(); - } else { - Y_data = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; - Y_data_stride = Y_stride; - } - - auto angular_velocity = compute_angular_velocity(dft_length, inverse); - - // Create vandermonde matrix V ordered with the bit-reversed permutation - if (V.size() != dft_length) { - V = std::vector>(dft_length); // e^(i *2*pi / N * k) - for (size_t i = 0; i < dft_length; i++) { - size_t bit_reversed_index = bit_reverse(i, significant_bits); - V[bit_reversed_index] = std::complex(cos(i * angular_velocity), sin(i * angular_velocity)); - } - } - - for (size_t i = 0; i < dft_length; i++) { - size_t bit_reversed_index = bit_reverse(i, significant_bits); - auto x = (bit_reversed_index < number_of_samples) ? * (X_data + bit_reversed_index * X_stride) : 0; - auto window_element = window_data ? *(window_data + bit_reversed_index) : 1; - *(Y_data + i*Y_data_stride) = std::complex(1, 0) * x * window_element; - } - - // Run fft_radix2 - unsigned current_significant_bits = 0; - for (size_t i = 2; i <= dft_length; i <<= 1) { - size_t midpoint = i >> 1; - current_significant_bits++; - - for (size_t k = 0; k < midpoint; k++) { - auto first_idx = bit_reverse(k, current_significant_bits); - auto second_idx = bit_reverse(midpoint + k, current_significant_bits); - for (size_t j = 0; j < dft_length; j += i) { - auto even_index = k + j; - auto odd_index = k + j + midpoint; - std::complex* even = (Y_data + even_index * Y_data_stride); - std::complex* odd = (Y_data + odd_index * Y_data_stride); - std::complex first = *even + (V[first_idx] * *odd); - std::complex second = *even + (V[second_idx] * *odd); - *even = first; - *odd = second; - } - } - } - - // Scale the output if inverse - if (inverse) { - for (size_t i = 0; i < dft_length; i++) { - std::complex& val = *(Y_data + i * Y_data_stride); - val /= static_cast(dft_length); - } - } - - if (is_onesided) { - auto destination = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; - for (size_t i = 0; i < dft_length; i++) { - *(destination + Y_stride * i) = *(Y_data + i * Y_data_stride); - } - } - - return Status::OK(); -} - -template -static Status dft_naive(const Tensor* X, Tensor* Y, - size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, int64_t axis, - size_t dft_length, const Tensor* window, bool inverse) { - // Get shape and significant bits - const auto& X_shape = X->Shape(); - size_t number_of_samples = static_cast(X_shape[axis]); - const auto& Y_shape = Y->Shape(); - size_t dft_output_size = static_cast(Y_shape[axis]); - - // Get data - auto* X_data = const_cast(reinterpret_cast(X->DataRaw())) + X_offset; - auto* Y_data = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; - - U* window_data = nullptr; - if (window) { - window_data = const_cast(reinterpret_cast(window->DataRaw())); - } - - auto angular_velocity = compute_angular_velocity(dft_length, inverse); - - for (size_t i = 0; i < dft_output_size; i++) { - std::complex& out = *(Y_data + i*Y_stride); - out.real(0); - out.imag(0); - - for (size_t j = 0; j < dft_length; j++) { // vectorize over this loop - auto exponential = std::complex(cos(i * j * angular_velocity), sin(i * j * angular_velocity)); - auto window_element = window_data ? * (window_data + j) : 1; - auto x = (j < number_of_samples) ? *(X_data + j * X_stride) : 0; - auto element = x * window_element; - out += exponential * element; - } - - if (inverse) { - out /= static_cast(dft_length); - } - } - - return Status::OK(); -} - -template -static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, int64_t dft_length, const Tensor* window, bool is_onesided, bool inverse, - std::vector>& V, std::vector>& temp_output) { - // Get shape - const auto& X_shape = X->Shape(); - const auto& Y_shape = Y->Shape(); - - auto batch_and_signal_rank = X->Shape().NumDimensions(); - auto total_dfts = static_cast(X->Shape().Size() / X->Shape()[axis]); - - auto is_input_real = X->Shape().NumDimensions() == 2 || X->Shape()[X->Shape().NumDimensions() - 1] == 1; - auto complex_input_factor = is_input_real ? 1 : 2; - if (X->Shape().NumDimensions() > 2) - { - total_dfts /= X->Shape()[X->Shape().NumDimensions() - 1]; - batch_and_signal_rank -= 1; - } - - // Calculate x/y offsets/strides - for (size_t i = 0; i < total_dfts; i++) - { - size_t X_offset = 0; - size_t X_stride = X_shape.SizeFromDimension(axis+1) / complex_input_factor; - size_t cumulative_packed_stride = total_dfts; - size_t temp = i; - for (size_t r = 0; r < batch_and_signal_rank; r++) { - if (r == static_cast(axis)) - { - continue; - } - cumulative_packed_stride /= X_shape[r]; - auto index = temp / cumulative_packed_stride; - temp -= (index * cumulative_packed_stride); - X_offset += index * X_shape.SizeFromDimension(r + 1) / complex_input_factor; - } - - size_t Y_offset = 0; - size_t Y_stride = Y_shape.SizeFromDimension(axis + 1) / 2; - cumulative_packed_stride = total_dfts; - temp = i; - for (size_t r = 0; r < batch_and_signal_rank; r++) { - if (r == static_cast(axis)) - { - continue; - } - cumulative_packed_stride /= X_shape[r]; - auto index = temp / cumulative_packed_stride; - temp -= (index * cumulative_packed_stride); - Y_offset += index * Y_shape.SizeFromDimension(r + 1) / 2; - } - - if (is_power_of_2(dft_length)) { - ORT_RETURN_IF_ERROR((fft_radix2(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, is_onesided, inverse, V, temp_output))); - } else { - ORT_RETURN_IF_ERROR((dft_naive(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, inverse))); - } - } - - return Status::OK(); -} - -static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, bool is_onesided, bool inverse) { - // Get input shape - const auto* X = ctx->Input(0); - const auto* dft_length = ctx->Input(1); - const auto& X_shape = X->Shape(); - const auto is_real_valued = is_real_valued_signal(X_shape); - const auto is_complex_valued = is_complex_valued_signal(X_shape); - - // Get the rank of the input tensor - // Ensure that the axis is in the valid range of [-rank, rank) - auto rank = static_cast(X_shape.GetDims().size()); - if (!(-rank <= axis && axis < rank)) { - ORT_RETURN_IF(!(-rank <= axis && axis < rank), - "axis attribute value ", - axis, - " is invalid for a tensor of rank ", - rank); - } - axis = (axis >= 0 ? axis : axis + rank); - - int64_t number_of_samples = static_cast(X_shape[axis]); - if (dft_length) { - const auto& dft_length_shape = dft_length->Shape(); - ORT_RETURN_IF(!dft_length_shape.IsScalar(), "dft_length must be a scalar value."); - number_of_samples = static_cast(get_scalar_value_from_tensor(dft_length)); - ORT_RETURN_IF(number_of_samples <= 0, "dft_length must be greater than zero."); - } - - // Get the DFT output size. Onesided will return only the unique values! - // note: x >> 1 === std::floor(x / 2.f) - auto dft_output_size = is_onesided ? - ((number_of_samples >> 1) + 1) : - number_of_samples; - - // Get output shape - auto Y_shape = onnxruntime::TensorShape(X_shape); - if (X_shape.NumDimensions() == 2) - { - Y_shape = onnxruntime::TensorShape({X_shape[0], dft_output_size, 2}); - } else - { - Y_shape[Y_shape.NumDimensions() - 1] = 2; - } - Y_shape[axis] = dft_output_size; - auto Y = ctx->Output(0, Y_shape); - - // Get data type - auto data_type = X->DataType(); - - auto element_size = data_type->Size(); - if (element_size == sizeof(float)) { - std::vector> V; - std::vector> temp_output; - if (is_real_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); - } else if (is_complex_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); - } else { - ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type); - } - } else if (element_size == sizeof(double)) { - std::vector> V; - std::vector> temp_output; - if (is_real_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); - } else if (is_complex_valued) { - ORT_RETURN_IF_ERROR((discrete_fourier_transform>(ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); - } else { - ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type); - } - } else { - ORT_THROW("Unsupported input data type of ", data_type); - } - - return Status::OK(); -} - -Status DFT::Compute(OpKernelContext* ctx) const { - ORT_RETURN_IF_ERROR( - discrete_fourier_transform(ctx, - axis_, - is_onesided_, - is_inverse_)); - return Status::OK(); -} - -Status IDFT::Compute(OpKernelContext* ctx) const { - ORT_RETURN_IF_ERROR( - discrete_fourier_transform(ctx, - axis_, - false /*is_onesided_*/, - true /*is_inverse_*/)); - return Status::OK(); -} - -template -static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_onesided, bool /*inverse*/) { - // Attr("onesided"): default = 1 - // Input(0, "signal") type = T1 - // Input(1, "frame_length") type = T2 - // Input(2, "window") type = T1, optional - // Input(3, "frame_step") type = T2 - // Output(0, "output") type = T1 - - // Get signal - const auto* signal = ctx->Input(0); - const auto frame_step = get_scalar_value_from_tensor(ctx->Input(1)); - const auto* window = ctx->Input(2); - const auto* frame_length_tensor = ctx->Input(3); - - // Get input signal shape - const auto& signal_shape = signal->Shape(); - const auto batch_size = signal_shape[0]; - const auto signal_size = signal_shape[1]; - const auto signal_components = - signal_shape.NumDimensions() == 2 ? 1 : signal_shape.NumDimensions() == 3 ? signal_shape[2] : 0; // error - ORT_ENFORCE(signal_components == 1 || signal_components == 2, "Ensure that the signal has either 1 or 2 components."); - - // Get the frame length - int64_t frame_length = std::numeric_limits::min(); - if (frame_length_tensor) - { - frame_length = get_scalar_value_from_tensor(frame_length_tensor); - } - - // Get window length - int64_t window_length = std::numeric_limits::min(); - if (window) { - window_length = window->Shape()[0]; - } - - // The frame_length and window inputs are generally used interchangably, and should match! - if (frame_length != std::numeric_limits::min() && - window_length != std::numeric_limits::min()) { - ORT_ENFORCE(frame_length == window_length, "If both frame_length and window are set, then the size of the window must be equal to the frame_length."); - } - - // Calculate the window size with preference to the window input. - const auto window_size = window ? window->Shape()[0] : frame_length; - ORT_ENFORCE(window_size < signal_size, "Ensure that the dft size is smaller than the signal."); - - // Calculate the number of dfts to run - const auto n_dfts = static_cast(std::floor((signal_size - window_size) / static_cast(frame_step)) + 1); - - // Calculate the output spectra length (onesided will return only the unique values) - // note: x >> 1 === std::floor(x / 2.f) - const auto dft_output_size = - is_onesided ? - (window_size >> 1) + 1 : - window_size; - - // Get/create the output mutable data - auto output_spectra_shape = onnxruntime::TensorShape({batch_size, n_dfts, dft_output_size, 2}); - auto Y = ctx->Output(0, output_spectra_shape); - auto Y_data = reinterpret_cast(Y->MutableDataRaw()); - - // Get/create the signal mutable data - auto* signal_data = const_cast(reinterpret_cast(signal->DataRaw())); - - // Define tensor shapes for each dft run - const int64_t output_components = 2; - auto dft_input_shape = onnxruntime::TensorShape({1, window_size, signal_components}); - auto dft_output_shape = onnxruntime::TensorShape({1, dft_output_size, output_components}); - - std::vector> V; - std::vector> temp_output; - - // Run each dft of each batch as if it was a real-valued batch size 1 dft operation - for (int64_t batch_idx = 0; batch_idx < batch_size; batch_idx++) { - for (int64_t i = 0; i < n_dfts; i++) { - auto input_frame_begin = - signal_data + - (batch_idx * signal_size * signal_components) + - (i * frame_step * signal_components); - - auto output_frame_begin = - Y_data + - (batch_idx * n_dfts * dft_output_size * output_components) + - (i * dft_output_size * output_components); - - // Tensors do not own the backing memory, so no worries on destruction - auto input = - onnxruntime::Tensor( - signal->DataType(), - dft_input_shape, - input_frame_begin, - signal->Location(), - 0); - - auto output = - onnxruntime::Tensor( - Y->DataType(), - dft_output_shape, - output_frame_begin, - Y->Location(), - 0); - - // Run individual dft - ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, &input, &output, 1, window_size, window, is_onesided, false, V, temp_output))); - } - } - - return Status::OK(); -} - -Status STFT::Compute(OpKernelContext* ctx) const { - // Attr("onesided"): default = 1 - // Input(0, "signal") type = T1 - // Input(1, "frame_length") type = T2 - // Input(2, "window") type = T1, optional - // Input(3, "frame_step") type = T2 - // Output(0, "output") type = T1 - - // Get signal shape - const auto* signal = ctx->Input(0); - const auto& signal_shape = signal->Shape(); - const auto is_real_valued = is_real_valued_signal(signal_shape); - const auto is_complex_valued = is_complex_valued_signal(signal_shape); - - // Get data type - auto data_type = signal->DataType(); - - const auto element_size = data_type->Size(); - if (element_size == sizeof(float)) { - if (is_real_valued) { - ORT_RETURN_IF_ERROR((short_time_fourier_transform(ctx, is_onesided_, false))); - } else if (is_complex_valued) { - ORT_RETURN_IF_ERROR((short_time_fourier_transform>(ctx, is_onesided_, false))); - } else { - ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type); - } - } else if (element_size == sizeof(double)) { - if (is_real_valued) { - ORT_RETURN_IF_ERROR((short_time_fourier_transform(ctx, is_onesided_, false))); - } else if (is_complex_valued) { - ORT_RETURN_IF_ERROR((short_time_fourier_transform>(ctx, is_onesided_, false))); - } else { - ORT_THROW("Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for complex inputs.", data_type); - } - } else { - ORT_THROW("Unsupported input data type of ", data_type); - } - - return Status::OK(); -} - -} // namespace contrib -} // namespace onnxruntime - -#endif diff --git a/onnxruntime/contrib_ops/cpu/signal/window_functions.cc b/onnxruntime/contrib_ops/cpu/signal/window_functions.cc deleted file mode 100644 index 29256adb264d..000000000000 --- a/onnxruntime/contrib_ops/cpu/signal/window_functions.cc +++ /dev/null @@ -1,334 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#ifdef BUILD_MS_EXPERIMENTAL_OPS - -#include "core/providers/common.h" -#include "core/framework/op_kernel.h" -#include "core/util/math_cpuonly.h" -#include "Eigen/src/Core/Map.h" -#include "window_functions.h" -#include - -#include "core/platform/threadpool.h" - -#include -#include - -namespace onnxruntime { -namespace contrib { - -ONNX_OPERATOR_KERNEL_EX( - HannWindow, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().MayInplace(0, 0) - .TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()), - HannWindow); - -ONNX_OPERATOR_KERNEL_EX( - HammingWindow, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().MayInplace(0, 0) - .TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()), - HammingWindow); - -ONNX_OPERATOR_KERNEL_EX( - BlackmanWindow, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().MayInplace(0, 0) - .TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()), - BlackmanWindow); - - -ONNX_OPERATOR_KERNEL_EX( - MelWeightMatrix, - kMSExperimentalDomain, - 1, - kCpuExecutionProvider, - KernelDefBuilder().MayInplace(0, 0) - .TypeConstraint("T1", BuildKernelDefConstraints()) - .TypeConstraint("T2", BuildKernelDefConstraints()) - .TypeConstraint("T3", BuildKernelDefConstraints()), - MelWeightMatrix); - - -template -static Status cosine_sum_window(Tensor* Y, size_t size, float a0, float a1, float a2) { - auto* Y_data = reinterpret_cast(Y->MutableDataRaw()); - - // Calculate the radians to increment per sample - constexpr double pi = 3.14159265; - constexpr double tau = 2 * pi; - const double angular_increment = tau / size; - - for (size_t i = 0; i < size; i++) { - auto a2_component = a2 == 0 ? 0 : (a2 * cos(2 * angular_increment * i)); - - T& value = *(Y_data + i); - value = static_cast(a0 - (a1 * cos(angular_increment * i)) + a2_component); - } - - return Status::OK(); -} - -template -static T get_scalar_value_from_tensor(const Tensor* tensor) { - ORT_ENFORCE(tensor->Shape().Size() == 1, "Tensor input should have a single value."); - auto data_type = tensor->DataType()->AsPrimitiveDataType()->GetDataType(); - switch (data_type) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_INT32: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - case ONNX_NAMESPACE::TensorProto_DataType_INT64: - return static_cast(*reinterpret_cast(tensor->DataRaw())); - default: - ORT_THROW("Unsupported input data type of ", data_type); - } -} - -static Status create_cosine_sum_window( - OpKernelContext* ctx, - onnx::TensorProto_DataType output_datatype, - float a0, float a1, float a2) { - - // Get the size of the window - auto size = get_scalar_value_from_tensor(ctx->Input(0)); - - // Get the output tensor - auto Y_shape = onnxruntime::TensorShape({size}); - auto Y = ctx->Output(0, Y_shape); - - switch (output_datatype) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT8: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT16: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT8: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT16: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT32: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT64: { - ORT_RETURN_IF_ERROR((cosine_sum_window(Y, size, a0, a1, a2))); - break; - } - default: - ORT_THROW("Unsupported input data type of ", output_datatype); - } - - return Status::OK(); -} - -Status HannWindow::Compute(OpKernelContext* ctx) const { - // HannWindows are a special case of Cosine-Sum Windows which take the following form: - // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k: - float a0 = .5f; - float a1 = a0; - float a2 = 0; - return create_cosine_sum_window(ctx, data_type_, a0, a1, a2); -} - -Status HammingWindow::Compute(OpKernelContext* ctx) const { - // HammingWindows are a special case of Cosine-Sum Windows which take the following form: - // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k: - float a0 = 25.f / 46.f; - float a1 = 1 - a0; - float a2 = 0; - return create_cosine_sum_window(ctx, data_type_, a0, a1, a2); -} - -Status BlackmanWindow::Compute(OpKernelContext* ctx) const { - // BlackmanWindows are a special case of Cosine-Sum Windows which take the following form: - // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k: - float alpha = .16f; - float a2 = alpha / 2.f; - float a0 = .5f - a2; - float a1 = .5f; - return create_cosine_sum_window(ctx, data_type_, a0, a1, a2); -} - -static inline double hz_to_mel_scale(double hz) { - return 2595 * std::log10(1 + hz / 700); -} - -static inline double mel_scale_to_hz(double mels) { - return 700 * (pow(10, (mels / 2595)) - 1); -} - -template -Status create_mel_weight_matrix(OpKernelContext* ctx, int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate, float lower_edge_hertz, float upper_edge_hertz) { - // Determine the width of the spectrogram. - // This is determined as half the size of the fft size. The first element of the spectrum is always retained, - // and the remaining are halved. The second half can be discarded due to the conjugate symmetry of the output with real valued ffts. - // Taken together the formula for the size of the output will be std::floor(dft_length / 2) + 1. - int64_t num_spectrogram_bins = static_cast(std::floor(dft_length / 2 + 1)); - - // Checks - auto lowest_index = std::floor(((dft_length + 1) * lower_edge_hertz) / sample_rate); - auto highest_index = std::floor(((dft_length + 1) * upper_edge_hertz) / sample_rate); - ORT_ENFORCE(lowest_index >= 0 && lowest_index < num_spectrogram_bins, "lower_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the sample_rate."); - ORT_ENFORCE(highest_index >= 0 && highest_index < num_spectrogram_bins, "upper_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the sample_rate."); - - // Create the output shape - onnxruntime::TensorShape output_shape( - { - static_cast(num_spectrogram_bins), - num_mel_bins - }); - auto* Y = ctx->Output(0, output_shape); - - // Get the raw output data - auto* Y_data = reinterpret_cast(Y->MutableDataRaw()); - - // Set the weight matrix to 0 - memset(Y_data, 0, num_spectrogram_bins * num_mel_bins * sizeof(T)); - - // The mel filterbank is a triangular shaped peak with a height of 1 and a base equal to the size of the MEL range divided by - // the number of bins needed times 2. This triagle is then slid across the mel domain linearly, with a constant step size that - // is equal to half of the base of the triange. To accomodate N bins, N+2 data points will be needed to determine the - // start, center and end points of each mel triange filter. - // - // low_frequency where the mel triangle filter banks begin, and they end on the high_frequency_mel - // The range is divided evenly to create the needed points corresponding to the begin, center, end points of each triangle filterbank - std::vector frequency_bins(num_mel_bins + 2); - auto low_frequency_mel = hz_to_mel_scale(lower_edge_hertz); - auto high_frequency_mel = hz_to_mel_scale(upper_edge_hertz); - auto mel_step = (high_frequency_mel - low_frequency_mel) / static_cast(frequency_bins.size()); - - // Convert each point from mel scale back to hertz, and then compute the corresponding index in the fft - for (size_t i = 0; i < frequency_bins.size(); i++) { - auto hz = mel_scale_to_hz(low_frequency_mel + mel_step * i); - frequency_bins[i] = static_cast(std::floor(((dft_length + 1) * hz) / sample_rate)); - } - - for (size_t i = 0; i < static_cast(num_mel_bins); i++) { - auto lower_frequency_value = frequency_bins[i]; //left - auto center_frequency_point = frequency_bins[i+1]; //center - auto higher_frequency_point = frequency_bins[i+2]; //right - - auto low_to_center = center_frequency_point - lower_frequency_value; - if (low_to_center == 0) { - auto& current_element = *(Y_data + (center_frequency_point * num_mel_bins) + i); - current_element = static_cast(1); - } else { - for (size_t j = lower_frequency_value; j <= center_frequency_point; j++) { - auto& current_element = *(Y_data + (j * num_mel_bins) + i); - current_element = static_cast((j - lower_frequency_value) / static_cast(low_to_center)); - } - } - - auto center_to_high = higher_frequency_point - center_frequency_point; - if (center_to_high > 0) { - for (size_t j = center_frequency_point; j < higher_frequency_point; j++) { - auto& current_element = *(Y_data + (j * num_mel_bins) + i); - current_element = static_cast((higher_frequency_point - j) / static_cast(center_to_high)); - } - } - } - - return Status::OK(); -} - -static Status create_mel_weight_matrix(OpKernelContext* ctx, onnx::TensorProto_DataType output_datatype, - int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate, float lower_edge_hertz, float upper_edge_hertz) { - switch (output_datatype) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT8: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT16: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT32: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_INT64: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT8: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT16: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT32: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - case ONNX_NAMESPACE::TensorProto_DataType_UINT64: { - ORT_RETURN_IF_ERROR((create_mel_weight_matrix(ctx, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz))); - break; - } - default: - ORT_THROW("Unsupported input data type of ", output_datatype); - } - return Status::OK(); -} - -Status MelWeightMatrix::Compute(OpKernelContext* ctx) const { - const auto num_mel_bins = get_scalar_value_from_tensor(ctx->Input(0)); - const auto dft_length = get_scalar_value_from_tensor(ctx->Input(1)); - const auto sample_rate = get_scalar_value_from_tensor(ctx->Input(2)); - const auto lower_edge_hertz = get_scalar_value_from_tensor(ctx->Input(3)); - const auto upper_edge_hertz = get_scalar_value_from_tensor(ctx->Input(4)); - - ORT_RETURN_IF_ERROR(create_mel_weight_matrix(ctx, data_type_, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, upper_edge_hertz)); - return Status::OK(); -} - -} // namespace contrib -} // namespace onnxruntime - -#endif diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 8cf2d278e0ea..fb7bc16cc190 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -16,7 +16,6 @@ #include "core/graph/contrib_ops/range_schema_defs.h" #include "core/graph/op.h" #include "core/mlas/inc/mlas.h" -#include "core/graph/signal_ops/signal_defs.h" #include "core/graph/contrib_ops/onnx_function_util.h" #include "onnx/defs/function.h" @@ -370,7 +369,6 @@ void sparseCompatibleMatmulShapeInference( updateOutputShape(ctx, 0, resultShape, default_tensor_type); } - bool ParseScalar(const TensorProto* initializer, int& value) { std::vector parsed_data; if (initializer->data_type() == TensorProto::INT32) { @@ -2417,7 +2415,6 @@ void RegisterContribSchemas() { // } // updateOutputShape(ctx, 0, disentangled_attention_shape); propagateShapeFromInputToOutput(ctx, 0, 0); - }); ONNX_CONTRIB_OPERATOR_SCHEMA(Snpe) @@ -2535,10 +2532,6 @@ This op functions in much the same was as Dropout-11 and Dropout-13 do, execpt t RegisterNchwcSchemas(); } #endif - -#ifdef BUILD_MS_EXPERIMENTAL_OPS - onnxruntime::signal::RegisterSignalSchemas(); -#endif } } // namespace contrib diff --git a/onnxruntime/core/graph/signal_ops/signal_defs.cc b/onnxruntime/core/graph/signal_ops/signal_defs.cc deleted file mode 100644 index 27e077c9fefe..000000000000 --- a/onnxruntime/core/graph/signal_ops/signal_defs.cc +++ /dev/null @@ -1,738 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#ifdef BUILD_MS_EXPERIMENTAL_OPS - -#include "core/framework/tensorprotoutils.h" -#include "core/providers/common.h" -#include "core/graph/constants.h" -#include "core/graph/signal_ops/signal_defs.h" -#include "core/graph/op.h" -#include "onnx/defs/schema.h" -#include "onnx/defs/shape_inference.h" -#include "onnx/defs/tensor_proto_util.h" - -#include - -namespace onnxruntime { -namespace signal { - -using ONNX_NAMESPACE::AttributeProto; -using ONNX_NAMESPACE::OpSchema; -using ONNX_NAMESPACE::OPTIONAL_VALUE; - -template -static T get_scalar_value_from_tensor(const ONNX_NAMESPACE::TensorProto* t) { - if (t == nullptr) { - return T{}; - } - - auto data_type = t->data_type(); - switch (data_type) { - case ONNX_NAMESPACE::TensorProto::FLOAT: - return static_cast(ONNX_NAMESPACE::ParseData(t).at(0)); - case ONNX_NAMESPACE::TensorProto::DOUBLE: - return static_cast(ONNX_NAMESPACE::ParseData(t).at(0)); - case ONNX_NAMESPACE::TensorProto::INT32: - return static_cast(ONNX_NAMESPACE::ParseData(t).at(0)); - case ONNX_NAMESPACE::TensorProto::INT64: - return static_cast(ONNX_NAMESPACE::ParseData(t).at(0)); - default: - ORT_THROW("Unsupported input data type of ", data_type); - } -} - -inline const ONNX_NAMESPACE::TensorShapeProto* getOptionalInputShape(ONNX_NAMESPACE::InferenceContext& ctx, size_t n) { - const auto* input_type = ctx.getInputType(n); - - if (input_type == nullptr) { - return nullptr; - } - - const auto value_case = input_type->value_case(); - if (value_case != ONNX_NAMESPACE::TypeProto::kTensorType && value_case != ONNX_NAMESPACE::TypeProto::kSparseTensorType) { - fail_type_inference("Attribute expected to have tensor or sparse tensor type"); - } - if (value_case == ONNX_NAMESPACE::TypeProto::kTensorType) { - return &input_type->tensor_type().shape(); - } else { - return &input_type->sparse_tensor_type().shape(); - } -} - -std::function CosineSumWindowOpDocGenerator(const char* name) { - return [name](OpSchema& schema) { - std::string doc; - POPULATE_OP_DOC_STR( - doc = R"DOC( -Generates a {name} window as described in the paper https://ieeexplore.ieee.org/document/1455106. -)DOC"; - ReplaceAll(doc, "{name}", name);); - - schema.SetDoc(doc); - schema.Attr("output_datatype", - "The data type of the output tensor. " - "Strictly must be one of the values from DataType enum in TensorProto whose values correspond to T2. " - "The default value is 1 = FLOAT. ", - AttributeProto::INT, - static_cast(onnx::TensorProto_DataType::TensorProto_DataType_FLOAT)); - schema.Attr("periodic", - "If 1, returns a window to be used as periodic function. If 0, return a symmetric window. " - "When 'periodic' is specified, hann computes a window of length size + 1 and returns the first size points. " - "The default value is 1. ", - AttributeProto::INT, - static_cast(1)); - schema.Input(0, - "size", - "A scalar value indicating the length of the window.", - "T1", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable); - schema.Output(0, - "output", - "A Hann window with length: size. " - "The output has the shape: [size].", - "T2", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable); - schema.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - // Update the output data type to the output_datatype - auto output_datatype = getAttribute(ctx, "output_datatype", - static_cast(onnx::TensorProto_DataType::TensorProto_DataType_FLOAT)); - updateOutputElemType(ctx, 0, static_cast(output_datatype)); - - if (!hasInputShape(ctx, 0)) { - // If no shape is available for the input, skip shape inference. - return; - } - - const auto* size = ctx.getInputData(0); - if (size == nullptr) { - // Size is not available, so return early - return; - } - - if (size->dims_size() != 0) { - fail_shape_inference("size input must be a scalar."); - } - - auto size_value = get_scalar_value_from_tensor(size); - if (size_value <= 0) { - fail_shape_inference("size input must be greater than 0."); - } - - ONNX_NAMESPACE::TensorShapeProto result_shape; - result_shape.add_dim()->set_dim_value(size_value); - updateOutputShape(ctx, 0, result_shape); - }); - }; -} - -void RegisterSignalSchemas() { - MS_SIGNAL_OPERATOR_SCHEMA(DFT) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .SetDoc(R"DOC(DFT)DOC") - .Attr("onesided", - "If True (default), only values for half of the fft size are returned because the real-to-complex Fourier transform satisfies the conjugate symmetry." - "The output tensor will return the first floor(n_fft/2) + 1 values from the DFT." - "Values can be 0 or 1.", - AttributeProto::AttributeType::AttributeProto_AttributeType_INT, - static_cast(0)) - .Attr("axis", - "The axis on which to perform the DFT. By default this value is set to 0, which corresponds to the first dimension after the batch index." - "This value must be less than signal_dimN, where signal_dimN is the number of dimensions in the signal.", - AttributeProto::AttributeType::AttributeProto_AttributeType_INT, - static_cast(0)) - .Attr("inverse", - "Whether to perform the inverse discrete fourier transform. By default this value is set to 0, which corresponds to false.", - AttributeProto::INT, - static_cast(0)) - .Input(0, - "input", - "For real input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]. " - "For complex input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]. " - "The first dimension is the batch dimension. " - "The following N dimentions correspond to the signal's dimensions. " - "The final dimension represents the real and imaginary parts of the value in that order.", - "T1", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Input(1, - "dft_length", - "The length of the signal." - "If greater than the axis dimension, the signal will be zero-padded up to dft_length. " - "If less than the axis dimension, only the first dft_length values will be used as the signal. " - "It's an optional value. ", - "T2", - OpSchema::Optional, - true, - 1, - OpSchema::NonDifferentiable) - .Output(0, - "output", - "The Fourier Transform of the input vector." - "If onesided is 0, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]. " - "If axis=0 and onesided is 1, the following shape is expected: [batch_idx][floor(signal_dim1/2)+1][signal_dim2]...[signal_dimN][2]. " - "If axis=1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][floor(signal_dim2/2)+1]...[signal_dimN][2]. " - "If axis=N-1 and onesided is 1, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[floor(signal_dimN/2)+1][2]. " - "The signal_dim at the specified axis is equal to the dft_length.", - "T1") - .TypeConstraint( - "T1", - {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, - "Constrain input and output types to float tensors.") - .TypeConstraint( - "T2", - {"tensor(int32)", "tensor(int64)"}, - "Constrain scalar length types to int64_t.") - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - bool is_onesided = static_cast(getAttribute(ctx, "onesided", 0)); - bool inverse = static_cast(getAttribute(ctx, "inverse", 0)); - - if (inverse && is_onesided) { - fail_shape_inference("is_onesided and inverse attributes cannot be enabled at the same time"); - } - - propagateElemTypeFromInputToOutput(ctx, 0, 0); - if (!hasInputShape(ctx, 0)) { - // If no shape is available for the input, skip shape inference... - return; - } - - // In general the output shape will match the input shape exactly - // So initialize the output shape with the input shape - auto& input_shape = getInputShape(ctx, 0); - ONNX_NAMESPACE::TensorShapeProto result_shape_proto = input_shape; - - // Get the axis where the DFT will be performed. - auto axis = static_cast(getAttribute(ctx, "axis", 1)); - auto rank = input_shape.dim_size(); - - if (!(-rank <= axis && axis < rank)) { - fail_shape_inference( - "axis attribute value ", - axis, - " is invalid for a tensor of rank ", - rank); - } - - auto axis_idx = (axis >= 0 ? axis : axis + rank); - - // If dft_length is specified, then we should honor the shape. - // Set the output dimension to match the dft_length on the axis. - // If onesided this will be adjusted later on... - const ONNX_NAMESPACE::TensorProto* dft_length = nullptr; - if (ctx.getNumInputs() >= 2 && ctx.getInputType(1) != nullptr) { - dft_length = ctx.getInputData(1); - if (dft_length == nullptr) { - // If we cannot read the dft_length, we cannot infer shape - // return... - return; - } - } - - if (nullptr != dft_length) { - if (dft_length->dims_size() != 0) { - fail_shape_inference("dft_length input must be a scalar."); - } - auto dft_length_value = get_scalar_value_from_tensor(dft_length); - result_shape_proto.mutable_dim(axis_idx)->set_dim_value(dft_length_value); - } - // When DFT is onesided, the output shape is half the size of the input shape - // along the specified axis. - if (is_onesided) { - auto axis_dimension = result_shape_proto.dim(axis_idx); - // We need to update the output shape dimension along the specified axis, - // but sometimes the dimension will be a free dimension or be otherwise unset. - // Only perform inference when a input dimension value exists. - if (axis_dimension.has_dim_value()) { - auto original_signal_size = axis_dimension.dim_value(); - auto half_signal_size = (original_signal_size >> 1) + 1; - result_shape_proto.mutable_dim(axis_idx)->set_dim_value(half_signal_size); - } else { - // Clear the value and param (which would otherwie be inherited from the input). - result_shape_proto.mutable_dim(axis_idx)->clear_dim_value(); - result_shape_proto.mutable_dim(axis_idx)->clear_dim_param(); - } - } - - // Coerce the last dimension to 2. - auto dim_size = static_cast(result_shape_proto.dim_size()); - auto has_component_dimension = dim_size > 2; - - // This if check is retained in the contrib op and not the official spec for back compat - if (has_component_dimension) { - result_shape_proto.mutable_dim(static_cast(dim_size - 1))->set_dim_value(2); - } else { - result_shape_proto.add_dim()->set_dim_value(2); - } - - updateOutputShape(ctx, 0, result_shape_proto); - }); - - MS_SIGNAL_OPERATOR_SCHEMA(IDFT) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .SetDoc(R"DOC(IDFT)DOC") - .Attr("axis", - "The axis on which to perform the DFT. By default this value is set to 0, which corresponds to the first dimension after the batch index." - "This value must be less than signal_dimN, where signal_dimN is the number of dimensions in the signal.", - AttributeProto::AttributeType::AttributeProto_AttributeType_INT, - static_cast(0)) - .Input(0, - "input", - "For real multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][1]." - "For complex multi-dimensional input, the following shape is expected: [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]." - "The first dimension is the batch dimension." - "The final dimension represents the real and imaginary parts of the value.", - "T1") - .Input(1, - "dft_length", - "The length of the signal." - "If greater than the axis dimension, the signal will be zero-padded up to dft_length. " - "If less than the axis dimension, only the first dft_length values will be used as the signal. " - "It's an optional value. ", - "T2", - OpSchema::Optional, - true, - 1, - OpSchema::NonDifferentiable) - .Output(0, - "output", - "The inverse discrete Fourier transform of the input. " - "The signal_dim at the specified axis is equal to the dft_length." - "The expected shape is [batch_idx][signal_dim1][signal_dim2]...[signal_dimN][2]" - "For all types of input, the last dimension of the output represents the components of a complex number.", - "T1", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .TypeConstraint( - "T1", - {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, - "Constrain input and output types to float tensors.") - .TypeConstraint( - "T2", - {"tensor(int64)"}, - "Constrain scalar length types to int64_t.") - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - propagateElemTypeFromInputToOutput(ctx, 0, 0); - const int64_t batch_ndim = 1; - - auto& input_shape = getInputShape(ctx, 0); - ONNX_NAMESPACE::TensorShapeProto result_shape = input_shape; - auto dim_size = static_cast(input_shape.dim_size()); - auto has_component_dimension = dim_size > 2; - - if (has_component_dimension) { - result_shape.mutable_dim(static_cast(dim_size - 1))->set_dim_value(2); - } else { - result_shape.add_dim()->set_dim_value(2); - } - - updateOutputShape(ctx, 0, result_shape); - }); - - MS_SIGNAL_OPERATOR_SCHEMA(STFT) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .SetDoc(R"DOC(STFT)DOC") - .Attr( - "onesided", - "If onesided is 1, only values for w in [0, 1, 2, ..., floor(n_fft/2) + 1] are returned because " - "the real-to-complex Fourier transform satisfies the conjugate symmetry, i.e., X[m, w] = X[m,w] = " - "X[m,n_fft-w]*. Note if the input or window tensors are complex, then onesided output is not possible. " - "Enabling onesided with real inputs performs a Real-valued fast Fourier transform (RFFT)." - "When invoked with real or complex valued input, the default value is 1. " - "Values can be 0 or 1.", - AttributeProto::INT, - static_cast(1)) - .Input(0, - "signal", - "Input tensor representing a real or complex valued signal. " - "For real input, the following shape is expected: [batch_size][signal_length][1]. " - "For complex input, the following shape is expected: [batch_size][signal_length][2], where " - "[batch_size][signal_length][0] represents the real component and [batch_size][signal_length][1] " - "represents the imaginary component of the signal.", - "T1", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Input(1, - "frame_step", - "The number of samples to step between successive DFTs.", - "T2", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Input(2, - "window", - "A tensor representing the window that will be slid over the signal." - "The window must have rank 1 with shape: [window_shape]. " - "It's an optional value. ", - "T1", - OpSchema::Optional, - true, - 1, - OpSchema::NonDifferentiable) - .Input(3, - "frame_length", - "A scalar representing the size of the DFT. " - "It's an optional value.", - "T2", - OpSchema::Optional, - true, - 1, - OpSchema::NonDifferentiable) - .Output(0, - "output", - "The Short-time Fourier Transform of the signals." - "If onesided is 1, the output has the shape: [batch_size][frames][dft_unique_bins][2], where " - "dft_unique_bins is frame_length // 2 + 1 (the unique components of the DFT) " - "If onesided is 0, the output has the shape: [batch_size][frames][frame_length][2], where frame_length " - "is the length of the DFT.", - "T1", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .TypeConstraint( - "T1", - {"tensor(float)", - "tensor(float16)", - "tensor(double)", - "tensor(bfloat16)"}, - "Constrain signal and output to float tensors.") - .TypeConstraint( - "T2", - {"tensor(int32)", "tensor(int64)"}, - "Constrain scalar length types to int64_t.") - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - propagateElemTypeFromInputToOutput(ctx, 0, 0); - - // Get signal size - // The signal size is needed to perform inference because the size of the signal - // is needed to compute the number of DFTs in the output. - // - // 1) Check if shape exists, return if not - // 2) Get the shape - // 3) Check if signal dim value exists, return if not - if (!hasInputShape(ctx, 0)) { - return; - } - - auto& input_shape = getInputShape(ctx, 0); - auto signal_dim = input_shape.dim(1); - if (!signal_dim.has_dim_value()) { - return; - } - auto signal_size = signal_dim.dim_value(); - - // The frame step is a required input. - // Its value is needed to compute the number output nDFTs, so return early is missing. - const auto* frame_step = ctx.getInputData(1); - if (nullptr == frame_step) { - return; - } - auto frame_step_value = get_scalar_value_from_tensor(frame_step); - - // Determine the size of the DFT based on the 2 optional inputs window and frame_length. - // One must be set. - int64_t dft_size = -1; - const ONNX_NAMESPACE::TensorProto* frame_length = nullptr; - if (ctx.getNumInputs() >= 4 && ctx.getInputType(3) != nullptr) { - frame_length = ctx.getInputData(3); - if (frame_length == nullptr) { - // If we cannot read the frame_length, we cannot infer shape - // return... - return; - } - } - - const ONNX_NAMESPACE::TensorShapeProto* window_shape = nullptr; - if (ctx.getNumInputs() >= 3) { - window_shape = getOptionalInputShape(ctx, 2); - } else { - window_shape = nullptr; - } - - if (window_shape == nullptr && frame_length == nullptr) { - // STFT expects to have at least one of these inputs set: [window, frame_length], - // but they may not be available at shape inference time - return; - } else if (window_shape != nullptr && frame_length != nullptr) { - if (frame_length->dims_size() != 0) { - fail_shape_inference("frame_length input must be scalar."); - } - auto frame_length_value = get_scalar_value_from_tensor(frame_length); - - // Ensure that the window length and the dft_length match. - if (window_shape->dim_size() != 1) { - fail_shape_inference("window input must have rank = 1."); - } - if (window_shape->dim(0).has_dim_value()) { - auto window_length = window_shape->dim(0).dim_value(); - if (window_length != frame_length_value) { - fail_type_inference( - "If STFT has both a window input and frame_length specified, the dimension of the " - "window must match the frame_length specified!"); - } - } - - dft_size = frame_length_value; - } else if (window_shape != nullptr) { - // Ensure that the window length and the dft_length match. - if (window_shape->dim_size() != 1) { - fail_shape_inference("window input must have rank = 1."); - } - if (window_shape->dim(0).has_dim_value()) { - dft_size = window_shape->dim(0).dim_value(); - } else { - // Cannot determine the window size, and there is no frame_length, - // So shape inference cannot proceed. - return; - } - } else if (frame_length != nullptr) { - if (frame_length->dims_size() != 0) { - fail_shape_inference("frame_length input must be scalar."); - } - dft_size = get_scalar_value_from_tensor(frame_length); - } - - bool is_onesided = static_cast(getAttribute(ctx, "onesided", 0)); - if (is_onesided) { - dft_size = is_onesided ? ((dft_size >> 1) + 1) : dft_size; - } - - auto n_dfts = static_cast((signal_size - dft_size) / static_cast(frame_step_value)) + 1; - - // The output has the following shape: [batch_size][frames][dft_unique_bins][2] - ONNX_NAMESPACE::TensorShapeProto result_shape_proto; - result_shape_proto.add_dim()->set_dim_value(input_shape.dim(0).dim_value()); // batch size - result_shape_proto.add_dim()->set_dim_value(n_dfts); - result_shape_proto.add_dim()->set_dim_value(dft_size); - result_shape_proto.add_dim()->set_dim_value(2); - updateOutputShape(ctx, 0, result_shape_proto); - }); - - // Window Functions - MS_SIGNAL_OPERATOR_SCHEMA(HannWindow) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .FillUsing(CosineSumWindowOpDocGenerator("Hann")) - .TypeConstraint( - "T1", - {"tensor(int32)", "tensor(int64)"}, - "Constrain the input size to int64_t.") - .TypeConstraint( - "T2", - ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(), - "Constrain output types to numeric tensors.") - .FunctionBody(R"ONNX( - { - A0 = Constant () - A1 = Constant () - A2 = Constant () - Zero = Constant () - One = Constant () - Two = Constant () - Tau = Constant () - Size_FP = Cast (size) - AngularIncrement = Div (Tau, Size_FP) - Range = Range (Zero, Size_FP, One) - RangeAngular = Mul (Range, AngularIncrement) - TwoRangeAngular = Mul (RangeAngular, Two) - CosTwoRangeAngular = Cos (TwoRangeAngular) - A2_Component = Mul (A2, CosTwoRangeAngular) - CosRangeAngular = Cos (RangeAngular) - A1_Component = Mul (A1, CosRangeAngular) - Temp0 = Add (A1_Component, A2_Component) - Temp1 = Sub (A0, Temp0) - output = Cast (Temp1) - } - )ONNX"); - - MS_SIGNAL_OPERATOR_SCHEMA(HammingWindow) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .FillUsing(CosineSumWindowOpDocGenerator("Hamming")) - .TypeConstraint( - "T1", - {"tensor(int32)", "tensor(int64)"}, - "Constrain the input size to int64_t.") - .TypeConstraint( - "T2", - ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(), - "Constrain output types to numeric tensors.") - .FunctionBody(R"ONNX( - { - A0 = Constant () - A1 = Constant () - A2 = Constant () - Zero = Constant () - One = Constant () - Two = Constant () - Tau = Constant () - Size_FP = Cast (size) - AngularIncrement = Div (Tau, Size_FP) - Range = Range (Zero, Size_FP, One) - RangeAngular = Mul (Range, AngularIncrement) - TwoRangeAngular = Mul (RangeAngular, Two) - CosTwoRangeAngular = Cos (TwoRangeAngular) - A2_Component = Mul (A2, CosTwoRangeAngular) - CosRangeAngular = Cos (RangeAngular) - A1_Component = Mul (A1, CosRangeAngular) - Temp0 = Add (A1_Component, A2_Component) - Temp1 = Sub (A0, Temp0) - output = Cast (Temp1) - } - )ONNX"); - - MS_SIGNAL_OPERATOR_SCHEMA(BlackmanWindow) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .FillUsing(CosineSumWindowOpDocGenerator("Blackman")) - .TypeConstraint( - "T1", - {"tensor(int32)", "tensor(int64)"}, - "Constrain the input size to int64_t.") - .TypeConstraint( - "T2", - ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(), - "Constrain output types to numeric tensors.") - .FunctionBody(R"ONNX( - { - A0 = Constant () - A1 = Constant () - A2 = Constant () - Zero = Constant () - One = Constant () - Two = Constant () - Tau = Constant () - Size_FP = Cast (size) - AngularIncrement = Div (Tau, Size_FP) - Range = Range (Zero, Size_FP, One) - RangeAngular = Mul (Range, AngularIncrement) - TwoRangeAngular = Mul (RangeAngular, Two) - CosTwoRangeAngular = Cos (TwoRangeAngular) - A2_Component = Mul (A2, CosTwoRangeAngular) - CosRangeAngular = Cos (RangeAngular) - A1_Component = Mul (A1, CosRangeAngular) - Temp0 = Add (A1_Component, A2_Component) - Temp1 = Sub (A0, Temp0) - output = Cast (Temp1) - } - )ONNX"); - - static const char* MelWeightMatrix_ver17_doc = R"DOC( -Generate a MelWeightMatrix that can be used to re-weight a Tensor containing a linearly sampled frequency spectra -(from DFT or STFT) into num_mel_bins frequency information based on the [lower_edge_hertz, upper_edge_hertz] range -on the mel scale. -This function defines the mel scale in terms of a frequency in hertz according to the following formula: - - mel(f) = 2595 * log10(1 + f/700) - -In the returned matrix, all the triangles (filterbanks) have a peak value of 1.0. - -The returned MelWeightMatrix can be used to right-multiply a spectrogram S of shape [frames, num_spectrogram_bins] of -linear scale spectrum values (e.g. STFT magnitudes) to generate a "mel spectrogram" M of shape [frames, num_mel_bins]. -)DOC"; - - MS_SIGNAL_OPERATOR_SCHEMA(MelWeightMatrix) - .SetDomain(kMSExperimentalDomain) - .SinceVersion(1) - .SetDoc(R"DOC(MelWeightMatrix)DOC") - .Attr("output_datatype", - "The data type of the output tensor. " - "Strictly must be one of the types from DataType enum in TensorProto.", - ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_INT, - static_cast(onnx::TensorProto_DataType::TensorProto_DataType_FLOAT)) - .Input(0, - "num_mel_bins", - "The number of bands in the mel spectrum.", - "T1") - .Input(1, - "dft_length", - "The size of the FFT.", - "T1") - .Input(2, - "sample_rate", - "", - "T1") - .Input(3, - "lower_edge_hertz", - "", - "T2") - .Input(4, - "upper_edge_hertz", - "", - "T2") - .Output(0, - "output", - "The MEL Matrix", - "T3") - .TypeConstraint( - "T1", - {"tensor(int32)", "tensor(int64)"}, - "Constrain to integer tensors.") - .TypeConstraint( - "T2", - {"tensor(float)", - "tensor(float16)", - "tensor(double)", - "tensor(bfloat16)"}, - "Constrain to float tensors") - .TypeConstraint( - "T3", - ONNX_NAMESPACE::OpSchema::all_numeric_types_with_bfloat(), - "Constrain to any numerical types.") - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - auto output_datatype = getAttribute( - ctx, "output_datatype", static_cast(onnx::TensorProto::DataType::TensorProto_DataType_FLOAT)); - updateOutputElemType(ctx, 0, static_cast(output_datatype)); - - if (!hasInputShape(ctx, 0) || !hasInputShape(ctx, 1)) { - return; - } - - const auto* num_mel_bins = ctx.getInputData(0); - const auto* dft_length = ctx.getInputData(1); - if (nullptr == num_mel_bins || nullptr == dft_length) { - return; - } - - int64_t num_mel_bins_value = -1; - int64_t dft_length_value = -1; - if (num_mel_bins->dims_size() != 0) { - fail_shape_inference("num_mel_bins input must be scalar."); - } - num_mel_bins_value = get_scalar_value_from_tensor(num_mel_bins); - - if (dft_length->dims_size() != 0) { - fail_shape_inference("dft_length input must be scalar."); - } - dft_length_value = get_scalar_value_from_tensor(dft_length); - - if (num_mel_bins_value > 0 && dft_length_value > 0) { - ONNX_NAMESPACE::TensorShapeProto result_shape; - result_shape.add_dim()->set_dim_value(static_cast((dft_length_value >> 1) + 1)); - result_shape.add_dim()->set_dim_value(num_mel_bins_value); - updateOutputShape(ctx, 0, result_shape); - } - }); -} - -} // namespace signal -} // namespace onnxruntime - -#endif diff --git a/onnxruntime/core/graph/signal_ops/signal_defs.h b/onnxruntime/core/graph/signal_ops/signal_defs.h deleted file mode 100644 index 6960ff33f6e6..000000000000 --- a/onnxruntime/core/graph/signal_ops/signal_defs.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#if !defined(ORT_MINIMAL_BUILD) -#include "onnx/defs/schema.h" -#else -#include "onnx/defs/data_type_utils.h" -#endif -#include "onnx/onnx_pb.h" -#include "onnx/onnx-operators_pb.h" - -namespace onnxruntime { -namespace signal { -#define MS_SIGNAL_OPERATOR_SCHEMA(name) \ - MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) -#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ - MS_SIGNAL_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ(Counter, name) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) - -#define MS_SIGNAL_OPERATOR_SCHEMA_ELSEWHERE(name, schema_func) \ - MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func) -#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \ - MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) -#define MS_SIGNAL_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) - -void RegisterSignalSchemas(); -} // namespace dml -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 62e1d1f73f35..191f34439c7b 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -751,6 +751,14 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, int32_t, LessOrEqual); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 16, int64_t, LessOrEqual); +// Opset 17 +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, DFT); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, BlackmanWindow); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, HammingWindow); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, HannWindow); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, MelWeightMatrix); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 17, STFT); + // !!PLEASE READ BELOW!! Following that, add new entries above this comment /* *** IMPORTANT! *** @@ -1953,6 +1961,14 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { LessOrEqual)>, BuildKernelCreateInfo, + + // Opset 17 + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/core/providers/cpu/signal/dft.cc b/onnxruntime/core/providers/cpu/signal/dft.cc new file mode 100644 index 000000000000..97d7e19a7c4b --- /dev/null +++ b/onnxruntime/core/providers/cpu/signal/dft.cc @@ -0,0 +1,508 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/cpu/signal/dft.h" + +#include +#include +#include +#include +#include + +#include "core/framework/op_kernel.h" +#include "core/platform/threadpool.h" +#include "core/providers/common.h" +#include "core/providers/cpu/signal/utils.h" +#include "core/util/math_cpuonly.h" +#include "Eigen/src/Core/Map.h" + +namespace onnxruntime { + +ONNX_CPU_OPERATOR_KERNEL(DFT, 17, + KernelDefBuilder() + .TypeConstraint("T1", BuildKernelDefConstraints()) + .TypeConstraint("T2", BuildKernelDefConstraints()), + DFT); + +ONNX_CPU_OPERATOR_KERNEL(STFT, 17, + KernelDefBuilder() + .MayInplace(0, 0) + .TypeConstraint("T1", BuildKernelDefConstraints()) + .TypeConstraint("T2", BuildKernelDefConstraints()), + STFT); + +static bool is_real_valued_signal(const onnxruntime::TensorShape& shape) { + return shape.NumDimensions() == 2 || shape[shape.NumDimensions() - 1] == 1; +} + +static bool is_complex_valued_signal(const onnxruntime::TensorShape& shape) { + return shape.NumDimensions() > 2 && shape[shape.NumDimensions() - 1] == 2; +} + +static bool is_power_of_2(size_t size) { + unsigned n_bits = 0; + while (size != 0) { + n_bits += size & 1; + size = size >> 1; + } + return n_bits == 1; +} + +static const unsigned char BitReverseTable256[] = { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, + 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, + 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, + 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, + 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, + 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, + 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, + 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, + 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, + 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, + 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, + 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, + 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, + 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF}; + +template +static inline T bit_reverse(T num, unsigned significant_bits) { + if (significant_bits > 32) { + ORT_THROW("Unsupported bit size."); + } + uint32_t num_32 = static_cast(num); + uint32_t rev = (BitReverseTable256[num_32 & 0xff] << 24) | (BitReverseTable256[(num_32 >> 8) & 0xff] << 16) | + (BitReverseTable256[(num_32 >> 16) & 0xff] << 8) | (BitReverseTable256[(num_32 >> 24) & 0xff]); + return static_cast(((uint64_t)rev) >> (32 - significant_bits)); +} + +template +static T compute_angular_velocity(size_t number_of_samples, bool inverse) { + // Calculate fundamental angular velocity + static const T pi = static_cast(3.14159265); + static const T tau = 2 * pi; + T inverse_switch = inverse ? 1.f : -1.f; + T angular_velocity = inverse_switch * tau / number_of_samples; + return angular_velocity; +} + +template +static std::complex compute_exponential(size_t index, const T angular_velocity) { + const T angle = static_cast(index) * angular_velocity; + return std::complex(cos(angle), sin(angle)); +} + +template +static Status fft_radix2(OpKernelContext* /*ctx*/, const Tensor* X, Tensor* Y, size_t X_offset, size_t X_stride, + size_t Y_offset, size_t Y_stride, int64_t axis, size_t dft_length, const Tensor* window, + bool is_onesided, bool inverse, InlinedVector>& V, + InlinedVector>& temp_output) { + // Get shape and significant bits + const auto& X_shape = X->Shape(); + size_t number_of_samples = static_cast(X_shape[axis]); + unsigned significant_bits = static_cast(log2(dft_length)); + + // Get data + auto* X_data = const_cast(reinterpret_cast(X->DataRaw())) + X_offset; + // Get window + U* window_data = nullptr; + if (window) { + window_data = const_cast(reinterpret_cast(window->DataRaw())); + } + + size_t Y_data_stride = 1; + std::complex* Y_data; + if (is_onesided) { + if (temp_output.size() != dft_length) { + temp_output.resize(dft_length); + } + Y_data = temp_output.data(); + } else { + Y_data = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; + Y_data_stride = Y_stride; + } + + auto angular_velocity = compute_angular_velocity(dft_length, inverse); + + // Create vandermonde matrix V ordered with the bit-reversed permutation + if (V.size() != dft_length) { + V.resize(dft_length); + for (size_t i = 0; i < dft_length; i++) { + size_t bit_reversed_index = bit_reverse(i, significant_bits); + V[bit_reversed_index] = compute_exponential(i, angular_velocity); + } + } + + for (size_t i = 0; i < dft_length; i++) { + size_t bit_reversed_index = bit_reverse(i, significant_bits); + auto x = (bit_reversed_index < number_of_samples) ? *(X_data + bit_reversed_index * X_stride) : 0; + auto window_element = window_data ? *(window_data + bit_reversed_index) : 1; + *(Y_data + i * Y_data_stride) = std::complex(1, 0) * x * window_element; + } + + // Run fft_radix2 + unsigned current_significant_bits = 0; + for (size_t i = 2; i <= dft_length; i <<= 1) { + size_t midpoint = i >> 1; + current_significant_bits++; + + for (size_t k = 0; k < midpoint; k++) { + auto first_idx = bit_reverse(k, current_significant_bits); + auto second_idx = bit_reverse(midpoint + k, current_significant_bits); + for (size_t j = 0; j < dft_length; j += i) { + auto even_index = k + j; + auto odd_index = k + j + midpoint; + std::complex* even = (Y_data + even_index * Y_data_stride); + std::complex* odd = (Y_data + odd_index * Y_data_stride); + std::complex first = *even + (V[first_idx] * *odd); + std::complex second = *even + (V[second_idx] * *odd); + *even = first; + *odd = second; + } + } + } + + // Scale the output if inverse + if (inverse) { + for (size_t i = 0; i < dft_length; i++) { + std::complex& val = *(Y_data + i * Y_data_stride); + val /= static_cast(dft_length); + } + } + + if (is_onesided) { + const size_t output_size = (dft_length >> 1) + 1; + auto destination = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; + for (size_t i = 0; i < output_size; i++) { + *(destination + Y_stride * i) = *(Y_data + i * Y_data_stride); + } + } + + return Status::OK(); +} + +template +static Status dft_naive(const Tensor* X, Tensor* Y, size_t X_offset, size_t X_stride, size_t Y_offset, size_t Y_stride, + int64_t axis, size_t dft_length, const Tensor* window, bool inverse) { + // Get shape and significant bits + const auto& X_shape = X->Shape(); + size_t number_of_samples = static_cast(X_shape[axis]); + const auto& Y_shape = Y->Shape(); + size_t dft_output_size = static_cast(Y_shape[axis]); + + // Get data + auto* X_data = const_cast(reinterpret_cast(X->DataRaw())) + X_offset; + auto* Y_data = reinterpret_cast*>(Y->MutableDataRaw()) + Y_offset; + + U* window_data = nullptr; + if (window) { + window_data = const_cast(reinterpret_cast(window->DataRaw())); + } + + auto angular_velocity = compute_angular_velocity(dft_length, inverse); + + for (size_t i = 0; i < dft_output_size; i++) { + std::complex& out = *(Y_data + i * Y_stride); + out.real(0); + out.imag(0); + + for (size_t j = 0; j < dft_length; j++) { // vectorize over this loop + auto exponential = compute_exponential(i * j, angular_velocity); + auto window_element = window_data ? *(window_data + j) : 1; + auto x = (j < number_of_samples) ? *(X_data + j * X_stride) : 0; + auto element = x * window_element; + out += exponential * element; + } + + if (inverse) { + out /= static_cast(dft_length); + } + } + + return Status::OK(); +} + +template +static Status discrete_fourier_transform(OpKernelContext* ctx, const Tensor* X, Tensor* Y, int64_t axis, + int64_t dft_length, const Tensor* window, bool is_onesided, bool inverse, + InlinedVector>& V, + InlinedVector>& temp_output) { + // Get shape + const auto& X_shape = X->Shape(); + const auto& Y_shape = Y->Shape(); + + auto batch_and_signal_rank = X->Shape().NumDimensions(); + auto total_dfts = static_cast(X->Shape().Size() / X->Shape()[axis]); + + auto is_input_real = X->Shape().NumDimensions() == 2 || X->Shape()[X->Shape().NumDimensions() - 1] == 1; + auto complex_input_factor = is_input_real ? 1 : 2; + if (X->Shape().NumDimensions() > 2) { + total_dfts /= X->Shape()[X->Shape().NumDimensions() - 1]; + batch_and_signal_rank -= 1; + } + + // Calculate x/y offsets/strides + for (size_t i = 0; i < total_dfts; i++) { + size_t X_offset = 0; + size_t X_stride = X_shape.SizeFromDimension(axis + 1) / complex_input_factor; + size_t cumulative_packed_stride = total_dfts; + size_t temp = i; + for (size_t r = 0; r < batch_and_signal_rank; r++) { + if (r == static_cast(axis)) { + continue; + } + cumulative_packed_stride /= X_shape[r]; + auto index = temp / cumulative_packed_stride; + temp -= (index * cumulative_packed_stride); + X_offset += index * X_shape.SizeFromDimension(r + 1) / complex_input_factor; + } + + size_t Y_offset = 0; + size_t Y_stride = Y_shape.SizeFromDimension(axis + 1) / 2; + cumulative_packed_stride = total_dfts; + temp = i; + for (size_t r = 0; r < batch_and_signal_rank; r++) { + if (r == static_cast(axis)) { + continue; + } + cumulative_packed_stride /= X_shape[r]; + auto index = temp / cumulative_packed_stride; + temp -= (index * cumulative_packed_stride); + Y_offset += index * Y_shape.SizeFromDimension(r + 1) / 2; + } + + if (is_power_of_2(dft_length)) { + ORT_RETURN_IF_ERROR((fft_radix2(ctx, X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, + is_onesided, inverse, V, temp_output))); + } else { + ORT_RETURN_IF_ERROR( + (dft_naive(X, Y, X_offset, X_stride, Y_offset, Y_stride, axis, dft_length, window, inverse))); + } + } + + return Status::OK(); +} + +static Status discrete_fourier_transform(OpKernelContext* ctx, int64_t axis, bool is_onesided, bool inverse) { + // Get input shape + const auto* X = ctx->Input(0); + const auto* dft_length = ctx->Input(1); + const auto& X_shape = X->Shape(); + const auto is_real_valued = is_real_valued_signal(X_shape); + const auto is_complex_valued = is_complex_valued_signal(X_shape); + axis = HandleNegativeAxis(axis, X_shape.NumDimensions()); + + int64_t number_of_samples = static_cast(X_shape[axis]); + if (dft_length) { + const auto& dft_length_shape = dft_length->Shape(); + ORT_RETURN_IF(!dft_length_shape.IsScalar(), "dft_length must be a scalar value."); + number_of_samples = static_cast(signal::get_scalar_value_from_tensor(dft_length)); + ORT_RETURN_IF(number_of_samples <= 0, "dft_length must be greater than zero."); + } + + // Get the DFT output size. Onesided will return only the unique values! + // note: x >> 1 === std::floor(x / 2.f) + auto dft_output_size = is_onesided ? ((number_of_samples >> 1) + 1) : number_of_samples; + + // Get output shape + auto Y_shape = onnxruntime::TensorShape(X_shape); + if (X_shape.NumDimensions() == 2) { + Y_shape = onnxruntime::TensorShape({X_shape[0], dft_output_size, 2}); + } else { + Y_shape[Y_shape.NumDimensions() - 1] = 2; + } + Y_shape[axis] = dft_output_size; + auto Y = ctx->Output(0, Y_shape); + + // Get data type + auto data_type = X->DataType(); + + auto element_size = data_type->Size(); + if (element_size == sizeof(float)) { + InlinedVector> V; + InlinedVector> temp_output; + if (is_real_valued) { + ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, number_of_samples, nullptr, + is_onesided, inverse, V, temp_output))); + } else if (is_complex_valued) { + ORT_RETURN_IF_ERROR((discrete_fourier_transform>( + ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); + } else { + ORT_THROW( + "Unsupported input signal shape. The signal's first dimension must be the batch dimension and its second " + "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for " + "complex inputs.", + data_type); + } + } else if (element_size == sizeof(double)) { + InlinedVector> V; + InlinedVector> temp_output; + if (is_real_valued) { + ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, X, Y, axis, number_of_samples, nullptr, + is_onesided, inverse, V, temp_output))); + } else if (is_complex_valued) { + ORT_RETURN_IF_ERROR((discrete_fourier_transform>( + ctx, X, Y, axis, number_of_samples, nullptr, is_onesided, inverse, V, temp_output))); + } else { + ORT_THROW( + "Unsupported input signal shape. The signal's first dimension must be the batch dimension and its second " + "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for " + "complex inputs.", + data_type); + } + } else { + ORT_THROW("Unsupported input data type of ", data_type); + } + + return Status::OK(); +} + +Status DFT::Compute(OpKernelContext* ctx) const { + ORT_RETURN_IF_ERROR(discrete_fourier_transform(ctx, axis_, is_onesided_, is_inverse_)); + return Status::OK(); +} + +template +static Status short_time_fourier_transform(OpKernelContext* ctx, bool is_onesided, bool /*inverse*/) { + // Attr("onesided"): default = 1 + // Input(0, "signal") type = T1 + // Input(1, "frame_length") type = T2 + // Input(2, "window") type = T1, optional + // Input(3, "frame_step") type = T2 + // Output(0, "output") type = T1 + + // Get signal + const auto* signal = ctx->Input(0); + const auto frame_step = signal::get_scalar_value_from_tensor(ctx->Input(1)); + const auto* window = ctx->Input(2); + const auto* frame_length_tensor = ctx->Input(3); + + // Get input signal shape + const auto& signal_shape = signal->Shape(); + const auto batch_size = signal_shape[0]; + const auto signal_size = signal_shape[1]; + const auto signal_components = signal_shape.NumDimensions() == 2 ? 1 + : signal_shape.NumDimensions() == 3 ? signal_shape[2] + : 0; // error + ORT_ENFORCE(signal_components == 1 || signal_components == 2, + "signal shape must end in 1 (real) or 2 (real, imaginary)."); + + // Get the frame length + int64_t frame_length = std::numeric_limits::min(); + if (frame_length_tensor) { + frame_length = signal::get_scalar_value_from_tensor(frame_length_tensor); + } + + // Get window length + int64_t window_length = std::numeric_limits::min(); + if (window) { + window_length = window->Shape()[0]; + } + + // The frame_length and window inputs are generally used interchangeably, and should match! + if (frame_length != std::numeric_limits::min() && window_length != std::numeric_limits::min()) { + ORT_ENFORCE( + frame_length == window_length, + "If both frame_length and window are set, then the size of the window must be equal to the frame_length."); + } + + // Calculate the window size with preference to the window input. + const auto window_size = window ? window->Shape()[0] : frame_length; + ORT_ENFORCE(window_size < signal_size, "Ensure that the dft size is smaller than the signal."); + + // Calculate the number of dfts to run + const auto n_dfts = + static_cast(std::floor((signal_size - window_size) / static_cast(frame_step)) + 1); + + // Calculate the output spectra length (onesided will return only the unique values) + // note: x >> 1 === std::floor(x / 2.f) + const auto dft_output_size = is_onesided ? (window_size >> 1) + 1 : window_size; + + // Get/create the output mutable data + auto output_spectra_shape = onnxruntime::TensorShape({batch_size, n_dfts, dft_output_size, 2}); + auto Y = ctx->Output(0, output_spectra_shape); + auto Y_data = reinterpret_cast(Y->MutableDataRaw()); + + // Get/create the signal mutable data + auto* signal_data = const_cast(reinterpret_cast(signal->DataRaw())); + + // Define tensor shapes for each dft run + const int64_t output_components = 2; + auto dft_input_shape = onnxruntime::TensorShape({1, window_size, signal_components}); + auto dft_output_shape = onnxruntime::TensorShape({1, dft_output_size, output_components}); + + InlinedVector> V; + InlinedVector> temp_output; + + // Run each dft of each batch as if it was a real-valued batch size 1 dft operation + for (int64_t batch_idx = 0; batch_idx < batch_size; batch_idx++) { + for (int64_t i = 0; i < n_dfts; i++) { + auto input_frame_begin = + signal_data + (batch_idx * signal_size * signal_components) + (i * frame_step * signal_components); + + auto output_frame_begin = Y_data + (batch_idx * n_dfts * dft_output_size * output_components) + + (i * dft_output_size * output_components); + + // Tensors do not own the backing memory, so no worries on destruction + auto input = onnxruntime::Tensor(signal->DataType(), dft_input_shape, input_frame_begin, signal->Location(), 0); + + auto output = onnxruntime::Tensor(Y->DataType(), dft_output_shape, output_frame_begin, Y->Location(), 0); + + // Run individual dft + ORT_RETURN_IF_ERROR((discrete_fourier_transform(ctx, &input, &output, 1, window_size, window, is_onesided, + false, V, temp_output))); + } + } + + return Status::OK(); +} + +Status STFT::Compute(OpKernelContext* ctx) const { + // Attr("onesided"): default = 1 + // Input(0, "signal") type = T1 + // Input(1, "frame_length") type = T2 + // Input(2, "window") type = T1, optional + // Input(3, "frame_step") type = T2 + // Output(0, "output") type = T1 + + // Get signal shape + const auto* signal = ctx->Input(0); + const auto& signal_shape = signal->Shape(); + const auto is_real_valued = is_real_valued_signal(signal_shape); + const auto is_complex_valued = is_complex_valued_signal(signal_shape); + + // Get data type + auto data_type = signal->DataType(); + + const auto element_size = data_type->Size(); + if (element_size == sizeof(float)) { + if (is_real_valued) { + ORT_RETURN_IF_ERROR((short_time_fourier_transform(ctx, is_onesided_, false))); + } else if (is_complex_valued) { + ORT_RETURN_IF_ERROR((short_time_fourier_transform>(ctx, is_onesided_, false))); + } else { + ORT_THROW( + "Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second " + "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for " + "complex inputs.", + data_type); + } + } else if (element_size == sizeof(double)) { + if (is_real_valued) { + ORT_RETURN_IF_ERROR((short_time_fourier_transform(ctx, is_onesided_, false))); + } else if (is_complex_valued) { + ORT_RETURN_IF_ERROR((short_time_fourier_transform>(ctx, is_onesided_, false))); + } else { + ORT_THROW( + "Unsupported input signal shape. The signal's first dimenstion must be the batch dimension and its second " + "dimension must be the signal length dimension. It may optionally include a 3rd dimension of size 2 for " + "complex inputs.", + data_type); + } + } else { + ORT_THROW("Unsupported input data type of ", data_type); + } + + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/signal/dft.h b/onnxruntime/core/providers/cpu/signal/dft.h similarity index 68% rename from onnxruntime/contrib_ops/cpu/signal/dft.h rename to onnxruntime/core/providers/cpu/signal/dft.h index e177eb877ea7..71cac52e37e8 100644 --- a/onnxruntime/contrib_ops/cpu/signal/dft.h +++ b/onnxruntime/core/providers/cpu/signal/dft.h @@ -1,35 +1,28 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#ifdef BUILD_MS_EXPERIMENTAL_OPS +#include "core/common/common.h" +#include "core/framework/op_kernel.h" namespace onnxruntime { -namespace contrib { class DFT final : public OpKernel { bool is_onesided_ = true; int64_t axis_ = 0; bool is_inverse_ = false; + public: explicit DFT(const OpKernelInfo& info) : OpKernel(info) { is_onesided_ = static_cast(info.GetAttrOrDefault("onesided", 0)); - axis_ = info.GetAttrOrDefault("axis", 0); + axis_ = info.GetAttrOrDefault("axis", 1); is_inverse_ = info.GetAttrOrDefault("inverse", 0); } Status Compute(OpKernelContext* ctx) const override; }; -class IDFT final : public OpKernel { - int64_t axis_ = 0; - public: - explicit IDFT(const OpKernelInfo& info) : OpKernel(info) { - axis_ = info.GetAttrOrDefault("axis", 0); - } - Status Compute(OpKernelContext* ctx) const override; -}; - class STFT final : public OpKernel { bool is_onesided_ = true; + public: explicit STFT(const OpKernelInfo& info) : OpKernel(info) { is_onesided_ = static_cast(info.GetAttrOrDefault("onesided", 1)); @@ -37,7 +30,4 @@ class STFT final : public OpKernel { Status Compute(OpKernelContext* ctx) const override; }; -} // namespace contrib } // namespace onnxruntime - -#endif diff --git a/onnxruntime/core/providers/cpu/signal/utils.h b/onnxruntime/core/providers/cpu/signal/utils.h new file mode 100644 index 000000000000..a5ff5df6e5d4 --- /dev/null +++ b/onnxruntime/core/providers/cpu/signal/utils.h @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/framework/tensor.h" + +namespace onnxruntime { +namespace signal { + +template +static T get_scalar_value_from_tensor(const Tensor* tensor) { + ORT_ENFORCE(tensor->Shape().Size() == 1, "ratio input should have a single value."); + const auto data_type = tensor->GetElementType(); + switch (data_type) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + return static_cast(*tensor->Data()); + case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: + return static_cast(*tensor->Data()); + case ONNX_NAMESPACE::TensorProto_DataType_INT32: + return static_cast(*tensor->Data()); + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + return static_cast(*tensor->Data()); + default: + ORT_THROW("Unsupported input data type of ", data_type); + } +} + +} // namespace signal +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/signal/window_functions.cc b/onnxruntime/core/providers/cpu/signal/window_functions.cc new file mode 100644 index 000000000000..4ddd76641a6e --- /dev/null +++ b/onnxruntime/core/providers/cpu/signal/window_functions.cc @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/cpu/signal/window_functions.h" + +#include + +#include "core/providers/common.h" +#include "core/providers/cpu/signal/utils.h" + +namespace onnxruntime { +ONNX_CPU_OPERATOR_KERNEL(HannWindow, 17, + KernelDefBuilder() + .MayInplace(0, 0) // + .TypeConstraint("T1", BuildKernelDefConstraints()) // + .TypeConstraint("T2", + BuildKernelDefConstraints()), + HannWindow); + +ONNX_CPU_OPERATOR_KERNEL(HammingWindow, 17, + KernelDefBuilder() + .MayInplace(0, 0) // + .TypeConstraint("T1", BuildKernelDefConstraints()) // + .TypeConstraint("T2", + BuildKernelDefConstraints()), + HammingWindow); + +ONNX_CPU_OPERATOR_KERNEL(BlackmanWindow, 17, + KernelDefBuilder() + .MayInplace(0, 0) // + .TypeConstraint("T1", BuildKernelDefConstraints()) // + .TypeConstraint("T2", + BuildKernelDefConstraints()), + BlackmanWindow); + +ONNX_CPU_OPERATOR_KERNEL(MelWeightMatrix, 17, + KernelDefBuilder() + .MayInplace(0, 0) // + .TypeConstraint("T1", BuildKernelDefConstraints()) // + .TypeConstraint("T2", BuildKernelDefConstraints()) + .TypeConstraint("T3", + BuildKernelDefConstraints()), + MelWeightMatrix); + +template +struct CosineSumWindow { + Status operator()(Tensor* Y, size_t size, float a0, float a1, float a2, bool is_periodic) { + auto* Y_data = reinterpret_cast(Y->MutableDataRaw()); + + // Calculate the radians to increment per sample + constexpr double pi = 3.14159265; + constexpr double tau = 2 * pi; + const size_t denominator = is_periodic ? size : size - 1; + const double angular_increment = tau / denominator; + + for (size_t i = 0; i < size; i++) { + auto a2_component = a2 == 0 ? 0 : (a2 * cos(2 * angular_increment * i)); + + T& value = *(Y_data + i); + value = static_cast(a0 - (a1 * cos(angular_increment * i)) + a2_component); + } + + return Status::OK(); + } +}; + +static Status create_cosine_sum_window(OpKernelContext* ctx, onnx::TensorProto_DataType output_datatype, float a0, + float a1, float a2, bool is_periodic) { + // Get the size of the window + auto size = signal::get_scalar_value_from_tensor(ctx->Input(0)); + + // Get the output tensor + auto Y_shape = TensorShape({size}); + auto Y = ctx->Output(0, Y_shape); + + utils::MLTypeCallDispatcher + dispatcher(output_datatype); + return dispatcher.InvokeRet(Y, size, a0, a1, a2, is_periodic); +} + +Status HannWindow::Compute(OpKernelContext* ctx) const { + // HannWindows are a special case of Cosine-Sum Windows which take the following form: + // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k: + float a0 = .5f; + float a1 = a0; + float a2 = 0; + return create_cosine_sum_window(ctx, data_type_, a0, a1, a2, is_periodic_); +} + +Status HammingWindow::Compute(OpKernelContext* ctx) const { + // HammingWindows are a special case of Cosine-Sum Windows which take the following form: + // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k: + float a0 = 25.f / 46.f; + float a1 = 1 - a0; + float a2 = 0; + return create_cosine_sum_window(ctx, data_type_, a0, a1, a2, is_periodic_); +} + +Status BlackmanWindow::Compute(OpKernelContext* ctx) const { + // BlackmanWindows are a special case of Cosine-Sum Windows which take the following form: + // w[n] = SUM_k=0_K( (-1)^k * a_k * cos(2*pi*k*n/N) ) with values the following values for a_k: + float alpha = .16f; + float a2 = alpha / 2.f; + float a0 = .5f - a2; + float a1 = .5f; + return create_cosine_sum_window(ctx, data_type_, a0, a1, a2, is_periodic_); +} + +static inline double hz_to_mel_scale(double hz) { return 2595 * std::log10(1 + hz / 700); } + +static inline double mel_scale_to_hz(double mels) { return 700 * (pow(10, (mels / 2595)) - 1); } + +template +struct CreateMelWeightMatrix { + Status operator()(OpKernelContext* ctx, int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate, + float lower_edge_hertz, float upper_edge_hertz) { + // Determine the width of the spectrogram. + // This is determined as half the size of the fft size. The first element of the spectrum is always retained, + // and the remaining are halved. The second half can be discarded due to the conjugate symmetry of the output with + // real valued ffts. Taken together the formula for the size of the output will be std::floor(dft_length / 2) + 1. + int64_t num_spectrogram_bins = static_cast(std::floor(dft_length / 2 + 1)); + + // Checks + auto lowest_index = std::floor(((dft_length + 1) * lower_edge_hertz) / sample_rate); + auto highest_index = std::floor(((dft_length + 1) * upper_edge_hertz) / sample_rate); + ORT_ENFORCE( + lowest_index >= 0 && lowest_index < num_spectrogram_bins, + "lower_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the " + "sample_rate."); + ORT_ENFORCE( + highest_index >= 0 && highest_index < num_spectrogram_bins, + "upper_edge_hertz produces a mel triangle filter bank that is out of range given the dft_length and the " + "sample_rate."); + + // Create the output shape + TensorShape output_shape({static_cast(num_spectrogram_bins), num_mel_bins}); + auto* Y = ctx->Output(0, output_shape); + + // Get the raw output data + auto* Y_data = reinterpret_cast(Y->MutableDataRaw()); + + // Set the weight matrix to 0 + memset(Y_data, 0, num_spectrogram_bins * num_mel_bins * sizeof(T)); + + // The mel filterbank is a triangular shaped peak with a height of 1 and a base equal to the size of the MEL range + // divided by the number of bins needed times 2. This triangle is then slid across the mel domain linearly, with a + // constant step size that is equal to half of the base of the triangle. To accommodate N bins, N+2 data points will + // be needed to determine the start, center and end points of each mel triangle filter. + // + // low_frequency where the mel triangle filter banks begin, and they end on the high_frequency_mel + // The range is divided evenly to create the needed points corresponding to the begin, center, end points of each + // triangle filterbank + InlinedVector frequency_bins(num_mel_bins + 2); + auto low_frequency_mel = hz_to_mel_scale(lower_edge_hertz); + auto high_frequency_mel = hz_to_mel_scale(upper_edge_hertz); + auto mel_step = (high_frequency_mel - low_frequency_mel) / static_cast(frequency_bins.size()); + + // Convert each point from mel scale back to hertz, and then compute the corresponding index in the fft + for (size_t i = 0; i < frequency_bins.size(); i++) { + auto hz = mel_scale_to_hz(low_frequency_mel + mel_step * i); + frequency_bins[i] = static_cast(std::floor(((dft_length + 1) * hz) / sample_rate)); + } + + for (size_t i = 0; i < static_cast(num_mel_bins); i++) { + auto lower_frequency_value = frequency_bins[i]; // left + auto center_frequency_point = frequency_bins[i + 1]; // center + auto higher_frequency_point = frequency_bins[i + 2]; // right + + auto low_to_center = center_frequency_point - lower_frequency_value; + if (low_to_center == 0) { + auto& current_element = *(Y_data + (center_frequency_point * num_mel_bins) + i); + current_element = static_cast(1); + } else { + for (size_t j = lower_frequency_value; j <= center_frequency_point; j++) { + auto& current_element = *(Y_data + (j * num_mel_bins) + i); + current_element = static_cast((j - lower_frequency_value) / static_cast(low_to_center)); + } + } + + auto center_to_high = higher_frequency_point - center_frequency_point; + if (center_to_high > 0) { + for (size_t j = center_frequency_point; j < higher_frequency_point; j++) { + auto& current_element = *(Y_data + (j * num_mel_bins) + i); + current_element = static_cast((higher_frequency_point - j) / static_cast(center_to_high)); + } + } + } + + return Status::OK(); + } +}; + +static Status create_mel_weight_matrix(OpKernelContext* ctx, onnx::TensorProto_DataType output_datatype, + int64_t num_mel_bins, int64_t dft_length, int64_t sample_rate, + float lower_edge_hertz, float upper_edge_hertz) { + utils::MLTypeCallDispatcher + dispatcher(output_datatype); + return dispatcher.InvokeRet(ctx, num_mel_bins, dft_length, sample_rate, + lower_edge_hertz, upper_edge_hertz); +} + +Status MelWeightMatrix::Compute(OpKernelContext* ctx) const { + const auto num_mel_bins = signal::get_scalar_value_from_tensor(ctx->Input(0)); + const auto dft_length = signal::get_scalar_value_from_tensor(ctx->Input(1)); + const auto sample_rate = signal::get_scalar_value_from_tensor(ctx->Input(2)); + const auto lower_edge_hertz = signal::get_scalar_value_from_tensor(ctx->Input(3)); + const auto upper_edge_hertz = signal::get_scalar_value_from_tensor(ctx->Input(4)); + + return create_mel_weight_matrix(ctx, data_type_, num_mel_bins, dft_length, sample_rate, lower_edge_hertz, + upper_edge_hertz); +} +} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/signal/window_functions.h b/onnxruntime/core/providers/cpu/signal/window_functions.h similarity index 62% rename from onnxruntime/contrib_ops/cpu/signal/window_functions.h rename to onnxruntime/core/providers/cpu/signal/window_functions.h index 81d8d3b48c65..994149b5ced0 100644 --- a/onnxruntime/contrib_ops/cpu/signal/window_functions.h +++ b/onnxruntime/core/providers/cpu/signal/window_functions.h @@ -1,40 +1,53 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#ifdef BUILD_MS_EXPERIMENTAL_OPS +#include "core/common/common.h" +#include "core/framework/op_kernel.h" namespace onnxruntime { -namespace contrib { class VariableOutputDataTypeBase : public OpKernel { protected: onnx::TensorProto_DataType data_type_; public: - VariableOutputDataTypeBase(const OpKernelInfo& info) : OpKernel(info) { - data_type_ = static_cast(info.GetAttrOrDefault("output_datatype", onnx::TensorProto_DataType::TensorProto_DataType_FLOAT)); + explicit VariableOutputDataTypeBase(const OpKernelInfo& info) : OpKernel(info) { + data_type_ = static_cast( // + info.GetAttrOrDefault("output_datatype", onnx::TensorProto_DataType::TensorProto_DataType_FLOAT)); } }; class HannWindow final : public VariableOutputDataTypeBase { public: explicit HannWindow(const OpKernelInfo& info) : VariableOutputDataTypeBase(info) { + is_periodic_ = static_cast(info.GetAttrOrDefault("periodic", 1)); } Status Compute(OpKernelContext* ctx) const override; + + private: + bool is_periodic_ = true; }; class HammingWindow final : public VariableOutputDataTypeBase { public: explicit HammingWindow(const OpKernelInfo& info) : VariableOutputDataTypeBase(info) { + is_periodic_ = static_cast(info.GetAttrOrDefault("periodic", 1)); } Status Compute(OpKernelContext* ctx) const override; + + private: + bool is_periodic_ = true; }; class BlackmanWindow final : public VariableOutputDataTypeBase { public: explicit BlackmanWindow(const OpKernelInfo& info) : VariableOutputDataTypeBase(info) { + is_periodic_ = static_cast(info.GetAttrOrDefault("periodic", 1)); } Status Compute(OpKernelContext* ctx) const override; + + private: + bool is_periodic_ = true; }; class MelWeightMatrix final : public VariableOutputDataTypeBase { @@ -44,7 +57,4 @@ class MelWeightMatrix final : public VariableOutputDataTypeBase { Status Compute(OpKernelContext* ctx) const override; }; -} // namespace contrib } // namespace onnxruntime - -#endif \ No newline at end of file diff --git a/onnxruntime/test/contrib_ops/signal_ops_test.cc b/onnxruntime/test/contrib_ops/signal_ops_test.cc deleted file mode 100644 index 3fe4ce75e604..000000000000 --- a/onnxruntime/test/contrib_ops/signal_ops_test.cc +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#ifdef BUILD_MS_EXPERIMENTAL_OPS - -#include "gtest/gtest.h" -#include "test/providers/provider_test_utils.h" - -namespace onnxruntime { -namespace test { - -static void TestNaiveDFTFloat(bool is_onesided) { - OpTester test("DFT", 1, onnxruntime::kMSExperimentalDomain); - - std::vector shape = {1, 5}; - std::vector output_shape = {1, 5, 2}; - output_shape[1] = is_onesided ? (1 + (shape[1] >> 1)) : shape[1]; - - std::vector input = {1, 2, 3, 4, 5}; - std::vector expected_output = { - 15.000000f, 0.0000000f, - -2.499999f, 3.4409550f, - -2.500000f, 0.8123000f, - -2.499999f, -0.812299f, - -2.500003f, -3.440953f - }; - - if (is_onesided) { - expected_output.resize(6); - } - test.AddInput("input", shape, input); - test.AddAttribute("onesided", static_cast(is_onesided)); - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -static void TestRadix2DFTFloat(bool is_onesided) { - OpTester test("DFT", 1, onnxruntime::kMSExperimentalDomain); - - std::vector shape = {1, 8}; - std::vector output_shape = {1, 8, 2}; - output_shape[1] = is_onesided ? (1 + (shape[1] >> 1)) : shape[1]; - - std::vector input = {1, 2, 3, 4, 5, 6, 7, 8}; - std::vector expected_output = { - 36.000f, 0.000f, - -4.000f, 9.65685f, - -4.000f, 4.000f, - -4.000f, 1.65685f, - -4.000f, 0.000f, - -4.000f, -1.65685f, - -4.000f, -4.000f, - -4.000f, -9.65685f - }; - - if (is_onesided) { - expected_output.resize(10); - } - test.AddInput("input", shape, input); - test.AddAttribute("onesided", static_cast(is_onesided)); - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -TEST(MLSignalOpTest, DFTFloat) { - TestNaiveDFTFloat(false); - TestNaiveDFTFloat(true); - TestRadix2DFTFloat(false); - TestRadix2DFTFloat(true); -} - -TEST(MLSignalOpTest, IDFTFloat) { - OpTester test("IDFT", 1, onnxruntime::kMSExperimentalDomain); - - std::vector shape = {1, 5, 2}; - std::vector input = - { - 15.000000f, 0.0000000f, - -2.499999f, 3.4409550f, - -2.500000f, 0.8123000f, - -2.499999f, -0.812299f, - -2.500003f, -3.440953f - }; - std::vector expected_output = - { - 1.000f, 0.000f, - 2.000f, 0.000f, - 3.000f, 0.000f, - 4.000f, 0.000f, - 5.000f, 0.000f - }; - - test.AddInput("input", shape, input); - test.AddOutput("output", shape, expected_output); - test.Run(); -} - -TEST(MLSignalOpTest, STFTFloat) { - OpTester test("STFT", 1, onnxruntime::kMSExperimentalDomain); - - std::vector signal(64, 1); - test.AddInput("signal", {1, 64}, signal); - std::vector window(16, 1); - test.AddInput("window", {16}, window); - test.AddInput("frame_length", {}, {16}); - test.AddInput("frame_step", {}, {8}); - - std::vector output_shape = {1, 7, 9, 2}; - std::vector expected_output = - { - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, - 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f - }; - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -TEST(MLSignalOpTest, HannWindowFloat) { - OpTester test("HannWindow", 1, onnxruntime::kMSExperimentalDomain); - - std::vector scalar_shape = {}; - std::vector output_shape = {32}; - std::vector expected_output = - { - 0.000000f, 0.009607f, 0.038060f, 0.084265f, 0.146447f, 0.222215f, 0.308658f, 0.402455f, - 0.500000f, 0.597545f, 0.691342f, 0.777785f, 0.853553f, 0.915735f, 0.961940f, 0.990393f, - 1.000000f, 0.990393f, 0.961940f, 0.915735f, 0.853553f, 0.777785f, 0.691342f, 0.597545f, - 0.500000f, 0.402455f, 0.308658f, 0.222215f, 0.146447f, 0.084265f, 0.038060f, 0.009607f - }; - - test.AddInput("size", scalar_shape, {32}); - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -TEST(MLSignalOpTest, HammingWindowFloat) { - OpTester test("HammingWindow", 1, onnxruntime::kMSExperimentalDomain); - - std::vector scalar_shape = {}; - std::vector output_shape = {32}; - std::vector expected_output = - { - 0.086957f, 0.095728f, 0.121707f, 0.163894f, 0.220669f, 0.289848f, 0.368775f, 0.454415f, - 0.543478f, 0.632541f, 0.718182f, 0.797108f, 0.866288f, 0.923062f, 0.965249f, 0.991228f, - 1.000000f, 0.991228f, 0.965249f, 0.923062f, 0.866288f, 0.797108f, 0.718182f, 0.632541f, - 0.543478f, 0.454415f, 0.368775f, 0.289848f, 0.220669f, 0.163894f, 0.121707f, 0.095728f - }; - - test.AddInput("size", scalar_shape, {32}); - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -TEST(MLSignalOpTest, BlackmanWindowFloat) { - OpTester test("BlackmanWindow", 1, onnxruntime::kMSExperimentalDomain); - - std::vector scalar_shape = {}; - std::vector output_shape = {32}; - std::vector expected_output = - { - 0.000000f, 0.003518f, 0.014629f, 0.034880f, 0.066447f, 0.111600f, 0.172090f, 0.248544f, - 0.340000f, 0.443635f, 0.554773f, 0.667170f, 0.773553f, 0.866350f, 0.938508f, 0.984303f, - 1.000000f, 0.984303f, 0.938508f, 0.866350f, 0.773553f, 0.667170f, 0.554773f, 0.443635f, - 0.340000f, 0.248544f, 0.172090f, 0.111600f, 0.066447f, 0.034880f, 0.014629f, 0.003518f - }; - - test.AddInput("size", scalar_shape, {32}); - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -TEST(MLSignalOpTest, MelWeightMatrixFloat) { - OpTester test("MelWeightMatrix", 1, onnxruntime::kMSExperimentalDomain); - - std::vector scalar_shape = {}; - std::vector output_shape = {9, 8}; - std::vector expected_output = - { - 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, - 0.000000f, 0.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, - 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f - }; - - test.AddInput("num_mel_bins", scalar_shape, {8}); - test.AddInput("dft_length", scalar_shape, {16}); - test.AddInput("sample_rate", scalar_shape, {8192}); - test.AddInput("lower_edge_hertz", scalar_shape, {0}); - test.AddInput("upper_edge_hertz", scalar_shape, {8192 / 2.f}); - test.AddOutput("output", output_shape, expected_output); - test.Run(); -} - -} // namespace test -} // namespace onnxruntime - -#endif \ No newline at end of file diff --git a/onnxruntime/test/providers/cpu/signal/signal_ops_test.cc b/onnxruntime/test/providers/cpu/signal/signal_ops_test.cc new file mode 100644 index 000000000000..2db126b14060 --- /dev/null +++ b/onnxruntime/test/providers/cpu/signal/signal_ops_test.cc @@ -0,0 +1,242 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include + +#include "gtest/gtest.h" +#include "test/common/tensor_op_test_utils.h" +#include "test/providers/provider_test_utils.h" +#include "test/util/include/test_random_seed.h" + +using std::vector; + +namespace onnxruntime { +namespace test { + +static const int kMinOpsetVersion = 17; + +static void TestNaiveDFTFloat(bool onesided) { + OpTester test("DFT", kMinOpsetVersion); + + vector shape = {1, 5, 1}; + vector output_shape = {1, 5, 2}; + output_shape[1] = onesided ? (1 + (shape[1] >> 1)) : shape[1]; + + vector input = {1, 2, 3, 4, 5}; + vector expected_output = {15.000000f, 0.0000000f, -2.499999f, 3.4409550f, -2.500000f, + 0.8123000f, -2.499999f, -0.812299f, -2.500003f, -3.440953f}; + + if (onesided) { + expected_output.resize(6); + } + test.AddInput("input", shape, input); + test.AddAttribute("onesided", static_cast(onesided)); + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +static void TestRadix2DFTFloat(bool onesided) { + OpTester test("DFT", kMinOpsetVersion); + + vector shape = {1, 8, 1}; + vector output_shape = {1, 8, 2}; + output_shape[1] = onesided ? (1 + (shape[1] >> 1)) : shape[1]; + + vector input = {1, 2, 3, 4, 5, 6, 7, 8}; + vector expected_output = {36.000f, 0.000f, -4.000f, 9.65685f, -4.000f, 4.000f, -4.000f, 1.65685f, + -4.000f, 0.000f, -4.000f, -1.65685f, -4.000f, -4.000f, -4.000f, -9.65685f}; + + if (onesided) { + expected_output.resize(10); + } + test.AddInput("input", shape, input); + test.AddAttribute("onesided", static_cast(onesided)); + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +TEST(SignalOpsTest, DFTFloat_naive) { TestNaiveDFTFloat(false); } + +TEST(SignalOpsTest, DFTFloat_naive_onesided) { TestNaiveDFTFloat(true); } + +TEST(SignalOpsTest, DFTFloat_radix2) { TestRadix2DFTFloat(false); } + +TEST(SignalOpsTest, DFTFloat_radix2_onesided) { TestRadix2DFTFloat(true); } + +TEST(SignalOpsTest, DFTFloat_inverse) { + OpTester test("DFT", kMinOpsetVersion); + + vector shape = {1, 5, 2}; + vector input = {15.000000f, 0.0000000f, -2.499999f, 3.4409550f, -2.500000f, + 0.8123000f, -2.499999f, -0.812299f, -2.500003f, -3.440953f}; + vector expected_output = {1.000f, 0.000f, 2.000f, 0.000f, 3.000f, 0.000f, 4.000f, 0.000f, 5.000f, 0.000f}; + + test.AddInput("input", shape, input); + test.AddAttribute("inverse", static_cast(true)); + test.AddOutput("output", shape, expected_output); + test.Run(); +} + +// Tests that FFT(FFT(x), inverse=true) == x +static void TestDFTInvertible(bool complex) { + // TODO: test dft_length + class DFTInvertibleTester : public OpTester { + public: + DFTInvertibleTester(int64_t axis) : OpTester("DFT", kMinOpsetVersion), axis_(axis) {} + + protected: + void AddNodes(Graph& graph, vector& graph_inputs, vector& graph_outputs, + vector>& add_attribute_funcs) override { + // Create an intermediate output + vector intermediate_outputs{&graph.GetOrCreateNodeArg("dft_output", graph_outputs[0]->TypeAsProto())}; + + // call base implementation to add the DFT node. + OpTester::AddNodes(graph, graph_inputs, intermediate_outputs, add_attribute_funcs); + OpTester::AddAttribute("axis", axis_); + + Node& inverse = graph.AddNode("inverse", "DFT", "inverse", intermediate_outputs, graph_outputs); + inverse.AddAttribute("inverse", static_cast(true)); + inverse.AddAttribute("axis", axis_); + } + + private: + int64_t axis_; + }; + + RandomValueGenerator random(GetTestRandomSeed()); + // TODO(garymm, smk2007): Add tests for different dft_length values. + const int64_t num_batches = 2; + for (int64_t axis = 1; axis < 2; axis += 1) { + for (int64_t signal_dim1 = 1; signal_dim1 <= 4; signal_dim1 += 1) { + for (int64_t signal_dim2 = 1; signal_dim2 <= 4; signal_dim2 += 1) { + DFTInvertibleTester test(axis); + vector input_shape{num_batches, signal_dim1, signal_dim2, 1 + complex}; + vector input_data = random.Uniform(input_shape, -100.f, 100.f); + test.AddInput("input", input_shape, input_data); + + vector output_shape(input_shape); + vector* output_data_p; + vector output_data; + if (complex) { + output_data_p = &input_data; + } else { // real -> (real, imaginary) with imaginary == 0. + output_shape[3] = 2; + output_data.resize(input_data.size() * 2, 0); + for (size_t i = 0; i < input_data.size(); i += 1) { + output_data[i * 2] = input_data[i]; + } + output_data_p = &output_data; + } + test.AddOutput("output", output_shape, *output_data_p); + test.Run(); + } + } + } +} + +TEST(SignalOpsTest, DFT_invertible_real) { TestDFTInvertible(false); } + +TEST(SignalOpsTest, DFT_invertible_complex) { TestDFTInvertible(true); } + +TEST(SignalOpsTest, STFTFloat) { + OpTester test("STFT", kMinOpsetVersion); + + vector signal(64, 1); + test.AddInput("signal", {1, 64, 1}, signal); + test.AddInput("frame_step", {}, {8}); + vector window(16, 1); + test.AddInput("window", {16}, window); + test.AddInput("frame_length", {}, {16}); + + vector output_shape = {1, 7, 9, 2}; + vector expected_output = { + 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 16.000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f, + 0.0000f, 0.000f, 0.0000f, 0.000f, 0.0000f, 0.000f}; + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +TEST(SignalOpsTest, HannWindowFloat) { + OpTester test("HannWindow", kMinOpsetVersion); + + vector scalar_shape = {}; + vector output_shape = {32}; + vector expected_output = {0.000000f, 0.009607f, 0.038060f, 0.084265f, 0.146447f, 0.222215f, 0.308658f, + 0.402455f, 0.500000f, 0.597545f, 0.691342f, 0.777785f, 0.853553f, 0.915735f, + 0.961940f, 0.990393f, 1.000000f, 0.990393f, 0.961940f, 0.915735f, 0.853553f, + 0.777785f, 0.691342f, 0.597545f, 0.500000f, 0.402455f, 0.308658f, 0.222215f, + 0.146447f, 0.084265f, 0.038060f, 0.009607f}; + + test.AddInput("size", scalar_shape, {32}); + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +TEST(SignalOpsTest, HammingWindowFloat) { + OpTester test("HammingWindow", kMinOpsetVersion); + + vector scalar_shape = {}; + vector output_shape = {32}; + vector expected_output = // + {0.086957f, 0.095728f, 0.121707f, 0.163894f, 0.220669f, 0.289848f, 0.368775f, 0.454415f, + 0.543478f, 0.632541f, 0.718182f, 0.797108f, 0.866288f, 0.923062f, 0.965249f, 0.991228f, + 1.000000f, 0.991228f, 0.965249f, 0.923062f, 0.866288f, 0.797108f, 0.718182f, 0.632541f, + 0.543478f, 0.454415f, 0.368775f, 0.289848f, 0.220669f, 0.163894f, 0.121707f, 0.095728f}; + + test.AddInput("size", scalar_shape, {32}); + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +TEST(SignalOpsTest, BlackmanWindowFloat) { + OpTester test("BlackmanWindow", kMinOpsetVersion); + + vector scalar_shape = {}; + vector output_shape = {32}; + vector expected_output = // + {0.000000f, 0.003518f, 0.014629f, 0.034880f, 0.066447f, 0.111600f, 0.172090f, 0.248544f, + 0.340000f, 0.443635f, 0.554773f, 0.667170f, 0.773553f, 0.866350f, 0.938508f, 0.984303f, + 1.000000f, 0.984303f, 0.938508f, 0.866350f, 0.773553f, 0.667170f, 0.554773f, 0.443635f, + 0.340000f, 0.248544f, 0.172090f, 0.111600f, 0.066447f, 0.034880f, 0.014629f, 0.003518f}; + + test.AddInput("size", scalar_shape, {32}); + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +TEST(SignalOpsTest, MelWeightMatrixFloat) { + OpTester test("MelWeightMatrix", kMinOpsetVersion); + + vector scalar_shape = {}; + vector output_shape = {9, 8}; + vector expected_output = { + 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 1.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, + 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f}; + + test.AddInput("num_mel_bins", scalar_shape, {8}); + test.AddInput("dft_length", scalar_shape, {16}); + test.AddInput("sample_rate", scalar_shape, {8192}); + test.AddInput("lower_edge_hertz", scalar_shape, {0}); + test.AddInput("upper_edge_hertz", scalar_shape, {8192 / 2.f}); + test.AddOutput("output", output_shape, expected_output); + test.Run(); +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json b/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json index 399e26fb35fd..d9271921c1e9 100644 --- a/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json +++ b/onnxruntime/test/testdata/kernel_def_hashes/onnx.cpu.json @@ -299,6 +299,10 @@ "BitShift ai.onnx CPUExecutionProvider", 8765933529403563240 ], + [ + "BlackmanWindow ai.onnx CPUExecutionProvider", + 4230790036355038984 + ], [ "Cast ai.onnx CPUExecutionProvider", 4892631558605514456 @@ -463,6 +467,10 @@ "Det ai.onnx CPUExecutionProvider", 4355346295804324544 ], + [ + "DFT ai.onnx CPUExecutionProvider", + 2809655513372322840 + ], [ "Div ai.onnx CPUExecutionProvider", 3765227735719542728 @@ -911,7 +919,7 @@ "GreaterOrEqual ai.onnx CPUExecutionProvider", 17416867432093505280 ], -[ + [ "GreaterOrEqual ai.onnx CPUExecutionProvider", 4445196831337347808 ], @@ -926,7 +934,7 @@ [ "GreaterOrEqual ai.onnx CPUExecutionProvider", 16172564801671050120 - ], + ], [ "GridSample ai.onnx CPUExecutionProvider", 15150264021585158264 @@ -939,6 +947,14 @@ "GRU ai.onnx CPUExecutionProvider", 2706165712066264784 ], + [ + "HammingWindow ai.onnx CPUExecutionProvider", + 7960927909626268504 + ], + [ + "HannWindow ai.onnx CPUExecutionProvider", + 11998243503561799520 + ], [ "Hardmax ai.onnx CPUExecutionProvider", 3471079605532327368 @@ -1018,7 +1034,7 @@ [ "LeakyRelu ai.onnx CPUExecutionProvider", 830582302303937272 - ], + ], [ "Less ai.onnx CPUExecutionProvider", 2529281912870061232 @@ -1090,7 +1106,7 @@ [ "LessOrEqual ai.onnx CPUExecutionProvider", 15565321713560893128 - ], + ], [ "Log ai.onnx CPUExecutionProvider", 268464912229648680 @@ -1287,6 +1303,10 @@ "MeanVarianceNormalization ai.onnx CPUExecutionProvider", 17242016597551698064 ], + [ + "MelWeightMatrix ai.onnx CPUExecutionProvider", + 1589563865873170600 + ], [ "Min ai.onnx CPUExecutionProvider", 5444634510407971152 @@ -1586,7 +1606,7 @@ [ "PRelu ai.onnx CPUExecutionProvider", 17872917958807301128 - ], + ], [ "QLinearConv ai.onnx CPUExecutionProvider", 1301685544574905024 @@ -2230,7 +2250,7 @@ [ "Scan ai.onnx CPUExecutionProvider", 220271302879298784 - ], + ], [ "Scatter ai.onnx CPUExecutionProvider", 15759064509848656392 @@ -2447,6 +2467,10 @@ "Squeeze ai.onnx CPUExecutionProvider", 16122603335179721968 ], + [ + "STFT ai.onnx CPUExecutionProvider", + 1739051453790648552 + ], [ "StringNormalizer ai.onnx CPUExecutionProvider", 7767393334034626736 @@ -2698,9 +2722,9 @@ [ "Where ai.onnx CPUExecutionProvider", 17544214758602217832 - ], + ], [ "Xor ai.onnx CPUExecutionProvider", 14631049987911195736 ] -] \ No newline at end of file +] diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 091c573d38ae..95b3516312f1 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -4,7 +4,6 @@ // // Tests that are failing temporarily and should be fixed "current_failing_tests": [ - "^test_(blackmanwindow|dft|hammingwindow|hannwindow|melweightmatrix|stft).*", // https://github.com/microsoft/onnxruntime/pull/11778 "^test_adagrad", "^test_adagrad_multiple", "^test_batchnorm_epsilon_old", diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc index 921e491b6351..8b2ec0246809 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_overrides.jsonc @@ -5,7 +5,9 @@ // Val: float, max absolute difference between expected and actual. "atol_overrides": { "test_dft": 1e-4, - "test_dft_axis": 1e-4 + "test_dft_axis": 1e-4, + "test_stft": 1e-4, + "test_stft_with_window": 1e-4 }, // Key: str, the name of the test as defined by ONNX without any device suffix. // Val: float, max relative difference between expected and actual.