Skip to content

Commit

Permalink
Add number normalization test and address followup for getJsonObject (#…
Browse files Browse the repository at this point in the history
…10800)

* Add number normalization test and address followup for getJsonObject

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

* Address comment

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

* reduce test cases

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

---------

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
  • Loading branch information
thirtiseven authored May 13, 2024
1 parent c8e492e commit 3066373
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
25 changes: 25 additions & 0 deletions integration_tests/src/main/python/get_json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,28 @@ def test_get_json_object_number_normalization_legacy():
conf={'spark.rapids.sql.expression.GetJsonObject': 'true',
'spark.rapids.sql.getJsonObject.legacy.enabled': 'true'})
assert([[row[1]] for row in gpu_result] == data)

@pytest.mark.parametrize('data_gen', [StringGen(r'''-?[1-9]\d{0,5}\.\d{1,20}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,20}\.\d{1,5}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,5}E-?\d{1,20}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,20}E-?\d{1,5}''', nullable=False)], ids=idfn)
def test_get_json_object_floating_normalization(data_gen):
schema = StructType([StructField("jsonStr", StringType())])
normalization = lambda spark: unary_op_df(spark, data_gen).selectExpr(
'a',
'get_json_object(a,"$")'
).collect()
gpu_res = [[row[1]] for row in with_gpu_session(
normalization,
conf={'spark.rapids.sql.expression.GetJsonObject': 'true'})]
cpu_res = [[row[1]] for row in with_cpu_session(normalization)]
def json_string_to_float(x):
if x == '"-Infinity"':
return float('-inf')
elif x == '"Infinity"':
return float('inf')
else:
return float(x)
for i in range(len(gpu_res)):
# verify relatively diff < 1e-9 (default value for is_close)
assert math.isclose(json_string_to_float(gpu_res[i][0]), json_string_to_float(cpu_res[i][0]))
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ object JsonPathParser extends RegexParsers {

def fallbackCheck(instructions: List[PathInstruction]): Boolean = {
// JNI kernel has a limit of 16 nested nodes, fallback to CPU if we exceed that
instructions.length > 16
instructions.length > JSONUtils.MAX_PATH_DEPTH
}

def unzipInstruction(instruction: PathInstruction): (String, String, Long) = {
Expand Down

0 comments on commit 3066373

Please sign in to comment.