Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add number normalization test and address followup for getJsonObject #10800

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions integration_tests/src/main/python/get_json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,31 @@ def test_get_json_object_number_normalization_legacy():
conf={'spark.rapids.sql.expression.GetJsonObject': 'true',
'spark.rapids.sql.getJsonObject.legacy.enabled': 'true'})
assert([[row[1]] for row in gpu_result] == data)

@pytest.mark.parametrize('data_gen', [StringGen(r'''-?[1-9]\d{0,20}\.\d{1,20}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,5}\.\d{1,20}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,20}\.\d{1,5}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,5}\.\d{1,5}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,20}E-?\d{1,20}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,5}E-?\d{1,20}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,20}E-?\d{1,5}''', nullable=False),
StringGen(r'''-?[1-9]\d{0,5}E-?\d{1,5}''', nullable=False)], ids=idfn)
def test_get_json_object_floating_normalization(data_gen):
schema = StructType([StructField("jsonStr", StringType())])
normalization = lambda spark: unary_op_df(spark, data_gen).selectExpr(
'a',
'get_json_object(a,"$")'
).collect()
gpu_res = [[row[1]] for row in with_gpu_session(
normalization,
conf={'spark.rapids.sql.expression.GetJsonObject': 'true'})]
cpu_res = [[row[1]] for row in with_cpu_session(normalization)]
def json_string_to_float(x):
if x == '"-Infinity"':
return float('-inf')
elif x == '"Infinity"':
return float('inf')
else:
return float(x)
for i in range(len(gpu_res)):
assert math.isclose(json_string_to_float(gpu_res[i][0]), json_string_to_float(cpu_res[i][0]))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Add a comment like:
verify relatively diff < 1.0e-7

Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ object JsonPathParser extends RegexParsers {

def fallbackCheck(instructions: List[PathInstruction]): Boolean = {
// JNI kernel has a limit of 16 nested nodes, fallback to CPU if we exceed that
instructions.length > 16
instructions.length > JSONUtils.MAX_PATH_DEPTH
}

def unzipInstruction(instruction: PathInstruction): (String, String, Long) = {
Expand Down