Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/0812_release' into 1228-analyze-…
Browse files Browse the repository at this point in the history
…and-improve-silver-job-runs-performance
  • Loading branch information
neilbest-db committed Jun 12, 2024
2 parents 2b2fe08 + da14f88 commit 5fb52bc
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,12 @@ class SchemaScrubber(
s"DUPLICATE FIELDS:\n" +
s"${dups.mkString("\n")}"
logger.log(Level.WARN, warnMsg)
val counterMap = scala.collection.mutable.Map[String, Int]().withDefaultValue(0)
fields.map(f => {
val fieldName = if (caseSensitive) f.sanitizedField.name else f.sanitizedField.name.toLowerCase
val fieldName = if (caseSensitive) f.sanitizedField.name.trim else f.sanitizedField.name.toLowerCase.trim
if (dups.contains(fieldName)) {
val generatedUniqueName = f.sanitizedField.name + "_UNIQUESUFFIX_" + f.originalField.name.hashCode.toString
counterMap(fieldName) += 1
val generatedUniqueName = f.sanitizedField.name.trim + "_UNIQUESUFFIX_" + f.originalField.name.trim.hashCode.toString + "_" + counterMap(fieldName)
val uniqueColumnMapping = s"\n${f.originalField.name} --> ${generatedUniqueName}"
logger.log(Level.WARN, uniqueColumnMapping)
f.sanitizedField.copy(name = generatedUniqueName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,11 +311,24 @@ class SchemaToolsTest extends AnyFunSpec with SparkSessionTestWrapper with Given
)
val exceptionScrubber = SchemaScrubber(exceptions = Array(propertiesScrubException))

val expectedResString = "b_2_2_2 STRUCT<abc: STRING, c_1__45: BIGINT>,exception_parent " +
"STRUCT<dup1: BIGINT, dup2: BIGINT, xyz: STRUCT<_mixed: BIGINT, _bad: BIGINT, " +
"dup1_UNIQUESUFFIX_95946320: BIGINT, dup1_UNIQUESUFFIX_95946320: BIGINT, dup2_UNIQUESUFFIX_3095059: " +
"BIGINT, dup2_UNIQUESUFFIX_3095059: STRING, good_col: BIGINT, jkl: BIGINT, otherexcept: BIGINT>, " +
"zyx: BIGINT>,i_1 BIGINT,parentwspace STRING,validParent STRING"
// <<<<<<< HEAD
// val expectedResString = "b_2_2_2 STRUCT<abc: STRING, c_1__45: BIGINT>,exception_parent " +
// "STRUCT<dup1: BIGINT, dup2: BIGINT, xyz: STRUCT<_mixed: BIGINT, _bad: BIGINT, " +
// "dup1_UNIQUESUFFIX_95946320: BIGINT, dup1_UNIQUESUFFIX_95946320: BIGINT, dup2_UNIQUESUFFIX_3095059: " +
// "BIGINT, dup2_UNIQUESUFFIX_3095059: STRING, good_col: BIGINT, jkl: BIGINT, otherexcept: BIGINT>, " +
// "zyx: BIGINT>,i_1 BIGINT,parentwspace STRING,validParent STRING"
// =======

val expectedResString = Seq(
"b_2_2_2 STRUCT<abc: STRING, c_1__45: BIGINT>,exception_parent ",
"STRUCT<dup1: BIGINT, dup2: BIGINT, xyz: STRUCT<_mixed: BIGINT, _bad: BIGINT, ",
"dup1_UNIQUESUFFIX_95946320_1: BIGINT, dup1_UNIQUESUFFIX_95946320_2: BIGINT, dup2_UNIQUESUFFIX_3095059_1: ",
"BIGINT, dup2_UNIQUESUFFIX_3095059_2: STRING, good_col: BIGINT, jkl: BIGINT, otherexcept: BIGINT>, ",
"zyx: BIGINT>,i_1 BIGINT,parentwspace STRING,validParent STRING")
.mkString

// >>>>>>> origin/0812_release

val ddlFromLogic = df.scrubSchema(exceptionScrubber).schema.toDDL
assertResult(expectedResString) {
ddlFromLogic
Expand Down

0 comments on commit 5fb52bc

Please sign in to comment.