diff --git a/pkg/planner/cascades/base/hash_equaler.go b/pkg/planner/cascades/base/hash_equaler.go index 455ebb948bb01..a09c2618fc80d 100644 --- a/pkg/planner/cascades/base/hash_equaler.go +++ b/pkg/planner/cascades/base/hash_equaler.go @@ -41,6 +41,8 @@ type Hasher interface { HashByte(val byte) HashBytes(val []byte) Reset() + SetCache([]byte) + Cache() []byte Sum64() uint64 } @@ -53,6 +55,9 @@ type Hash64a uint64 type hasher struct { // hash stores the hash value as it is incrementally computed. hash64a Hash64a + + // cache is the internal bytes slice that's will be reused for some special tmp encoding like datum. + cache []byte } // NewHashEqualer creates a new HashEqualer. @@ -65,6 +70,17 @@ func NewHashEqualer() Hasher { // Reset resets the Hasher to its initial state, reusing the internal bytes slice. func (h *hasher) Reset() { h.hash64a = offset64 + h.cache = h.cache[:0] +} + +// Cache returns the internal bytes slice for re-usage. +func (h *hasher) Cache() []byte { + return h.cache +} + +// SetCache sets the internal bytes slice for reu-sage. +func (h *hasher) SetCache(cache []byte) { + h.cache = cache } func (h *hasher) Sum64() uint64 { diff --git a/pkg/types/BUILD.bazel b/pkg/types/BUILD.bazel index f7bad15da90c9..4fb334fc30a85 100644 --- a/pkg/types/BUILD.bazel +++ b/pkg/types/BUILD.bazel @@ -53,6 +53,7 @@ go_library( "//pkg/parser/opcode", "//pkg/parser/terror", "//pkg/parser/types", + "//pkg/planner/cascades/base", "//pkg/util/collate", "//pkg/util/context", "//pkg/util/dbterror", diff --git a/pkg/types/datum.go b/pkg/types/datum.go index 34391839cf28f..21fe16994b32d 100644 --- a/pkg/types/datum.go +++ b/pkg/types/datum.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/terror" "github.com/pingcap/tidb/pkg/parser/types" + "github.com/pingcap/tidb/pkg/planner/cascades/base" "github.com/pingcap/tidb/pkg/util/collate" "github.com/pingcap/tidb/pkg/util/hack" "github.com/pingcap/tidb/pkg/util/logutil" @@ -651,6 +652,48 @@ func (d *Datum) SetValue(val any, tp *types.FieldType) { } } +// Hash64ForDatum is a hash function for initialized by codec package. +var Hash64ForDatum func(h base.Hasher, d *Datum) + +// Hash64 implements base.HashEquals<0th> interface. +func (d *Datum) Hash64(h base.Hasher) { + Hash64ForDatum(h, d) +} + +// Equals implements base.HashEquals.<1st> interface. +func (d *Datum) Equals(other any) bool { + if other == nil { + return false + } + var d2 *Datum + switch x := other.(type) { + case *Datum: + d2 = x + case Datum: + d2 = &x + default: + return false + } + ok := d.k == d2.k && + d.decimal == d2.decimal && + d.length == d2.length && + d.i == d2.i && + d.collation == d2.collation && + string(d.b) == string(d2.b) + if !ok { + return false + } + // compare x + switch d.k { + case KindMysqlDecimal: + return d.GetMysqlDecimal().Compare(d2.GetMysqlDecimal()) == 0 + case KindMysqlTime: + return d.GetMysqlTime().Compare(d2.GetMysqlTime()) == 0 + default: + return true + } +} + // Compare compares datum to another datum. // Notes: don't rely on datum.collation to get the collator, it's tend to buggy. func (d *Datum) Compare(ctx Context, ad *Datum, comparer collate.Collator) (int, error) { diff --git a/pkg/util/codec/BUILD.bazel b/pkg/util/codec/BUILD.bazel index f2da06940be15..bf0cedeb4c827 100644 --- a/pkg/util/codec/BUILD.bazel +++ b/pkg/util/codec/BUILD.bazel @@ -14,6 +14,7 @@ go_library( deps = [ "//pkg/parser/mysql", "//pkg/parser/terror", + "//pkg/planner/cascades/base", "//pkg/types", "//pkg/util/chunk", "//pkg/util/collate", @@ -42,6 +43,7 @@ go_test( "//pkg/errctx", "//pkg/parser/mysql", "//pkg/parser/terror", + "//pkg/planner/cascades/base", "//pkg/testkit/testsetup", "//pkg/types", "//pkg/util/benchdaily", diff --git a/pkg/util/codec/codec.go b/pkg/util/codec/codec.go index b5d937c453ee3..8ebcaee493f75 100644 --- a/pkg/util/codec/codec.go +++ b/pkg/util/codec/codec.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/terror" + "github.com/pingcap/tidb/pkg/planner/cascades/base" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/collate" @@ -1586,6 +1587,20 @@ func ConvertByCollationStr(str string, tp *types.FieldType) string { return string(hack.String(collator.Key(str))) } +// Hash64 is for datum hash64 calculation. +func Hash64(h base.Hasher, d *types.Datum) { + // let h.cache to receive datum hash value, which is potentially expendable. + // clean the cache before using it. + b := h.Cache()[:0] + b = HashCode(b, *d) + h.HashBytes(b) + h.SetCache(b) +} + +func init() { + types.Hash64ForDatum = Hash64 +} + // HashCode encodes a Datum into a unique byte slice. // It is mostly the same as EncodeValue, but it doesn't contain truncation or verification logic in order to make the encoding lossless. func HashCode(b []byte, d types.Datum) []byte { diff --git a/pkg/util/codec/codec_test.go b/pkg/util/codec/codec_test.go index 2cfcb7d5a0592..25f0611133852 100644 --- a/pkg/util/codec/codec_test.go +++ b/pkg/util/codec/codec_test.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/pkg/errctx" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/parser/terror" + "github.com/pingcap/tidb/pkg/planner/cascades/base" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/collate" @@ -1293,3 +1294,42 @@ func TestHashChunkColumns(t *testing.T) { require.Equal(t, rowHash[2].Sum64(), vecHash[2].Sum64()) } } + +func TestDatumHashEquals(t *testing.T) { + now := time.Now() + tests := []struct { + d1 types.Datum + d2 types.Datum + }{ + {types.NewIntDatum(1), types.NewIntDatum(1)}, + {types.NewUintDatum(1), types.NewUintDatum(1)}, + {types.NewFloat64Datum(1.1), types.NewFloat64Datum(1.1)}, + {types.NewStringDatum("abc"), types.NewStringDatum("abc")}, + {types.NewBytesDatum([]byte("abc")), types.NewBytesDatum([]byte("abc"))}, + {types.NewMysqlEnumDatum(types.Enum{Name: "a", Value: 1}), types.NewMysqlEnumDatum(types.Enum{Name: "a", Value: 1})}, + {types.NewMysqlSetDatum(types.Set{Name: "a", Value: 1}, "a"), types.NewMysqlSetDatum(types.Set{Name: "a", Value: 1}, "a")}, + {types.NewBinaryLiteralDatum([]byte{0x01}), types.NewBinaryLiteralDatum([]byte{0x01})}, + {types.NewMysqlBitDatum(types.NewBinaryLiteralFromUint(1, -1)), types.NewMysqlBitDatum(types.NewBinaryLiteralFromUint(1, -1))}, + {types.NewTimeDatum(types.NewTime(types.FromGoTime(now), mysql.TypeDatetime, 6)), types.NewTimeDatum(types.NewTime(types.FromGoTime(now), mysql.TypeDatetime, 6))}, + {types.NewDurationDatum(types.Duration{Duration: time.Second}), types.NewDurationDatum(types.Duration{Duration: time.Second})}, + {types.NewJSONDatum(types.CreateBinaryJSON("a")), types.NewJSONDatum(types.CreateBinaryJSON("a"))}, + {types.NewTimeDatum(types.NewTime(types.FromGoTime(now), mysql.TypeDatetime, 6)), types.NewTimeDatum(types.NewTime(types.FromGoTime(time.Now()), mysql.TypeDatetime, 6))}, + } + hasher1 := base.NewHashEqualer() + hasher2 := base.NewHashEqualer() + for _, tt := range tests[:(len(tests) - 1)] { + hasher1.Reset() + hasher2.Reset() + tt.d1.Hash64(hasher1) + tt.d2.Hash64(hasher2) + require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) + require.True(t, tt.d1.Equals(tt.d2)) + } + // the last test case is for the case that two datums are not equal + hasher1.Reset() + hasher2.Reset() + tests[len(tests)-1].d1.Hash64(hasher1) + tests[len(tests)-1].d2.Hash64(hasher2) + require.NotEqual(t, hasher1.Sum64(), hasher2.Sum64()) + require.False(t, tests[len(tests)-1].d1.Equals(tests[len(tests)-1].d2)) +}