diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index a563c622fc9c..42cb26ff3d51 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -259,16 +259,12 @@ enum FieldType : uint16_t { } // namespace field_layout #ifndef NDEBUG -template -void AlignFail(uintptr_t address) { - GOOGLE_ABSL_LOG(FATAL) << "Unaligned (" << align << ") access at " << address; - - // Explicit abort to let compilers know this function does not return - abort(); -} - -extern template void AlignFail<4>(uintptr_t); -extern template void AlignFail<8>(uintptr_t); +PROTOBUF_EXPORT void AlignFail(std::integral_constant, + std::uintptr_t address); +PROTOBUF_EXPORT void AlignFail(std::integral_constant, + std::uintptr_t address); +inline void AlignFail(std::integral_constant, + std::uintptr_t address) {} #endif // TcParser implements most of the parsing logic for tailcall tables. @@ -365,29 +361,39 @@ class PROTOBUF_EXPORT TcParser final { // Manually unrolled and specialized Varint parsing. template - static const char* SpecializedUnrolledVImpl1(PROTOBUF_TC_PARAM_DECL); + static const char* FastTV32S1(PROTOBUF_TC_PARAM_DECL); + template + static const char* FastTV64S1(PROTOBUF_TC_PARAM_DECL); template - static const char* SpecializedFastV8S1(PROTOBUF_TC_PARAM_DECL); + static const char* FastTV8S1(PROTOBUF_TC_PARAM_DECL); template static constexpr TailCallParseFunc SingularVarintNoZag1() { - if (data_offset < 100) { - if (sizeof(FieldType) == 1) { - return &SpecializedFastV8S1; + if (sizeof(FieldType) == 1) { + if (data_offset < 100) { + return &FastTV8S1; + } else { + return &FastV8S1; + } + } + if (sizeof(FieldType) == 4) { + if (data_offset < 100) { + return &FastTV32S1; + } else { // + return &FastV32S1; + } + } + if (sizeof(FieldType) == 8) { + if (data_offset < 128) { + return &FastTV64S1; + } else { + return &FastV64S1; } - return &SpecializedUnrolledVImpl1; - } else if (sizeof(FieldType) == 1) { - return &FastV8S1; - } else if (sizeof(FieldType) == 4) { - return &FastV32S1; - } else if (sizeof(FieldType) == 8) { - return &FastV64S1; - } else { - static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 || - sizeof(FieldType) == 8, - ""); - return nullptr; } + static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4 || + sizeof(FieldType) == 8, + ""); + std::abort(); // unreachable } // Functions referenced by generated fast tables (closed enum): @@ -482,7 +488,10 @@ class PROTOBUF_EXPORT TcParser final { #ifndef NDEBUG if (PROTOBUF_PREDICT_FALSE( reinterpret_cast(target) % alignof(T) != 0)) { - AlignFail(reinterpret_cast(target)); + AlignFail(std::integral_constant(), + reinterpret_cast(target)); + // Explicit abort to let compilers know this code-path does not return + abort(); } #endif return *target; @@ -495,7 +504,10 @@ class PROTOBUF_EXPORT TcParser final { #ifndef NDEBUG if (PROTOBUF_PREDICT_FALSE( reinterpret_cast(target) % alignof(T) != 0)) { - AlignFail(reinterpret_cast(target)); + AlignFail(std::integral_constant(), + reinterpret_cast(target)); + // Explicit abort to let compilers know this code-path does not return + abort(); } #endif return *target; @@ -537,7 +549,7 @@ class PROTOBUF_EXPORT TcParser final { }; static TestMiniParseResult TestMiniParse(PROTOBUF_TC_PARAM_DECL); template - static const char* MiniParseImpl(PROTOBUF_TC_PARAM_DECL); + static const char* MiniParse(PROTOBUF_TC_PARAM_DECL); template static inline const char* SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL); @@ -714,12 +726,127 @@ class PROTOBUF_EXPORT TcParser final { static const char* MpFallback(PROTOBUF_TC_PARAM_DECL); }; +// Shift "byte" left by n * 7 bits, filling vacated bits with ones. +template +inline PROTOBUF_ALWAYS_INLINE uint64_t +shift_left_fill_with_ones(uint64_t byte, uint64_t ones) { + return (byte << (n * 7)) | (ones >> (64 - (n * 7))); +} + +// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and +// put the new value in res. Return whether the result was negative. +template +inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative( + uint64_t byte, uint64_t ones, int64_t& res) { +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a + // substantial improvement from capturing the sign from the condition code + // register on x86-64. + bool sign_bit; + asm("shldq %3, %2, %1" + : "=@ccs"(sign_bit), "+r"(byte) + : "r"(ones), "i"(n * 7)); + res = byte; + return sign_bit; +#else + // Generic fallback: + res = shift_left_fill_with_ones(byte, ones); + return static_cast(res) < 0; +#endif +} + +inline PROTOBUF_ALWAYS_INLINE std::pair +Parse64FallbackPair(const char* p, int64_t res1) { + auto ptr = reinterpret_cast(p); + + // The algorithm relies on sign extension for each byte to set all high bits + // when the varint continues. It also relies on asserting all of the lower + // bits for each successive byte read. This allows the result to be aggregated + // using a bitwise AND. For example: + // + // 8 1 64 57 ... 24 17 16 9 8 1 + // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa + // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111 + // ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111 + // --------------------------------------------- + // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa + // + // On x86-64, a shld from a single register filled with enough 1s in the high + // bits can accomplish all this in one instruction. It so happens that res1 + // has 57 high bits of ones, which is enough for the largest shift done. + // + // Just as importantly, by keeping results in res1, res2, and res3, we take + // advantage of the superscalar abilities of the CPU. + GOOGLE_ABSL_DCHECK_EQ(res1 >> 7, -1); + uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD) + int64_t res2, res3; // accumulated result chunks + + if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2)) + goto done2; + if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3)) + goto done3; + + // For the remainder of the chunks, check the sign of the AND result. + res1 &= shift_left_fill_with_ones<3>(ptr[3], ones); + if (res1 >= 0) goto done4; + res2 &= shift_left_fill_with_ones<4>(ptr[4], ones); + if (res2 >= 0) goto done5; + res3 &= shift_left_fill_with_ones<5>(ptr[5], ones); + if (res3 >= 0) goto done6; + res1 &= shift_left_fill_with_ones<6>(ptr[6], ones); + if (res1 >= 0) goto done7; + res2 &= shift_left_fill_with_ones<7>(ptr[7], ones); + if (res2 >= 0) goto done8; + res3 &= shift_left_fill_with_ones<8>(ptr[8], ones); + if (res3 >= 0) goto done9; + + // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this + // case, the continuation bit of ptr[8] already set the top bit of res3 + // correctly, so all we have to do is check that the expected case is true. + if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10; + + // A value of 0, however, represents an over-serialized varint. This case + // should not happen, but if does (say, due to a nonconforming serializer), + // deassert the continuation bit that came from ptr[8]. + if (ptr[9] == 0) { +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // Use a small instruction since this is an uncommon code path. + asm("btcq $63,%0" : "+r"(res3)); +#else + res3 ^= static_cast(1) << 63; +#endif + goto done10; + } + + // If the 10th byte/ptr[9] itself has any other value, then it is too big to + // fit in 64 bits. If the continue bit is set, it is an unterminated varint. + return {nullptr, 0}; + +done2: + return {p + 2, res1 & res2}; +done3: + return {p + 3, res1 & res2 & res3}; +done4: + return {p + 4, res1 & res2 & res3}; +done5: + return {p + 5, res1 & res2 & res3}; +done6: + return {p + 6, res1 & res2 & res3}; +done7: + return {p + 7, res1 & res2 & res3}; +done8: + return {p + 8, res1 & res2 & res3}; +done9: + return {p + 9, res1 & res2 & res3}; +done10: + return {p + 10, res1 & res2 & res3}; +} + // Notes: // 1) if data_offset is negative, it's read from data.offset() // 2) if hasbit_idx is negative, it's read from data.hasbit_idx() template -PROTOBUF_NOINLINE const char* TcParser::SpecializedFastV8S1( - PROTOBUF_TC_PARAM_DECL) { +PROTOBUF_NOINLINE const char* TcParser::FastTV8S1(PROTOBUF_TC_PARAM_DECL) { using TagType = uint8_t; // Special case for a varint bool field with a tag of 1 byte: @@ -766,8 +893,40 @@ PROTOBUF_NOINLINE const char* TcParser::SpecializedFastV8S1( } template -PROTOBUF_NOINLINE const char* TcParser::SpecializedUnrolledVImpl1( - PROTOBUF_TC_PARAM_DECL) { +PROTOBUF_NOINLINE const char* TcParser::FastTV64S1(PROTOBUF_TC_PARAM_DECL) { + using TagType = uint8_t; + // super-early success test... + if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { + ptr += sizeof(TagType); // Consume tag + if (hasbit_idx < 32) { + hasbits |= (uint64_t{1} << hasbit_idx); + } + uint8_t value = data.data >> 8; + RefAt(msg, data_offset) = value; + ptr += 1; + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + } + if (PROTOBUF_PREDICT_FALSE(data.coded_tag() != 0)) { + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); + } + ptr += sizeof(TagType); // Consume tag + if (hasbit_idx < 32) { + hasbits |= (uint64_t{1} << hasbit_idx); + } + + auto tmp = Parse64FallbackPair(ptr, static_cast(data.data >> 8)); + data.data = 0; // Indicate to the compiler that we don't need this anymore. + ptr = tmp.first; + if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) { + return Error(PROTOBUF_TC_PARAM_PASS); + } + + RefAt(msg, data_offset) = static_cast(tmp.second); + PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); +} + +template +PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) { using TagType = uint8_t; // super-early success test... if (PROTOBUF_PREDICT_TRUE(((data.data) & 0x80FF) == 0)) { @@ -800,34 +959,30 @@ PROTOBUF_NOINLINE const char* TcParser::SpecializedUnrolledVImpl1( if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { res = RotRight7AndReplaceLowByte(res, ptr[4]); if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { - res = RotRight7AndReplaceLowByte(res, ptr[5]); - if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { - res = RotRight7AndReplaceLowByte(res, ptr[6]); - if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { - res = RotRight7AndReplaceLowByte(res, ptr[7]); - if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { - res = RotRight7AndReplaceLowByte(res, ptr[8]); - if (PROTOBUF_PREDICT_FALSE(res & 0x80)) { + if (PROTOBUF_PREDICT_FALSE(ptr[5] & 0x80)) { + if (PROTOBUF_PREDICT_FALSE(ptr[6] & 0x80)) { + if (PROTOBUF_PREDICT_FALSE(ptr[7] & 0x80)) { + if (PROTOBUF_PREDICT_FALSE(ptr[8] & 0x80)) { if (ptr[9] & 0xFE) return Error(PROTOBUF_TC_PARAM_PASS); - res = RotateLeft(res, -7) & ~1; - res += ptr[9] & 1; - *out = RotateLeft(res, 63); + *out = RotateLeft(res, 28); ptr += 10; - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch( + PROTOBUF_TC_PARAM_PASS); } - *out = RotateLeft(res, 56); + *out = RotateLeft(res, 28); ptr += 9; - PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return ToTagDispatch( + PROTOBUF_TC_PARAM_PASS); } - *out = RotateLeft(res, 49); + *out = RotateLeft(res, 28); ptr += 8; PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } - *out = RotateLeft(res, 42); + *out = RotateLeft(res, 28); ptr += 7; PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } - *out = RotateLeft(res, 35); + *out = RotateLeft(res, 28); ptr += 6; PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_PASS); } diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 8af9851ebc6b..cd70aeb3515f 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -58,8 +58,18 @@ using FieldEntry = TcParseTableBase::FieldEntry; ////////////////////////////////////////////////////////////////////////////// #ifndef NDEBUG -template void AlignFail<4>(uintptr_t); -template void AlignFail<8>(uintptr_t); +void AlignFail(std::integral_constant, std::uintptr_t address) { + GOOGLE_ABSL_LOG(FATAL) << "Unaligned (4) access at " << address; + + // Explicit abort to let compilers know this function does not return + abort(); +} +void AlignFail(std::integral_constant, std::uintptr_t address) { + GOOGLE_ABSL_LOG(FATAL) << "Unaligned (8) access at " << address; + + // Explicit abort to let compilers know this function does not return + abort(); +} #endif const char* TcParser::GenericFallbackLite(PROTOBUF_TC_PARAM_DECL) { @@ -257,7 +267,7 @@ absl::string_view TcParser::FieldName(const TcParseTableBase* table, } template -inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParseImpl( +inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParse( PROTOBUF_TC_PARAM_DECL) { TestMiniParseResult* test_out; if (export_called_function) { @@ -342,13 +352,13 @@ inline PROTOBUF_ALWAYS_INLINE const char* TcParser::MiniParseImpl( } PROTOBUF_NOINLINE const char* TcParser::MiniParse(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return MiniParseImpl(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return MiniParse(PROTOBUF_TC_PARAM_PASS); } PROTOBUF_NOINLINE TcParser::TestMiniParseResult TcParser::TestMiniParse( PROTOBUF_TC_PARAM_DECL) { TestMiniParseResult result = {}; data.data = reinterpret_cast(&result); - result.ptr = MiniParseImpl(PROTOBUF_TC_PARAM_PASS); + result.ptr = MiniParse(PROTOBUF_TC_PARAM_PASS); return result; } @@ -678,119 +688,6 @@ PROTOBUF_NOINLINE const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) { namespace { -// Shift "byte" left by n * 7 bits, filling vacated bits with ones. -template -inline PROTOBUF_ALWAYS_INLINE uint64_t -shift_left_fill_with_ones(uint64_t byte, uint64_t ones) { - return (byte << (n * 7)) | (ones >> (64 - (n * 7))); -} - -// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and -// put the new value in res. Return whether the result was negative. -template -inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative( - uint64_t byte, uint64_t ones, int64_t& res) { -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) - // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a - // substantial improvement from capturing the sign from the condition code - // register on x86-64. - bool sign_bit; - asm("shldq %3, %2, %1" - : "=@ccs"(sign_bit), "+r"(byte) - : "r"(ones), "i"(n * 7)); - res = byte; - return sign_bit; -#else - // Generic fallback: - res = (byte << (n * 7)) | (ones >> (64 - (n * 7))); - return static_cast(res) < 0; -#endif -} - -inline PROTOBUF_ALWAYS_INLINE std::pair -Parse64FallbackPair(const char* p, int64_t res1) { - auto ptr = reinterpret_cast(p); - - // The algorithm relies on sign extension for each byte to set all high bits - // when the varint continues. It also relies on asserting all of the lower - // bits for each successive byte read. This allows the result to be aggregated - // using a bitwise AND. For example: - // - // 8 1 64 57 ... 24 17 16 9 8 1 - // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa - // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111 - // ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111 - // --------------------------------------------- - // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa - // - // On x86-64, a shld from a single register filled with enough 1s in the high - // bits can accomplish all this in one instruction. It so happens that res1 - // has 57 high bits of ones, which is enough for the largest shift done. - GOOGLE_ABSL_DCHECK_EQ(res1 >> 7, -1); - uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD) - int64_t res2, res3; // accumulated result chunks - - if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2)) - goto done2; - if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3)) - goto done3; - - // For the remainder of the chunks, check the sign of the AND result. - res1 &= shift_left_fill_with_ones<3>(ptr[3], ones); - if (res1 >= 0) goto done4; - res2 &= shift_left_fill_with_ones<4>(ptr[4], ones); - if (res2 >= 0) goto done5; - res3 &= shift_left_fill_with_ones<5>(ptr[5], ones); - if (res3 >= 0) goto done6; - res1 &= shift_left_fill_with_ones<6>(ptr[6], ones); - if (res1 >= 0) goto done7; - res2 &= shift_left_fill_with_ones<7>(ptr[7], ones); - if (res2 >= 0) goto done8; - res3 &= shift_left_fill_with_ones<8>(ptr[8], ones); - if (res3 >= 0) goto done9; - - // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this - // case, the continuation bit of ptr[8] already set the top bit of res3 - // correctly, so all we have to do is check that the expected case is true. - if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10; - - // A value of 0, however, represents an over-serialized varint. This case - // should not happen, but if does (say, due to a nonconforming serializer), - // deassert the continuation bit that came from ptr[8]. - if (ptr[9] == 0) { -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) - // Use a small instruction since this is an uncommon code path. - asm("btcq $63,%0" : "+r"(res3)); -#else - res3 ^= static_cast(1) << 63; -#endif - goto done10; - } - - // If the 10th byte/ptr[9] itself has any other value, then it is too big to - // fit in 64 bits. If the continue bit is set, it is an unterminated varint. - return {nullptr, 0}; - -done2: - return {p + 2, res1 & res2}; -done3: - return {p + 3, res1 & res2 & res3}; -done4: - return {p + 4, res1 & res2 & res3}; -done5: - return {p + 5, res1 & res2 & res3}; -done6: - return {p + 6, res1 & res2 & res3}; -done7: - return {p + 7, res1 & res2 & res3}; -done8: - return {p + 8, res1 & res2 & res3}; -done9: - return {p + 9, res1 & res2 & res3}; -done10: - return {p + 10, res1 & res2 & res3}; -} - template inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p, Type* value) { @@ -969,7 +866,7 @@ PROTOBUF_NOINLINE const char* TcParser::SingularVarBigint( } PROTOBUF_NOINLINE const char* TcParser::FastV8S1(PROTOBUF_TC_PARAM_DECL) { - PROTOBUF_MUSTTAIL return SpecializedFastV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS); + PROTOBUF_MUSTTAIL return FastTV8S1<-1, -1>(PROTOBUF_TC_PARAM_PASS); } PROTOBUF_NOINLINE const char* TcParser::FastV8S2(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_MUSTTAIL return SingularVarint( diff --git a/src/google/protobuf/generated_message_tctable_lite_test.cc b/src/google/protobuf/generated_message_tctable_lite_test.cc index 3a33084fc3d3..85ffc3ff7ba7 100644 --- a/src/google/protobuf/generated_message_tctable_lite_test.cc +++ b/src/google/protobuf/generated_message_tctable_lite_test.cc @@ -33,6 +33,7 @@ #include "google/protobuf/generated_message_tctable_impl.h" #include #include +#include "absl/types/optional.h" #include "google/protobuf/wire_format_lite.h" namespace google { @@ -43,6 +44,210 @@ namespace { using ::testing::Eq; using ::testing::Not; +using ::testing::Optional; + +// The fast parser's dispatch table Xors two bytes of incoming data with +// the data in TcFieldData, so we reproduce that here: +TcFieldData Xor2SerializedBytes(TcFieldData tfd, const char* ptr) { + uint64_t twobytes = 0xFF & ptr[0]; + twobytes |= (0xFF & ptr[1]) << 8; + tfd.data ^= twobytes; + return tfd; +} + +absl::optional fallback_ptr_received; +absl::optional fallback_hasbits_received; +absl::optional fallback_tag_received; +const char* FastParserGaveUp(::google::protobuf::MessageLite*, const char* ptr, + ::google::protobuf::internal::ParseContext*, + ::google::protobuf::internal::TcFieldData data, + const ::google::protobuf::internal::TcParseTableBase*, + uint64_t hasbits) { + fallback_ptr_received = ptr; + fallback_hasbits_received = hasbits; + fallback_tag_received = data.tag(); + return nullptr; +} + +// To test that we aren't storing too much data, we set up a fake message area +// and fill all its bytes with kDND. +constexpr char kDND = 0x5A; // "Do Not Disturb" + +// To retrieve data and see if it matches what we expect, we have this routine +// which simultaneously reads the data we want, and sets it back to what it was +// before the test, that is, to kDND. This makes it easier to test at the end +// that all the original data is undisturbed. +template +T ReadAndReset(char* p) { + T result; + memcpy(&result, p, sizeof(result)); + memset(p, kDND, sizeof(result)); + return result; +} + +TEST(FastVarints, NameHere) { + constexpr uint8_t kHasBitsOffset = 4; + constexpr uint8_t kHasBitIndex = 0; + constexpr uint8_t kFieldOffset = 24; + + // clang-format on + const TcParseTable<0, 1, 0, 0, 2> parse_table = { + { + kHasBitsOffset, // + 0, 0, 0, // no _extensions_ + 1, 0, // max_field_number, fast_idx_mask + offsetof(decltype(parse_table), field_lookup_table), + 0xFFFFFFFF - 1, // skipmap + offsetof(decltype(parse_table), field_entries), + 1, // num_field_entries + 0, // num_aux_entries + offsetof(decltype(parse_table), field_names), // no aux_entries + nullptr, // default instance + FastParserGaveUp, // fallback + }, + // Fast Table: + {{ + // optional int32 field = 1; + {TcParser::SingularVarintNoZag1<::uint32_t, kFieldOffset, + kHasBitIndex>(), + {/* coded_tag= */ 8, kHasBitIndex, /* aux_idx= */ 0, kFieldOffset}}, + }}, + // Field Lookup Table: + {{65535, 65535}}, + // Field Entries: + {{ + // This is set to kFkNone to force MiniParse to call the fallback + {kFieldOffset, kHasBitsOffset + 0, 0, (field_layout::kFkNone)}, + }}, + // no aux_entries + {{}}, + }; + // clang-format on + uint8_t serialize_buffer[64]; + + for (int size : {8, 32, 64, -8, -32, -64}) { + auto next_i = [](uint64_t i) { + // if i + 1 is a power of two, return that. + // (This will also match when i == -1, but for this loop we know that will + // not happen.) + if ((i & (i + 1)) == 0) return i + 1; + // otherwise, i is already a power of two, so advance to one less than the + // next power of two. + return i + (i - 1); + }; + for (uint64_t i = 0; i + 1 != 0; i = next_i(i)) { + char fake_msg[64] = { + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, // + }; + memset(&fake_msg[kHasBitsOffset], 0, sizeof(uint32_t)); + + auto serialize_ptr = WireFormatLite::WriteUInt64ToArray( + /* field_number= */ 1, i, serialize_buffer); + absl::string_view serialized{ + reinterpret_cast(&serialize_buffer[0]), + static_cast(serialize_ptr - serialize_buffer)}; + + const char* ptr = nullptr; + const char* end_ptr = nullptr; + ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(), + /* aliasing= */ false, &ptr, serialized); +#if 0 // FOR_DEBUGGING + GOOGLE_ABSL_LOG(ERROR) << "size=" << size << " i=" << i << " ptr points to " // + << +ptr[0] << "," << +ptr[1] << "," // + << +ptr[2] << "," << +ptr[3] << "," // + << +ptr[4] << "," << +ptr[5] << "," // + << +ptr[6] << "," << +ptr[7] << "," // + << +ptr[8] << "," << +ptr[9] << "," << +ptr[10] << "\n"; +#endif + TailCallParseFunc fn = nullptr; + switch (size) { + case 8: + fn = &TcParser::FastV8S1; + break; + case -8: + fn = &TcParser::FastTV8S1; + break; + case 32: + fn = &TcParser::FastV32S1; + break; + case -32: + fn = &TcParser::FastTV32S1; + break; + case 64: + fn = &TcParser::FastV64S1; + break; + case -64: + fn = &TcParser::FastTV64S1; + break; + } + fallback_ptr_received = absl::nullopt; + fallback_hasbits_received = absl::nullopt; + fallback_tag_received = absl::nullopt; + end_ptr = fn(reinterpret_cast(fake_msg), ptr, &ctx, + Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr), + &parse_table.header, /*hasbits=*/0); + switch (size) { + case -8: + case 8: { + if (end_ptr == nullptr) { + // If end_ptr is nullptr, that means the FastParser gave up and + // tried to pass control to MiniParse.... which is expected anytime + // we encounter something other than 0 or 1 encodings. (Since + // FastV8S1 is only used for `bool` fields.) + EXPECT_NE(i, true); + EXPECT_NE(i, false); + EXPECT_THAT(fallback_hasbits_received, Optional(0)); + // Like the mini-parser functions, and unlike the fast-parser + // functions, the fallback receives a ptr already incremented past + // the tag, and receives the actual tag in the `data` parameter. + EXPECT_THAT(fallback_ptr_received, Optional(ptr + 1)); + EXPECT_THAT(fallback_tag_received, Optional(0x7F & *ptr)); + continue; + } + ASSERT_EQ(end_ptr - ptr, serialized.size()); + + auto actual_field = ReadAndReset(&fake_msg[kFieldOffset]); + EXPECT_EQ(actual_field, static_cast(i)) // + << " hex: " << absl::StrCat(absl::Hex(actual_field)); + }; break; + case -32: + case 32: { + ASSERT_EQ(end_ptr - ptr, serialized.size()); + + auto actual_field = ReadAndReset(&fake_msg[kFieldOffset]); + EXPECT_EQ(actual_field, static_cast(i)) // + << " hex: " << absl::StrCat(absl::Hex(actual_field)); + }; break; + case -64: + case 64: { + ASSERT_EQ(end_ptr - ptr, serialized.size()); + + auto actual_field = ReadAndReset(&fake_msg[kFieldOffset]); + EXPECT_EQ(actual_field, static_cast(i)) // + << " hex: " << absl::StrCat(absl::Hex(actual_field)); + }; break; + } + EXPECT_TRUE(!fallback_ptr_received); + EXPECT_TRUE(!fallback_hasbits_received); + EXPECT_TRUE(!fallback_tag_received); + auto hasbits = ReadAndReset(&fake_msg[kHasBitsOffset]); + EXPECT_EQ(hasbits, 1 << kHasBitIndex); + + int offset = 0; + for (char ch : fake_msg) { + EXPECT_EQ(ch, kDND) << " corruption of message at offset " << offset; + ++offset; + } + } + } +} MATCHER_P3(IsEntryForFieldNum, table, field_num, field_numbers_table, absl::StrCat(negation ? "isn't " : "",