Skip to content

Commit

Permalink
Make TDP accept and discard garbage non-continuation bits on the 10th…
Browse files Browse the repository at this point in the history
… byte of a varint.

This is the behavior of the codegen parser and the reflection parser.

PiperOrigin-RevId: 504022814
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Jan 23, 2023
1 parent 46656ed commit 092e447
Show file tree
Hide file tree
Showing 5 changed files with 413 additions and 178 deletions.
22 changes: 12 additions & 10 deletions src/google/protobuf/generated_message_tctable_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -805,22 +805,24 @@ Parse64FallbackPair(const char* p, int64_t res1) {
// correctly, so all we have to do is check that the expected case is true.
if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10;

// A value of 0, however, represents an over-serialized varint. This case
// should not happen, but if does (say, due to a nonconforming serializer),
// deassert the continuation bit that came from ptr[8].
if (ptr[9] == 0) {
if (PROTOBUF_PREDICT_FALSE(ptr[9] & 0x80)) {
// If the continue bit is set, it is an unterminated varint.
return {nullptr, 0};
}

// A zero value of the first bit of the 10th byte represents an
// over-serialized varint. This case should not happen, but if does (say, due
// to a nonconforming serializer), deassert the continuation bit that came
// from ptr[8].
if ((ptr[9] & 1) == 0) {
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
// Use a small instruction since this is an uncommon code path.
asm("btcq $63,%0" : "+r"(res3));
#else
res3 ^= static_cast<uint64_t>(1) << 63;
#endif
goto done10;
}

// If the 10th byte/ptr[9] itself has any other value, then it is too big to
// fit in 64 bits. If the continue bit is set, it is an unterminated varint.
return {nullptr, 0};
goto done10;

done2:
return {p + 2, res1 & res2};
Expand Down Expand Up @@ -963,7 +965,7 @@ PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) {
if (PROTOBUF_PREDICT_FALSE(ptr[6] & 0x80)) {
if (PROTOBUF_PREDICT_FALSE(ptr[7] & 0x80)) {
if (PROTOBUF_PREDICT_FALSE(ptr[8] & 0x80)) {
if (ptr[9] & 0xFE) return Error(PROTOBUF_TC_PARAM_PASS);
if (ptr[9] & 0x80) return Error(PROTOBUF_TC_PARAM_PASS);
*out = RotateLeft(res, 28);
ptr += 10;
PROTOBUF_MUSTTAIL return ToTagDispatch(
Expand Down
4 changes: 3 additions & 1 deletion src/google/protobuf/generated_message_tctable_lite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,9 @@ inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
byte = (byte - 0x80) | *p++;
if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
byte = (byte - 0x80) | *p++;
// We only care about the continuation bit and the first bit
// of the 10th byte.
byte = (byte - 0x80) | (*p++ & 0x81);
if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
return nullptr;
}
Expand Down
225 changes: 125 additions & 100 deletions src/google/protobuf/generated_message_tctable_lite_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ TEST(FastVarints, NameHere) {
uint8_t serialize_buffer[64];

for (int size : {8, 32, 64, -8, -32, -64}) {
SCOPED_TRACE(size);
auto next_i = [](uint64_t i) {
// if i + 1 is a power of two, return that.
// (This will also match when i == -1, but for this loop we know that will
Expand All @@ -136,28 +137,48 @@ TEST(FastVarints, NameHere) {
return i + (i - 1);
};
for (uint64_t i = 0; i + 1 != 0; i = next_i(i)) {
char fake_msg[64] = {
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
};
memset(&fake_msg[kHasBitsOffset], 0, sizeof(uint32_t));

auto serialize_ptr = WireFormatLite::WriteUInt64ToArray(
/* field_number= */ 1, i, serialize_buffer);
absl::string_view serialized{
reinterpret_cast<char*>(&serialize_buffer[0]),
static_cast<size_t>(serialize_ptr - serialize_buffer)};

const char* ptr = nullptr;
const char* end_ptr = nullptr;
ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
/* aliasing= */ false, &ptr, serialized);
SCOPED_TRACE(i);
enum OverlongKind { kNotOverlong, kOverlong, kOverlongWithDroppedBits };
for (OverlongKind overlong :
{kNotOverlong, kOverlong, kOverlongWithDroppedBits}) {
SCOPED_TRACE(overlong);
alignas(16) char fake_msg[64] = {
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
};
memset(&fake_msg[kHasBitsOffset], 0, sizeof(uint32_t));

auto serialize_ptr = WireFormatLite::WriteUInt64ToArray(
/* field_number= */ 1, i, serialize_buffer);

if (overlong == kOverlong || overlong == kOverlongWithDroppedBits) {
// 1 for the tag plus 10 for the value
while (serialize_ptr - serialize_buffer < 11) {
serialize_ptr[-1] |= 0x80;
*serialize_ptr++ = 0;
}
if (overlong == kOverlongWithDroppedBits) {
// For this one we add some unused bits to the last byte.
// They should be dropped. Bits 1-6 are dropped. Bit 0 is used and
// bit 7 is checked for continuation.
serialize_ptr[-1] |= 0b0111'1110;
}
}

absl::string_view serialized{
reinterpret_cast<char*>(&serialize_buffer[0]),
static_cast<size_t>(serialize_ptr - serialize_buffer)};

const char* ptr = nullptr;
const char* end_ptr = nullptr;
ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
/* aliasing= */ false, &ptr, serialized);
#if 0 // FOR_DEBUGGING
GOOGLE_ABSL_LOG(ERROR) << "size=" << size << " i=" << i << " ptr points to " //
<< +ptr[0] << "," << +ptr[1] << "," //
Expand All @@ -166,84 +187,88 @@ TEST(FastVarints, NameHere) {
<< +ptr[6] << "," << +ptr[7] << "," //
<< +ptr[8] << "," << +ptr[9] << "," << +ptr[10] << "\n";
#endif
TailCallParseFunc fn = nullptr;
switch (size) {
case 8:
fn = &TcParser::FastV8S1;
break;
case -8:
fn = &TcParser::FastTV8S1<kFieldOffset, kHasBitIndex>;
break;
case 32:
fn = &TcParser::FastV32S1;
break;
case -32:
fn = &TcParser::FastTV32S1<uint32_t, kFieldOffset, kHasBitIndex>;
break;
case 64:
fn = &TcParser::FastV64S1;
break;
case -64:
fn = &TcParser::FastTV64S1<uint64_t, kFieldOffset, kHasBitIndex>;
break;
}
fallback_ptr_received = absl::nullopt;
fallback_hasbits_received = absl::nullopt;
fallback_tag_received = absl::nullopt;
end_ptr = fn(reinterpret_cast<MessageLite*>(fake_msg), ptr, &ctx,
Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr),
&parse_table.header, /*hasbits=*/0);
switch (size) {
case -8:
case 8: {
if (end_ptr == nullptr) {
// If end_ptr is nullptr, that means the FastParser gave up and
// tried to pass control to MiniParse.... which is expected anytime
// we encounter something other than 0 or 1 encodings. (Since
// FastV8S1 is only used for `bool` fields.)
EXPECT_NE(i, true);
EXPECT_NE(i, false);
EXPECT_THAT(fallback_hasbits_received, Optional(0));
// Like the mini-parser functions, and unlike the fast-parser
// functions, the fallback receives a ptr already incremented past
// the tag, and receives the actual tag in the `data` parameter.
EXPECT_THAT(fallback_ptr_received, Optional(ptr + 1));
EXPECT_THAT(fallback_tag_received, Optional(0x7F & *ptr));
continue;
}
ASSERT_EQ(end_ptr - ptr, serialized.size());

auto actual_field = ReadAndReset<uint8_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
case -32:
case 32: {
ASSERT_EQ(end_ptr - ptr, serialized.size());

auto actual_field = ReadAndReset<uint32_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
case -64:
case 64: {
ASSERT_EQ(end_ptr - ptr, serialized.size());

auto actual_field = ReadAndReset<uint64_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
}
EXPECT_TRUE(!fallback_ptr_received);
EXPECT_TRUE(!fallback_hasbits_received);
EXPECT_TRUE(!fallback_tag_received);
auto hasbits = ReadAndReset<uint32_t>(&fake_msg[kHasBitsOffset]);
EXPECT_EQ(hasbits, 1 << kHasBitIndex);

int offset = 0;
for (char ch : fake_msg) {
EXPECT_EQ(ch, kDND) << " corruption of message at offset " << offset;
++offset;
TailCallParseFunc fn = nullptr;
switch (size) {
case 8:
fn = &TcParser::FastV8S1;
break;
case -8:
fn = &TcParser::FastTV8S1<kFieldOffset, kHasBitIndex>;
break;
case 32:
fn = &TcParser::FastV32S1;
break;
case -32:
fn = &TcParser::FastTV32S1<uint32_t, kFieldOffset, kHasBitIndex>;
break;
case 64:
fn = &TcParser::FastV64S1;
break;
case -64:
fn = &TcParser::FastTV64S1<uint64_t, kFieldOffset, kHasBitIndex>;
break;
}
fallback_ptr_received = absl::nullopt;
fallback_hasbits_received = absl::nullopt;
fallback_tag_received = absl::nullopt;
end_ptr = fn(reinterpret_cast<MessageLite*>(fake_msg), ptr, &ctx,
Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr),
&parse_table.header, /*hasbits=*/0);
switch (size) {
case -8:
case 8: {
if (end_ptr == nullptr) {
// If end_ptr is nullptr, that means the FastParser gave up and
// tried to pass control to MiniParse.... which is expected
// anytime we encounter something other than 0 or 1 encodings.
// (Since FastV8S1 is only used for `bool` fields.)
if (overlong == kNotOverlong) {
EXPECT_NE(i, true);
EXPECT_NE(i, false);
}
EXPECT_THAT(fallback_hasbits_received, Optional(0));
// Like the mini-parser functions, and unlike the fast-parser
// functions, the fallback receives a ptr already incremented past
// the tag, and receives the actual tag in the `data` parameter.
EXPECT_THAT(fallback_ptr_received, Optional(ptr + 1));
EXPECT_THAT(fallback_tag_received, Optional(0x7F & *ptr));
continue;
}
ASSERT_EQ(end_ptr - ptr, serialized.size());

auto actual_field = ReadAndReset<uint8_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
case -32:
case 32: {
ASSERT_TRUE(end_ptr);
ASSERT_EQ(end_ptr - ptr, serialized.size());

auto actual_field = ReadAndReset<uint32_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
case -64:
case 64: {
ASSERT_EQ(end_ptr - ptr, serialized.size());

auto actual_field = ReadAndReset<uint64_t>(&fake_msg[kFieldOffset]);
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
}; break;
}
EXPECT_TRUE(!fallback_ptr_received);
EXPECT_TRUE(!fallback_hasbits_received);
EXPECT_TRUE(!fallback_tag_received);
auto hasbits = ReadAndReset<uint32_t>(&fake_msg[kHasBitsOffset]);
EXPECT_EQ(hasbits, 1 << kHasBitIndex);

int offset = 0;
for (char ch : fake_msg) {
EXPECT_EQ(ch, kDND) << " corruption of message at offset " << offset;
++offset;
}
}
}
}
Expand Down
Loading

0 comments on commit 092e447

Please sign in to comment.