Skip to content

Commit

Permalink
protoprint: try to preserve printable unicode code points in output (#…
Browse files Browse the repository at this point in the history
…529)

Don't be so eager to resort to octal escapes. This makes the output readable for languages that use code points outside the 7-bit ASCII range. It also preserves use of other printable unicode symbols, including emoji.
  • Loading branch information
jhump committed Aug 22, 2022
1 parent 8dab444 commit bccb0aa
Show file tree
Hide file tree
Showing 20 changed files with 294 additions and 213 deletions.
8 changes: 4 additions & 4 deletions desc/protoparse/test-source-info.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ desc_test_comments.proto:43:77
some detached comments

Leading detached comment [1]:
some detached comments
some detached comments with unicode 这个是值

Leading detached comment [2]:
Another field comment
Expand Down Expand Up @@ -381,7 +381,7 @@ desc_test_comments.proto:55:51
desc_test_comments.proto:55:9
desc_test_comments.proto:69:10
Leading comments:
Group comment
Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐

Trailing comments:
trailer for Extras
Expand Down Expand Up @@ -829,12 +829,12 @@ desc_test_comments.proto:103:18

> message_type[0] > oneof_decl[1] > options:
desc_test_comments.proto:105:17
desc_test_comments.proto:105:57
desc_test_comments.proto:105:89


> message_type[0] > oneof_decl[1] > options > oofubar[0]:
desc_test_comments.proto:105:17
desc_test_comments.proto:105:57
desc_test_comments.proto:105:89
Leading comments:
whoops?

Expand Down
70 changes: 64 additions & 6 deletions desc/protoprint/print.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"reflect"
"sort"
"strings"
"unicode"
"unicode/utf8"

"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/protoc-gen-go/descriptor"
Expand Down Expand Up @@ -1656,7 +1658,7 @@ func (p *Printer) printOption(name string, optVal interface{}, w *writer, indent
case string:
fmt.Fprintf(w, "%s", quotedString(optVal))
case []byte:
fmt.Fprintf(w, "%s", quotedString(string(optVal)))
fmt.Fprintf(w, "%s", quotedBytes(string(optVal)))
case bool:
fmt.Fprintf(w, "%v", optVal)
case ident:
Expand Down Expand Up @@ -1978,12 +1980,12 @@ func optionsAsElementAddrs(optionsTag int32, order int, opts map[int32][]option)
return optAddrs
}

// quotedString implements the text format for string literals for protocol
// buffers. This form is also acceptable for string literals in option values
// by the protocol buffer compiler, protoc.
func quotedString(s string) string {
// quotedBytes implements the text format for string literals for protocol
// buffers. Since the underlying data is a bytes field, this encodes all
// bytes outside the 7-bit ASCII printable range. To preserve unicode strings
// without byte escapes, use quotedString.
func quotedBytes(s string) string {
var b bytes.Buffer
// use WriteByte here to get any needed indent
b.WriteByte('"')
// Loop over the bytes, not the runes.
for i := 0; i < len(s); i++ {
Expand Down Expand Up @@ -2014,6 +2016,62 @@ func quotedString(s string) string {
return b.String()
}

// quotedString implements the text format for string literals for protocol
// buffers. This form is also acceptable for string literals in option values
// by the protocol buffer compiler, protoc.
func quotedString(s string) string {
var b bytes.Buffer
b.WriteByte('"')
// Loop over the bytes, not the runes.
for {
r, n := utf8.DecodeRuneInString(s)
if n == 0 {
break // end of string
}
if r == utf8.RuneError && n == 1 {
// Invalid UTF8! Use an octal byte escape to encode the bad byte.
fmt.Fprintf(&b, "\\%03o", s[0])
s = s[1:]
continue
}

// Divergence from C++: we don't escape apostrophes.
// There's no need to escape them, and the C++ parser
// copes with a naked apostrophe.
switch r {
case '\n':
b.WriteString("\\n")
case '\r':
b.WriteString("\\r")
case '\t':
b.WriteString("\\t")
case '"':
b.WriteString("\\")
case '\\':
b.WriteString("\\\\")
default:
if unicode.IsPrint(r) {
b.WriteRune(r)
} else {
// if it's not printable, use a unicode escape
if r > 0xffff {
fmt.Fprintf(&b, "\\U%08X", r)
} else if r > 0x7F {
fmt.Fprintf(&b, "\\u%04X", r)
} else {
fmt.Fprintf(&b, "\\%03o", byte(r))
}
}
}

s = s[n:]
}

b.WriteByte('"')

return b.String()
}

type elementAddr struct {
elementType int32
elementIndex int
Expand Down
13 changes: 13 additions & 0 deletions desc/protoprint/print_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,16 @@ func checkContents(t *testing.T, actualContents string, goldenFileName string) {

testutil.Eq(t, string(b), actualContents, "wrong file contents for %s", goldenFileName)
}

func TestQuoteString(t *testing.T) {
// other tests have examples of encountering invalid UTF8 and printable unicode
// so this is just for testing how unprintable valid unicode characters are rendered
s := quotedString("\x04")
testutil.Eq(t, "\"\\004\"", s)
s = quotedString("\x7F")
testutil.Eq(t, "\"\\177\"", s)
s = quotedString("\u2028")
testutil.Eq(t, "\"\\u2028\"", s)
s = quotedString("\U0010FFFF")
testutil.Eq(t, "\"\\U0010FFFF\"", s)
}
6 changes: 3 additions & 3 deletions desc/protoprint/testfiles/desc_test_comments-compact.proto
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ message Request {
// lead mfubar
option (testprotos.mfubar) = true; // trailing mfubar
// some detached comments
// some detached comments
// some detached comments with unicode 这个是值
// Another field comment
// label comment
optional string name = 2 [default = "fubar"];
extensions 100 to 200;
extensions 201 to 250 [(testprotos.exfubarb) = "\000\001\002\003\004\005\006\007", (testprotos.exfubar) = "splat!"];
reserved 10 to 20, 30 to 50;
reserved "foo", "bar", "baz";
// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
// trailer for Extras
// this is a custom option
Expand Down Expand Up @@ -65,7 +65,7 @@ message Request {
// can be these or those
oneof xyz {
// whoops?
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
string these = 6;
int32 those = 7;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ message Request {
string these = 6;

// whoops?
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
}

// can be this or that
Expand All @@ -125,7 +125,7 @@ message Request {

// some detached comments

// some detached comments
// some detached comments with unicode 这个是值

// Another field comment

Expand All @@ -140,7 +140,7 @@ message Request {
json_name = "|foo|"
]; // field trailer #1...

// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
// trailer for Extras

Expand Down
6 changes: 3 additions & 3 deletions desc/protoprint/testfiles/desc_test_comments-default.proto
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ message Request {

// some detached comments

// some detached comments
// some detached comments with unicode 这个是值

// Another field comment

Expand All @@ -55,7 +55,7 @@ message Request {

reserved "foo", "bar", "baz";

// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
// trailer for Extras

Expand Down Expand Up @@ -123,7 +123,7 @@ message Request {
// can be these or those
oneof xyz {
// whoops?
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ message Request {

/* some detached comments */

/* some detached comments */
/* some detached comments with unicode 这个是值 */

/* Another field comment */

Expand All @@ -57,7 +57,7 @@ message Request {

reserved "foo", "bar", "baz";

/* Group comment */
/* Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐 */
optional group Extras = 3 {
/* trailer for Extras */

Expand Down Expand Up @@ -125,7 +125,7 @@ message Request {
/* can be these or those */
oneof xyz {
/* whoops? */
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ message Request {

// some detached comments

// some detached comments
// some detached comments with unicode 这个是值

// Another field comment

Expand All @@ -55,7 +55,7 @@ message Request {

reserved "foo", "bar", "baz";

// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
// this is a custom option
option (testprotos.mfubar) = false;
Expand Down Expand Up @@ -117,7 +117,7 @@ message Request {
// can be these or those
oneof xyz {
// whoops?
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ message Request {

reserved "foo", "bar", "baz";

// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
option (testprotos.mfubar) = false;

Expand Down Expand Up @@ -95,7 +95,7 @@ message Request {

// can be these or those
oneof xyz {
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ message Request {

/* some detached comments */

/* some detached comments */
/* some detached comments with unicode 这个是值 */

/* Another field comment */

/* label comment */
optional string name = 2 [default = "fubar"];

/* Group comment */
/* Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐 */
optional group Extras = 3 {
/* trailer for Extras */

Expand Down Expand Up @@ -78,7 +78,7 @@ message Request {
/* can be these or those */
oneof xyz {
/* whoops? */
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
4 changes: 2 additions & 2 deletions desc/protoprint/testfiles/desc_test_comments-sorted.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ message Request {
// label comment
optional string name = 2 [default = "fubar"];

// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
// trailer for Extras

Expand Down Expand Up @@ -61,7 +61,7 @@ message Request {
// can be these or those
oneof xyz {
// whoops?
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ message Request {

// some detached comments

// some detached comments
// some detached comments with unicode 这个是值

// Another field comment

Expand All @@ -59,7 +59,7 @@ message Request {

reserved "foo", "bar", "baz";

// Group comment
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
optional group Extras = 3 {
// trailer for Extras

Expand Down Expand Up @@ -128,7 +128,7 @@ message Request {
// can be these or those
oneof xyz {
// whoops?
option (testprotos.oofubar) = "whoops!";
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";

string these = 6;

Expand Down
Loading

0 comments on commit bccb0aa

Please sign in to comment.