From 38f14be0d8d88f6c328f72748b8d8e448c083247 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 14 May 2024 12:24:31 -0700 Subject: [PATCH 1/2] Remove the branches from `len_utf8` This changes `len_utf8` to add all of the range comparisons together, rather than branching on each one. We should definitely test performance though, because it's possible that this will pessimize mostly-ascii inputs that would have had a short branch-predicted path before. --- library/core/src/char/methods.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index a93b94867ce4c..2d05edc4a2180 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1739,15 +1739,9 @@ impl EscapeDebugExtArgs { #[inline] const fn len_utf8(code: u32) -> usize { - if code < MAX_ONE_B { - 1 - } else if code < MAX_TWO_B { - 2 - } else if code < MAX_THREE_B { - 3 - } else { - 4 - } + 1 + ((code >= MAX_ONE_B) as usize) + + ((code >= MAX_TWO_B) as usize) + + ((code >= MAX_THREE_B) as usize) } /// Encodes a raw u32 value as UTF-8 into the provided byte buffer, From ba2f5a9db9285a6527218c07f9577b6cf0357f93 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 15 May 2024 08:33:44 -0700 Subject: [PATCH 2/2] Try `len_utf8` with one branch for ASCII's sake --- library/core/src/char/methods.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 2d05edc4a2180..aa5d286388c89 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1739,9 +1739,11 @@ impl EscapeDebugExtArgs { #[inline] const fn len_utf8(code: u32) -> usize { - 1 + ((code >= MAX_ONE_B) as usize) - + ((code >= MAX_TWO_B) as usize) - + ((code >= MAX_THREE_B) as usize) + if code < MAX_ONE_B { + 1 + } else { + 2 + ((code >= MAX_TWO_B) as usize) + ((code >= MAX_THREE_B) as usize) + } } /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,