diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs index bb0a618f6b870..7fe274a79ed08 100644 --- a/src/libcore/tests/pattern.rs +++ b/src/libcore/tests/pattern.rs @@ -21,7 +21,7 @@ enum Step { Done } -use Step::*; +use self::Step::*; impl From for Step { fn from(x: SearchStep) -> Self { @@ -42,6 +42,12 @@ impl From> for Step { } } +// XXXManishearth these tests focus on single-character searching (CharSearcher) +// and on next()/next_match(), not next_reject(). This is because +// the memchr changes make next_match() for single chars complex, but next_reject() +// continues to use next() under the hood. We should add more test cases for all +// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc) + #[test] fn test_simple_iteration() { search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string", @@ -98,3 +104,149 @@ fn test_simple_search() { ); } +// Á, 각, ก, 😀 all end in 0x81 +// 🁀, ᘀ do not end in 0x81 but contain the byte +// ꁁ has 0x81 as its second and third bytes. +// +// The memchr-using implementation of next_match +// and next_match_back temporarily violate +// the property that the search is always on a unicode boundary, +// which is fine as long as this never reaches next() or next_back(). +// So we test if next() is correct after each next_match() as well. +const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a"; + +#[test] +fn test_stress_indices() { + // this isn't really a test, more of documentation on the indices of each character in the stresstest string + + search_asserts!(STRESS, 'x', "Indices of characters in stress test", + [next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next], + [Rejects(0, 2), // Á + Rejects(2, 3), // a + Rejects(3, 7), // 🁀 + Rejects(7, 8), // b + Rejects(8, 10), // Á + Rejects(10, 13), // ꁁ + Rejects(13, 14), // f + Rejects(14, 15), // g + Rejects(15, 19), // 😀 + Rejects(19, 22), // 각 + Rejects(22, 25), // ก + Rejects(25, 28), // ᘀ + Rejects(28, 31), // 각 + Rejects(31, 32), // a + Rejects(32, 34), // Á + Rejects(34, 37), // 각 + Rejects(37, 40), // ꁁ + Rejects(40, 43), // ก + Rejects(43, 47), // 😀 + Rejects(47, 48), // a + Done] + ); +} + +#[test] +fn test_forward_search_shared_bytes() { + search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character", + [next_match, next_match, next_match, next_match], + [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done] + ); + + search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character; check if next() still works", + [next_match, next, next_match, next, next_match, next, next_match], + [InRange(0, 2), Rejects(2, 3), InRange(8, 10), Rejects(10, 13), InRange(32, 34), Rejects(34, 37), Done] + ); + + search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character", + [next_match, next, next_match, next_match, next_match], + [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done] + ); + + search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character; check if next() still works", + [next_match, next, next_match, next, next_match, next, next_match], + [InRange(19, 22), Rejects(22, 25), InRange(28, 31), Rejects(31, 32), InRange(34, 37), Rejects(37, 40), Done] + ); + + search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character", + [next_match, next, next_match, next, next_match], + [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done] + ); + + search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character; check if next() still works", + [next_match, next, next_match, next, next_match], + [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done] + ); + + search_asserts!(STRESS, '😁', "Forward search for four-byte emoji", + [next_match, next, next_match, next, next_match], + [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done] + ); + + search_asserts!(STRESS, '😁', "Forward search for four-byte emoji; check if next() still works", + [next_match, next, next_match, next, next_match], + [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done] + ); + + search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes", + [next_match, next, next_match, next, next_match], + [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done] + ); + + search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes; check if next() still works", + [next_match, next, next_match, next, next_match], + [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done] + ); +} + +#[test] +fn test_reverse_search_shared_bytes() { + search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character", + [next_match_back, next_match_back, next_match_back, next_match_back], + [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done] + ); + + search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back], + [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done] + ); + + search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character", + [next_match_back, next_back, next_match_back, next_match_back, next_match_back], + [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done] + ); + + search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(34, 37), Rejects(32, 34), InRange(28, 31), Rejects(25, 28), InRange(19, 22), Rejects(15, 19), Done] + ); + + search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done] + ); + + search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done] + ); + + search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done] + ); + + search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done] + ); + + search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] + ); + + search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] + ); +}