Skip to content
This repository has been archived by the owner on Aug 8, 2023. It is now read-only.

Commit

Permalink
[core, test] Add BiDi unit test
Browse files Browse the repository at this point in the history
- Port of arabic.test.js from mapbox-gl-rtl-text
- Modify BiDi::getLine to remove trailing nulls in the event UBIDI_REMOVE_BIDI_CONTROLS causes the string to shorten.
- Patch vendored ICU to avoid undefined undefined bit shifting behavior (triggered sanitizer failure)
  • Loading branch information
ChrisLoer committed Oct 15, 2018
1 parent 9328c27 commit 37e88c0
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 5 deletions.
2 changes: 1 addition & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1063,7 +1063,7 @@ jobs:
JOBS: 1 # https://github.com/mapbox/mapbox-gl-native/issues/9108
command: |
xvfb-run --server-args="-screen 0 1024x768x24" \
build/qt-linux-x86_64/Release/mbgl-test --gtest_filter=-*.Load --gtest_filter=-Memory.Vector
build/qt-linux-x86_64/Release/mbgl-test --gtest_filter=-*.Load --gtest_filter=-Memory.Vector --gtest_filter=-BiDi.*
# ------------------------------------------------------------------------------
qt5-macos-debug:
Expand Down
1 change: 1 addition & 0 deletions cmake/test-files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ test/src/mbgl/test/util.cpp
test/src/mbgl/test/util.hpp

# text
test/text/bidi.test.cpp
test/text/cross_tile_symbol_index.test.cpp
test/text/glyph_manager.test.cpp
test/text/glyph_pbf.test.cpp
Expand Down
9 changes: 7 additions & 2 deletions platform/default/bidi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,13 @@ std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
// UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
// UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
// they don't show up on screen (some fonts have glyphs representing them)
ubidi_writeReordered(impl->bidiLine, mbgl::utf16char_cast<UChar*>(&outputText[0]), outputLength,
UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &errorCode);
int32_t finalLength = ubidi_writeReordered(impl->bidiLine,
mbgl::utf16char_cast<UChar*>(&outputText[0]),
outputLength,
UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS,
&errorCode);

outputText.resize(finalLength); // REMOVE_BIDI_CONTROLS may have shrunk the string

if (U_FAILURE(errorCode)) {
throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") +
Expand Down
18 changes: 18 additions & 0 deletions scripts/vendor/icu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,24 @@ for FILE in "${ALL[@]}"; do
sed 's/^#include \"\(unicode\/[^\"]\{1,\}\)\"/#include <\1>/' "common/$FILE" > "$DIR/$FILE"
done

# Apply patch from https://github.com/LibreOffice/core/blob/master/external/icu/icu4c-ubsan.patch.1
# Shifting signed int to a number greater than can be represented is undefined behavior
patch -p0 << PATCH
--- src/ubidiimp.h
+++ src/ubidiimp.h
@@ -198,8 +198,8 @@
/* in a Run, logicalStart will get this bit set if the run level is odd */
#define INDEX_ODD_BIT (1UL<<31)
-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
+#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)(level)<<31))
-#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
+#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)(level)<<31))
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
PATCH

rm -rf common

file_list include src -name "*.h" -o -name "*.cpp"
90 changes: 90 additions & 0 deletions test/text/bidi.test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

#include <mbgl/test/util.hpp>

#include <mbgl/text/bidi.hpp>

using namespace mbgl;

/*
These tests mirror the unit tests in mapbox-gl-rtl-text's arabic.test.js
The arabic text in results may appear to be backwards
This is because whatever you're viewing the text with is
applying the bidirectional algorithm a second time.
Although they may look the same as input in your viewer, the
characters in the test results are "presentation forms" of
the characters.
To closely inspect the inputs and outputs, use a binary/hex editor.
*/

TEST(BiDi, ArabicShaping) {
EXPECT_EQ(applyArabicShaping(u"اليَمَن‎‎"), u"ﺍﻟﻴﹷﻤﹷﻦ‎‎");
}

TEST(BiDi, Tashkeel) {
EXPECT_EQ(applyArabicShaping(u"سلام۳۹"), u"ﺳﻼﻡ۳۹");
}

TEST(BiDi, MixedShaping) {
EXPECT_EQ(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"),
u"ﻣﻜﺘﺒﺔ ﺍﻹﺳﻜﻨﺪﺭﻳﺔ‎‎ Maktabat al-Iskandarīyah");
}

TEST(BiDi, ReverseArabic) {
BiDi bidi;
EXPECT_EQ(bidi.processText(applyArabicShaping(u"سلام۳۹"), {}),
std::vector<std::u16string>{ u"۳۹ﻡﻼﺳ" });
}

TEST(BiDi, ReverseMixed) {
BiDi bidi;
EXPECT_EQ(bidi.processText(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), {}),
std::vector<std::u16string>{ u" Maktabat al-Iskandarīyahﺔﻳﺭﺪﻨﻜﺳﻹﺍ ﺔﺒﺘﻜﻣ" });
}

TEST(BiDi, WithLineBreaks) {
BiDi bidi;
std::vector<std::u16string> expected;
expected.emplace_back(u" ﺔﻳﺭﺪﻨﻜﺳﻹﺍ ﺔﺒﺘﻜﻣ");
expected.emplace_back(u"Maktabat al-");
expected.emplace_back(u"Iskandarīyah");
EXPECT_EQ(bidi.processText(applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"), { 18, 30 }),
expected);
}

TEST(BiDi, StyledText) {
// This test uses line breaks that intentionally split/reorder/interleave styled blocks
// that are contiguous in the input
BiDi bidi;
std::vector<StyledText> expected;
StyledText input(
applyArabicShaping(u"مكتبة الإسكندرية‎‎ Maktabat al-Iskandarīyah"),
std::vector<uint8_t>{ 0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,4,5,5,5,5,6,6,6,6,6,6,6,6,6,6,7,7,7 }
);

expected.emplace_back(StyledText(
u"ﺔﺒﺘﻜﻣ",
std::vector<uint8_t>{ 0,0,0,0,0 }
));
EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
expected.emplace_back(StyledText(
u" ‎‎ﺔﻳﺭﺪﻨﻜﺳﻹﺍ ",
std::vector<uint8_t>{ 2,2,2,2,2,2,2,1,1,1,1,1,1 }
));
EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
expected.emplace_back(StyledText(
u"Maktabat al-",
std::vector<uint8_t>{ 2,3,3,3,3,3,4,5,5,5,5,6 }
));
EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());
expected.emplace_back(StyledText(
u"Iskandarīyah",
std::vector<uint8_t>{ 6,6,6,6,6,6,6,6,6,7,7,7 }
));
EXPECT_EQ(expected.rbegin()->first.size(), expected.rbegin()->second.size());

EXPECT_EQ(bidi.processStyledText(input, { 5, 18, 30 }), expected);
}

4 changes: 2 additions & 2 deletions vendor/icu/src/ubidiimp.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ typedef struct Run {
/* in a Run, logicalStart will get this bit set if the run level is odd */
#define INDEX_ODD_BIT (1UL<<31)

#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)(level)<<31))
#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)(level)<<31))
#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)

#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
Expand Down

0 comments on commit 37e88c0

Please sign in to comment.