diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/WhitespaceStripper.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/WhitespaceStripper.java index c350c5491..121f152eb 100644 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/WhitespaceStripper.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/layout/WhitespaceStripper.java @@ -145,19 +145,55 @@ private static boolean stripWhitespace(InlineBox iB, boolean collapseLeading) { } return text.equals("") ? collapseLeading : collapseNext; } + + /** + * Collapse whitespace for normal or no-wrap modes. Much faster (15x in simple testing) + * than using multiple regular expressions. + * + * NOTE: Slightly different behavior to using regular expressions as definition of space characters + * differ, but I believe this is the correct definition according to CSS specifications. + * @param text + * @param collapseLeading + * @return + */ + private static String collapseWhitespaceNormalOrNoWrap(String text, boolean collapseLeading) { + char[] chs = text.toCharArray(); + StringBuilder builder = new StringBuilder(chs.length); + boolean spaceAdded = collapseLeading; + + for (int i = 0; i < chs.length; i++) { + char ch = chs[i]; + + if (spaceAdded) { + if (ch != '\n' && + ch != '\t' && + ch != ' ') { + builder.append(ch); + spaceAdded = false; + } + } else { + if (ch == '\n' || + ch == '\t' || + ch == ' ') { + builder.append(' '); + spaceAdded = true; + } else { + builder.append(ch); + } + } + } + + return builder.toString(); + } private static String collapseWhitespace(InlineBox iB, IdentValue whitespace, String text, boolean collapseLeading) { if (whitespace == IdentValue.NORMAL || whitespace == IdentValue.NOWRAP) { - text = linefeed_space_collapse.matcher(text).replaceAll(EOL); + return collapseWhitespaceNormalOrNoWrap(text, collapseLeading); } else if (whitespace == IdentValue.PRE) { text = space_before_linefeed_collapse.matcher(text).replaceAll(EOL); } - if (whitespace == IdentValue.NORMAL || whitespace == IdentValue.NOWRAP) { - text = linefeed_to_space.matcher(text).replaceAll(SPACE); - text = tab_to_space.matcher(text).replaceAll(SPACE); - text = space_collapse.matcher(text).replaceAll(SPACE); - } else if (whitespace == IdentValue.PRE || whitespace == IdentValue.PRE_WRAP) { + if (whitespace == IdentValue.PRE || whitespace == IdentValue.PRE_WRAP) { int tabSize = (int) iB.getStyle().asFloat(CSSName.TAB_SIZE); char[] tabs = new char[tabSize]; Arrays.fill(tabs, ' '); @@ -167,14 +203,6 @@ private static String collapseWhitespace(InlineBox iB, IdentValue whitespace, St text = space_collapse.matcher(text).replaceAll(SPACE); } - if (whitespace == IdentValue.NORMAL || whitespace == IdentValue.NOWRAP) { - // collapse first space against prev inline - if (text.startsWith(SPACE) && - collapseLeading) { - text = text.substring(1, text.length()); - } - } - return text; } }