Skip to content

Commit

Permalink
Merge pull request #1 from diffbot/performance-fixes
Browse files Browse the repository at this point in the history
Use a string function rather than regex for better performance
  • Loading branch information
praveen-diffbot committed Sep 17, 2024
2 parents cc3a2f5 + 5003bed commit e8fdf0b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
1 change: 1 addition & 0 deletions flexmark-html2md-converter/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<version>0.64.8</version>
</parent>

<groupId>com.diffbot</groupId>
<artifactId>flexmark-html2md-converter</artifactId>
<name>flexmark-java HTML to Markdown extensible converter</name>
<description>flexmark-java customizable extension to convert HTML to Markdown</description>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ private void handleTableCaption(Element element, HtmlNodeConverterContext contex
}

private void handleTableCell(Element element, HtmlNodeConverterContext context, HtmlMarkdownWriter out) {
String cellText = context.processTextNodes(element).trim().replaceAll("\\s*\n\\s*", " ");
String cellText = replaceMultipleBlankSpace(context.processTextNodes(element).trim());
int colSpan = 1;
int rowSpan = 1;
CellAlignment alignment = null;
Expand Down Expand Up @@ -1260,6 +1260,25 @@ private void handleTableCell(Element element, HtmlNodeConverterContext context,
}
}

private String replaceMultipleBlankSpace(String cellText) {
StringBuilder result = new StringBuilder();
boolean wasSpace = false;

for (char c : cellText.toCharArray()) {
if (Character.isWhitespace(c)) {
if (!wasSpace) {
result.append(' ');
wasSpace = true;
}
} else {
result.append(c);
wasSpace = false;
}
}

return result.toString();
}

private boolean matchingText(Pattern pattern, String text, String[] match) {
Matcher matcher = pattern.matcher(text);
if (matcher.matches()) {
Expand Down
6 changes: 2 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,10 @@

<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<id>diffbot-nexus-snapshots</id>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
<id>diffbot-nexus-releases</id>
</repository>
</distributionManagement>

Expand Down

0 comments on commit e8fdf0b

Please sign in to comment.