Skip to content

Commit

Permalink
For locale dependant files do not verify contents with hash.
Browse files Browse the repository at this point in the history
Signed-off-by: Carroll <carrofin@amazon.com>
  • Loading branch information
finnegancarroll committed May 15, 2024
1 parent 3fcc4bc commit 9ae651e
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,17 @@
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.test.OpenSearchTestCase;
import org.junit.Before;

import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Locale;
import java.util.Map;

/**
* Parse sample tika documents and assert the contents has not changed according to previously recorded checksums.
* Uncaught changes to tika parsing could potentially pose bwc issues.
* Note: In some cases tika will access a user's locale to inform the parsing of a file.
* The checksums of these files are left empty, and we only validate that parsed content is not null.
*/
@SuppressFileSystems("ExtrasFS") // don't try to parse extraN
public class TikaDocTests extends OpenSearchTestCase {
Expand All @@ -59,11 +59,6 @@ public class TikaDocTests extends OpenSearchTestCase {
static final String TIKA_FILES = "/org/opensearch/ingest/attachment/test/tika-files/";
static final String TIKA_CHECKSUMS = "/org/opensearch/ingest/attachment/test/.checksums";

@Before
public void setLocale() {
Locale.setDefault(Locale.ENGLISH);
}

public void testParseSamples() throws Exception {
String checksumJson = Files.readString(PathUtils.get(getClass().getResource(TIKA_CHECKSUMS).toURI()));
Map<String, Object> checksums = XContentHelper.convertToMap(JsonXContent.jsonXContent, checksumJson, false);
Expand All @@ -73,7 +68,11 @@ public void testParseSamples() throws Exception {
String parsedContent = tryParse(doc);
assertNotNull(parsedContent);
assertFalse(parsedContent.isEmpty());
assertEquals(checksums.get(doc.getFileName().toString()), DigestUtils.sha1Hex(parsedContent));

String check = checksums.get(doc.getFileName().toString()).toString();
if (!check.isEmpty()) {
assertEquals(check, DigestUtils.sha1Hex(parsedContent));
}
}

stream.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"testRTFWithCurlyBraces.rtf": "019cab63b73ff89d094823cf50c0a721bec08ee2",
"testFooter.ods": "846e1d0415b23fa27631b536b0cf566abbf8fcc1",
"testPPT.ppt": "933ee556884b1d9e28b801daa0d77bbaa4f4be62",
"testEXCEL-formats.xls": "3f3e2e5cd7d6527af8d15e5668dc2cf7c33b25fe",
"testEXCEL-formats.xls": "",
"testPPT_masterFooter.pptx": "29bb97006b3608b7db6ff72b94d20157878d94dd",
"testWORD_header_hyperlink.doc": "914bbec0730c54948ad307ea3e375ef0c100abf1",
"testRTFHyperlink.rtf": "2b2ffb1997aa495fbab1af490d134051de168c97",
Expand Down Expand Up @@ -112,7 +112,7 @@
"testPPT_embedded_two_slides.pptx": "0d760dbaf9d9d2f173dd40deecd0de5ecb885301",
"testPDF_bookmarks.pdf": "5fc486c443511452db4f1aa6530714c6aa49c831",
"test_recursive_embedded.docx": "afc32b07ce07ad273e5b3d1a43390a9d2b6dd0a9",
"testEXCEL-formats.xlsx": "801f4850a8e5dca36cd2e3544cb4e74d8f4265f5",
"testEXCEL-formats.xlsx": "",
"testPPT_masterText2.pptx": "2b01eab5d0349e3cfe791b28c70c2dbf4efc884d",
"test.doc": "774be3106edbb6d80be36dbb548d62401dcfa0fe",
"test_recursive_embedded_npe.docx": "afc32b07ce07ad273e5b3d1a43390a9d2b6dd0a9",
Expand Down Expand Up @@ -174,7 +174,7 @@
"testPPTX_Thumbnail.pptx": "6aa019154289317c7b7832fe46556e6d61cd0a9f",
"testRTFTableCellSeparation.rtf": "5647290a3197c1855fad10201dc7be60ea7b0e42",
"testRTFControls.rtf": "aee6afb80e8b09cf49f056020c037f70c2757e49",
"testEXCEL.xls": "b5b3302499974062a7b1abd4ed523e895785b702",
"testEXCEL.xls": "",
"testRTFJapanese.rtf": "08976f9a7d6d3a155cad84d7fa23295cb972a17a",
"testPageNumber.pdf": "96b03d2cc6782eba653af28228045964e68422b5",
"testOptionalHyphen.pdf": "12edd450ea76ea4e79f80ebd3442999ec2180dbc",
Expand All @@ -192,7 +192,7 @@
"testPPT.ppsm": "71333ef84f7825d8ad6aba2ba993d04b4bab41c6",
"boilerplate.html": "b3558f02c3179e4aeeb6057594d87bda79964e7b",
"testEXCEL_embeded.xls": "110247fc0a3936828c760e40975ff83e4578be76",
"testEXCEL.xlsx": "b39735e1498ec538615366b48dcfb67b558203b1",
"testEXCEL.xlsx": "",
"testPPT_2imgs.ppt": "9a68072ffcf171389e78cf8bc018c4b568a6202d",
"testComment.pptx": "6ae6052f469b8f901fd4fd8bc70f8e267255a58e",
"testPDF_Version.6.x.pdf": "03b60dfc8c103dbabeedfd682e979f96dd8983a2",
Expand Down

0 comments on commit 9ae651e

Please sign in to comment.