From 326f4aba1dfdb5c62fa2146dc7668b08a924d622 Mon Sep 17 00:00:00 2001 From: Simon Gene Gottlieb Date: Thu, 22 Aug 2024 10:29:29 +0200 Subject: [PATCH] fix: parse files with '\r' symbols as line ending correctly --- src/stream.cpp | 17 ++++++++++++++++- src/stream.h | 1 + test/integration/load_node_test.cpp | 16 ++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/stream.cpp b/src/stream.cpp index b1aa092f6..b70940ed7 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -262,7 +262,22 @@ char Stream::get() { AdvanceCurrent(); m_mark.column++; - if (ch == '\n') { + // if line ending symbol is unknown, set it to the first + // encountered '\n' or '\r' symbol + if (!m_lineEndingSymbol) { + if (ch == '\n') { // line ending is '\n' + m_lineEndingSymbol = '\n'; + } else if (ch == '\r') { + auto ch2 = peek(); + if (ch2 == '\n') { // line ending is '\r\n' + m_lineEndingSymbol = '\n'; + } else { // line ending is '\r' + m_lineEndingSymbol = '\r'; + } + } + } + + if (ch == m_lineEndingSymbol) { m_mark.column = 0; m_mark.line++; } diff --git a/src/stream.h b/src/stream.h index 2bc7a1521..214104ade 100644 --- a/src/stream.h +++ b/src/stream.h @@ -53,6 +53,7 @@ class Stream { Mark m_mark; CharacterSet m_charSet; + char m_lineEndingSymbol{}; // 0 means it is not determined yet, must be '\n' or '\r' mutable std::deque m_readahead; unsigned char* const m_pPrefetched; mutable size_t m_nPrefetchedAvailable; diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index 9d0c790fd..1cc84a45a 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -360,5 +360,21 @@ TEST(LoadNodeTest, BlockCRNLEncoded) { EXPECT_EQ(1, node["followup"].as()); } +TEST(LoadNodeTest, BlockCREncoded) { + Node node = Load( + "blockText: |\r" + " some arbitrary text \r" + " spanning some \r" + " lines, that are split \r" + " by CR and NL\r" + "followup: 1"); + EXPECT_EQ( + "some arbitrary text \nspanning some \nlines, that are split \nby CR and " + "NL\n", + node["blockText"].as()); + EXPECT_EQ(1, node["followup"].as()); +} + + } // namespace } // namespace YAML