From 7179129d9931dd9c7e0479dd7b4afb182dc9c500 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Fri, 13 Nov 2020 11:27:03 +0100 Subject: [PATCH] [ #324 ] ocamllex does not accept unicode char literals It seems that in ocaml a char is 8bit, and unicode characters are their UTF-8 encoded strings. This means we cannot represent unicode character sets in the ocamllex lexer definition. We can use string literals in some circumstances. For that reason, RAlts is now translated to a disjunction of char or string literals (the latter for unicode chars) rather than to a @[charset]@. --- source/src/BNFC/Backend/OCaml/CFtoOCamlLex.hs | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/source/src/BNFC/Backend/OCaml/CFtoOCamlLex.hs b/source/src/BNFC/Backend/OCaml/CFtoOCamlLex.hs index 26af9ea7..b489a4e5 100644 --- a/source/src/BNFC/Backend/OCaml/CFtoOCamlLex.hs +++ b/source/src/BNFC/Backend/OCaml/CFtoOCamlLex.hs @@ -26,6 +26,7 @@ module BNFC.Backend.OCaml.CFtoOCamlLex (cf2ocamllex) where import Prelude hiding ((<>)) +import Data.Char (ord) import qualified Data.List as List import Text.PrettyPrint hiding (render) import qualified Text.PrettyPrint as PP @@ -169,10 +170,10 @@ mkRegexSingleLineComment s = cstring s <+> "(_ # '\\n')*" -- | Create regex for multiline comments. -- >>> mkRegexMultilineComment "" --- "