From da179d66f41b7dcefde4c5599451c058e62a89d1 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Wed, 31 Mar 2021 21:19:21 +0200 Subject: [PATCH] [ WIP #349 ] unify C and C++ lexer/parser: CPP/NoSTL part --- source/src/BNFC/Backend/C.hs | 6 +- source/src/BNFC/Backend/C/CFtoBisonC.hs | 223 ++++++++++++------ source/src/BNFC/Backend/C/CFtoFlexC.hs | 159 +++++++++---- source/src/BNFC/Backend/CPP/Makefile.hs | 20 +- source/src/BNFC/Backend/CPP/NoSTL.hs | 52 ++-- source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs | 2 +- source/src/BNFC/Backend/CPP/STL.hs | 20 +- .../src/BNFC/Backend/CPP/STL/CFtoBisonSTL.hs | 16 +- 8 files changed, 335 insertions(+), 163 deletions(-) diff --git a/source/src/BNFC/Backend/C.hs b/source/src/BNFC/Backend/C.hs index 8de4a3582..4c3a80a8d 100644 --- a/source/src/BNFC/Backend/C.hs +++ b/source/src/BNFC/Backend/C.hs @@ -32,9 +32,9 @@ makeC opts cf = do mkfile "Absyn.c" cfile mkfile "Buffer.h" bufferH mkfile "Buffer.c" $ bufferC "Buffer.h" - let (flex, env) = cf2flex prefix cf + let (flex, env) = cf2flex parserMode cf mkfile (name ++ ".l") flex - let bison = cf2Bison (linenumbers opts) prefix cf env + let bison = cf2Bison (linenumbers opts) parserMode cf env mkfile (name ++ ".y") bison let header = mkHeaderFile (linenumbers opts) cf (Map.elems env) mkfile "Parser.h" header @@ -54,6 +54,8 @@ makeC opts cf = do -- It should be a valid C identifier. prefix :: String prefix = snakeCase_ name ++ "_" + parserMode :: ParserMode + parserMode = CParser False prefix makefile :: String -> String -> String -> Doc diff --git a/source/src/BNFC/Backend/C/CFtoBisonC.hs b/source/src/BNFC/Backend/C/CFtoBisonC.hs index 7d1d42e3b..d9a5719a2 100644 --- a/source/src/BNFC/Backend/C/CFtoBisonC.hs +++ b/source/src/BNFC/Backend/C/CFtoBisonC.hs @@ -24,16 +24,22 @@ module BNFC.Backend.C.CFtoBisonC ) where +import Prelude hiding ((<>)) + import Data.Char (toLower) import Data.Foldable (toList) import Data.List (intercalate, nub) import Data.Maybe (fromMaybe) import qualified Data.Map as Map +import System.FilePath ((<.>)) import BNFC.CF import BNFC.Backend.Common.NamedVariables hiding (varName) +import BNFC.Backend.C.CFtoFlexC (ParserMode(..), cParser, parserHExt, parserName, parserPackage) +import BNFC.Backend.CPP.STL.STLUtils import BNFC.Options (RecordPositions(..)) -import BNFC.Utils ((+++)) +import BNFC.PrettyPrint +import BNFC.Utils ((+++), whenJust) --This follows the basic structure of CFtoHappy. @@ -44,11 +50,11 @@ type Action = String type MetaVar = String --The environment comes from the CFtoFlex -cf2Bison :: RecordPositions -> String -> CF -> SymMap -> String -cf2Bison rp name cf env = unlines - [ header name cf - , union (allParserCatsNorm cf) - , unionDependentCode name +cf2Bison :: RecordPositions -> ParserMode -> CF -> SymMap -> String +cf2Bison rp mode cf env = unlines + [ header mode cf + , render $ union mode $ allParserCatsNorm cf + , unionDependentCode mode , "%token _ERROR_" , tokens (map fst $ tokenPragmas cf) env , declarations cf @@ -57,22 +63,32 @@ cf2Bison rp name cf env = unlines , "" , "%%" , "" - , prRules (rulesForBison rp cf env) + , prRules $ rulesForBison rp mode cf env , "%%" , "" - , entryCode name cf + , entryCode mode cf ] + where + name = fromMaybe undefined $ parserName mode + -header :: String -> CF -> String -header name cf = unlines - [ "/* This Bison file was machine-generated by BNFC */" +header :: ParserMode -> CF -> String +header mode cf = unlines $ concat + [ [ "/* -*- c -*- This Bison file was machine-generated by BNFC */" , "" , "/* Generate header file for lexer. */" - , "%defines \"Bison.h\"" - , "" + , "%defines \"" ++ ("Bison" <.> h) ++ "\"" + ] + , whenJust (parserPackage mode) $ \ ns -> + [ "%name-prefix = \"" ++ ns ++ "yy\"" + , " /* From Bison 2.6: %define api.prefix {" ++ ns ++ "yy} */" + ] + , [ "" , "/* Reentrant parser */" , "%pure_parser" - -- This flag is deprecated in Bison 3.7, but older Bisons don't recognize + , " /* From Bison 2.3b (2008): %define api.pure full */" + -- The flag %pure_parser is deprecated with a warning since Bison 3.4, + -- but older Bisons like 2.3 (2006, shipped with macOS) don't recognize -- %define api.pure full , "%lex-param { yyscan_t scanner }" , "%parse-param { yyscan_t scanner }" @@ -89,7 +105,7 @@ header name cf = unlines , "#include " , "#include " , "#include " - , "#include \"Absyn.h\"" + , "#include \"" ++ ("Absyn" <.> h) ++ "\"" , "" , "#define YYMAXDEPTH 10000000" -- default maximum stack size is 10000, but right-recursion needs O(n) stack , "" @@ -99,31 +115,35 @@ header name cf = unlines , "typedef void* yyscan_t;" , "#endif" , "" - , "typedef struct " ++ name ++ "_buffer_state *YY_BUFFER_STATE;" - , "YY_BUFFER_STATE " ++ name ++ "_scan_string(const char *str, yyscan_t scanner);" - , "void " ++ name ++ "_delete_buffer(YY_BUFFER_STATE buf, yyscan_t scanner);" + -- , "typedef struct " ++ name ++ "_buffer_state *YY_BUFFER_STATE;" + , "typedef struct yy_buffer_state *YY_BUFFER_STATE;" + , "extern YY_BUFFER_STATE " ++ name ++ "_scan_string(const char *str, yyscan_t scanner);" + , "extern void " ++ name ++ "_delete_buffer(YY_BUFFER_STATE buf, yyscan_t scanner);" , "" , "extern void " ++ name ++ "lex_destroy(yyscan_t scanner);" , "extern char* " ++ name ++ "get_text(yyscan_t scanner);" , "" - , "extern yyscan_t " ++ name ++ "_init_lexer(FILE * inp);" + , "extern yyscan_t " ++ name ++ "_initialize_lexer(FILE * inp);" , "" , "/* List reversal functions. */" - , concatMap reverseList $ filter isList $ allParserCatsNorm cf + , concatMap (reverseList mode) $ filter isList $ allParserCatsNorm cf , "/* End C preamble code */" , "%}" ] + ] where eps = toList (allEntryPoints cf) -- Andreas, 2019-04-29, #210: Generate also parsers for CoercCat. -- WAS: (allCatsNorm cf) -- Found old comment: -- -- M.F. 2004-09-17 changed allEntryPoints to allCatsIdNorm. Seems to fix the [Ty2] bug. + h = parserHExt mode + name = fromMaybe undefined $ parserName mode -- | Code that needs the @YYSTYPE@ defined by the @%union@ pragma. -- -unionDependentCode :: String -> String -unionDependentCode name = unlines +unionDependentCode :: ParserMode -> String +unionDependentCode mode = unlines [ "%{" , errorHandler name , "int yyparse(yyscan_t scanner, YYSTYPE *result);" @@ -131,26 +151,29 @@ unionDependentCode name = unlines , "extern int yylex(YYSTYPE *lvalp, YYLTYPE *llocp, yyscan_t scanner);" , "%}" ] + where + name = fromMaybe undefined $ parserName mode -- TODO errorHandler :: String -> String errorHandler name = unlines [ "void yyerror(YYLTYPE *loc, yyscan_t scanner, YYSTYPE *result, const char *msg)" , "{" , " fprintf(stderr, \"error: %d,%d: %s at %s\\n\"," + -- , " loc->first_line, loc->first_column, msg, yyget_text(scanner));" , " loc->first_line, loc->first_column, msg, " ++ name ++ "get_text(scanner));" , "}" ] -- | Parser entry point code. -- -entryCode :: String -> CF -> String -entryCode name cf = unlines $ map (parseMethod cf name) eps +entryCode :: ParserMode -> CF -> String +entryCode mode cf = unlines $ map (parseMethod mode cf) eps where eps = toList (allEntryPoints cf) --This generates a parser method for each entry point. -parseMethod :: CF -> String -> Cat -> String -parseMethod cf name cat = unlines $ concat +parseMethod :: ParserMode -> CF -> Cat -> String +parseMethod mode cf cat = unlines $ concat [ [ unwords [ "/* Entrypoint: parse", dat, "from file. */" ] , dat ++ " p" ++ parser ++ "(FILE *inp)" ] @@ -162,10 +185,11 @@ parseMethod cf name cat = unlines $ concat , body True ] where + name = fromMaybe undefined $ parserName mode body stringParser = concat [ [ "{" , " YYSTYPE result;" - , " yyscan_t scanner = " ++ name ++ "_init_lexer(", file, ");" + , " yyscan_t scanner = " ++ name ++ "_initialize_lexer(", file, ");" , " if (!scanner) {" , " fprintf(stderr, \"Failed to initialize lexer.\\n\");" , " return 0;" @@ -190,47 +214,91 @@ parseMethod cf name cat = unlines $ concat file | stringParser = "0" | otherwise = "inp" ncat = normCat cat - dat = identCat ncat + dat0 = identCat ncat + dat = if cParser mode then dat0 else dat0 ++ "*" parser = identCat cat res0 = concat [ "result.", varName ncat ] - revRes = "reverse" ++ dat ++ "(" ++ res0 ++ ")" + revRes = "reverse" ++ dat0 ++ "(" ++ res0 ++ ")" res = if cat `elem` cfgReversibleCats cf then revRes else res0 --This method generates list reversal functions for each list type. -reverseList :: Cat -> String -reverseList c = unlines - [ - c' ++ " reverse" ++ c' ++ "(" ++ c' +++ "l)", - "{", - " " ++ c' +++"prev = 0;", - " " ++ c' +++"tmp = 0;", - " while (l)", - " {", - " tmp = l->" ++ v ++ ";", - " l->" ++ v +++ "= prev;", - " prev = l;", - " l = tmp;", - " }", - " return prev;", - "}" - ] - where - c' = identCat (normCat c) - v = map toLower c' ++ "_" - ---The union declaration is special to Bison/Yacc and gives the type of yylval. ---For efficiency, we may want to only include used categories here. -union :: [Cat] -> String -union cats = unlines $ concat - [ [ "/* The type of a parse result (yylval). */" ] - , [ "%union" +reverseList :: ParserMode -> Cat -> String +reverseList mode c0 = unlines + [ c' ++ " reverse" ++ c ++ "(" ++ c' +++ "l)" , "{" + , " " ++ c' +++"prev = 0;" + , " " ++ c' +++"tmp = 0;" + , " while (l)" + , " {" + , " tmp = l->" ++ v ++ ";" + , " l->" ++ v +++ "= prev;" + , " prev = l;" + , " l = tmp;" + , " }" + , " return prev;" + , "}" ] - , map (" " ++) unionBuiltinTokens - , concatMap mkPointer cats - , [ "}" + where + c = identCat (normCat c0) + c' = c ++ star + v = map toLower c ++ "_" + star = if cParser mode then "" else "*" + +-- | The union declaration is special to Bison/Yacc and gives the type of +-- yylval. For efficiency, we may want to only include used categories here. +-- +-- >>> let foo = Cat "Foo" +-- >>> union Nothing [foo, ListCat foo] +-- %union +-- { +-- int _int; +-- char _char; +-- double _double; +-- char* _string; +-- Foo* foo_; +-- ListFoo* listfoo_; +-- } +-- +-- If the given list of categories is contains coerced categories, those should +-- be normalized and duplicate removed +-- E.g. if there is both [Foo] and [Foo2] we should only print one pointer: +-- ListFoo* listfoo_; +-- +-- >>> let foo2 = CoercCat "Foo" 2 +-- >>> union (CppParser Nothing) [foo, ListCat foo, foo2, ListCat foo2] +-- %union +-- { +-- int _int; +-- char _char; +-- double _double; +-- char* _string; +-- Foo* foo_; +-- ListFoo* listfoo_; +-- } +union :: ParserMode -> [Cat] -> Doc +union mode cats = vcat + [ "%union" + , codeblock 2 $ map text unionBuiltinTokens ++ map mkPointer normCats ] - ] + where + normCats = nub (map normCat cats) + mkPointer s = scope <> text (identCat s) <> star <+> text (varName s) <> ";" + scope = text $ nsScope $ parserPackage mode + star = if cParser mode then empty else text "*" + +-- --The union declaration is special to Bison/Yacc and gives the type of yylval. +-- --For efficiency, we may want to only include used categories here. +-- union :: ParserMode -> [Cat] -> String +-- union parserMode cats = unlines $ concat +-- [ [ "/* The type of a parse result (yylval). */" ] +-- , [ "%union" +-- , "{" +-- ] +-- , map (" " ++) unionBuiltinTokens +-- , concatMap (mkPointer parserMode) cats +-- , [ "}" +-- ] +-- ] --This is a little weird because people can make [Exp2] etc. mkPointer :: Cat -> [String] mkPointer c @@ -290,18 +358,18 @@ startSymbol cf = "%start" +++ identCat (firstEntry cf) --The following functions are a (relatively) straightforward translation --of the ones in CFtoHappy.hs -rulesForBison :: RecordPositions -> CF -> SymMap -> Rules -rulesForBison rp cf env = map mkOne $ ruleGroups cf where - mkOne (cat,rules) = constructRule rp cf env rules cat +rulesForBison :: RecordPositions -> ParserMode -> CF -> SymMap -> Rules +rulesForBison rp mode cf env = map mkOne $ ruleGroups cf where + mkOne (cat,rules) = constructRule rp mode cf env rules cat -- For every non-terminal, we construct a set of rules. constructRule - :: RecordPositions -> CF -> SymMap + :: RecordPositions -> ParserMode -> CF -> SymMap -> [Rule] -- ^ List of alternatives for parsing ... -> NonTerminal -- ^ ... this non-terminal. -> (NonTerminal,[(Pattern,Action)]) -constructRule rp cf env rules nt = (nt,) $ - [ (p,) $ addResult $ generateAction rp (identCat (normCat nt)) (funRule r) b m +constructRule rp mode cf env rules nt = (nt,) $ + [ (p,) $ addResult $ generateAction rp mode (identCat (normCat nt)) (funRule r) b m | r0 <- rules , let (b,r) = if isConsFun (funRule r0) && valCat r0 `elem` cfgReversibleCats cf then (True, revSepListRule r0) @@ -320,28 +388,29 @@ constructRule rp cf env rules nt = (nt,) $ else a -- | Generates a string containing the semantic action. --- >>> generateAction NoRecordPositions "Foo" "Bar" False ["$1"] +-- >>> generateAction NoRecordPositions (CParser False "") "Foo" "Bar" False ["$1"] -- "make_Bar($1);" --- >>> generateAction NoRecordPositions "Foo" "_" False ["$1"] +-- >>> generateAction NoRecordPositions (CParser False "") "Foo" "_" False ["$1"] -- "$1;" --- >>> generateAction NoRecordPositions "ListFoo" "[]" False [] +-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "[]" False [] -- "0;" --- >>> generateAction NoRecordPositions "ListFoo" "(:[])" False ["$1"] +-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:[])" False ["$1"] -- "make_ListFoo($1, 0);" --- >>> generateAction NoRecordPositions "ListFoo" "(:)" False ["$1","$2"] +-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:)" False ["$1","$2"] -- "make_ListFoo($1, $2);" --- >>> generateAction NoRecordPositions "ListFoo" "(:)" True ["$1","$2"] +-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:)" True ["$1","$2"] -- "make_ListFoo($2, $1);" -generateAction :: IsFun a => RecordPositions -> String -> a -> Bool -> [MetaVar] -> Action -generateAction rp nt f b ms +generateAction :: IsFun a => RecordPositions -> ParserMode -> String -> a -> Bool -> [MetaVar] -> Action +generateAction rp mode nt f b ms | isCoercion f = unwords ms ++ ";" ++ loc | isNilFun f = "0;" - | isOneFun f = concat ["make_", nt, "(", intercalate ", " ms', ", 0);"] - | isConsFun f = concat ["make_", nt, "(", intercalate ", " ms', ");"] - | otherwise = concat ["make_", funName f, "(", intercalate ", " ms', ");", loc] + | isOneFun f = concat [new, nt, "(", intercalate ", " ms', ", 0);"] + | isConsFun f = concat [new, nt, "(", intercalate ", " ms', ");"] + | otherwise = concat [new, funName f, "(", intercalate ", " ms', ");", loc] where ms' = if b then reverse ms else ms loc = if rp == RecordPositions then " $$->line_number = @$.first_line; $$->char_number = @$.first_column;" else "" + new = if cParser mode then "make_" else "new " -- Generate patterns and a set of metavariables indicating -- where in the pattern the non-terminal diff --git a/source/src/BNFC/Backend/C/CFtoFlexC.hs b/source/src/BNFC/Backend/C/CFtoFlexC.hs index 5542adc05..e3ae54dfe 100644 --- a/source/src/BNFC/Backend/C/CFtoFlexC.hs +++ b/source/src/BNFC/Backend/C/CFtoFlexC.hs @@ -15,6 +15,7 @@ module BNFC.Backend.C.CFtoFlexC ( cf2flex + , ParserMode(..), parserName, parserPackage, cParser, parserHExt , preludeForBuffer -- C code defining a buffer for lexing string literals. , cMacros -- Lexer definitions. , commentStates -- Stream of names for lexer states for comments. @@ -26,22 +27,50 @@ module BNFC.Backend.C.CFtoFlexC import Prelude hiding ((<>)) import Data.Bifunctor (first) import Data.List (isInfixOf) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Map as Map +import System.FilePath ((<.>)) import BNFC.CF import BNFC.Backend.C.RegToFlex import BNFC.Backend.Common.NamedVariables +import BNFC.Backend.CPP.STL.STLUtils (nsDefine, nsString) +import BNFC.Options (InPackage) import BNFC.PrettyPrint -import BNFC.Utils (cstring, unless, when) +import BNFC.Utils (cstring, unless, when, whenJust) + +data ParserMode + = CParser Bool String -- ^ @C@ (@False@) or @C++ no STL@ (@True@) mode, with @name@ to use as prefix. + | CppParser InPackage -- ^ @C++@ mode, with optional package name + +parserName :: ParserMode -> Maybe String +parserName = \case + CParser _ n -> Just n + CppParser _ -> Nothing + +parserPackage :: ParserMode -> InPackage +parserPackage = \case + CParser _ _ -> Nothing + CppParser p -> p + +cParser :: ParserMode -> Bool +cParser = \case + CParser b _ -> not b + CppParser _ -> False + +parserHExt :: ParserMode -> String +parserHExt = \case + CParser b _ -> if b then "H" else "h" + CppParser _ -> "H" -- | Entrypoint. -cf2flex :: String -> CF -> (String, SymMap) -- The environment is reused by the parser. -cf2flex name cf = (, env) $ unlines - [ prelude stringLiterals name +cf2flex :: ParserMode -> CF -> (String, SymMap) -- The environment is reused by the parser. +cf2flex mode cf = (, env) $ unlines + [ prelude stringLiterals mode , cMacros cf , lexSymbols env0 - , restOfFlex cf env + , restOfFlex (parserPackage mode) cf env + , footer -- mode ] where env = Map.fromList env1 @@ -50,8 +79,8 @@ cf2flex name cf = (, env) $ unlines makeSymEnv = zipWith $ \ s n -> (s, "_SYMB_" ++ show n) stringLiterals = isUsedCat cf (TokenCat catString) -prelude :: Bool -> String -> String -prelude stringLiterals name = unlines $ concat +prelude :: Bool -> ParserMode -> String +prelude stringLiterals mode = unlines $ concat [ [ "/* -*- c -*- This FLex file was machine-generated by the BNF converter */" , "" -- noinput and nounput are most often unused @@ -65,7 +94,9 @@ prelude stringLiterals name = unlines $ concat , "%option extra-type=\"Buffer\"" , "" ] - , [ "%top{" + , maybeToList $ ("%option prefix=\"" ++) . (++ "yy\"" ) <$> parserPackage mode + , when (cParser mode) + [ "%top{" , "/* strdup was not in the ISO C standard before 6/2019 (C2x), but in POSIX 1003.1." , " * See: https://en.cppreference.com/w/c/experimental/dynamic/strdup" , " * Setting _POSIX_C_SOURCE to 200809L activates strdup in string.h." @@ -73,14 +104,17 @@ prelude stringLiterals name = unlines $ concat -- The following #define needs to be at the top before the automatic #include , "#define _POSIX_C_SOURCE 200809L" , "}" - , "%{" - , "#include \"Absyn.h\"" - , "#include \"Bison.h\"" + ] + , [ "%{" + , "#include \"" ++ ("Absyn" <.> h) ++ "\"" + , "#include \"" ++ ("Bison" <.> h) ++ "\"" , "" - , "#define init_lexer " ++ name ++ "_init_lexer" + ] + , whenJust (parserName mode) $ \ name -> + [ "#define initialize_lexer " ++ name ++ "_initialize_lexer" , "" ] - , when stringLiterals $ preludeForBuffer "Buffer.h" + , when stringLiterals $ preludeForBuffer $ "Buffer" <.> h -- https://www.gnu.org/software/bison/manual/html_node/Token-Locations.html -- Flex is responsible for keeping tracking of the yylloc for Bison. -- Flex also doesn't do this automatically so we need this function @@ -104,6 +138,8 @@ prelude stringLiterals name = unlines $ concat , "%}" ] ] + where + h = parserHExt mode -- | Part of the lexer prelude needed when string literals are to be lexed. -- Defines an interface to the Buffer. @@ -163,35 +199,62 @@ lexSymbols ss = concatMap transSym ss where s' = escapeChars s -restOfFlex :: CF -> SymMap -> String -restOfFlex cf env = unlines $ concat - [ [ render $ lexComments Nothing (comments cf) +restOfFlex :: InPackage -> CF -> SymMap -> String +restOfFlex inPackage cf env = unlines $ concat + [ [ render $ lexComments $ comments cf , "" ] , userDefTokens - , ifC catString $ lexStrings "yylval" "_STRING_" "_ERROR_" - , ifC catChar $ lexChars "yylval" "_CHAR_" - , ifC catDouble [ "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval->_double = atof(yytext); return _DOUBLE_;" ] - , ifC catInteger [ "{DIGIT}+ \t yylval->_int = atoi(yytext); return _INTEGER_;" ] - , ifC catIdent [ "{LETTER}{IDENT}* \t yylval->_string = strdup(yytext); return _IDENT_;" ] + , ifC catString $ lexStrings (ns ++ "yylval") (nsDefine inPackage "_STRING_") (nsDefine inPackage "_ERROR_") + , ifC catChar $ lexChars (ns ++ "yylval") (nsDefine inPackage "_CHAR_") + , ifC catDouble [ "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t " ++ ns ++ "yylval->_double = atof(yytext); return " ++ nsDefine inPackage "_DOUBLE_" ++ ";" ] + , ifC catInteger [ "{DIGIT}+ \t " ++ ns ++ "yylval->_int = atoi(yytext); return " ++ nsDefine inPackage "_INTEGER_" ++ ";" ] + , ifC catIdent [ "{LETTER}{IDENT}* \t " ++ ns ++ "yylval->_string = strdup(yytext); return " ++ nsDefine inPackage "_IDENT_" ++ ";" ] + -- , [ "\\n ++" ++ ns ++ "yy_mylinenumber ;" , [ "[ \\t\\r\\n\\f] \t /* ignore white space. */;" - , ". \t return _ERROR_;" - , "" - , "%% /* Initialization code. */" - , "" + , ". \t return " ++ nsDefine inPackage "_ERROR_" ++ ";" + , "%%" ] - , footer ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] - userDefTokens = - [ "" ++ printRegFlex exp ++ - " \t yylval->_string = strdup(yytext); return " ++ sName name ++ ";" - | (name, exp) <- tokenPragmas cf - ] - where sName n = fromMaybe n $ Map.lookup (Tokentype n) env - footer = - [ "yyscan_t init_lexer(FILE *inp)" + ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + ns = nsString inPackage + userDefTokens = + [ "" ++ printRegFlex exp ++ + " \t " ++ ns ++ "yylval->_string = strdup(yytext); return " ++ sName name ++ ";" + | (name, exp) <- tokenPragmas cf + ] + where sName n = fromMaybe n $ Map.lookup (Tokentype n) env + + -- [ [ render $ lexComments inPackage $ comments cf + -- , "" + -- ] + -- , userDefTokens + -- , ifC catString $ lexStrings "yylval" "_STRING_" "_ERROR_" + -- , ifC catChar $ lexChars "yylval" "_CHAR_" + -- , ifC catDouble [ "{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval->_double = atof(yytext); return _DOUBLE_;" ] + -- , ifC catInteger [ "{DIGIT}+ \t yylval->_int = atoi(yytext); return _INTEGER_;" ] + -- , ifC catIdent [ "{LETTER}{IDENT}* \t yylval->_string = strdup(yytext); return _IDENT_;" ] + -- , [ "[ \\t\\r\\n\\f] \t /* ignore white space. */;" + -- , ". \t return _ERROR_;" + -- , "" + -- , "%% /* Initialization code. */" + -- , "" + -- ] + -- , footer + -- ] + -- where + -- ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + -- userDefTokens = + -- [ "" ++ printRegFlex exp ++ + -- " \t yylval->_string = strdup(yytext); return " ++ sName name ++ ";" + -- | (name, exp) <- tokenPragmas cf + -- ] + -- where sName n = fromMaybe n $ Map.lookup (Tokentype n) env + +footer :: String +footer = unlines + [ "yyscan_t initialize_lexer(FILE *inp)" , "{" , " yyscan_t scanner;" , " if (yylex_init_extra(NULL, &scanner)) return 0;" @@ -200,6 +263,22 @@ restOfFlex cf env = unlines $ concat , "}" ] +-- footer :: FlexMode -> String +-- footer mode +-- | cFlex mode = unlines +-- [ "yyscan_t initialize_lexer(FILE *inp)" +-- , "{" +-- , " yyscan_t scanner;" +-- , " if (yylex_init_extra(NULL, &scanner)) return 0;" +-- , " if (inp) yyrestart(inp, scanner);" +-- , " return scanner;" +-- , "}" +-- ] +-- | otherwise = unlines +-- [ "void " ++ ns ++ "initialize_lexer(FILE *inp) { yyrestart(inp); }" +-- , "int yywrap(void) { return 1; }" -- TODO: NEEDED? +-- ] + -- | Lexing of strings, converting escaped characters. lexStrings :: String -> String -> String -> [String] lexStrings yylval stringToken errorToken = @@ -238,15 +317,15 @@ lexChars yylval charToken = -- lexSingleComment or lexMultiComment on each comment delimiter or pair of -- delimiters. -- --- >>> lexComments (Just "myns.") ([("{-","-}")],["--"]) +-- >>> lexComments ([("{-","-}")],["--"]) -- "--"[^\n]* /* skip */; /* BNFC: comment "--" */ -- "{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */ -- "-}" BEGIN INITIAL; -- . /* skip */; -- [\n] /* skip */; -lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc -lexComments _ (m,s) = vcat $ concat - [ map lexSingleComment s +lexComments :: ([(String, String)], [String]) -> Doc +lexComments (m,s) = vcat $ concat + [ map lexSingleComment s , zipWith lexMultiComment m commentStates ] diff --git a/source/src/BNFC/Backend/CPP/Makefile.hs b/source/src/BNFC/Backend/CPP/Makefile.hs index 801a9f308..7315b700d 100644 --- a/source/src/BNFC/Backend/CPP/Makefile.hs +++ b/source/src/BNFC/Backend/CPP/Makefile.hs @@ -3,16 +3,16 @@ module BNFC.Backend.CPP.Makefile (makefile) where import BNFC.Backend.Common.Makefile import BNFC.PrettyPrint -makefile :: String -> String -> Doc -makefile name basename = vcat +makefile :: String -> String -> String -> Doc +makefile prefix name basename = vcat [ mkVar "CC" "g++ -g" , mkVar "CCFLAGS" "--ansi -W -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unneeded-internal-declaration" , "" , mkVar "FLEX" "flex" - , mkVar "FLEX_OPTS" ("-P" ++ name) + , mkVar "FLEX_OPTS" ("-P" ++ prefix) , "" , mkVar "BISON" "bison" - , mkVar "BISON_OPTS" ("-t -p" ++ name) + , mkVar "BISON_OPTS" ("-t -p" ++ prefix) , "" , mkVar "OBJS" "Absyn.o Buffer.o Lexer.o Parser.o Printer.o" , "" @@ -29,7 +29,7 @@ makefile name basename = vcat [ "Absyn.C", "Absyn.H" , "Buffer.C", "Buffer.H" , "Test.C" - , "Parser.C", "Parser.H", "ParserError.H", name ++ ".y" + , "Bison.H", "Parser.C", "Parser.H", "ParserError.H", name ++ ".y" , "Lexer.C", name ++ ".l" , "Skeleton.C", "Skeleton.H" , "Printer.C", "Printer.H" @@ -45,12 +45,12 @@ makefile name basename = vcat , mkRule "Buffer.o" [ "Buffer.C", "Buffer.H" ] [ "${CC} ${CCFLAGS} -c Buffer.C " ] , mkRule "Lexer.C" [ name ++ ".l" ] - [ "${FLEX} -oLexer.C " ++ name ++ ".l" ] - , mkRule "Parser.C" [ name ++ ".y" ] - [ "${BISON} " ++ name ++ ".y -o Parser.C" ] - , mkRule "Lexer.o" [ "Lexer.C", "Parser.H" ] + [ "${FLEX} ${FLEX_OPTS} -oLexer.C " ++ name ++ ".l" ] + , mkRule "Parser.C Bison.H" [ name ++ ".y" ] + [ "${BISON} ${BISON_OPTS} " ++ name ++ ".y -o Parser.C" ] + , mkRule "Lexer.o" [ "Lexer.C", "Bison.H" ] [ "${CC} ${CCFLAGS} -c Lexer.C " ] - , mkRule "Parser.o" [ "Parser.C", "Absyn.H" ] + , mkRule "Parser.o" [ "Parser.C", "Absyn.H", "Bison.H" ] [ "${CC} ${CCFLAGS} -c Parser.C" ] , mkRule "Printer.o" [ "Printer.C", "Printer.H", "Absyn.H" ] [ "${CC} ${CCFLAGS} -c Printer.C" ] diff --git a/source/src/BNFC/Backend/CPP/NoSTL.hs b/source/src/BNFC/Backend/CPP/NoSTL.hs index 90fb65182..d7f5a263e 100644 --- a/source/src/BNFC/Backend/CPP/NoSTL.hs +++ b/source/src/BNFC/Backend/CPP/NoSTL.hs @@ -15,11 +15,13 @@ import BNFC.CF import BNFC.Options import BNFC.Backend.Base import BNFC.Backend.C (bufferH, bufferC) -import BNFC.Backend.C.CFtoBisonC (unionBuiltinTokens) +import BNFC.Backend.C.CFtoBisonC (cf2Bison) +-- import BNFC.Backend.C.CFtoBisonC (unionBuiltinTokens) +import BNFC.Backend.C.CFtoFlexC (cf2flex, ParserMode(..)) import BNFC.Backend.CPP.Makefile import BNFC.Backend.CPP.NoSTL.CFtoCPPAbs -import BNFC.Backend.CPP.NoSTL.CFtoFlex -import BNFC.Backend.CPP.NoSTL.CFtoBison +-- import BNFC.Backend.CPP.NoSTL.CFtoFlex +-- import BNFC.Backend.CPP.NoSTL.CFtoBison import BNFC.Backend.CPP.STL.CFtoCVisitSkelSTL import BNFC.Backend.CPP.PrettyPrinter import qualified BNFC.Backend.Common.Makefile as Makefile @@ -31,9 +33,9 @@ makeCppNoStl opts cf = do mkfile "Absyn.C" cfile mkfile "Buffer.H" bufferH mkfile "Buffer.C" $ bufferC "Buffer.H" - let (flex, env) = cf2flex Nothing name cf + let (flex, env) = cf2flex parserMode cf mkfile (name ++ ".l") flex - let bison = cf2Bison name cf env + let bison = cf2Bison(linenumbers opts) parserMode cf env mkfile (name ++ ".y") bison let header = mkHeaderFile cf (allParserCats cf) (toList $ allEntryPoints cf) (Map.elems env) mkfile "Parser.H" header @@ -44,9 +46,17 @@ makeCppNoStl opts cf = do mkfile "Printer.H" prinH mkfile "Printer.C" prinC mkfile "Test.C" (cpptest cf) - Makefile.mkMakefile opts $ makefile name - where name = lang opts - + Makefile.mkMakefile opts $ makefile prefix name + where + name :: String + name = lang opts + -- The prefix is a string used by flex and bison + -- that is prepended to generated function names. + -- It should be a valid C identifier. + prefix :: String + prefix = snakeCase_ name ++ "_" + parserMode :: ParserMode + parserMode = CParser True prefix cpptest :: CF -> String cpptest cf = @@ -129,20 +139,22 @@ mkHeaderFile cf cats eps env = unlines $ concat [ [ "#ifndef PARSER_HEADER_FILE" , "#define PARSER_HEADER_FILE" , "" - ] - , map mkForwardDec $ nub $ map normCat cats - , [ "typedef union" - , "{" - ] - , map (" " ++) unionBuiltinTokens - , concatMap mkVar cats - , [ "} YYSTYPE;" - , "" - , "#define _ERROR_ 258" - , mkDefines (259 :: Int) env - , "extern YYSTYPE yylval;" + , "#include \"Absyn.H\"" , "" ] + -- , map mkForwardDec $ nub $ map normCat cats + -- , [ "typedef union" + -- , "{" + -- ] + -- , map (" " ++) unionBuiltinTokens + -- , concatMap mkVar cats + -- , [ "} YYSTYPE;" + -- , "" + -- , "#define _ERROR_ 258" + -- , mkDefines (259 :: Int) env + -- , "extern YYSTYPE yylval;" + -- , "" + -- ] , map mkFunc eps , [ "" , "#endif" diff --git a/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs b/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs index 9818e7b1b..1dc75bca0 100644 --- a/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs +++ b/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs @@ -49,7 +49,7 @@ prelude stringLiterals inPackage = unlines $ concat [ [ "/* This FLex file was machine-generated by the BNF converter */" ] , maybe [] (\ ns -> [ "%option prefix=\"" ++ ns ++ "yy\"" ]) inPackage , [ "%{" - , "#include " + -- , "#include " -- not needed, it seems , "#include \"Parser.H\"" , "extern int " ++ nsString inPackage ++ "yy_mylinenumber ;" --- hack to get line number. AR 2006 , "" diff --git a/source/src/BNFC/Backend/CPP/STL.hs b/source/src/BNFC/Backend/CPP/STL.hs index b930196c7..e778f59fa 100644 --- a/source/src/BNFC/Backend/CPP/STL.hs +++ b/source/src/BNFC/Backend/CPP/STL.hs @@ -20,9 +20,10 @@ import BNFC.Options import BNFC.Backend.Base import BNFC.Backend.C (bufferH, bufferC) import BNFC.Backend.C.CFtoBisonC (unionBuiltinTokens) +import BNFC.Backend.C.CFtoFlexC (cf2flex, ParserMode(..)) import BNFC.Backend.CPP.Makefile import BNFC.Backend.CPP.STL.CFtoSTLAbs -import BNFC.Backend.CPP.NoSTL.CFtoFlex +-- import BNFC.Backend.CPP.NoSTL.CFtoFlex import BNFC.Backend.CPP.STL.CFtoBisonSTL import BNFC.Backend.CPP.STL.CFtoCVisitSkelSTL import BNFC.Backend.CPP.PrettyPrinter @@ -36,9 +37,9 @@ makeCppStl opts cf = do mkfile "Absyn.C" cfile mkfile "Buffer.H" bufferH mkfile "Buffer.C" $ bufferC "Buffer.H" - let (flex, env) = cf2flex (inPackage opts) name cf + let (flex, env) = cf2flex (CppParser $ inPackage opts) cf mkfile (name ++ ".l") flex - let bison = cf2Bison (linenumbers opts) (inPackage opts) name cf env + let bison = cf2Bison (linenumbers opts) (inPackage opts) cf env mkfile (name ++ ".y") bison let header = mkHeaderFile (inPackage opts) cf (allParserCats cf) (toList $ allEntryPoints cf) (Map.elems env) mkfile "Parser.H" header @@ -50,8 +51,17 @@ makeCppStl opts cf = do mkfile "Printer.H" prinH mkfile "Printer.C" prinC mkfile "Test.C" (cpptest (inPackage opts) cf) - Makefile.mkMakefile opts $ makefile name - where name = lang opts + Makefile.mkMakefile opts $ makefile prefix name + where + name :: String + name = lang opts + -- The prefix is a string used by flex and bison + -- that is prepended to generated function names. + -- It should be a valid C identifier. + prefix :: String + prefix = snakeCase_ name ++ "_" + parserMode :: ParserMode + parserMode = CppParser (inPackage opts) printParseErrHeader :: Maybe String -> String printParseErrHeader inPackage = diff --git a/source/src/BNFC/Backend/CPP/STL/CFtoBisonSTL.hs b/source/src/BNFC/Backend/CPP/STL/CFtoBisonSTL.hs index f8794abcb..4c6815a2f 100644 --- a/source/src/BNFC/Backend/CPP/STL/CFtoBisonSTL.hs +++ b/source/src/BNFC/Backend/CPP/STL/CFtoBisonSTL.hs @@ -52,10 +52,10 @@ type Action = String type MetaVar = String --The environment comes from the CFtoFlex -cf2Bison :: RecordPositions -> Maybe String -> String -> CF -> SymMap -> String -cf2Bison rp inPackage name cf env +cf2Bison :: RecordPositions -> Maybe String -> CF -> SymMap -> String +cf2Bison rp inPackage cf env = unlines - [header inPackage name cf, + [header inPackage cf, render $ union inPackage (map TokenCat (positionCats cf) ++ allParserCats cf), maybe "" (\ns -> "%define api.prefix {" ++ ns ++ "yy}") inPackage, "%token _ERROR_", @@ -72,8 +72,8 @@ cf2Bison rp inPackage name cf env positionCats cf = filter (isPositionCat cf) $ fst (unzip (tokenPragmas cf)) -header :: Maybe String -> String -> CF -> String -header inPackage name cf = unlines +header :: Maybe String -> CF -> String +header inPackage cf = unlines [ "/* This Bison file was machine-generated by BNFC */" , "%{" , "#include " @@ -103,7 +103,7 @@ header inPackage name cf = unlines , "" , nsStart inPackage , unlines $ map parseResult dats - , unlines $ map (parseMethod cf inPackage name) eps + , unlines $ map (parseMethod cf inPackage) eps , nsEnd inPackage , "%}" ] @@ -126,8 +126,8 @@ parseResult cat = cat' = identCat cat --This generates a parser method for each entry point. -parseMethod :: CF -> Maybe String -> String -> Cat -> String -parseMethod cf inPackage _ cat = unlines $ concat +parseMethod :: CF -> Maybe String -> Cat -> String +parseMethod cf inPackage cat = unlines $ concat [ [ cat' ++ "* p" ++ par ++ "(FILE *inp)" , "{" , " " ++ ns ++ "yy_mylinenumber = 1;"