Skip to content

Commit

Permalink
[ WIP #349 ] unify C and C++ lexer/parser: CPP/NoSTL part
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasabel committed Apr 1, 2021
1 parent cf5c1c1 commit da179d6
Show file tree
Hide file tree
Showing 8 changed files with 335 additions and 163 deletions.
6 changes: 4 additions & 2 deletions source/src/BNFC/Backend/C.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ makeC opts cf = do
mkfile "Absyn.c" cfile
mkfile "Buffer.h" bufferH
mkfile "Buffer.c" $ bufferC "Buffer.h"
let (flex, env) = cf2flex prefix cf
let (flex, env) = cf2flex parserMode cf
mkfile (name ++ ".l") flex
let bison = cf2Bison (linenumbers opts) prefix cf env
let bison = cf2Bison (linenumbers opts) parserMode cf env
mkfile (name ++ ".y") bison
let header = mkHeaderFile (linenumbers opts) cf (Map.elems env)
mkfile "Parser.h" header
Expand All @@ -54,6 +54,8 @@ makeC opts cf = do
-- It should be a valid C identifier.
prefix :: String
prefix = snakeCase_ name ++ "_"
parserMode :: ParserMode
parserMode = CParser False prefix


makefile :: String -> String -> String -> Doc
Expand Down
223 changes: 146 additions & 77 deletions source/src/BNFC/Backend/C/CFtoBisonC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,22 @@ module BNFC.Backend.C.CFtoBisonC
)
where

import Prelude hiding ((<>))

import Data.Char (toLower)
import Data.Foldable (toList)
import Data.List (intercalate, nub)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import System.FilePath ((<.>))

import BNFC.CF
import BNFC.Backend.Common.NamedVariables hiding (varName)
import BNFC.Backend.C.CFtoFlexC (ParserMode(..), cParser, parserHExt, parserName, parserPackage)
import BNFC.Backend.CPP.STL.STLUtils
import BNFC.Options (RecordPositions(..))
import BNFC.Utils ((+++))
import BNFC.PrettyPrint
import BNFC.Utils ((+++), whenJust)

--This follows the basic structure of CFtoHappy.

Expand All @@ -44,11 +50,11 @@ type Action = String
type MetaVar = String

--The environment comes from the CFtoFlex
cf2Bison :: RecordPositions -> String -> CF -> SymMap -> String
cf2Bison rp name cf env = unlines
[ header name cf
, union (allParserCatsNorm cf)
, unionDependentCode name
cf2Bison :: RecordPositions -> ParserMode -> CF -> SymMap -> String
cf2Bison rp mode cf env = unlines
[ header mode cf
, render $ union mode $ allParserCatsNorm cf
, unionDependentCode mode
, "%token _ERROR_"
, tokens (map fst $ tokenPragmas cf) env
, declarations cf
Expand All @@ -57,22 +63,32 @@ cf2Bison rp name cf env = unlines
, ""
, "%%"
, ""
, prRules (rulesForBison rp cf env)
, prRules $ rulesForBison rp mode cf env
, "%%"
, ""
, entryCode name cf
, entryCode mode cf
]
where
name = fromMaybe undefined $ parserName mode


header :: String -> CF -> String
header name cf = unlines
[ "/* This Bison file was machine-generated by BNFC */"
header :: ParserMode -> CF -> String
header mode cf = unlines $ concat
[ [ "/* -*- c -*- This Bison file was machine-generated by BNFC */"
, ""
, "/* Generate header file for lexer. */"
, "%defines \"Bison.h\""
, ""
, "%defines \"" ++ ("Bison" <.> h) ++ "\""
]
, whenJust (parserPackage mode) $ \ ns ->
[ "%name-prefix = \"" ++ ns ++ "yy\""
, " /* From Bison 2.6: %define api.prefix {" ++ ns ++ "yy} */"
]
, [ ""
, "/* Reentrant parser */"
, "%pure_parser"
-- This flag is deprecated in Bison 3.7, but older Bisons don't recognize
, " /* From Bison 2.3b (2008): %define api.pure full */"
-- The flag %pure_parser is deprecated with a warning since Bison 3.4,
-- but older Bisons like 2.3 (2006, shipped with macOS) don't recognize
-- %define api.pure full
, "%lex-param { yyscan_t scanner }"
, "%parse-param { yyscan_t scanner }"
Expand All @@ -89,7 +105,7 @@ header name cf = unlines
, "#include <stdlib.h>"
, "#include <stdio.h>"
, "#include <string.h>"
, "#include \"Absyn.h\""
, "#include \"" ++ ("Absyn" <.> h) ++ "\""
, ""
, "#define YYMAXDEPTH 10000000" -- default maximum stack size is 10000, but right-recursion needs O(n) stack
, ""
Expand All @@ -99,58 +115,65 @@ header name cf = unlines
, "typedef void* yyscan_t;"
, "#endif"
, ""
, "typedef struct " ++ name ++ "_buffer_state *YY_BUFFER_STATE;"
, "YY_BUFFER_STATE " ++ name ++ "_scan_string(const char *str, yyscan_t scanner);"
, "void " ++ name ++ "_delete_buffer(YY_BUFFER_STATE buf, yyscan_t scanner);"
-- , "typedef struct " ++ name ++ "_buffer_state *YY_BUFFER_STATE;"
, "typedef struct yy_buffer_state *YY_BUFFER_STATE;"
, "extern YY_BUFFER_STATE " ++ name ++ "_scan_string(const char *str, yyscan_t scanner);"
, "extern void " ++ name ++ "_delete_buffer(YY_BUFFER_STATE buf, yyscan_t scanner);"
, ""
, "extern void " ++ name ++ "lex_destroy(yyscan_t scanner);"
, "extern char* " ++ name ++ "get_text(yyscan_t scanner);"
, ""
, "extern yyscan_t " ++ name ++ "_init_lexer(FILE * inp);"
, "extern yyscan_t " ++ name ++ "_initialize_lexer(FILE * inp);"
, ""
, "/* List reversal functions. */"
, concatMap reverseList $ filter isList $ allParserCatsNorm cf
, concatMap (reverseList mode) $ filter isList $ allParserCatsNorm cf
, "/* End C preamble code */"
, "%}"
]
]
where
eps = toList (allEntryPoints cf)
-- Andreas, 2019-04-29, #210: Generate also parsers for CoercCat.
-- WAS: (allCatsNorm cf)
-- Found old comment:
-- -- M.F. 2004-09-17 changed allEntryPoints to allCatsIdNorm. Seems to fix the [Ty2] bug.
h = parserHExt mode
name = fromMaybe undefined $ parserName mode

-- | Code that needs the @YYSTYPE@ defined by the @%union@ pragma.
--
unionDependentCode :: String -> String
unionDependentCode name = unlines
unionDependentCode :: ParserMode -> String
unionDependentCode mode = unlines
[ "%{"
, errorHandler name
, "int yyparse(yyscan_t scanner, YYSTYPE *result);"
, ""
, "extern int yylex(YYSTYPE *lvalp, YYLTYPE *llocp, yyscan_t scanner);"
, "%}"
]
where
name = fromMaybe undefined $ parserName mode -- TODO

errorHandler :: String -> String
errorHandler name = unlines
[ "void yyerror(YYLTYPE *loc, yyscan_t scanner, YYSTYPE *result, const char *msg)"
, "{"
, " fprintf(stderr, \"error: %d,%d: %s at %s\\n\","
-- , " loc->first_line, loc->first_column, msg, yyget_text(scanner));"
, " loc->first_line, loc->first_column, msg, " ++ name ++ "get_text(scanner));"
, "}"
]

-- | Parser entry point code.
--
entryCode :: String -> CF -> String
entryCode name cf = unlines $ map (parseMethod cf name) eps
entryCode :: ParserMode -> CF -> String
entryCode mode cf = unlines $ map (parseMethod mode cf) eps
where
eps = toList (allEntryPoints cf)

--This generates a parser method for each entry point.
parseMethod :: CF -> String -> Cat -> String
parseMethod cf name cat = unlines $ concat
parseMethod :: ParserMode -> CF -> Cat -> String
parseMethod mode cf cat = unlines $ concat
[ [ unwords [ "/* Entrypoint: parse", dat, "from file. */" ]
, dat ++ " p" ++ parser ++ "(FILE *inp)"
]
Expand All @@ -162,10 +185,11 @@ parseMethod cf name cat = unlines $ concat
, body True
]
where
name = fromMaybe undefined $ parserName mode
body stringParser = concat
[ [ "{"
, " YYSTYPE result;"
, " yyscan_t scanner = " ++ name ++ "_init_lexer(", file, ");"
, " yyscan_t scanner = " ++ name ++ "_initialize_lexer(", file, ");"
, " if (!scanner) {"
, " fprintf(stderr, \"Failed to initialize lexer.\\n\");"
, " return 0;"
Expand All @@ -190,47 +214,91 @@ parseMethod cf name cat = unlines $ concat
file | stringParser = "0"
| otherwise = "inp"
ncat = normCat cat
dat = identCat ncat
dat0 = identCat ncat
dat = if cParser mode then dat0 else dat0 ++ "*"
parser = identCat cat
res0 = concat [ "result.", varName ncat ]
revRes = "reverse" ++ dat ++ "(" ++ res0 ++ ")"
revRes = "reverse" ++ dat0 ++ "(" ++ res0 ++ ")"
res = if cat `elem` cfgReversibleCats cf then revRes else res0

--This method generates list reversal functions for each list type.
reverseList :: Cat -> String
reverseList c = unlines
[
c' ++ " reverse" ++ c' ++ "(" ++ c' +++ "l)",
"{",
" " ++ c' +++"prev = 0;",
" " ++ c' +++"tmp = 0;",
" while (l)",
" {",
" tmp = l->" ++ v ++ ";",
" l->" ++ v +++ "= prev;",
" prev = l;",
" l = tmp;",
" }",
" return prev;",
"}"
]
where
c' = identCat (normCat c)
v = map toLower c' ++ "_"

--The union declaration is special to Bison/Yacc and gives the type of yylval.
--For efficiency, we may want to only include used categories here.
union :: [Cat] -> String
union cats = unlines $ concat
[ [ "/* The type of a parse result (yylval). */" ]
, [ "%union"
reverseList :: ParserMode -> Cat -> String
reverseList mode c0 = unlines
[ c' ++ " reverse" ++ c ++ "(" ++ c' +++ "l)"
, "{"
, " " ++ c' +++"prev = 0;"
, " " ++ c' +++"tmp = 0;"
, " while (l)"
, " {"
, " tmp = l->" ++ v ++ ";"
, " l->" ++ v +++ "= prev;"
, " prev = l;"
, " l = tmp;"
, " }"
, " return prev;"
, "}"
]
, map (" " ++) unionBuiltinTokens
, concatMap mkPointer cats
, [ "}"
where
c = identCat (normCat c0)
c' = c ++ star
v = map toLower c ++ "_"
star = if cParser mode then "" else "*"

-- | The union declaration is special to Bison/Yacc and gives the type of
-- yylval. For efficiency, we may want to only include used categories here.
--
-- >>> let foo = Cat "Foo"
-- >>> union Nothing [foo, ListCat foo]
-- %union
-- {
-- int _int;
-- char _char;
-- double _double;
-- char* _string;
-- Foo* foo_;
-- ListFoo* listfoo_;
-- }
--
-- If the given list of categories is contains coerced categories, those should
-- be normalized and duplicate removed
-- E.g. if there is both [Foo] and [Foo2] we should only print one pointer:
-- ListFoo* listfoo_;
--
-- >>> let foo2 = CoercCat "Foo" 2
-- >>> union (CppParser Nothing) [foo, ListCat foo, foo2, ListCat foo2]
-- %union
-- {
-- int _int;
-- char _char;
-- double _double;
-- char* _string;
-- Foo* foo_;
-- ListFoo* listfoo_;
-- }
union :: ParserMode -> [Cat] -> Doc
union mode cats = vcat
[ "%union"
, codeblock 2 $ map text unionBuiltinTokens ++ map mkPointer normCats
]
]
where
normCats = nub (map normCat cats)
mkPointer s = scope <> text (identCat s) <> star <+> text (varName s) <> ";"
scope = text $ nsScope $ parserPackage mode
star = if cParser mode then empty else text "*"

-- --The union declaration is special to Bison/Yacc and gives the type of yylval.
-- --For efficiency, we may want to only include used categories here.
-- union :: ParserMode -> [Cat] -> String
-- union parserMode cats = unlines $ concat
-- [ [ "/* The type of a parse result (yylval). */" ]
-- , [ "%union"
-- , "{"
-- ]
-- , map (" " ++) unionBuiltinTokens
-- , concatMap (mkPointer parserMode) cats
-- , [ "}"
-- ]
-- ]
--This is a little weird because people can make [Exp2] etc.
mkPointer :: Cat -> [String]
mkPointer c
Expand Down Expand Up @@ -290,18 +358,18 @@ startSymbol cf = "%start" +++ identCat (firstEntry cf)

--The following functions are a (relatively) straightforward translation
--of the ones in CFtoHappy.hs
rulesForBison :: RecordPositions -> CF -> SymMap -> Rules
rulesForBison rp cf env = map mkOne $ ruleGroups cf where
mkOne (cat,rules) = constructRule rp cf env rules cat
rulesForBison :: RecordPositions -> ParserMode -> CF -> SymMap -> Rules
rulesForBison rp mode cf env = map mkOne $ ruleGroups cf where
mkOne (cat,rules) = constructRule rp mode cf env rules cat

-- For every non-terminal, we construct a set of rules.
constructRule
:: RecordPositions -> CF -> SymMap
:: RecordPositions -> ParserMode -> CF -> SymMap
-> [Rule] -- ^ List of alternatives for parsing ...
-> NonTerminal -- ^ ... this non-terminal.
-> (NonTerminal,[(Pattern,Action)])
constructRule rp cf env rules nt = (nt,) $
[ (p,) $ addResult $ generateAction rp (identCat (normCat nt)) (funRule r) b m
constructRule rp mode cf env rules nt = (nt,) $
[ (p,) $ addResult $ generateAction rp mode (identCat (normCat nt)) (funRule r) b m
| r0 <- rules
, let (b,r) = if isConsFun (funRule r0) && valCat r0 `elem` cfgReversibleCats cf
then (True, revSepListRule r0)
Expand All @@ -320,28 +388,29 @@ constructRule rp cf env rules nt = (nt,) $
else a

-- | Generates a string containing the semantic action.
-- >>> generateAction NoRecordPositions "Foo" "Bar" False ["$1"]
-- >>> generateAction NoRecordPositions (CParser False "") "Foo" "Bar" False ["$1"]
-- "make_Bar($1);"
-- >>> generateAction NoRecordPositions "Foo" "_" False ["$1"]
-- >>> generateAction NoRecordPositions (CParser False "") "Foo" "_" False ["$1"]
-- "$1;"
-- >>> generateAction NoRecordPositions "ListFoo" "[]" False []
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "[]" False []
-- "0;"
-- >>> generateAction NoRecordPositions "ListFoo" "(:[])" False ["$1"]
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:[])" False ["$1"]
-- "make_ListFoo($1, 0);"
-- >>> generateAction NoRecordPositions "ListFoo" "(:)" False ["$1","$2"]
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:)" False ["$1","$2"]
-- "make_ListFoo($1, $2);"
-- >>> generateAction NoRecordPositions "ListFoo" "(:)" True ["$1","$2"]
-- >>> generateAction NoRecordPositions (CParser False "") "ListFoo" "(:)" True ["$1","$2"]
-- "make_ListFoo($2, $1);"
generateAction :: IsFun a => RecordPositions -> String -> a -> Bool -> [MetaVar] -> Action
generateAction rp nt f b ms
generateAction :: IsFun a => RecordPositions -> ParserMode -> String -> a -> Bool -> [MetaVar] -> Action
generateAction rp mode nt f b ms
| isCoercion f = unwords ms ++ ";" ++ loc
| isNilFun f = "0;"
| isOneFun f = concat ["make_", nt, "(", intercalate ", " ms', ", 0);"]
| isConsFun f = concat ["make_", nt, "(", intercalate ", " ms', ");"]
| otherwise = concat ["make_", funName f, "(", intercalate ", " ms', ");", loc]
| isOneFun f = concat [new, nt, "(", intercalate ", " ms', ", 0);"]
| isConsFun f = concat [new, nt, "(", intercalate ", " ms', ");"]
| otherwise = concat [new, funName f, "(", intercalate ", " ms', ");", loc]
where
ms' = if b then reverse ms else ms
loc = if rp == RecordPositions then " $$->line_number = @$.first_line; $$->char_number = @$.first_column;" else ""
new = if cParser mode then "make_" else "new "

-- Generate patterns and a set of metavariables indicating
-- where in the pattern the non-terminal
Expand Down
Loading

0 comments on commit da179d6

Please sign in to comment.