Skip to content

Commit

Permalink
[ re #349 ] C/C++ lexer: use flex's predefined start state INITIAL
Browse files Browse the repository at this point in the history
Use INITIAL instead of defining our own YYINITIAL.
Saves us the initial BEGIN, works also for reentrant lexer.
  • Loading branch information
andreasabel committed Mar 25, 2021
1 parent 7fcef49 commit c95eaee
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 91 deletions.
49 changes: 24 additions & 25 deletions source/src/BNFC/Backend/C/CFtoFlexC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ cMacros cf = unlines
, "DIGIT [0-9]"
, "IDENT [a-zA-Z0-9'_]"
, unwords $ concat
[ [ "%START YYINITIAL CHAR CHARESC CHAREND STRING ESCAPED" ]
[ [ "%START CHAR CHARESC CHAREND STRING ESCAPED" ]
, take (numberOfBlockCommentForms cf) commentStates
]
, ""
Expand All @@ -150,7 +150,7 @@ lexSymbols :: KeywordEnv -> String
lexSymbols ss = concatMap transSym ss
where
transSym (s,r) =
"<YYINITIAL>\"" ++ s' ++ "\" \t return " ++ r ++ ";\n"
"<INITIAL>\"" ++ s' ++ "\" \t return " ++ r ++ ";\n"
where
s' = escapeChars s

Expand All @@ -162,11 +162,11 @@ restOfFlex cf env = unlines $ concat
, userDefTokens
, ifC catString $ lexStrings "yylval" "_STRING_" "_ERROR_"
, ifC catChar $ lexChars "yylval" "_CHAR_"
, ifC catDouble [ "<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval._double = atof(yytext); return _DOUBLE_;" ]
, ifC catInteger [ "<YYINITIAL>{DIGIT}+ \t yylval._int = atoi(yytext); return _INTEGER_;" ]
, ifC catIdent [ "<YYINITIAL>{LETTER}{IDENT}* \t yylval._string = strdup(yytext); return _IDENT_;" ]
, [ "<YYINITIAL>[ \\t\\r\\n\\f] \t /* ignore white space. */;"
, "<YYINITIAL>. \t return _ERROR_;"
, ifC catDouble [ "<INITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval._double = atof(yytext); return _DOUBLE_;" ]
, ifC catInteger [ "<INITIAL>{DIGIT}+ \t yylval._int = atoi(yytext); return _INTEGER_;" ]
, ifC catIdent [ "<INITIAL>{LETTER}{IDENT}* \t yylval._string = strdup(yytext); return _IDENT_;" ]
, [ "<INITIAL>[ \\t\\r\\n\\f] \t /* ignore white space. */;"
, "<INITIAL>. \t return _ERROR_;"
, ""
, "%% /* Initialization code. */"
, ""
Expand All @@ -176,7 +176,7 @@ restOfFlex cf env = unlines $ concat
where
ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
userDefTokens =
[ "<YYINITIAL>" ++ printRegFlex exp ++
[ "<INITIAL>" ++ printRegFlex exp ++
" \t yylval._string = strdup(yytext); return " ++ sName name ++ ";"
| (name, exp) <- tokenPragmas cf
]
Expand All @@ -190,16 +190,15 @@ restOfFlex cf env = unlines $ concat
" yylloc.first_column = 1;",
" yylloc.last_line = 1;",
" yylloc.last_column = 1;",
" BEGIN YYINITIAL;",
"}"
]

-- | Lexing of strings, converting escaped characters.
lexStrings :: String -> String -> String -> [String]
lexStrings yylval stringToken errorToken =
[ "<YYINITIAL>\"\\\"\" \t LITERAL_BUFFER_CREATE(); BEGIN STRING;"
[ "<INITIAL>\"\\\"\" \t LITERAL_BUFFER_CREATE(); BEGIN STRING;"
, "<STRING>\\\\ \t BEGIN ESCAPED;"
, "<STRING>\\\" \t " ++ yylval ++ "._string = LITERAL_BUFFER_HARVEST(); BEGIN YYINITIAL; return " ++ stringToken ++ ";"
, "<STRING>\\\" \t " ++ yylval ++ "._string = LITERAL_BUFFER_HARVEST(); BEGIN INITIAL; return " ++ stringToken ++ ";"
, "<STRING>. \t LITERAL_BUFFER_APPEND_CHAR(yytext[0]);"
, "<ESCAPED>n \t LITERAL_BUFFER_APPEND_CHAR('\\n'); BEGIN STRING;"
, "<ESCAPED>\\\" \t LITERAL_BUFFER_APPEND_CHAR('\"'); BEGIN STRING;"
Expand All @@ -212,13 +211,13 @@ lexStrings yylval stringToken errorToken =
-- | Lexing of characters, converting escaped characters.
lexChars :: String -> String -> [String]
lexChars yylval charToken =
[ "<YYINITIAL>\"'\" \tBEGIN CHAR;"
[ "<INITIAL>\"'\" \tBEGIN CHAR;"
, "<CHAR>\\\\ \t BEGIN CHARESC;"
, "<CHAR>[^'] \t BEGIN CHAREND; " ++ yylval ++ "._char = yytext[0]; return " ++ charToken ++ ";"
, "<CHARESC>n \t BEGIN CHAREND; " ++ yylval ++ "._char = '\\n'; return " ++ charToken ++ ";"
, "<CHARESC>t \t BEGIN CHAREND; " ++ yylval ++ "._char = '\\t'; return " ++ charToken ++ ";"
, "<CHARESC>. \t BEGIN CHAREND; " ++ yylval ++ "._char = yytext[0]; return " ++ charToken ++ ";"
, "<CHAREND>\"'\" \t BEGIN YYINITIAL;"
, "<CHAREND>\"'\" \t BEGIN INITIAL;"
]

-- ---------------------------------------------------------------------------
Expand All @@ -233,9 +232,9 @@ lexChars yylval charToken =
-- delimiters.
--
-- >>> lexComments (Just "myns.") ([("{-","-}")],["--"])
-- <YYINITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
-- <YYINITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <INITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
-- <INITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] /* skip */;
lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc
Expand All @@ -254,13 +253,13 @@ commentStates = map ("COMMENT" ++) $ "" : map show [1..]
-- comment.
--
-- >>> lexSingleComment "--"
-- <YYINITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
-- <INITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
--
-- >>> lexSingleComment "\""
-- <YYINITIAL>"\""[^\n]* /* skip */; /* BNFC: comment "\"" */
-- <INITIAL>"\""[^\n]* /* skip */; /* BNFC: comment "\"" */
lexSingleComment :: String -> Doc
lexSingleComment c =
"<YYINITIAL>" <> cstring c <> "[^\\n]*"
"<INITIAL>" <> cstring c <> "[^\\n]*"
<+> "/* skip */;"
<+> unless (containsCCommentMarker c) ("/* BNFC: comment" <+> cstring c <+> "*/")

Expand All @@ -276,22 +275,22 @@ containsCCommentMarker s = "/*" `isInfixOf` s || "*/" `isInfixOf` s
-- with another. However this seems rare.
--
-- >>> lexMultiComment ("{-", "-}") "COMMENT"
-- <YYINITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <INITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] /* skip */;
--
-- >>> lexMultiComment ("\"'", "'\"") "COMMENT"
-- <YYINITIAL>"\"'" BEGIN COMMENT; /* BNFC: block comment "\"'" "'\"" */
-- <COMMENT>"'\"" BEGIN YYINITIAL;
-- <INITIAL>"\"'" BEGIN COMMENT; /* BNFC: block comment "\"'" "'\"" */
-- <COMMENT>"'\"" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] /* skip */;
lexMultiComment :: (String, String) -> String -> Doc
lexMultiComment (b,e) comment = vcat
[ "<YYINITIAL>" <> cstring b <+> "BEGIN" <+> text comment <> ";"
[ "<INITIAL>" <> cstring b <+> "BEGIN" <+> text comment <> ";"
<+> unless (containsCCommentMarker b || containsCCommentMarker e)
("/* BNFC: block comment" <+> cstring b <+> cstring e <+> "*/")
, commentTag <> cstring e <+> "BEGIN YYINITIAL;"
, commentTag <> cstring e <+> "BEGIN INITIAL;"
, commentTag <> ". /* skip */;"
, commentTag <> "[\\n] /* skip */;"
]
Expand Down
65 changes: 23 additions & 42 deletions source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ lexSymbols :: SymEnv -> String
lexSymbols ss = concatMap transSym ss
where
transSym (s,r) =
"<YYINITIAL>\"" ++ s' ++ "\" \t return " ++ r ++ ";\n"
"<INITIAL>\"" ++ s' ++ "\" \t return " ++ r ++ ";\n"
where
s' = escapeChars s

Expand All @@ -74,12 +74,12 @@ restOfFlex inPackage cf env = unlines $ concat
, userDefTokens
, ifC catString $ lexStrings (ns ++ "yylval") (nsDefine inPackage "_STRING_") (nsDefine inPackage "_ERROR_")
, ifC catChar $ lexChars (ns ++ "yylval") (nsDefine inPackage "_CHAR_")
, ifC catDouble [ "<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t " ++ ns ++ "yylval._double = atof(yytext); return " ++ nsDefine inPackage "_DOUBLE_" ++ ";" ]
, ifC catInteger [ "<YYINITIAL>{DIGIT}+ \t " ++ ns ++ "yylval._int = atoi(yytext); return " ++ nsDefine inPackage "_INTEGER_" ++ ";" ]
, ifC catIdent [ "<YYINITIAL>{LETTER}{IDENT}* \t " ++ ns ++ "yylval._string = strdup(yytext); return " ++ nsDefine inPackage "_IDENT_" ++ ";" ]
, ifC catDouble [ "<INITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t " ++ ns ++ "yylval._double = atof(yytext); return " ++ nsDefine inPackage "_DOUBLE_" ++ ";" ]
, ifC catInteger [ "<INITIAL>{DIGIT}+ \t " ++ ns ++ "yylval._int = atoi(yytext); return " ++ nsDefine inPackage "_INTEGER_" ++ ";" ]
, ifC catIdent [ "<INITIAL>{LETTER}{IDENT}* \t " ++ ns ++ "yylval._string = strdup(yytext); return " ++ nsDefine inPackage "_IDENT_" ++ ";" ]
, [ "\\n ++" ++ ns ++ "yy_mylinenumber ;"
, "<YYINITIAL>[ \\t\\r\\n\\f] \t /* ignore white space. */;"
, "<YYINITIAL>. \t return " ++ nsDefine inPackage "_ERROR_" ++ ";"
, "<INITIAL>[ \\t\\r\\n\\f] \t /* ignore white space. */;"
, "<INITIAL>. \t return " ++ nsDefine inPackage "_ERROR_" ++ ";"
, "%%"
]
, footer
Expand All @@ -88,13 +88,13 @@ restOfFlex inPackage cf env = unlines $ concat
ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
ns = nsString inPackage
userDefTokens =
[ "<YYINITIAL>" ++ printRegFlex exp ++
[ "<INITIAL>" ++ printRegFlex exp ++
" \t " ++ ns ++ "yylval._string = strdup(yytext); return " ++ sName name ++ ";"
| (name, exp) <- tokenPragmas cf
]
where sName n = fromMaybe n $ Map.lookup (Tokentype n) env
footer =
[ "void " ++ ns ++ "initialize_lexer(FILE *inp) { yyrestart(inp); BEGIN YYINITIAL; }"
[ "void " ++ ns ++ "initialize_lexer(FILE *inp) { yyrestart(inp); }"
, "int yywrap(void) { return 1; }"
]

Expand All @@ -111,9 +111,9 @@ restOfFlex inPackage cf env = unlines $ concat
-- delimiters.
--
-- >>> lexComments (Just "myns.") ([("{-","-}")],["--"])
-- <YYINITIAL>"--"[^\n]* ; // BNFC: comment "--";
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <INITIAL>"--"[^\n]* ; // BNFC: comment "--";
-- <INITIAL>"{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}";
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++myns.yy_mylinenumber;
lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc
Expand All @@ -128,38 +128,19 @@ lexComments ns (m,s) = vcat $ concat
-- comment.
--
-- >>> lexSingleComment (Just "mypackage.") "--"
-- <YYINITIAL>"--"[^\n]* ; // BNFC: comment "--";
-- <INITIAL>"--"[^\n]* ; // BNFC: comment "--";
--
-- >>> lexSingleComment Nothing "--"
-- <YYINITIAL>"--"[^\n]* ; // BNFC: comment "--";
-- <INITIAL>"--"[^\n]* ; // BNFC: comment "--";
--
-- >>> lexSingleComment Nothing "\""
-- <YYINITIAL>"\""[^\n]* ; // BNFC: comment "\"";
-- <INITIAL>"\""[^\n]* ; // BNFC: comment "\"";
lexSingleComment :: Maybe String -> String -> Doc
lexSingleComment _ c =
"<YYINITIAL>" <> cstring c <> "[^\\n]*"
"<INITIAL>" <> cstring c <> "[^\\n]*"
<+> ";"
<+> "// BNFC: comment" <+> cstring c <> ";"

-- -- | Create a lexer rule for single-line comments.
-- -- The first argument is -- an optional c++ namespace
-- -- The second argument is the delimiter that marks the beginning of the
-- -- comment.
-- --
-- -- >>> lexSingleComment (Just "mypackage.") "--"
-- -- <YYINITIAL>"--"[^\n]*\n ++mypackage.yy_mylinenumber; // BNFC: comment "--";
-- --
-- -- >>> lexSingleComment Nothing "--"
-- -- <YYINITIAL>"--"[^\n]*\n ++yy_mylinenumber; // BNFC: comment "--";
-- --
-- -- >>> lexSingleComment Nothing "\""
-- -- <YYINITIAL>"\""[^\n]*\n ++yy_mylinenumber; // BNFC: comment "\"";
-- lexSingleComment :: Maybe String -> String -> Doc
-- lexSingleComment ns c =
-- "<YYINITIAL>" <> cstring c <> "[^\\n]*\\n"
-- <+> "++"<> text (fromMaybe "" ns)<>"yy_mylinenumber;"
-- <+> "// BNFC: comment" <+> cstring c <> ";"

-- | Create a lexer rule for multi-lines comments.
-- The first argument is -- an optional c++ namespace
-- The second arguments is the pair of delimiter for the multi-lines comment:
Expand All @@ -169,27 +150,27 @@ lexSingleComment _ c =
-- with another. However this seems rare.
--
-- >>> lexMultiComment Nothing ("{-", "-}") "COMMENT"
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <INITIAL>"{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}";
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++yy_mylinenumber;
--
-- >>> lexMultiComment (Just "foo.") ("{-", "-}") "COMMENT"
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <INITIAL>"{-" BEGIN COMMENT; // BNFC: block comment "{-" "-}";
-- <COMMENT>"-}" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++foo.yy_mylinenumber;
--
-- >>> lexMultiComment Nothing ("\"'", "'\"") "COMMENT"
-- <YYINITIAL>"\"'" BEGIN COMMENT; // BNFC: block comment "\"'" "'\"";
-- <COMMENT>"'\"" BEGIN YYINITIAL;
-- <INITIAL>"\"'" BEGIN COMMENT; // BNFC: block comment "\"'" "'\"";
-- <COMMENT>"'\"" BEGIN INITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++yy_mylinenumber;
lexMultiComment :: Maybe String -> (String, String) -> String -> Doc
lexMultiComment ns (b,e) comment = vcat
[ "<YYINITIAL>" <> cstring b <+> "BEGIN" <+> text comment <> ";"
[ "<INITIAL>" <> cstring b <+> "BEGIN" <+> text comment <> ";"
<+> "// BNFC: block comment" <+> cstring b <+> cstring e <> ";"
, commentTag <> cstring e <+> "BEGIN YYINITIAL;"
, commentTag <> cstring e <+> "BEGIN INITIAL;"
, commentTag <> ". /* skip */;"
, commentTag <> "[\\n] ++" <> text (fromMaybe "" ns) <> "yy_mylinenumber;"
]
Expand Down
Loading

0 comments on commit c95eaee

Please sign in to comment.