Skip to content

Commit

Permalink
[ fix #349 ] Make parser reentrant by using reentrant lexer
Browse files Browse the repository at this point in the history
Phew! That took me a whole day to figure out, but in the end, there
aren't too many changes.

UPDATE: also use yyextra argument in lexer instead of global variable
literal_buffer.
  • Loading branch information
andreasabel committed Apr 2, 2021
1 parent e69474c commit aaa00fd
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 37 deletions.
50 changes: 36 additions & 14 deletions source/src/BNFC/Backend/C/CFtoBisonC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ header name cf = unlines
, "/* Generate header file for lexer. */"
, "%defines \"Bison.h\""
, ""
, "/* Reentrant parser */"
, "%pure_parser"
-- This flag is deprecated in Bison 3.7, but older Bisons don't recognize
-- %define api.pure full
, "%lex-param { yyscan_t scanner }"
, "%parse-param { yyscan_t scanner }"
, ""
, concat [ "/* Turn on line/column tracking in the ", name, "lloc structure: */" ]
, "%locations"
, ""
Expand All @@ -86,11 +93,20 @@ header name cf = unlines
, ""
, "#define YYMAXDEPTH 10000000" -- default maximum stack size is 10000, but right-recursion needs O(n) stack
, ""
, "/* The type yyscan_t is defined by flex, but we need it in the parser already. */"
, "#ifndef YY_TYPEDEF_YY_SCANNER_T"
, "#define YY_TYPEDEF_YY_SCANNER_T"
, "typedef void* yyscan_t;"
, "#endif"
, ""
, "typedef struct " ++ name ++ "_buffer_state *YY_BUFFER_STATE;"
, "YY_BUFFER_STATE " ++ name ++ "_scan_string(const char *str);"
, "void " ++ name ++ "_delete_buffer(YY_BUFFER_STATE buf);"
, "extern int yylex(void);"
, "extern int " ++ name ++ "_init_lexer(FILE * inp);"
, "YY_BUFFER_STATE " ++ name ++ "_scan_string(const char *str, yyscan_t scanner);"
, "void " ++ name ++ "_delete_buffer(YY_BUFFER_STATE buf, yyscan_t scanner);"
, ""
, "extern void " ++ name ++ "lex_destroy(yyscan_t scanner);"
, "extern char* " ++ name ++ "get_text(yyscan_t scanner);"
, ""
, "extern yyscan_t " ++ name ++ "_init_lexer(FILE * inp);"
, ""
, "/* List reversal functions. */"
, concatMap reverseList $ filter isList $ allParserCatsNorm cf
Expand All @@ -110,17 +126,18 @@ unionDependentCode :: String -> String
unionDependentCode name = unlines
[ "%{"
, errorHandler name
, "int yyparse(YYSTYPE *result);"
, "int yyparse(yyscan_t scanner, YYSTYPE *result);"
, ""
, "extern int yylex(YYSTYPE *lvalp, YYLTYPE *llocp, yyscan_t scanner);"
, "%}"
]

errorHandler :: String -> String
errorHandler name = unlines
[ "void yyerror(YYSTYPE *result, const char *str)"
[ "void yyerror(YYLTYPE *loc, yyscan_t scanner, YYSTYPE *result, const char *msg)"
, "{"
, " extern char *" ++ name ++ "text;"
, " fprintf(stderr,\"error: %d,%d: %s at %s\\n\","
, " " ++ name ++ "lloc.first_line, " ++ name ++ "lloc.first_column, str, " ++ name ++ "text);"
, " fprintf(stderr, \"error: %d,%d: %s at %s\\n\","
, " loc->first_line, loc->first_column, msg, " ++ name ++ "get_text(scanner));"
, "}"
]

Expand All @@ -147,13 +164,18 @@ parseMethod cf name cat = unlines $ concat
where
body stringParser = concat
[ [ "{"
, concat [ " ", name, "_init_lexer(", file, ");" ]
, " YYSTYPE result;"
, " yyscan_t scanner = " ++ name ++ "_init_lexer(", file, ");"
, " if (!scanner) {"
, " fprintf(stderr, \"Failed to initialize lexer.\\n\");"
, " return 0;"
, " }"
]
, [ " YY_BUFFER_STATE buf = " ++ name ++ "_scan_string(str);" | stringParser ]
, [ " int error = yyparse(&result);" ]
, [ " " ++ name ++ "_delete_buffer(buf);" | stringParser ]
, [ " if (error)"
, [ " YY_BUFFER_STATE buf = " ++ name ++ "_scan_string(str, scanner);" | stringParser ]
, [ " int error = yyparse(scanner, &result);" ]
, [ " " ++ name ++ "_delete_buffer(buf, scanner);" | stringParser ]
, [ " " ++ name ++ "lex_destroy(scanner);"
, " if (error)"
, " { /* Failure */"
, " return 0;"
, " }"
Expand Down
51 changes: 28 additions & 23 deletions source/src/BNFC/Backend/C/CFtoFlexC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,19 @@ cf2flex name cf = (, env) $ unlines
prelude :: Bool -> String -> String
prelude stringLiterals name = unlines $ concat
[ [ "/* -*- c -*- This FLex file was machine-generated by the BNF converter */"
, ""
-- noinput and nounput are most often unused
-- https://stackoverflow.com/questions/39075510/option-noinput-nounput-what-are-they-for
, "%option noyywrap noinput nounput"
, "%top{"
, "%option reentrant bison-bridge bison-locations"
, ""
]
, when stringLiterals
[ "/* Additional data for the lexer: a buffer for lexing string literals. */"
, "%option extra-type=\"Buffer\""
, ""
]
, [ "%top{"
, "/* strdup was not in the ISO C standard before 6/2019 (C2x), but in POSIX 1003.1."
, " * See: https://en.cppreference.com/w/c/experimental/dynamic/strdup"
, " * Setting _POSIX_C_SOURCE to 200809L activates strdup in string.h."
Expand All @@ -68,8 +77,6 @@ prelude stringLiterals name = unlines $ concat
, "#include \"Absyn.h\""
, "#include \"Bison.h\""
, ""
, "#define yylval " ++ name ++ "lval"
, "#define yylloc " ++ name ++ "lloc"
, "#define init_lexer " ++ name ++ "_init_lexer"
, ""
]
Expand All @@ -92,7 +99,7 @@ prelude stringLiterals name = unlines $ concat
, " }"
, " }"
, "}"
, "#define YY_USER_ACTION update_loc(&yylloc, yytext);"
, "#define YY_USER_ACTION update_loc(yylloc, yytext);"
, ""
, "%}"
]
Expand All @@ -110,7 +117,7 @@ preludeForBuffer bufferH =
, "#define LITERAL_BUFFER_INITIAL_SIZE 1024"
, ""
, "/* The pointer to the literal buffer. */"
, "static Buffer literal_buffer = NULL;"
, "#define literal_buffer yyextra"
, ""
, "/* Initialize the literal buffer. */"
, "#define LITERAL_BUFFER_CREATE() literal_buffer = newBuffer(LITERAL_BUFFER_INITIAL_SIZE)"
Expand Down Expand Up @@ -164,9 +171,9 @@ restOfFlex cf env = unlines $ concat
, userDefTokens
, ifC catString $ lexStrings "yylval" "_STRING_" "_ERROR_"
, ifC catChar $ lexChars "yylval" "_CHAR_"
, ifC catDouble [ "<INITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval._double = atof(yytext); return _DOUBLE_;" ]
, ifC catInteger [ "<INITIAL>{DIGIT}+ \t yylval._int = atoi(yytext); return _INTEGER_;" ]
, ifC catIdent [ "<INITIAL>{LETTER}{IDENT}* \t yylval._string = strdup(yytext); return _IDENT_;" ]
, ifC catDouble [ "<INITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? \t yylval->_double = atof(yytext); return _DOUBLE_;" ]
, ifC catInteger [ "<INITIAL>{DIGIT}+ \t yylval->_int = atoi(yytext); return _INTEGER_;" ]
, ifC catIdent [ "<INITIAL>{LETTER}{IDENT}* \t yylval->_string = strdup(yytext); return _IDENT_;" ]
, [ "<INITIAL>[ \\t\\r\\n\\f] \t /* ignore white space. */;"
, "<INITIAL>. \t return _ERROR_;"
, ""
Expand All @@ -179,28 +186,26 @@ restOfFlex cf env = unlines $ concat
ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
userDefTokens =
[ "<INITIAL>" ++ printRegFlex exp ++
" \t yylval._string = strdup(yytext); return " ++ sName name ++ ";"
" \t yylval->_string = strdup(yytext); return " ++ sName name ++ ";"
| (name, exp) <- tokenPragmas cf
]
where sName n = fromMaybe n $ Map.lookup (Tokentype n) env
footer =
[
"void init_lexer(FILE *inp)",
"{",
" if (inp) yyrestart(inp);",
" yylloc.first_line = 1;",
" yylloc.first_column = 1;",
" yylloc.last_line = 1;",
" yylloc.last_column = 1;",
"}"
[ "yyscan_t init_lexer(FILE *inp)"
, "{"
, " yyscan_t scanner;"
, " if (yylex_init_extra(NULL, &scanner)) return 0;"
, " if (inp) yyrestart(inp, scanner);"
, " return scanner;"
, "}"
]

-- | Lexing of strings, converting escaped characters.
lexStrings :: String -> String -> String -> [String]
lexStrings yylval stringToken errorToken =
[ "<INITIAL>\"\\\"\" \t LITERAL_BUFFER_CREATE(); BEGIN STRING;"
, "<STRING>\\\\ \t BEGIN ESCAPED;"
, "<STRING>\\\" \t " ++ yylval ++ "._string = LITERAL_BUFFER_HARVEST(); BEGIN INITIAL; return " ++ stringToken ++ ";"
, "<STRING>\\\" \t " ++ yylval ++ "->_string = LITERAL_BUFFER_HARVEST(); BEGIN INITIAL; return " ++ stringToken ++ ";"
, "<STRING>. \t LITERAL_BUFFER_APPEND_CHAR(yytext[0]);"
, "<ESCAPED>n \t LITERAL_BUFFER_APPEND_CHAR('\\n'); BEGIN STRING;"
, "<ESCAPED>\\\" \t LITERAL_BUFFER_APPEND_CHAR('\"'); BEGIN STRING;"
Expand All @@ -215,10 +220,10 @@ lexChars :: String -> String -> [String]
lexChars yylval charToken =
[ "<INITIAL>\"'\" \tBEGIN CHAR;"
, "<CHAR>\\\\ \t BEGIN CHARESC;"
, "<CHAR>[^'] \t BEGIN CHAREND; " ++ yylval ++ "._char = yytext[0]; return " ++ charToken ++ ";"
, "<CHARESC>n \t BEGIN CHAREND; " ++ yylval ++ "._char = '\\n'; return " ++ charToken ++ ";"
, "<CHARESC>t \t BEGIN CHAREND; " ++ yylval ++ "._char = '\\t'; return " ++ charToken ++ ";"
, "<CHARESC>. \t BEGIN CHAREND; " ++ yylval ++ "._char = yytext[0]; return " ++ charToken ++ ";"
, "<CHAR>[^'] \t BEGIN CHAREND; " ++ yylval ++ "->_char = yytext[0]; return " ++ charToken ++ ";"
, "<CHARESC>n \t BEGIN CHAREND; " ++ yylval ++ "->_char = '\\n'; return " ++ charToken ++ ";"
, "<CHARESC>t \t BEGIN CHAREND; " ++ yylval ++ "->_char = '\\t'; return " ++ charToken ++ ";"
, "<CHARESC>. \t BEGIN CHAREND; " ++ yylval ++ "->_char = yytext[0]; return " ++ charToken ++ ";"
, "<CHAREND>\"'\" \t BEGIN INITIAL;"
]

Expand Down

0 comments on commit aaa00fd

Please sign in to comment.