Skip to content

Commit

Permalink
Fix issue #146 in C and C++.
Browse files Browse the repository at this point in the history
Do some refactoring to remove some related duplication.
  • Loading branch information
gdetrez committed Jun 24, 2015
1 parent 316e5ed commit 9402769
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 60 deletions.
116 changes: 93 additions & 23 deletions source/src/BNFC/Backend/C/CFtoFlexC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@
**************************************************************
-}
module BNFC.Backend.C.CFtoFlexC (cf2flex) where
module BNFC.Backend.C.CFtoFlexC (cf2flex, lexComments, cMacros) where

import Data.Maybe (fromMaybe)

import BNFC.CF
import BNFC.Backend.CPP.NoSTL.RegToFlex
import BNFC.Backend.Common.NamedVariables
import BNFC.PrettyPrint
import BNFC.Utils (cstring)

--The environment must be returned for the parser to use.
cf2flex :: String -> CF -> (String, SymEnv)
Expand Down Expand Up @@ -85,8 +89,8 @@ prelude name = unlines
"%}"
]

--For now all categories are included.
--Optimally only the ones that are used should be generated.
-- For now all categories are included.
-- Optimally only the ones that are used should be generated.
cMacros :: String
cMacros = unlines
[
Expand All @@ -110,7 +114,7 @@ lexSymbols ss = concatMap transSym ss
restOfFlex :: CF -> SymEnv -> String
restOfFlex cf env = concat
[
lexComments (comments cf),
render $ lexComments Nothing (comments cf),
userDefTokens,
ifC catString strStates,
ifC catChar chStates,
Expand Down Expand Up @@ -157,26 +161,92 @@ restOfFlex cf env = concat
]
footer = "void initialize_lexer(FILE *inp) { yyrestart(inp); BEGIN YYINITIAL; }"

lexComments :: ([(String, String)], [String]) -> String
lexComments (m,s) =
(unlines (map lexSingleComment s))
++ (unlines (map lexMultiComment m))

lexSingleComment :: String -> String
lexSingleComment c =
"<YYINITIAL>\"" ++ c ++ "\"[^\\n]*\\n ++yy_mylinenumber; \t /* BNFC single-line comment */;"

--There might be a possible bug here if a language includes 2 multi-line comments.
--They could possibly start a comment with one character and end it with another.
--However this seems rare.
lexMultiComment :: (String, String) -> String
lexMultiComment (b,e) = unlines [
"<YYINITIAL>\"" ++ b ++ "\" \t BEGIN COMMENT;",
"<COMMENT>\"" ++ e ++ "\" \t BEGIN YYINITIAL;",
"<COMMENT>. \t /* BNFC multi-line comment */;",
"<COMMENT>[\\n] ++yy_mylinenumber ; \t /* BNFC multi-line comment */;"
]
-- ---------------------------------------------------------------------------
-- Comments

-- | Create flex rules for single-line and multi-lines comments.
-- The first argument is an optional namespace (for C++); the second
-- argument is the set of comment delimiters as returned by BNFC.CF.comments.
--
-- This function is only compiling the results of applying either
-- lexSingleComment or lexMultiComment on each comment delimiter or pair of
-- delimiters.
--
-- >>> lexComments (Just "myns.") ([("{-","-}")],["--"])
-- <YYINITIAL>"--"[^\n]*\n ++myns.yy_mylinenumber; // BNFC: comment "--";
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++myns.yy_mylinenumber;
lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc
lexComments ns (m,s) =
vcat (map (lexSingleComment ns) s ++ map (lexMultiComment ns) m)

-- | Create a lexer rule for single-line comments.
-- The first argument is -- an optional c++ namespace
-- The second argument is the delimiter that marks the beginning of the
-- comment.
--
-- >>> lexSingleComment (Just "mypackage.") "--"
-- <YYINITIAL>"--"[^\n]*\n ++mypackage.yy_mylinenumber; // BNFC: comment "--";
--
-- >>> lexSingleComment Nothing "--"
-- <YYINITIAL>"--"[^\n]*\n ++yy_mylinenumber; // BNFC: comment "--";
--
-- >>> lexSingleComment Nothing "\""
-- <YYINITIAL>"\""[^\n]*\n ++yy_mylinenumber; // BNFC: comment "\"";
lexSingleComment :: Maybe String -> String -> Doc
lexSingleComment ns c =
"<YYINITIAL>" <> cstring c <> "[^\\n]*\\n"
<+> "++"<> text (fromMaybe "" ns)<>"yy_mylinenumber;"
<+> "// BNFC: comment" <+> cstring c <> ";"

-- | Create a lexer rule for multi-lines comments.
-- The first argument is -- an optional c++ namespace
-- The second arguments is the pair of delimiter for the multi-lines comment:
-- start deleminiter and end delimiter.
-- There might be a possible bug here if a language includes 2 multi-line
-- comments. They could possibly start a comment with one character and end it
-- with another. However this seems rare.
--
-- >>> lexMultiComment Nothing ("{-", "-}")
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++yy_mylinenumber;
--
-- >>> lexMultiComment (Just "foo.") ("{-", "-}")
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++foo.yy_mylinenumber;
--
-- >>> lexMultiComment Nothing ("\"'", "'\"")
-- <YYINITIAL>"\"'" BEGIN COMMENT; // BNFC: comment "\"'" "'\"";
-- <COMMENT>"'\"" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] ++yy_mylinenumber;
lexMultiComment :: Maybe String -> (String, String) -> Doc
lexMultiComment ns (b,e) = vcat
[ "<YYINITIAL>" <> cstring b <+> "BEGIN COMMENT;"
<+> "// BNFC: comment" <+> cstring b <+> cstring e <> ";"
, "<COMMENT>" <> cstring e <+> "BEGIN YYINITIAL;"
, "<COMMENT>. /* skip */;"
, "<COMMENT>[\\n] ++"<> text (fromMaybe "" ns) <>"yy_mylinenumber;"
]

-- --There might be a possible bug here if a language includes 2 multi-line comments.
-- --They could possibly start a comment with one character and end it with another.
-- --However this seems rare.
-- --
-- lexMultiComment :: Maybe String -> (String, String) -> String
-- lexMultiComment inPackage (b,e) = unlines [
-- "<YYINITIAL>\"" ++ b ++ "\" \t BEGIN COMMENT;",
-- "<COMMENT>\"" ++ e ++ "\" \t BEGIN YYINITIAL;",
-- "<COMMENT>. \t /* BNFC multi-line comment */;",
-- "<COMMENT>[\\n] ++" ++ nsString inPackage ++ "yy_mylinenumber ; \t /* BNFC multi-line comment */;"
-- ---- "\\n ++yy_mylinenumber ;"
-- ]
--Helper function that escapes characters in strings
escapeChars :: String -> String
escapeChars [] = []
Expand Down
40 changes: 3 additions & 37 deletions source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@
module BNFC.Backend.CPP.NoSTL.CFtoFlex (cf2flex) where

import BNFC.CF
import BNFC.Backend.C.CFtoFlexC (lexComments, cMacros)
import BNFC.Backend.CPP.NoSTL.RegToFlex
import BNFC.Backend.Common.NamedVariables
import BNFC.Backend.CPP.STL.STLUtils
import BNFC.PrettyPrint

--The environment must be returned for the parser to use.
cf2flex :: Maybe String -> String -> CF -> (String, SymEnv)
Expand Down Expand Up @@ -81,20 +83,6 @@ prelude inPackage _ = unlines
"%}"
]

--For now all categories are included.
--Optimally only the ones that are used should be generated.
cMacros :: String
cMacros = unlines
[
"LETTER [a-zA-Z]",
"CAPITAL [A-Z]",
"SMALL [a-z]",
"DIGIT [0-9]",
"IDENT [a-zA-Z0-9'_]",
"%START YYINITIAL COMMENT CHAR CHARESC CHAREND STRING ESCAPED",
"%%"
]

lexSymbols :: SymEnv -> String
lexSymbols ss = concatMap transSym ss
where
Expand All @@ -106,7 +94,7 @@ lexSymbols ss = concatMap transSym ss
restOfFlex :: Maybe String -> CF -> SymEnv -> String
restOfFlex inPackage cf env = concat
[
lexComments inPackage (comments cf),
render $ lexComments inPackage (comments cf),
userDefTokens,
ifC catString strStates,
ifC catChar chStates,
Expand Down Expand Up @@ -159,28 +147,6 @@ restOfFlex inPackage cf env = concat
]


lexComments :: Maybe String -> ([(String, String)], [String]) -> String
lexComments inPackage (m,s) =
(unlines (map (lexSingleComment inPackage) s))
++ (unlines (map (lexMultiComment inPackage) m))

lexSingleComment :: Maybe String -> String -> String
lexSingleComment inPackage c =
"<YYINITIAL>\"" ++ c ++ "\"[^\\n]*\\n ++" ++ nsString inPackage ++ "yy_mylinenumber ; \t /* BNFC single-line comment */;"

--There might be a possible bug here if a language includes 2 multi-line comments.
--They could possibly start a comment with one character and end it with another.
--However this seems rare.
lexMultiComment :: Maybe String -> (String, String) -> String
lexMultiComment inPackage (b,e) = unlines [
"<YYINITIAL>\"" ++ b ++ "\" \t BEGIN COMMENT;",
"<COMMENT>\"" ++ e ++ "\" \t BEGIN YYINITIAL;",
"<COMMENT>. \t /* BNFC multi-line comment */;",
"<COMMENT>[\\n] ++" ++ nsString inPackage ++ "yy_mylinenumber ; \t /* BNFC multi-line comment */;"
---- "\\n ++yy_mylinenumber ;"
]


--Helper function that escapes characters in strings
escapeChars :: String -> String
escapeChars [] = []
Expand Down
12 changes: 12 additions & 0 deletions source/src/BNFC/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ module BNFC.Utils
, lowerCase, upperCase, mixedCase, camelCase, snakeCase
, replace, prParenth
, writeFileRep
, cstring
) where

import Control.Arrow ((&&&))
Expand Down Expand Up @@ -237,3 +238,14 @@ mixedCase = text . mkName [] MixedCase
-- my_ident
snakeCase :: String -> Doc
snakeCase = text . mkName [] SnakeCase

-- ESCAPING

-- | a function that renders a c-like string with escaped characters:
-- >>> cstring "foobar"
-- "foobar"
--
-- >>> cstring "foobar\""
-- "foobar\""
cstring :: String -> Doc
cstring = text . show

0 comments on commit 9402769

Please sign in to comment.