Skip to content

Commit

Permalink
[ haskell, BNFC#423 ] structured errors in the Haskell backend
Browse files Browse the repository at this point in the history
A new option "--errors" is introduced, which can change the parser
failure type from 'String' to a record type.
  • Loading branch information
Anton Vl. Kalinin committed Aug 8, 2022
1 parent 7018e84 commit 99f98a8
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 39 deletions.
22 changes: 12 additions & 10 deletions source/src/BNFC/Backend/Haskell.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import qualified BNFC.Backend.Common.Makefile as Makefile
import BNFC.CF
import BNFC.Options
( SharedOptions(..), TokenText(..), AlexVersion(..), HappyMode(..)
, isDefault, printOptions
, isDefault, printOptions, ErrorType (..)
)
import BNFC.Utils (when, table, getZonedTimeTruncatedToSeconds)

Expand Down Expand Up @@ -68,12 +68,12 @@ makeHaskell opts cf = do
-- Generate Happy parser and matching test program.
do
mkfile (happyFile opts) commentWithEmacsModeHint $
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) cf
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) (errorType opts) cf
-- liftIO $ printf "%s Tested with Happy 1.15\n" (happyFile opts)
mkfile (tFile opts) comment $ testfile opts cf

-- Both Happy parser and skeleton (template) rely on Err.
mkfile (errFile opts) comment $ mkErrM errMod
mapM_ (mkfile (errFile opts) comment) $ mkErrM errMod (errorType opts)
mkfile (templateFile opts) comment $ cf2Template (templateFileM opts) absMod (functor opts) cf

-- Generate txt2tags documentation.
Expand Down Expand Up @@ -335,7 +335,7 @@ testfile opts cf = unlines $ concat $
[ [ [ "import " , absFileM opts , " (" ++ if_glr impTopCat ++ ")" ] ]
, [ [ "import " , layoutFileM opts , " ( resolveLayout )" ] | lay ]
, [ [ "import " , alexFileM opts , " ( Token, mkPosToken )" ]
, [ "import " , happyFileM opts , " ( " ++ impParser ++ ", myLexer" ++ impParGLR ++ " )" ]
, [ "import " , happyFileM opts , " ( " ++ impParser ++ ", myLexer" ++ impParGLR ++ ", Err )" ]
, [ "import " , printerFileM opts , " ( Print, printTree )" ]
, [ "import " , templateFileM opts , " ()" ]
]
Expand All @@ -344,7 +344,6 @@ testfile opts cf = unlines $ concat $
, [ "import qualified Data.Map ( Map, lookup, toList )" | use_glr ]
, [ "import Data.Maybe ( fromJust )" | use_glr ]
, [ ""
, "type Err = Either String"
, if use_glr
then "type ParseFun a = [[Token]] -> (GLRResult, GLR_Output (Err a))"
else "type ParseFun a = [Token] -> Err a"
Expand All @@ -357,7 +356,7 @@ testfile opts cf = unlines $ concat $
, "runFile v p f = putStrLn f >> readFile f >>= run v p"
, ""
, "run :: (" ++ xpr ++ if_glr "TreeDecode a, " ++ "Print a, Show a) => Verbosity -> ParseFun a -> " ++ tokenTextType (tokenText opts) ++ " -> IO ()"
, (if use_glr then runGlr else runStd use_xml) myLLexer
, if use_glr then runGlr myLLexer else runStd use_xml myLLexer (errorType opts)
, "showTree :: (Show a, Print a) => Int -> a -> IO ()"
, "showTree v tree = do"
, " putStrV v $ \"\\n[Abstract Syntax]\\n\\n\" ++ show tree"
Expand Down Expand Up @@ -408,17 +407,20 @@ testfile opts cf = unlines $ concat $
(hasTopLevelLayout, layoutKeywords, _) = layoutPragmas cf
useTopLevelLayout = isJust hasTopLevelLayout

runStd :: Bool -> (String -> String) -> String
runStd xml myLLexer = unlines $ concat
runStd :: Bool -> (String -> String) -> ErrorType -> String
runStd xml myLLexer errorType = unlines $ concat
[ [ "run v p s ="
, " case p ts of"
, " Left err -> do"
, " putStrLn \"\\nParse Failed...\\n\""
, " putStrV v \"Tokens:\""
, " mapM_ (putStrV v . showPosToken . mkPosToken) ts"
-- , " putStrV v $ show ts"
, " putStrLn err"
, " exitFailure"
]
, case errorType of
ErrorTypeString -> [ " putStrLn err" ]
ErrorTypeStructured -> [ " putStrLn $ \"Error: \" ++ show err" ]
, [ " exitFailure"
, " Right tree -> do"
, " putStrLn \"\\nParse Successful!\""
, " showTree v tree"
Expand Down
7 changes: 5 additions & 2 deletions source/src/BNFC/Backend/Haskell/CFtoAlex3.hs
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,11 @@ restOfAlex tokenText cf = concat
, "-- A modified \"posn\" wrapper."
, "-------------------------------------------------------------------"
, ""
, "data Posn = Pn !Int !Int !Int"
, " deriving (Eq, Show, Ord)"
, "data Posn = Pn"
, " { pnAbsolute :: !Int"
, " , pnLine :: !Int"
, " , pnColumn :: !Int"
, " } deriving (Eq, Show, Ord)"
, ""
, "alexStartPos :: Posn"
, "alexStartPos = Pn 0 1 1"
Expand Down
122 changes: 100 additions & 22 deletions source/src/BNFC/Backend/Haskell/CFtoHappy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import Data.List (intersperse)
import BNFC.CF
import BNFC.Backend.Common.StrUtils (escapeChars)
import BNFC.Backend.Haskell.Utils
import BNFC.Options (HappyMode(..), TokenText(..))
import BNFC.Options (HappyMode(..), TokenText(..), ErrorType(..))
import BNFC.PrettyPrint
import BNFC.Utils

Expand All @@ -42,17 +42,18 @@ cf2Happy
-> HappyMode -- ^ Happy mode.
-> TokenText -- ^ Use @ByteString@ or @Text@?
-> Bool -- ^ AST is a functor?
-> ErrorType -- ^ The error type in the parser result type.
-> CF -- ^ Grammar.
-> String -- ^ Generated code.
cf2Happy name absName lexName mode tokenText functor cf = unlines
cf2Happy name absName lexName mode tokenText functor errorType cf = unlines
[ header name absName lexName tokenText eps
, render $ declarations mode functor eps
, render $ tokens cf functor
, delimiter
, specialRules absName functor tokenText cf
, render $ prRules absName functor (rulesForHappy absName functor cf)
, ""
, footer absName tokenText functor eps cf
, footer absName tokenText functor eps errorType cf
]
where
eps = toList $ allEntryPoints cf
Expand All @@ -66,7 +67,14 @@ header modName absName lexName tokenText eps = unlines $ concat
, "{-# LANGUAGE PatternSynonyms #-}"
, ""
, "module " ++ modName
, " ( happyError"
, " ( Err"
, " , Failure(..)"
, " , InvalidTokenFailure(..)"
, " , UnexpectedTokenFailure(..)"
, " , UnexpectedEofFailure(..)"
-- TODO: maybe we should stop exporting happyError, since there is no reason
-- to use it outside and its type can vary?
, " , happyError"
, " , myLexer"
]
, map ((" , " ++) . render . parserName) eps
Expand All @@ -91,6 +99,8 @@ header modName absName lexName tokenText eps = unlines $ concat
-- -- no lexer declaration
-- %monad { Err } { (>>=) } { return }
-- %tokentype {Token}
-- %errorhandlertype explist
-- %error { happyError }
--
-- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")]
-- %name pA_internal A
Expand All @@ -99,14 +109,18 @@ header modName absName lexName tokenText eps = unlines $ concat
-- -- no lexer declaration
-- %monad { Err } { (>>=) } { return }
-- %tokentype {Token}
-- %errorhandlertype explist
-- %error { happyError }
declarations :: HappyMode -> Bool -> [Cat] -> Doc
declarations mode functor ns = vcat
[ vcat $ map generateP ns
, case mode of
Standard -> "-- no lexer declaration"
GLR -> "%lexer { myLexer } { Err _ }",
"%monad { Err } { (>>=) } { return }",
"%tokentype" <+> braces (text tokenName)
GLR -> "%lexer { myLexer } { Err _ }"
, "%monad { Err } { (>>=) } { return }"
, "%tokentype" <+> braces (text tokenName)
, "%errorhandlertype explist"
, "%error { happyError }"
]
where
generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n)
Expand Down Expand Up @@ -255,24 +269,88 @@ prRules absM functor = vsep . map prOne

-- Finally, some haskell code.

footer :: ModuleName -> TokenText -> Bool -> [Cat] -> CF -> String
footer absName tokenText functor eps _cf = unlines $ concat
footer :: ModuleName -> TokenText -> Bool -> [Cat] -> ErrorType -> CF -> String
footer absName tokenText functor eps errorType _cf = unlines $ concat
[ [ "{"
, ""
, "type Err = Either String"
, "-- | The parser failure type."
, "--"
, "-- It can contain fields of more specific failure record types, so that they"
, "-- could easily be extended with new fields."
, "data Failure"
, " = FailureInvalidToken !InvalidTokenFailure"
, " | FailureUnexpectedToken !UnexpectedTokenFailure"
, " | FailureUnexpectedEof !UnexpectedEofFailure"
, " deriving (Show, Eq)"
, ""
, "happyError :: [" ++ tokenName ++ "] -> Err a"
, "happyError ts = Left $"
, " \"syntax error at \" ++ tokenPos ts ++ "
, " case ts of"
, " [] -> []"
, " [Err _] -> \" due to lexer error\""
, unwords
[ " t:_ -> \" before `\" ++"
, "(prToken t)"
-- , tokenTextUnpack tokenText "(prToken t)"
, "++ \"'\""
]
, "-- | The lexer error type."
, "newtype InvalidTokenFailure = InvalidTokenFailure"
, " { itfPosn :: Posn -- ^ The position of the beginning of an invalid token."
, " } deriving (Show, Eq)"
, ""
, "-- | The parser error: no production is found to match a token."
, "data UnexpectedTokenFailure = UnexpectedTokenFailure"
, " { utfPosn :: !Posn -- ^ The position of the beginning of the unexpected token."
, " , utfTokenText :: !(" ++ tokenTextType tokenText ++ ")"
, " , utfExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar."
, " } deriving (Show, Eq)"
, ""
, "-- | The parser error: the end of file is encountered but a token is expected."
, "newtype UnexpectedEofFailure = UnexpectedEofFailure"
, " { ueofExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar."
, " } deriving (Show, Eq)"
, ""
]
, case errorType of
ErrorTypeStructured ->
[ "type Err = Either Failure"
, ""
, "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a"
, "happyError = Left . uncurry mkFailure"
]
ErrorTypeString ->
[ "type Err = Either String"
, ""
, "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a"
, "happyError = Left . failureToString . uncurry mkFailure"
, ""
, "failureToString :: Failure -> String"
, "failureToString f ="
, " \"syntax error at \" ++ pos ++ "
, " case f of"
, " FailureUnexpectedEof _ -> []"
, " FailureInvalidToken _ -> \" due to lexer error\""
, unwords
[ " FailureUnexpectedToken ut -> \" before `\" ++"
, tokenTextUnpack tokenText "(utfTokenText ut)"
, "++ \"'\""
]
, " where"
, " pos = case f of"
, " FailureInvalidToken it -> printPosn (itfPosn it)"
, " FailureUnexpectedToken ut -> printPosn (utfPosn ut)"
, " FailureUnexpectedEof _ -> \"end of file\""
]
, [ ""
, "mkFailure :: [" ++ tokenName ++ "] -> [String] -> Failure"
, "mkFailure ts expectedTokens = case ts of"
, " [] ->"
, " FailureUnexpectedEof"
, " UnexpectedEofFailure"
, " { ueofExpectedTokens = expectedTokens"
, " }"
, " [Err pos] ->"
, " FailureInvalidToken"
, " InvalidTokenFailure"
, " { itfPosn = pos"
, " }"
, " t : _ ->"
, " FailureUnexpectedToken"
, " UnexpectedTokenFailure"
, " { utfPosn = tokenPosn t"
, " , utfTokenText = tokenText t"
, " , utfExpectedTokens = expectedTokens"
, " }"
, ""
, "myLexer :: " ++ tokenTextType tokenText ++ " -> [" ++ tokenName ++ "]"
, "myLexer = tokens"
Expand Down
11 changes: 9 additions & 2 deletions source/src/BNFC/Backend/Haskell/MkErrM.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,16 @@
module BNFC.Backend.Haskell.MkErrM where

import BNFC.PrettyPrint
import BNFC.Options (ErrorType(..))

mkErrM :: String -> Doc
mkErrM errMod = vcat
-- | Creates @ErrM.hs@ file if needed.
--
-- It returns 'Nothing' if there is no need to create it.
mkErrM :: String -> ErrorType -> Maybe Doc
mkErrM _ ErrorTypeStructured = Nothing
-- ErrM.hs is only for backward compatibility with old code using string
-- errors, so that we don't create it in case of structured errors.
mkErrM errMod ErrorTypeString = Just $ vcat
[ "{-# LANGUAGE CPP #-}"
, ""
, "#if __GLASGOW_HASKELL__ >= 708"
Expand Down
4 changes: 2 additions & 2 deletions source/src/BNFC/Backend/HaskellGADT.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ makeHaskellGadt opts cf = do
mkHsFileHint (alexFile opts) $ cf2alex3 lexMod (tokenText opts) cf
liftIO $ putStrLn " (Use Alex 3 to compile.)"
mkHsFileHint (happyFile opts) $
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False cf
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False (errorType opts) cf
liftIO $ putStrLn " (Tested with Happy 1.15 - 1.20)"
mkHsFile (templateFile opts) $ cf2Template (templateFileM opts) absMod cf
mkHsFile (printerFile opts) $ cf2Printer StringToken False True prMod absMod cf
when (hasLayout cf) $ mkHsFile (layoutFile opts) $
cf2Layout layMod lexMod cf
mkHsFile (tFile opts) $ Haskell.testfile opts cf
mkHsFile (errFile opts) $ mkErrM errMod
mapM_ (mkHsFile (errFile opts)) $ mkErrM errMod (errorType opts)
Makefile.mkMakefile opts $ Haskell.makefile opts cf
case xml opts of
2 -> makeXML opts True cf
Expand Down
24 changes: 23 additions & 1 deletion source/src/BNFC/Options.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module BNFC.Options
, SharedOptions(..)
, defaultOptions, isDefault, printOptions
, AlexVersion(..), HappyMode(..), OCamlParser(..), JavaLexerParser(..)
, RecordPositions(..), TokenText(..)
, RecordPositions(..), TokenText(..), ErrorType(..)
, Ansi(..)
, InPackage
, removedIn290
Expand Down Expand Up @@ -83,6 +83,12 @@ instance Show Target where
show TargetPygments = "Pygments"
show TargetCheck = "Check LBNF file"

-- | Which error type to use in the generated parser result?
data ErrorType
= ErrorTypeString -- ^ Errors are plain strings.
| ErrorTypeStructured -- ^ Errors are values of a record/structure type.
deriving (Show,Eq,Ord)

-- | Which version of Alex is targeted?
data AlexVersion = Alex3
deriving (Show,Eq,Ord,Bounded,Enum)
Expand Down Expand Up @@ -139,6 +145,7 @@ data SharedOptions = Options
, glr :: HappyMode -- ^ Happy option @--glr@.
, xml :: Int -- ^ Options @--xml@, generate DTD and XML printers.
, agda :: Bool -- ^ Option @--agda@. Create bindings for Agda?
, errorType :: ErrorType -- ^ An error type to use in the parser result.
--- OCaml specific
, ocamlParser :: OCamlParser -- ^ Option @--menhir@ to switch to @Menhir@.
--- Java specific
Expand Down Expand Up @@ -173,6 +180,7 @@ defaultOptions = Options
, glr = Standard
, xml = 0
, agda = False
, errorType = ErrorTypeString
-- OCaml specific
, ocamlParser = OCamlYacc
-- Java specific
Expand Down Expand Up @@ -233,6 +241,9 @@ printOptions opts = unwords . concat $
, [ "--xml" | xml opts == 1 ]
, [ "--xmlt" | xml opts == 2 ]
, [ "--agda" | agda opts ]
, case errorType opts of
ErrorTypeString -> []
ErrorTypeStructured -> [ "--errors=structured" ]
-- C# options:
, [ "--vs" | visualStudio opts ]
, [ "--wfc" | wcf opts ]
Expand Down Expand Up @@ -376,6 +387,9 @@ specificOptions =
, ( Option [] ["generic"] (NoArg (\o -> pure o {generic = True}))
"Derive Data, Generic, and Typeable instances for AST types"
, haskellTargets )
, ( Option [] ["errors"] (ReqArg parseAndSetErrorType "TYPE")
"Set the parser error type. Valid values are `string' (default) and `structured'"
, [TargetHaskell] )
, ( Option [] ["xml"] (NoArg (\o -> pure o {xml = 1}))
"Also generate a DTD and an XML printer"
, haskellTargets )
Expand All @@ -387,6 +401,14 @@ specificOptions =
"Also generate Agda bindings for the abstract syntax"
, [TargetHaskell] )
]
where
parseAndSetErrorType arg o = (\t -> o {errorType = t}) <$> parseErrorType arg

parseErrorType s = case s of
"string" -> pure ErrorTypeString
"structured" -> pure ErrorTypeStructured
_ -> Left $ "Wrong error type: " ++ show s


-- | The list of specific options for a target.
specificOptions' :: Target -> [OptDescr (SharedOptions -> Either String SharedOptions)]
Expand Down
5 changes: 5 additions & 0 deletions source/test/BNFC/Backend/HaskellSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ spec = do
calc <- getCalc
makeHaskell calcOptions calc `shouldGenerate` "ErrM.hs"

it "does not generate a error module file for structured errors" $ do
let options = calcOptions { errorType = ErrorTypeStructured }
calc <- getCalc
makeHaskell options calc `shouldNotGenerate` "ErrM.hs"

context "with option -mMyMakefile and the Calc grammar" $ do
it "generates a Makefile" $ do
calc <- getCalc
Expand Down
Loading

0 comments on commit 99f98a8

Please sign in to comment.