Skip to content

Commit

Permalink
[ haskell, BNFC#423 ]: structured errors in the Haskell backend
Browse files Browse the repository at this point in the history
A new option "--errors" is introduced, which can change the parser
failure type from 'String' to a record type.
  • Loading branch information
Anton Vl. Kalinin committed Aug 8, 2022
1 parent 6cc56e4 commit 4215d4e
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 31 deletions.
4 changes: 2 additions & 2 deletions source/src/BNFC/Backend/Haskell.hs
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ makeHaskell opts cf = do
-- Generate Happy parser and matching test program.
do
mkfile (happyFile opts) commentWithEmacsModeHint $
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) cf
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) (errorType opts) cf
-- liftIO $ printf "%s Tested with Happy 1.15\n" (happyFile opts)
mkfile (tFile opts) comment $ testfile opts cf

-- Both Happy parser and skeleton (template) rely on Err.
mkfile (errFile opts) comment $ mkErrM errMod
mapM_ (mkfile (errFile opts) comment) $ mkErrM errMod (errorType opts)
mkfile (templateFile opts) comment $ cf2Template (templateFileM opts) absMod (functor opts) cf

-- Generate txt2tags documentation.
Expand Down
7 changes: 5 additions & 2 deletions source/src/BNFC/Backend/Haskell/CFtoAlex3.hs
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,11 @@ restOfAlex tokenText cf = concat
, "-- A modified \"posn\" wrapper."
, "-------------------------------------------------------------------"
, ""
, "data Posn = Pn !Int !Int !Int"
, " deriving (Eq, Show, Ord)"
, "data Posn = Pn"
, " { pnAbsolute :: !Int"
, " , pnLine :: !Int"
, " , pnColumn :: !Int"
, " } deriving (Eq, Show, Ord)"
, ""
, "alexStartPos :: Posn"
, "alexStartPos = Pn 0 1 1"
Expand Down
121 changes: 99 additions & 22 deletions source/src/BNFC/Backend/Haskell/CFtoHappy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import Data.List (intersperse)
import BNFC.CF
import BNFC.Backend.Common.StrUtils (escapeChars)
import BNFC.Backend.Haskell.Utils
import BNFC.Options (HappyMode(..), TokenText(..))
import BNFC.Options (HappyMode(..), TokenText(..), ErrorType(..))
import BNFC.PrettyPrint
import BNFC.Utils

Expand All @@ -42,17 +42,18 @@ cf2Happy
-> HappyMode -- ^ Happy mode.
-> TokenText -- ^ Use @ByteString@ or @Text@?
-> Bool -- ^ AST is a functor?
-> ErrorType -- ^ The error type in the parser result type.
-> CF -- ^ Grammar.
-> String -- ^ Generated code.
cf2Happy name absName lexName mode tokenText functor cf = unlines
cf2Happy name absName lexName mode tokenText functor errorType cf = unlines
[ header name absName lexName tokenText eps
, render $ declarations mode functor eps
, render $ tokens cf functor
, delimiter
, specialRules absName functor tokenText cf
, render $ prRules absName functor (rulesForHappy absName functor cf)
, ""
, footer absName tokenText functor eps cf
, footer absName tokenText functor eps errorType cf
]
where
eps = toList $ allEntryPoints cf
Expand All @@ -66,7 +67,13 @@ header modName absName lexName tokenText eps = unlines $ concat
, "{-# LANGUAGE PatternSynonyms #-}"
, ""
, "module " ++ modName
, " ( happyError"
, " ( Failure(..)"
, " , InvalidTokenFailure(..)"
, " , UnexpectedTokenFailure(..)"
, " , UnexpectedEofFailure(..)"
-- TODO: maybe we should stop exporting happyError, since there is no reason
-- to use it outside and its type can vary?
, " , happyError"
, " , myLexer"
]
, map ((" , " ++) . render . parserName) eps
Expand All @@ -91,6 +98,8 @@ header modName absName lexName tokenText eps = unlines $ concat
-- -- no lexer declaration
-- %monad { Err } { (>>=) } { return }
-- %tokentype {Token}
-- %errorhandlertype explist
-- %error { happyError }
--
-- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")]
-- %name pA_internal A
Expand All @@ -99,14 +108,18 @@ header modName absName lexName tokenText eps = unlines $ concat
-- -- no lexer declaration
-- %monad { Err } { (>>=) } { return }
-- %tokentype {Token}
-- %errorhandlertype explist
-- %error { happyError }
declarations :: HappyMode -> Bool -> [Cat] -> Doc
declarations mode functor ns = vcat
[ vcat $ map generateP ns
, case mode of
Standard -> "-- no lexer declaration"
GLR -> "%lexer { myLexer } { Err _ }",
"%monad { Err } { (>>=) } { return }",
"%tokentype" <+> braces (text tokenName)
GLR -> "%lexer { myLexer } { Err _ }"
, "%monad { Err } { (>>=) } { return }"
, "%tokentype" <+> braces (text tokenName)
, "%errorhandlertype explist"
, "%error { happyError }"
]
where
generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n)
Expand Down Expand Up @@ -255,24 +268,88 @@ prRules absM functor = vsep . map prOne

-- Finally, some haskell code.

footer :: ModuleName -> TokenText -> Bool -> [Cat] -> CF -> String
footer absName tokenText functor eps _cf = unlines $ concat
footer :: ModuleName -> TokenText -> Bool -> [Cat] -> ErrorType -> CF -> String
footer absName tokenText functor eps errorType _cf = unlines $ concat
[ [ "{"
, ""
, "type Err = Either String"
, "-- | The parser failure type."
, "--"
, "-- It can contain fields of more specific failure record types, so that they"
, "-- could easily be extended with new fields."
, "data Failure"
, " = FailureInvalidToken !InvalidTokenFailure"
, " | FailureUnexpectedToken !UnexpectedTokenFailure"
, " | FailureUnexpectedEof !UnexpectedEofFailure"
, " deriving (Show, Eq)"
, ""
, "happyError :: [" ++ tokenName ++ "] -> Err a"
, "happyError ts = Left $"
, " \"syntax error at \" ++ tokenPos ts ++ "
, " case ts of"
, " [] -> []"
, " [Err _] -> \" due to lexer error\""
, unwords
[ " t:_ -> \" before `\" ++"
, "(prToken t)"
-- , tokenTextUnpack tokenText "(prToken t)"
, "++ \"'\""
]
, "-- | The lexer error type."
, "newtype InvalidTokenFailure = InvalidTokenFailure"
, " { itfPosn :: Posn -- ^ The position of the beginning of an invalid token."
, " } deriving (Show, Eq)"
, ""
, "-- | The parser error: no production is found to match a token."
, "data UnexpectedTokenFailure = UnexpectedTokenFailure"
, " { utfPosn :: !Posn -- ^ The position of the beginning of the unexpected token."
, " , utfTokenText :: !(" ++ tokenTextType tokenText ++ ")"
, " , utfExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar."
, " } deriving (Show, Eq)"
, ""
, "-- | The parser error: the end of file is encountered but a token is expected."
, "newtype UnexpectedEofFailure = UnexpectedEofFailure"
, " { ueofExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar."
, " } deriving (Show, Eq)"
, ""
]
, case errorType of
ErrorTypeStructured ->
[ "type Err = Either Failure"
, ""
, "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a"
, "happyError = Left . uncurry mkFailure"
]
ErrorTypeString ->
[ "type Err = Either String"
, ""
, "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a"
, "happyError = Left . failureToString . uncurry mkFailure"
, ""
, "failureToString :: Failure -> String"
, "failureToString f ="
, " \"syntax error at \" ++ pos ++ "
, " case f of"
, " FailureUnexpectedEof _ -> []"
, " FailureInvalidToken _ -> \" due to lexer error\""
, unwords
[ " FailureUnexpectedToken ut -> \" before `\" ++"
, tokenTextUnpack tokenText "(utfTokenText ut)"
, "++ \"'\""
]
, " where"
, " pos = case f of"
, " FailureInvalidToken it -> printPosn (itfPosn it)"
, " FailureUnexpectedToken ut -> printPosn (utfPosn ut)"
, " FailureUnexpectedEof _ -> \"end of file\""
]
, [ ""
, "mkFailure :: [" ++ tokenName ++ "] -> [String] -> Failure"
, "mkFailure ts expectedTokens = case ts of"
, " [] ->"
, " FailureUnexpectedEof"
, " UnexpectedEofFailure"
, " { ueofExpectedTokens = expectedTokens"
, " }"
, " [Err pos] ->"
, " FailureInvalidToken"
, " InvalidTokenFailure"
, " { itfPosn = pos"
, " }"
, " t : _ ->"
, " FailureUnexpectedToken"
, " UnexpectedTokenFailure"
, " { utfPosn = tokenPosn t"
, " , utfTokenText = tokenText t"
, " , utfExpectedTokens = expectedTokens"
, " }"
, ""
, "myLexer :: " ++ tokenTextType tokenText ++ " -> [" ++ tokenName ++ "]"
, "myLexer = tokens"
Expand Down
11 changes: 9 additions & 2 deletions source/src/BNFC/Backend/Haskell/MkErrM.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,16 @@
module BNFC.Backend.Haskell.MkErrM where

import BNFC.PrettyPrint
import BNFC.Options (ErrorType(..))

mkErrM :: String -> Doc
mkErrM errMod = vcat
-- | Creates @ErrM.hs@ file if needed.
--
-- It returns 'Nothing' if there is no need to create it.
mkErrM :: String -> ErrorType -> Maybe Doc
mkErrM _ ErrorTypeStructured = Nothing
-- ErrM.hs is only for backward compatibility with old code using string
-- errors, so that we don't create it in case of structured errors.
mkErrM errMod ErrorTypeString = Just $ vcat
[ "{-# LANGUAGE CPP #-}"
, ""
, "#if __GLASGOW_HASKELL__ >= 708"
Expand Down
4 changes: 2 additions & 2 deletions source/src/BNFC/Backend/HaskellGADT.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ makeHaskellGadt opts cf = do
mkHsFileHint (alexFile opts) $ cf2alex3 lexMod (tokenText opts) cf
liftIO $ putStrLn " (Use Alex 3 to compile.)"
mkHsFileHint (happyFile opts) $
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False cf
cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False (errorType opts) cf
liftIO $ putStrLn " (Tested with Happy 1.15 - 1.20)"
mkHsFile (templateFile opts) $ cf2Template (templateFileM opts) absMod cf
mkHsFile (printerFile opts) $ cf2Printer StringToken False True prMod absMod cf
when (hasLayout cf) $ mkHsFile (layoutFile opts) $
cf2Layout layMod lexMod cf
mkHsFile (tFile opts) $ Haskell.testfile opts cf
mkHsFile (errFile opts) $ mkErrM errMod
mapM_ (mkHsFile (errFile opts)) $ mkErrM errMod (errorType opts)
Makefile.mkMakefile opts $ Haskell.makefile opts cf
case xml opts of
2 -> makeXML opts True cf
Expand Down
24 changes: 23 additions & 1 deletion source/src/BNFC/Options.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module BNFC.Options
, SharedOptions(..)
, defaultOptions, isDefault, printOptions
, AlexVersion(..), HappyMode(..), OCamlParser(..), JavaLexerParser(..)
, RecordPositions(..), TokenText(..)
, RecordPositions(..), TokenText(..), ErrorType(..)
, InPackage
, removedIn290
, translateOldOptions
Expand Down Expand Up @@ -82,6 +82,12 @@ instance Show Target where
show TargetPygments = "Pygments"
show TargetCheck = "Check LBNF file"

-- | Which error type to use in the generated parser result?
data ErrorType
= ErrorTypeString -- ^ Errors are plain strings.
| ErrorTypeStructured -- ^ Errors are values of a record/structure type.
deriving (Show,Eq,Ord)

-- | Which version of Alex is targeted?
data AlexVersion = Alex3
deriving (Show,Eq,Ord,Bounded,Enum)
Expand Down Expand Up @@ -132,6 +138,7 @@ data SharedOptions = Options
, glr :: HappyMode -- ^ Happy option @--glr@.
, xml :: Int -- ^ Options @--xml@, generate DTD and XML printers.
, agda :: Bool -- ^ Option @--agda@. Create bindings for Agda?
, errorType :: ErrorType -- ^ An error type to use in the parser result.
--- OCaml specific
, ocamlParser :: OCamlParser -- ^ Option @--menhir@ to switch to @Menhir@.
--- Java specific
Expand Down Expand Up @@ -165,6 +172,7 @@ defaultOptions = Options
, glr = Standard
, xml = 0
, agda = False
, errorType = ErrorTypeString
-- OCaml specific
, ocamlParser = OCamlYacc
-- Java specific
Expand Down Expand Up @@ -224,6 +232,9 @@ printOptions opts = unwords . concat $
, [ "--xml" | xml opts == 1 ]
, [ "--xmlt" | xml opts == 2 ]
, [ "--agda" | agda opts ]
, case errorType opts of
ErrorTypeString -> []
ErrorTypeStructured -> [ "--errors=structured" ]
-- C# options:
, [ "--vs" | visualStudio opts ]
, [ "--wfc" | wcf opts ]
Expand Down Expand Up @@ -363,6 +374,9 @@ specificOptions =
, ( Option [] ["generic"] (NoArg (\o -> pure o {generic = True}))
"Derive Data, Generic, and Typeable instances for AST types"
, haskellTargets )
, ( Option [] ["errors"] (ReqArg parseAndSetErrorType "TYPE")
"Set the parser error type. Valid values are `string' (default) and `structured'"
, [TargetHaskell] )
, ( Option [] ["xml"] (NoArg (\o -> pure o {xml = 1}))
"Also generate a DTD and an XML printer"
, haskellTargets )
Expand All @@ -374,6 +388,14 @@ specificOptions =
"Also generate Agda bindings for the abstract syntax"
, [TargetHaskell] )
]
where
parseAndSetErrorType arg o = (\t -> o {errorType = t}) <$> parseErrorType arg

parseErrorType s = case s of
"string" -> pure ErrorTypeString
"structured" -> pure ErrorTypeStructured
_ -> Left $ "Wrong error type: " ++ show s


-- | The list of specific options for a target.
specificOptions' :: Target -> [OptDescr (SharedOptions -> Either String SharedOptions)]
Expand Down

0 comments on commit 4215d4e

Please sign in to comment.