Skip to content

Commit

Permalink
[ fixed #256 ] Regex: let - bind stronger than |; simpReg
Browse files Browse the repository at this point in the history
We simplify regular expressions to make it easier for backends to
produce lexer specifications.  In particular we try to eliminated
iterated difference on character classes which is often not supported.
  • Loading branch information
andreasabel committed Dec 15, 2019
1 parent ae542ee commit a3f66cb
Show file tree
Hide file tree
Showing 10 changed files with 426 additions and 105 deletions.
192 changes: 120 additions & 72 deletions docs/lbnf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -980,86 +980,134 @@ Comments
The syntactic structure of BNF
==============================

Non-terminals are enclosed between ``<`` and
``>``. The symbols ``::=`` (production), ``::=`` (union) and ``ε`` (empty rule)
belong to the BNF notation. All other symbols are terminals.
Non-terminals are enclosed between ``<`` and ``>``.
The symbols ``::=`` (production), ``|`` (union) and ``ε`` (empty rule)
belong to the BNF notation.
All other symbols are terminals
(as well as sometimes even ``::=`` and ``|``).


::

<Grammar> ::= <ListDef>

<ListDef> ::= ε
| <Def> ; <ListDef>
<ListItem> ::= ε
| <Item> <ListItem>
<Def> ::= <Label> . <Cat> ::= <ListItem>
| comment <String>
| comment <String> <String>
| internal <Label> . <Cat> ::= <ListItem>
| token <Ident> <Reg>
| position token <Ident> <Reg>
| entrypoints <ListIdent>
| separator <MinimumSize> <Cat> <String>
| terminator <MinimumSize> <Cat> <String>
| coercions <Ident> <Integer>
| rules <Ident> ::= <ListRHS>
| layout <ListString>
| layout stop <ListString>
| layout toplevel
<Item> ::= <String>
| <Cat>
<Cat> ::= [ <Cat> ]
| <Ident>
<Label> ::= <LabelId>
| <LabelId> <ListProfItem>
| <LabelId> <LabelId> <ListProfItem>
<LabelId> ::= <Ident>
| -
| [ ]
| ( : )
| ( : [ ] )
<ProfItem> ::= ( [ <ListIntList> ] , [ <ListInteger> ] )
<IntList> ::= [ <ListInteger> ]

<ListInteger> ::= ε
| <Integer>
| <Integer> , <ListInteger>
<ListIntList> ::= ε
| <IntList>
| <IntList> , <ListIntList>
<ListProfItem> ::= <ProfItem>
| <ProfItem> <ListProfItem>
<ListString> ::= <String>
| <String> , <ListString>
<ListRHS> ::= <RHS>
| <RHS> | <ListRHS>
<ListDef>
::= ε
| <Def>
| <Def> ; <ListDef>
| ; <ListDef>

<Def>
::= entrypoints <ListIdent>

| <Label> . <Cat> ::= <ListItem>
| internal <Label> . <Cat> ::= <ListItem>

| separator <MinimumSize> <Cat> <String>
| terminator <MinimumSize> <Cat> <String>

| coercions <Ident> <Integer>

| rules <Ident> ::= <ListRHS>

| comment <String>
| comment <String> <String>

| token <Ident> <Reg>
| position token <Ident> <Reg>

| layout <ListString>
| layout stop <ListString>
| layout toplevel

<ListIdent>
::= <Ident>
| <Ident> , <ListIdent>

<ListItem>
::= ε
| <Item> <ListItem>

<Item>
::= <String>
| <Cat>

<Cat>
::= [ <Cat> ]
| <Ident>

<Label>
::= <LabelId>
| <LabelId> <ListProfItem>
| <LabelId> <LabelId> <ListProfItem>

<LabelId>
::= <Ident>
| -
| [ ]
| ( : )
| ( : [ ] )

<ProfItem>
::= ( [ <ListIntList> ] , [ <ListInteger> ] )

<IntList>
::= [ <ListInteger> ]

<ListInteger>
::= ε
| <Integer>
| <Integer> , <ListInteger>

<ListIntList>
::= ε
| <IntList>
| <IntList> , <ListIntList>

<ListProfItem>
::= <ProfItem>
| <ProfItem> <ListProfItem>

<ListString>
::= <String>
| <String> , <ListString>

<ListRHS>
::= <RHS>
| <RHS> | <ListRHS>

<RHS> ::= <ListItem>
<MinimumSize> ::= nonempty
| ε
<Reg2> ::= <Reg2> <Reg3>
| <Reg3>

<Reg1> ::= <Reg1> | <Reg2>
| <Reg2> − <Reg2>
| <Reg2>
<Reg3> ::= <Reg3> *
| <Reg3> +
| <Reg3> ?
| eps
| <Char>
| [ <String> ]
| { <String> }
| digit
| letter
| upper
| lower
| char
| ( <Reg> )
<Reg> ::= <Reg1>
<ListIdent> ::= <Ident>
| <Ident> , <ListIdent>

<MinimumSize>
::= ε
| nonempty

<Reg>
::= <Reg> | <Reg1>
| <Reg1>

<Reg1>
::= <Reg1> − <Reg2>
| <Reg2>

<Reg2>
::= <Reg2> <Reg3>
| <Reg3>

<Reg3>
::= <Reg3> *
| <Reg3> +
| <Reg3> ?
| eps
| <Char>
| [ <String> ]
| { <String> }
| digit
| letter
| upper
| lower
| char
| ( <Reg> )

.. [1]
Cambridge University Press, 1998.
2 changes: 2 additions & 0 deletions source/BNFC.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Executable bnfc
BNFC.CF
BNFC.ToCNFCore
BNFC.MultiView
BNFC.Regex
BNFC.TypeChecker
BNFC.GetCF
BNFC.Lexing
Expand Down Expand Up @@ -269,6 +270,7 @@ Test-suite unit-tests
BNFC.CF
BNFC.ToCNFCore
BNFC.MultiView
BNFC.Regex
BNFC.TypeChecker
BNFC.GetCF
BNFC.Lexing
Expand Down
4 changes: 2 additions & 2 deletions source/src/AbsBNF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ data MinimumSize = MNonempty | MEmpty
deriving (Eq, Ord, Show, Read)

data Reg
= RSeq Reg Reg
| RAlt Reg Reg
= RAlt Reg Reg
| RMinus Reg Reg
| RSeq Reg Reg
| RStar Reg
| RPlus Reg
| ROpt Reg
Expand Down
33 changes: 18 additions & 15 deletions source/src/BNF.cf
Original file line number Diff line number Diff line change
Expand Up @@ -134,25 +134,28 @@ MNonempty. MinimumSize ::= "nonempty" ;
MEmpty. MinimumSize ::= ;

-- Regular expressions
RSeq. Reg2 ::= Reg2 Reg3 ; -- left-associative
RAlt. Reg1 ::= Reg1 "|" Reg2 ; -- left-associative
RMinus. Reg1 ::= Reg2 "-" Reg2 ; -- non-associative

RStar. Reg3 ::= Reg3 "*" ;
RPlus. Reg3 ::= Reg3 "+" ;
ROpt. Reg3 ::= Reg3 "?" ;
RAlt. Reg ::= Reg "|" Reg1 ; -- left-associative

REps. Reg3 ::= "eps" ;
RMinus. Reg1 ::= Reg1 "-" Reg2 ; -- left-associative

RChar. Reg3 ::= Char ; -- single character
RAlts. Reg3 ::= "[" String "]" ; -- list of alternative characters
RSeqs. Reg3 ::= "{" String "}" ; -- character sequence
RSeq. Reg2 ::= Reg2 Reg3 ; -- left-associative

RDigit. Reg3 ::= "digit" ;
RLetter. Reg3 ::= "letter" ;
RUpper. Reg3 ::= "upper" ;
RLower. Reg3 ::= "lower" ;
RAny. Reg3 ::= "char" ;
RStar. Reg3 ::= Reg3 "*" ;
RPlus. Reg3 ::= Reg3 "+" ;
ROpt. Reg3 ::= Reg3 "?" ;

REps. Reg3 ::= "eps" ; -- empty string, same as {""}

RChar. Reg3 ::= Char ; -- single character
RAlts. Reg3 ::= "[" String "]" ; -- list of alternative characters
RSeqs. Reg3 ::= "{" String "}" ; -- character sequence

RDigit. Reg3 ::= "digit" ;
RLetter. Reg3 ::= "letter" ;
RUpper. Reg3 ::= "upper" ;
RLower. Reg3 ::= "lower" ;
RAny. Reg3 ::= "char" ;

coercions Reg 3;

Expand Down
5 changes: 3 additions & 2 deletions source/src/BNFC/GetCF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import ParBNF

import BNFC.CF
import BNFC.Options
import BNFC.Regex (simpReg)
import BNFC.TypeChecker
import BNFC.Utils

Expand Down Expand Up @@ -222,8 +223,8 @@ transDef = \case
[Right $ Rule (transLabel label) (transCat cat) (concatMap transItem items) Parsable]
Abs.Comment str -> [Left $ CommentS str]
Abs.Comments str0 str -> [Left $ CommentM (str0,str)]
Abs.Token ident reg -> [Left $ TokenReg (transIdent ident) False reg]
Abs.PosToken ident reg -> [Left $ TokenReg (transIdent ident) True reg]
Abs.Token ident reg -> [Left $ TokenReg (transIdent ident) False $ simpReg reg]
Abs.PosToken ident reg -> [Left $ TokenReg (transIdent ident) True $ simpReg reg]
Abs.Entryp idents -> [Left $ EntryPoints (map (strToCat .transIdent) idents)]
Abs.Internal label cat items ->
[Right $ Rule (transLabel label) (transCat cat) (concatMap transItem items) Internal]
Expand Down
Loading

0 comments on commit a3f66cb

Please sign in to comment.