Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate pxd #70

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/pegen/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def generate_python_code(
verbose_tokenizer,
verbose_parser,
skip_actions=args.skip_actions,
generate_pxd=args.generate_pxd,
)
return grammar, parser, tokenizer, gen
except Exception as err:
Expand Down Expand Up @@ -71,6 +72,11 @@ def generate_python_code(
action="store_true",
help="Suppress code emission for rule actions",
)
argparser.add_argument(
"--generate-pxd",
action="store_true",
help="Generate additional .pxd file for cython compilation",
)


def main() -> None:
Expand Down
13 changes: 12 additions & 1 deletion src/pegen/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.parser import Parser
from pegen.parser_generator import ParserGenerator
from pegen.python_generator import PythonParserGenerator
from pegen.python_generator import PythonParserGenerator, PxdGenerator
from pegen.tokenizer import Tokenizer

MOD_DIR = pathlib.Path(__file__).resolve().parent
Expand All @@ -33,10 +33,17 @@ def build_python_generator(
grammar_file: str,
output_file: str,
skip_actions: bool = False,
generate_pxd: bool = False,
) -> ParserGenerator:
with open(output_file, "w") as file:
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
gen.generate(grammar_file)

if generate_pxd:
pxd_path = pathlib.Path(output_file).with_suffix(".pxd")
with pxd_path.open("w") as file:
PxdGenerator(grammar, file).generate(grammar_file)

return gen


Expand All @@ -46,6 +53,7 @@ def build_python_parser_and_generator(
verbose_tokenizer: bool = False,
verbose_parser: bool = False,
skip_actions: bool = False,
generate_pxd: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, python parser, tokenizer, parser generator for a given grammar

Expand All @@ -57,12 +65,15 @@ def build_python_parser_and_generator(
verbose_parser (bool, optional): Whether to display additional output
when generating the parser. Defaults to False.
skip_actions (bool, optional): Whether to pretend no rule has any actions.
generate_pxd (bool, optional): Whether to generate additional .pxd file
for cython compilation.
"""
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
gen = build_python_generator(
grammar,
grammar_file,
output_file,
skip_actions=skip_actions,
generate_pxd=generate_pxd,
)
return grammar, parser, tokenizer, gen
31 changes: 31 additions & 0 deletions src/pegen/parser.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
cdef class Parser:
cdef tuple KEYWORDS
cdef tuple SOFT_KEYWORDS

cdef object _tokenizer
cdef bint _verbose
cdef int _level
cdef dict _cache
cdef int in_recursive_rule
cdef object _mark # self._tokenizer.mark
cdef object _reset # self._tokenizer.reset
cdef bint call_invalid_rules

# cdef str showpeek(self)

# cdef object name(self)
# cdef object number(self)
# cdef object string(self)
# cdef object op(self)
# cdef object type_comment(self)
# cdef object soft_keyword(self)
# cdef object expect(self, str type)
# cdef object expect_forced(self, object res, str expectation)

# @cython.locals(mark=cython.int, ok=cython.bint)
# cdef bint positive_lookahead(self, object func, *args)

# @cython.locals(mark=cython.int, ok=cython.bint)
# cdef bint negative_lookahead(self, object func, *args)

# cdef SyntaxError make_syntax_error(self, str message, str filename = "<unknown>"):
133 changes: 111 additions & 22 deletions src/pegen/python_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

from typing import Any, Optional

from pegen.parser import memoize, memoize_left_rec, logger, Parser
from pegen.parser import memoize, memoize_left_rec, logger, Parser as {parser_name}

"""
MODULE_SUFFIX = """
Expand All @@ -44,6 +44,17 @@
from pegen.parser import simple_parser_main
simple_parser_main({class_name})
"""
PXD_PREFIX = """
# @generated by pegen from {filename}
# cython: annotation_typing = False

cimport cython
from pegen.parser cimport Parser as {parser_name}

cdef class Parser({parser_name}):
pass

"""


class InvalidNodeVisitor(GrammarVisitor):
Expand Down Expand Up @@ -222,7 +233,8 @@ def __init__(
def generate(self, filename: str) -> None:
header = self.grammar.metas.get("header", MODULE_PREFIX)
if header is not None:
self.print(header.rstrip("\n").format(filename=filename))
parser_name = self.grammar.metas.get("pegenclass", "Parser")
self.print(header.rstrip("\n").format(filename=filename, parser_name=parser_name))
subheader = self.grammar.metas.get("subheader", "")
if subheader:
self.print(subheader)
Expand All @@ -238,8 +250,11 @@ def generate(self, filename: str) -> None:

self.print()
with self.indent():
self.print(f"KEYWORDS = {tuple(sorted(self.callmakervisitor.keywords))}")
self.print(f"SOFT_KEYWORDS = {tuple(sorted(self.callmakervisitor.soft_keywords))}")
self.print("def __init__(self, *args, **kwargs):")
with self.indent():
self.print("super().__init__(*args, **kwargs)")
self.print(f"KEYWORDS = {tuple(sorted(self.callmakervisitor.keywords))}")
self.print(f"SOFT_KEYWORDS = {tuple(sorted(self.callmakervisitor.soft_keywords))}")

trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
if trailer is not None:
Expand Down Expand Up @@ -299,16 +314,34 @@ def visit_Rule(self, node: Rule) -> None:
if node.name.endswith("without_invalid"):
self.cleanup_statements.pop()

def visit_NamedItem(self, node: NamedItem) -> None:
def visit_NamedItem(self, node: NamedItem, is_gather: bool, print: bool = True) -> None:
name, call = self.callmakervisitor.visit(node.item)
if node.name:
name = node.name

if is_gather:
condition = "if {test} is None: break"
else:
condition = "if not {test}: break"

if not name:
self.print(call)
if not print:
return

self.print(condition.format(test=call))
else:
if name != "cut":
name = self.dedupe(name)
self.print(f"({name} := {call})")

if not print:
return

if call[-1] == ",":
# condition is never run, because 'call' is in form '(X,)'
self.print(f"{name} = {call[:-1]}")
else:
self.print(f"{name} = {call}")
self.print(condition.format(test=name))

def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
if is_loop:
Expand All @@ -319,28 +352,18 @@ def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
has_cut = any(isinstance(item.item, Cut) for item in node.items)
has_invalid = self.invalidvisitor.visit(node)

with self.local_variable_context():
if has_cut:
self.print("cut = False")
if is_loop:
self.print("while (")
else:
self.print("if (")

self.print("while 1:" + is_loop*" # recursive")
with self.indent():
first = True
if has_invalid:
self.print("self.call_invalid_rules")
first = False
self.print("if not self.call_invalid_rules: break")
for item in node.items:
if first:
first = False
else:
self.print("and")
self.visit(item)
if is_gather:
self.print("is not None")
self.visit(item, is_gather=is_gather)

self.print("):")
with self.indent():
action = node.action
if not action:
Expand Down Expand Up @@ -368,10 +391,76 @@ def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting)
self.add_return(f"{action}")
self.print("break") # XXX: probably can be removed

self.print("self._reset(mark)")
# Skip remaining alternatives if a cut was reached.
if has_cut:
self.print("if cut:")
with self.indent():
self.add_return("None")


class PxdGenerator(PythonParserGenerator):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.defined_variables: Set[str] = set()

def generate(self, filename: str) -> None:
# TODO: add pxd subheader
header = self.grammar.metas.get("pxdheader", PXD_PREFIX)
if header is not None:
parser_name = self.grammar.metas.get("pegenclass", "Parser")
self.print(header.rstrip("\n").format(filename=filename, parser_name=parser_name))
cls_name = self.grammar.metas.get("class", "GeneratedParser")
self.print(f"cdef class {cls_name}(Parser):")
while self.todo:
for rulename, rule in list(self.todo.items()):
del self.todo[rulename]
self.print()
with self.indent():
self.visit(rule)

# probably not needed as it's defined in the pegen.parse.pxd
# self.print()
# with self.indent():
# self.print("cdef object KEYWORDS")
# self.print("cdef object SOFT_KEYWORDS")

def visit_Rule(self, node: Rule) -> None:
is_loop = node.is_loop()
is_gather = node.is_gather()
rhs = node.flatten()
self.print("@cython.locals(")
with self.indent():
if node.name.endswith("without_invalid"):
self.print("_prev_call_invalid=cython.bint,")

self.print("mark=cython.int,")
if self.alts_uses_locations(node.rhs.alts):
self.print("start_lineno=cython.int,")
self.print("start_col_offset=cython.int,")
self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
self.print(")")
self.defined_variables.clear()

node_type = node.type or "Any"
return_type = "list" if is_loop else "object"
self.print(f"cdef {return_type} {node.name}(self) # from {node_type}")

def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
has_cut = any(isinstance(item.item, Cut) for item in node.items)
with self.local_variable_context():
if has_cut:
self.print("cut=cython.bint,")
for item in node.items:
self.visit(item, is_gather=is_gather, print=False)

for name in self.local_variable_names:
if name not in self.defined_variables:
self.defined_variables.add(name)
self.print(f"{name}=object,")

if node.action and "LOCATIONS" in node.action:
self.print("end_lineno=cython.int,")
self.print("end_col_offset=cython.int,")