Skip to content

Commit

Permalink
Merge pull request #1459 from lark-parser/chanicpanic-share-nodes
Browse files Browse the repository at this point in the history
Tiny refactor for PR #1451
  • Loading branch information
erezsh committed Aug 30, 2024
2 parents 414e40f + 6b52f72 commit 906de31
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 31 deletions.
31 changes: 13 additions & 18 deletions lark/parsers/earley.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matc
self.term_matcher = term_matcher


def predict_and_complete(self, i, to_scan, columns, transitives):
def predict_and_complete(self, i, to_scan, columns, transitives, node_cache):
"""The core Earley Predictor and Completer.
At each stage of the input, we handling any completed items (things
Expand All @@ -84,7 +84,6 @@ def predict_and_complete(self, i, to_scan, columns, transitives):
non-terminals are recursively processed until we reach a set of,
which can be added to the scan list for the next scanner cycle."""
# Held Completions (H in E.Scotts paper).
node_cache = {}
held_completions = {}

column = columns[i]
Expand Down Expand Up @@ -203,7 +202,7 @@ def scan(i, token, to_scan):
for item in self.Set(to_scan):
if match(item.expect, token):
new_item = item.advance()
label = (new_item.s, new_item.start, i)
label = (new_item.s, new_item.start, i + 1)
# 'terminals' may not contain token.type when using %declare
# Additionally, token is not always a Token
# For example, it can be a Tree when using TreeMatcher
Expand All @@ -227,7 +226,7 @@ def scan(i, token, to_scan):
expect = {i.expect.name for i in to_scan}
raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))

return next_to_scan
return next_to_scan, node_cache


# Define parser functions
Expand All @@ -245,16 +244,17 @@ def scan(i, token, to_scan):
# step.
expects = {i.expect for i in to_scan}
i = 0
node_cache = {}
for token in lexer.lex(expects):
self.predict_and_complete(i, to_scan, columns, transitives)
self.predict_and_complete(i, to_scan, columns, transitives, node_cache)

to_scan = scan(i, token, to_scan)
to_scan, node_cache = scan(i, token, to_scan)
i += 1

expects.clear()
expects |= {i.expect for i in to_scan}

self.predict_and_complete(i, to_scan, columns, transitives)
self.predict_and_complete(i, to_scan, columns, transitives, node_cache)

## Column is now the final column in the parse.
assert i == len(columns)-1
Expand Down Expand Up @@ -286,6 +286,9 @@ def parse(self, lexer, start):
if not solutions:
expected_terminals = [t.expect.name for t in to_scan]
raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
if len(solutions) > 1:
raise RuntimeError('Earley should not generate multiple start symbol items! Please report this bug.')
solution ,= solutions

if self.debug:
from .earley_forest import ForestToPyDotVisitor
Expand All @@ -294,8 +297,7 @@ def parse(self, lexer, start):
except ImportError:
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
else:
for i, s in enumerate(solutions):
debug_walker.visit(s, f"sppf{i}.png")
debug_walker.visit(solution, "sppf.png")


if self.Tree is not None:
Expand All @@ -304,14 +306,7 @@ def parse(self, lexer, start):
# to prevent a tree construction bug. See issue #1283
use_cache = not self.resolve_ambiguity
transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity, use_cache)
solutions = [transformer.transform(s) for s in solutions]

if len(solutions) > 1 and not self.resolve_ambiguity:
t: Tree = self.Tree('_ambig', solutions)
t.expand_kids_by_data('_ambig') # solutions may themselves be _ambig nodes
return t
return solutions[0]
return transformer.transform(solution)

# return the root of the SPPF
# TODO return a list of solutions, or join them together somehow
return solutions[0]
return solution
9 changes: 5 additions & 4 deletions lark/parsers/xearley.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def scan(i, to_scan):
considered_rules=considered_rules
)

return next_to_scan
return next_to_scan, node_cache


delayed_matches = defaultdict(list)
Expand All @@ -146,10 +146,11 @@ def scan(i, to_scan):
# processed down to terminals/empty nodes to be added to the scanner for the next
# step.
i = 0
node_cache = {}
for token in stream:
self.predict_and_complete(i, to_scan, columns, transitives)
self.predict_and_complete(i, to_scan, columns, transitives, node_cache)

to_scan = scan(i, to_scan)
to_scan, node_cache = scan(i, to_scan)

if token == '\n':
text_line += 1
Expand All @@ -158,7 +159,7 @@ def scan(i, to_scan):
text_column += 1
i += 1

self.predict_and_complete(i, to_scan, columns, transitives)
self.predict_and_complete(i, to_scan, columns, transitives, node_cache)

## Column is now the final column in the parse.
assert i == len(columns)-1
Expand Down
2 changes: 1 addition & 1 deletion lark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def is_id_start(s: str) -> bool:
return _test_unicode_category(s, _ID_START)


def dedup_list(l: Sequence[T]) -> List[T]:
def dedup_list(l: Iterable[T]) -> List[T]:
"""Given a list (l) will removing duplicates from the list,
preserving the original order of the list. Assumes that
the list entries are hashable."""
Expand Down
13 changes: 5 additions & 8 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,14 +836,14 @@ def test_multiple_start_solutions(self):
tree = l.parse('x')

expected = Tree('_ambig', [
Tree('start', [Tree('a', ['x'])]),
Tree('start', ['x']),
Tree('start', [Tree('a', ['x'])])]
)
])
self.assertEqual(tree, expected)

l = Lark(grammar, ambiguity='resolve', lexer=LEXER)
tree = l.parse('x')
assert tree == Tree('start', ['x'])
assert tree == Tree('start', [Tree('a', ['x'])])


def test_cycle(self):
Expand Down Expand Up @@ -872,10 +872,7 @@ def test_cycle2(self):
tree = l.parse("ab")
expected = (
Tree('start', [
Tree('_ambig', [
Tree('v', [Tree('v', [])]),
Tree('v', [Tree('v', [Tree('v', [])])])
])
Tree('v', [Tree('v', [])]),
])
)
self.assertEqual(tree, expected)
Expand Down Expand Up @@ -990,7 +987,7 @@ def test_consistent_derivation_order1(self):
''', lexer=LEXER)

tree = parser.parse('..')
n = Tree('a', [Tree('b', [])])
n = Tree('a', [])
assert tree == Tree('start', [n, n])

_NAME = "TestFullEarley" + LEXER.capitalize()
Expand Down

0 comments on commit 906de31

Please sign in to comment.