From 728c937c14514ae4eea079b03150e6584205d414 Mon Sep 17 00:00:00 2001
From: Aravind Rao <palimar@google.com>
Date: Mon, 10 Apr 2017 15:13:31 -0700
Subject: [PATCH] Rewrite CONSTRUCT query

---
 bql/grammar/grammar.go      | 97 +++++++++++++++++++++++++++++++++----
 bql/grammar/grammar_test.go | 13 +++++
 bql/lexer/lexer.go          | 28 +++++++++--
 bql/lexer/lexer_test.go     | 16 ++++++
 4 files changed, 141 insertions(+), 13 deletions(-)

diff --git a/bql/grammar/grammar.go b/bql/grammar/grammar.go
index e36021e5..606ca7c4 100644
--- a/bql/grammar/grammar.go
+++ b/bql/grammar/grammar.go
@@ -204,15 +204,6 @@ func BQL() *Grammar {
 				},
 			},
 		},
-		"CONSTRUCT_FACTS": []*Clause{
-			{
-				Elements: []Element{
-					NewTokenType(lexer.ItemLBracket),
-					NewSymbol("CLAUSES"),
-					NewTokenType(lexer.ItemRBracket),
-				},
-			},
-		},
 		"CLAUSES": []*Clause{
 			{
 				Elements: []Element{
@@ -755,6 +746,94 @@ func BQL() *Grammar {
 			},
 			{},
 		},
+		"CONSTRUCT_FACTS": []*Clause{
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemLBracket),
+					NewSymbol("CONSTRUCT_TRIPLES"),
+					NewTokenType(lexer.ItemRBracket),
+				},
+			},
+		},
+		"CONSTRUCT_TRIPLES": []*Clause{
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemNode),
+					NewSymbol("CONSTRUCT_PREDICATE"),
+					NewSymbol("CONSTRUCT_OBJECT"),
+					NewSymbol("MORE_CONSTRUCT_TRIPLES"),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemBlankNode),
+					NewSymbol("CONSTRUCT_PREDICATE"),
+					NewSymbol("CONSTRUCT_OBJECT"),
+					NewSymbol("MORE_CONSTRUCT_TRIPLES"),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemBinding),
+					NewSymbol("CONSTRUCT_PREDICATE"),
+					NewSymbol("CONSTRUCT_OBJECT"),
+					NewSymbol("MORE_CONSTRUCT_TRIPLES"),
+				},
+			},
+		},
+		"CONSTRUCT_PREDICATE": []*Clause{
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemPredicate),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemPredicateBound),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemBinding),
+				},
+			},
+		},
+		"CONSTRUCT_OBJECT": []*Clause{
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemNode),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemPredicate),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemPredicateBound),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemLiteral),
+				},
+			},
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemBinding),
+				},
+			},
+		},
+		"MORE_CONSTRUCT_TRIPLES": []*Clause{
+			{
+				Elements: []Element{
+					NewTokenType(lexer.ItemDot),
+					NewSymbol("CONSTRUCT_TRIPLES"),
+				},
+			},
+			{},
+		},
 	}
 }
 
diff --git a/bql/grammar/grammar_test.go b/bql/grammar/grammar_test.go
index 56cc70bd..7db5668e 100644
--- a/bql/grammar/grammar_test.go
+++ b/bql/grammar/grammar_test.go
@@ -116,6 +116,12 @@ func TestAcceptByParse(t *testing.T) {
 		// Test Construct clause.
 		`construct {?s "foo"@[,] ?o} into ?a from ?b where {?s "foo"@[,] ?o} having ?s = ?o;`,
 		`construct {?s "foo"@[,] ?o} into ?a from ?b where {?s "foo"@[,] ?o};`,
+		`construct {?s ?p ?o} into ?a from ?b where {?s "foo"@[,] ?o} having ?s = ?o;`,
+		`construct {?s ?p ?o.
+			    _:v "_subject"@[] ?s.
+			    _:v "_predicate"@[] ?p.
+			    _:v "_object"@[] ?o.
+			    _:v "some_pred"@[] ?k } into ?a from ?b where {?s "foo"@[,] ?o};`,
 
 	}
 	p, err := NewParser(BQL())
@@ -215,6 +221,13 @@ func TestRejectByParse(t *testing.T) {
 		`construct {?s "foo"@[,] ?o} into ?a where{?s "foo"@[,] ?o} having ?s = ?o;`,
 		// Construct clause without destination.
 		`construct {?s "foo"@[,] ?o} from ?b where{?s "foo"@[,] ?o} having ?s = ?o;`,
+		// Construct clause with badly formed blank node.
+		`construct {?s ?p ?o.
+			    _v "some_pred"@[] ?k } into ?a from ?b where {?s "foo"@[,] ?o};`,
+		// Construct clause with badle formed triple.
+		`construct {?s ?p ?o.
+		            _:v "some_pred"@[]} into ?a from ?b where {?s "foo"@[,] ?o};`,
+
 	}
 	p, err := NewParser(BQL())
 	if err != nil {
diff --git a/bql/lexer/lexer.go b/bql/lexer/lexer.go
index a2931acf..c28f4b40 100644
--- a/bql/lexer/lexer.go
+++ b/bql/lexer/lexer.go
@@ -33,7 +33,6 @@ const (
 	ItemError TokenType = iota
 	// ItemEOF indicates end of input to be scanned in BQL.
 	ItemEOF
-
 	// ItemQuery represents the select keyword in BQL.
 	ItemQuery
 	// ItemInsert represents insert keyword in BQL.
@@ -90,19 +89,18 @@ const (
 	ItemDesc
 	// ItemLimit represents the limit clause in BQL.
 	ItemLimit
-
 	// ItemBinding represents a variable binding in BQL.
 	ItemBinding
-
 	// ItemNode represents a BadWolf node in BQL.
 	ItemNode
+	// ItemBlankNode represents a blank BadWolf node in BQL.
+	ItemBlankNode
 	// ItemLiteral represents a BadWolf literal in BQL.
 	ItemLiteral
 	// ItemPredicate represents a BadWolf predicates in BQL.
 	ItemPredicate
 	// ItemPredicateBound represents a BadWolf predicate bound in BQL.
 	ItemPredicateBound
-
 	// ItemLBracket represents the left opening bracket token in BQL.
 	ItemLBracket
 	// ItemRBracket represents the right opening bracket token in BQL.
@@ -189,6 +187,8 @@ func (tt TokenType) String() string {
 		return "BINDING"
 	case ItemNode:
 		return "NODE"
+	case ItemBlankNode:
+		return "BLANK_NODE"
 	case ItemLiteral:
 		return "LITERAL"
 	case ItemPredicate:
@@ -248,6 +248,7 @@ const (
 	semicolon      = rune(';')
 	comma          = rune(',')
 	slash          = rune('/')
+	underscore     = rune('_')
 	backSlash      = rune('\\')
 	lt             = rune('<')
 	gt             = rune('>')
@@ -356,6 +357,9 @@ func lexToken(l *lexer) stateFn {
 				return lexBinding
 			case slash:
 				return lexNode
+			case underscore:
+				l.next()
+				return lexBlankNode
 			case quote:
 				return lexPredicateOrLiteral
 			}
@@ -612,6 +616,22 @@ func lexNode(l *lexer) stateFn {
 	return lexSpace
 }
 
+// lexBlankNode tries to lex a blank node out of the input
+func lexBlankNode(l *lexer) stateFn {
+	if r := l.next(); r != colon {
+		l.emitError("blank node should start with _:")
+		return nil
+	}
+	for {
+		if r := l.next(); !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != rune('_') || r == eof {
+			l.backup()
+			l.emit(ItemBlankNode)
+			break
+		}
+	}
+	return lexSpace
+}
+
 // lexPredicateOrLiteral tries to lex a predicate or a literal out of the input.
 func lexPredicateOrLiteral(l *lexer) stateFn {
 	text := l.input[l.pos:]
diff --git a/bql/lexer/lexer_test.go b/bql/lexer/lexer_test.go
index 2122b01d..d0ef3913 100644
--- a/bql/lexer/lexer_test.go
+++ b/bql/lexer/lexer_test.go
@@ -102,6 +102,16 @@ func TestIndividualTokens(t *testing.T) {
 				{Type: ItemError, Text: "/_<foo",
 					ErrorMessage: "[lexer:0:6] node is not properly terminated; missing final > delimiter"},
 				{Type: ItemEOF}}},
+		{"_:v1 _:foo_bar",
+			[]Token{
+				{Type: ItemBlankNode, Text: "_:v1"},
+				{Type: ItemBlankNode, Text: "_:foo_bar"},
+				{Type: ItemEOF}}},
+		{"_v1",
+			[]Token{
+				{Type: ItemError, Text: "_v",
+					ErrorMessage: "[lexer:0:2] blank node should start with _:"},
+				{Type: ItemEOF}}},
 		{`"true"^^type:bool "1"^^type:int64"2"^^type:float64"t"^^type:text`,
 			[]Token{
 				{Type: ItemLiteral, Text: `"true"^^type:bool`},
@@ -214,6 +224,12 @@ func TestValidTokenQuery(t *testing.T) {
 			ItemRBracket, ItemInto, ItemBinding, ItemFrom, ItemBinding, ItemWhere,
 			ItemLBracket, ItemBinding, ItemPredicateBound, ItemBinding, ItemRBracket,
 			ItemSemicolon, ItemEOF}},
+		{`construct {_:v1 "predicate"@[] ?p.
+		             _:v1 "object"@[,] ?o} into ?a from ?b where {?s "foo"@[,] ?o};`, []TokenType{
+			ItemConstruct, ItemLBracket, ItemBlankNode, ItemPredicate, ItemBinding, ItemDot,
+			ItemBlankNode, ItemPredicateBound, ItemBinding, ItemRBracket, ItemInto, ItemBinding,
+			ItemFrom, ItemBinding, ItemWhere, ItemLBracket, ItemBinding, ItemPredicateBound,
+			ItemBinding, ItemRBracket, ItemSemicolon, ItemEOF}},
 	}
 	for _, test := range table {
 		_, c := lex(test.input, 0)