diff --git a/bql/grammar/grammar.go b/bql/grammar/grammar.go index ab1c18e8..6e847129 100644 --- a/bql/grammar/grammar.go +++ b/bql/grammar/grammar.go @@ -88,6 +88,19 @@ func BQL() *Grammar { NewTokenType(lexer.ItemSemicolon), }, }, + { + Elements: []Element{ + NewTokenType(lexer.ItemConstruct), + NewSymbol("CONSTRUCT_FACTS"), + NewTokenType(lexer.ItemInto), + NewSymbol("GRAPHS"), + NewTokenType(lexer.ItemFrom), + NewSymbol("GRAPHS"), + NewSymbol("WHERE"), + NewSymbol("HAVING"), + NewTokenType(lexer.ItemSemicolon), + }, + }, }, "CREATE_GRAPHS": []*Clause{ { @@ -733,6 +746,84 @@ func BQL() *Grammar { }, {}, }, + "CONSTRUCT_FACTS": []*Clause{ + { + Elements: []Element{ + NewTokenType(lexer.ItemLBracket), + NewSymbol("CONSTRUCT_TRIPLES"), + NewTokenType(lexer.ItemRBracket), + }, + }, + }, + "CONSTRUCT_TRIPLES": []*Clause{ + { + Elements: []Element{ + NewTokenType(lexer.ItemNode), + NewSymbol("CONSTRUCT_PREDICATE"), + NewSymbol("CONSTRUCT_OBJECT"), + NewSymbol("MORE_CONSTRUCT_TRIPLES"), + }, + }, + { + Elements: []Element{ + NewTokenType(lexer.ItemBlankNode), + NewSymbol("CONSTRUCT_PREDICATE"), + NewSymbol("CONSTRUCT_OBJECT"), + NewSymbol("MORE_CONSTRUCT_TRIPLES"), + }, + }, + { + Elements: []Element{ + NewTokenType(lexer.ItemBinding), + NewSymbol("CONSTRUCT_PREDICATE"), + NewSymbol("CONSTRUCT_OBJECT"), + NewSymbol("MORE_CONSTRUCT_TRIPLES"), + }, + }, + }, + "CONSTRUCT_PREDICATE": []*Clause{ + { + Elements: []Element{ + NewTokenType(lexer.ItemPredicate), + }, + }, + { + Elements: []Element{ + NewTokenType(lexer.ItemBinding), + }, + }, + }, + "CONSTRUCT_OBJECT": []*Clause{ + { + Elements: []Element{ + NewTokenType(lexer.ItemNode), + }, + }, + { + Elements: []Element{ + NewTokenType(lexer.ItemPredicate), + }, + }, + { + Elements: []Element{ + NewTokenType(lexer.ItemLiteral), + }, + }, + { + Elements: []Element{ + NewTokenType(lexer.ItemBinding), + }, + }, + }, + "MORE_CONSTRUCT_TRIPLES": []*Clause{ + { + Elements: []Element{ + NewTokenType(lexer.ItemDot), + NewSymbol("CONSTRUCT_TRIPLES"), + }, + }, + {}, + }, } } diff --git a/bql/grammar/grammar_test.go b/bql/grammar/grammar_test.go index 71211c22..7c3c5d02 100644 --- a/bql/grammar/grammar_test.go +++ b/bql/grammar/grammar_test.go @@ -113,6 +113,18 @@ func TestAcceptByParse(t *testing.T) { /room<000> "connects_to"@[] /room<001>};`, `delete data from ?world {/room<000> "named"@[] "Hallway"^^type:text. /room<000> "connects_to"@[] /room<001>};`, + // Test Construct clause. + `construct {?s "new_predicate"@[] ?o} into ?a from ?b where {?s "old_predicate"@[,] ?o} having ?s = ?o;`, + `construct {?s "new_predicate"@[] ?o} into ?a from ?b where {?s "old_predicate"@[,] ?o};`, + `construct {?s ?p ?o} into ?a from ?b where {?n "_subject"@[] ?s. + ?n "_predicate"@[] ?p. + ?n "_object"@[] ?o};`, + `construct {?s ?p ?o. + _:v "_subject"@[] ?s. + _:v "_predicate"@[] ?p. + _:v "_object"@[] ?o} into ?a from ?b where {?n "_subject"@[] ?s. + ?n "_predicate"@[] ?p. + ?n "_object"@[] ?o};`, } p, err := NewParser(BQL()) if err != nil { @@ -207,6 +219,17 @@ func TestRejectByParse(t *testing.T) { // Drop graphs. `drop graph ;`, `drop graph ?a ?b, ?c;`, + // Construct clause without source. + `construct {?s "foo"@[,] ?o} into ?a where{?s "foo"@[,] ?o} having ?s = ?o;`, + // Construct clause without destination. + `construct {?s "foo"@[,] ?o} from ?b where{?s "foo"@[,] ?o} having ?s = ?o;`, + // Construct clause with badly formed blank node. + `construct {?s ?p ?o. + _v "some_pred"@[] ?k } into ?a from ?b where {?s "foo"@[,] ?o};`, + // Construct clause with badly formed triple. + `construct {?s ?p ?o. + _:v "some_pred"@[]} into ?a from ?b where {?s "foo"@[,] ?o};`, + } p, err := NewParser(BQL()) if err != nil { @@ -344,7 +367,7 @@ func TestRejectByParseAndSemantic(t *testing.T) { } } -func TestSemanticStatementGraphClausesLenghtCorrectness(t *testing.T) { +func TestSemanticStatementGraphClausesLengthCorrectness(t *testing.T) { table := []struct { query string want int diff --git a/bql/lexer/lexer.go b/bql/lexer/lexer.go index 24416401..c0d1d3db 100644 --- a/bql/lexer/lexer.go +++ b/bql/lexer/lexer.go @@ -33,7 +33,6 @@ const ( ItemError TokenType = iota // ItemEOF indicates end of input to be scanned in BQL. ItemEOF - // ItemQuery represents the select keyword in BQL. ItemQuery // ItemInsert represents insert keyword in BQL. @@ -42,6 +41,8 @@ const ( ItemDelete // ItemCreate represents the creation of a graph in BQL. ItemCreate + // ItemConstruct represents the construct keyword in BQL. + ItemConstruct // ItemDrop represent the destruction of a graph in BQL. ItemDrop // ItemGraph represent the graph to be created of destroyed in BQL. @@ -88,19 +89,18 @@ const ( ItemDesc // ItemLimit represents the limit clause in BQL. ItemLimit - // ItemBinding represents a variable binding in BQL. ItemBinding - // ItemNode represents a BadWolf node in BQL. ItemNode + // ItemBlankNode represents a blank BadWolf node in BQL. + ItemBlankNode // ItemLiteral represents a BadWolf literal in BQL. ItemLiteral // ItemPredicate represents a BadWolf predicates in BQL. ItemPredicate // ItemPredicateBound represents a BadWolf predicate bound in BQL. ItemPredicateBound - // ItemLBracket represents the left opening bracket token in BQL. ItemLBracket // ItemRBracket represents the right opening bracket token in BQL. @@ -143,6 +143,8 @@ func (tt TokenType) String() string { return "DELETE" case ItemCreate: return "CREATE" + case ItemConstruct: + return "CONSTRUCT" case ItemDrop: return "DROP" case ItemGraph: @@ -185,6 +187,8 @@ func (tt TokenType) String() string { return "BINDING" case ItemNode: return "NODE" + case ItemBlankNode: + return "BLANK_NODE" case ItemLiteral: return "LITERAL" case ItemPredicate: @@ -244,6 +248,7 @@ const ( semicolon = rune(';') comma = rune(',') slash = rune('/') + underscore = rune('_') backSlash = rune('\\') lt = rune('<') gt = rune('>') @@ -256,6 +261,7 @@ const ( insert = "insert" delete = "delete" create = "create" + construct = "construct" drop = "drop" graph = "graph" data = "data" @@ -303,7 +309,7 @@ func (t *Token) String() string { return fmt.Sprintf("(%s, %s, %s)", t.Type, t.Text, t.ErrorMessage) } -// stateFn represents the state of the scanner as a function that returns +// stateFn represents the state of the scanner as a function that returns // the next state. type stateFn func(*lexer) stateFn @@ -351,6 +357,9 @@ func lexToken(l *lexer) stateFn { return lexBinding case slash: return lexNode + case underscore: + l.next() + return lexBlankNode case quote: return lexPredicateOrLiteral } @@ -403,7 +412,7 @@ func lexToken(l *lexer) stateFn { return nil // Stop the run loop. } -// isSingleSymbolToken check if a single char should be lexed. +// isSingleSymbolToken checks if a single char should be lexed. func isSingleSymbolToken(l *lexer, tt TokenType, symbol rune) stateFn { if r := l.peek(); r == symbol { l.next() @@ -462,6 +471,10 @@ func lexKeyword(l *lexer) stateFn { consumeKeyword(l, ItemCreate) return lexSpace } + if strings.EqualFold(input, construct) { + consumeKeyword(l, ItemConstruct) + return lexSpace + } if strings.EqualFold(input, drop) { consumeKeyword(l, ItemDrop) return lexSpace @@ -603,6 +616,26 @@ func lexNode(l *lexer) stateFn { return lexSpace } +// lexBlankNode tries to lex a blank node out of the input +func lexBlankNode(l *lexer) stateFn { + if r := l.next(); r != colon { + l.emitError("blank node should start with _:") + return nil + } + if r := l.next(); !unicode.IsLetter(r) { + l.emitError("blank node label should begin with a letter") + return nil + } + for { + if r := l.next(); !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != rune('_') || r == eof { + l.backup() + l.emit(ItemBlankNode) + break + } + } + return lexSpace +} + // lexPredicateOrLiteral tries to lex a predicate or a literal out of the input. func lexPredicateOrLiteral(l *lexer) stateFn { text := l.input[l.pos:] @@ -618,7 +651,7 @@ func lexPredicateOrLiteral(l *lexer) stateFn { return lexLiteral } -// lexPredicate lexes a predicate of out of the input. +// lexPredicate lexes a predicate out of the input. func lexPredicate(l *lexer) stateFn { l.next() for done := false; !done; { @@ -669,7 +702,7 @@ func lexPredicate(l *lexer) stateFn { return lexSpace } -// lexPredicate lexes a literal of out of the input. +// lexLiteral lexes a literal out of the input. func lexLiteral(l *lexer) stateFn { l.next() for done := false; !done; { diff --git a/bql/lexer/lexer_test.go b/bql/lexer/lexer_test.go index ffc37d92..a0e47c8a 100644 --- a/bql/lexer/lexer_test.go +++ b/bql/lexer/lexer_test.go @@ -47,7 +47,7 @@ func TestIndividualTokens(t *testing.T) { {Type: ItemEOF}}}, {`SeLeCt FrOm WhErE As BeFoRe AfTeR BeTwEeN CoUnT SuM GrOuP bY HaViNg LiMiT OrDeR AsC DeSc NoT AnD Or Id TyPe At DiStInCt InSeRt DeLeTe DaTa InTo - CrEaTe DrOp GrApH`, + cONsTruCT CrEaTe DrOp GrApH`, []Token{ {Type: ItemQuery, Text: "SeLeCt"}, {Type: ItemFrom, Text: "FrOm"}, @@ -76,6 +76,7 @@ func TestIndividualTokens(t *testing.T) { {Type: ItemDelete, Text: "DeLeTe"}, {Type: ItemData, Text: "DaTa"}, {Type: ItemInto, Text: "InTo"}, + {Type: ItemConstruct, Text: "cONsTruCT"}, {Type: ItemCreate, Text: "CrEaTe"}, {Type: ItemDrop, Text: "DrOp"}, {Type: ItemGraph, Text: "GrApH"}, @@ -101,6 +102,27 @@ func TestIndividualTokens(t *testing.T) { {Type: ItemError, Text: "/_ delimiter"}, {Type: ItemEOF}}}, + {"_:v1 _:foo_bar", + []Token{ + {Type: ItemBlankNode, Text: "_:v1"}, + {Type: ItemBlankNode, Text: "_:foo_bar"}, + {Type: ItemEOF}}}, + {"_v1", + []Token{ + {Type: ItemError, Text: "_v", + ErrorMessage: "[lexer:0:2] blank node should start with _:"}, + {Type: ItemEOF}}}, + + {"_:1v", + []Token{ + {Type: ItemError, Text: "_:1", + ErrorMessage: "[lexer:0:3] blank node label should begin with a letter"}, + {Type: ItemEOF}}}, + {"_:_", + []Token{ + {Type: ItemError, Text: "_:_", + ErrorMessage: "[lexer:0:3] blank node label should begin with a letter"}, + {Type: ItemEOF}}}, {`"true"^^type:bool "1"^^type:int64"2"^^type:float64"t"^^type:text`, []Token{ {Type: ItemLiteral, Text: `"true"^^type:bool`}, @@ -208,6 +230,17 @@ func TestValidTokenQuery(t *testing.T) { ItemBinding, ItemLT, ItemBinding, ItemAnd, ItemNot, ItemBinding, ItemOr, ItemBinding, ItemEQ, ItemBinding, ItemLimit, ItemLiteral, ItemSemicolon, ItemEOF}}, + {`construct {?s "foo"@[] ?o} into ?a from ?b where {?s "foo"@[] ?o};`, []TokenType{ + ItemConstruct, ItemLBracket, ItemBinding, ItemPredicate, ItemBinding, + ItemRBracket, ItemInto, ItemBinding, ItemFrom, ItemBinding, ItemWhere, + ItemLBracket, ItemBinding, ItemPredicate, ItemBinding, ItemRBracket, + ItemSemicolon, ItemEOF}}, + {`construct {_:v1 "predicate"@[] ?p. + _:v1 "object"@[] ?o} into ?a from ?b where {?s "foo"@[] ?o};`, []TokenType{ + ItemConstruct, ItemLBracket, ItemBlankNode, ItemPredicate, ItemBinding, ItemDot, + ItemBlankNode, ItemPredicate, ItemBinding, ItemRBracket, ItemInto, ItemBinding, + ItemFrom, ItemBinding, ItemWhere, ItemLBracket, ItemBinding, ItemPredicate, + ItemBinding, ItemRBracket, ItemSemicolon, ItemEOF}}, } for _, test := range table { _, c := lex(test.input, 0)