From 899321db55eef43c8f08c501692effb9e34563ed Mon Sep 17 00:00:00 2001 From: derekstride Date: Tue, 4 Jul 2023 11:32:02 -0400 Subject: [PATCH 1/4] feat(sets): Dedicated node with optional parens for set operations --- grammar.js | 75 ++++------ queries/highlights.scm | 6 +- test/corpus/create.txt | 33 ++--- test/corpus/cte.txt | 304 ++++++++++++++++++++++++++------------- test/corpus/select.txt | 144 ++++++++++++++----- test/highlight/union.sql | 16 +++ 6 files changed, 382 insertions(+), 196 deletions(-) create mode 100644 test/highlight/union.sql diff --git a/grammar.js b/grammar.js index 9d2d210..069cf72 100644 --- a/grammar.js +++ b/grammar.js @@ -593,12 +593,11 @@ module.exports = grammar({ ), _dml_read: $ => seq( - choice( - seq( - optional( - $._cte - ), + optional($._cte), + optional_parenthesis( + choice( $._select_statement, + $.set_operation, ), ), ), @@ -623,25 +622,30 @@ module.exports = grammar({ ')', ), - _select_statement: $ => seq( - $.select, - optional($.from), - repeat( + set_operation: $ => seq( + $._select_statement, + repeat1( seq( - choice( - seq( - $.keyword_union, - optional($.keyword_all), + field( + "operation", + choice( + seq($.keyword_union, optional($.keyword_all)), + $.keyword_except, + $.keyword_intersect, ), - $.keyword_except, - $.keyword_intersect, ), - $.select, - optional($.from), + $._select_statement, ), ), ), + _select_statement: $ => optional_parenthesis( + seq( + $.select, + optional($.from), + ), + ), + select: $ => seq( $.keyword_select, seq( @@ -759,33 +763,7 @@ module.exports = grammar({ ), ), - create_query: $ => choice( - $._select_statement, - seq( - $._cte, - $._select_statement, - ), - seq( - $._inner_create_query, - repeat( - seq( - $.keyword_union, - optional($.keyword_all), - $._inner_create_query, - ), - ) - ), - ), - - _inner_create_query: $ => choice( - seq( '(', $._select_statement, ')'), - seq( - '(', - $._cte, - $._select_statement, - ')', - ), - ), + create_query: $ => $._dml_read, create_view: $ => prec.right( seq( @@ -2496,6 +2474,15 @@ function unsigned_type($, type) { ) } +function optional_parenthesis(node) { + return prec.right( + choice( + node, + seq("(", node, ")"), + ), + ) +} + function parametric_type($, type, params = ['size']) { return prec.right(1, choice( diff --git a/queries/highlights.scm b/queries/highlights.scm index 26952ab..5cb4a06 100644 --- a/queries/highlights.scm +++ b/queries/highlights.scm @@ -165,12 +165,9 @@ (keyword_to) (keyword_schema) (keyword_owner) - (keyword_union) (keyword_all) (keyword_any) (keyword_some) - (keyword_except) - (keyword_intersect) (keyword_returning) (keyword_begin) (keyword_commit) @@ -314,6 +311,9 @@ (keyword_by) (keyword_on) (keyword_do) + (keyword_union) + (keyword_except) + (keyword_intersect) ] @keyword.operator [ diff --git a/test/corpus/create.txt b/test/corpus/create.txt index 8380c4c..b754f0f 100644 --- a/test/corpus/create.txt +++ b/test/corpus/create.txt @@ -1232,22 +1232,23 @@ UNION ALL (identifier)) (keyword_as) (create_query - (select - (keyword_select) - (select_expression - (term - (literal) - (keyword_as) - (identifier)))) - (keyword_union) - (keyword_all) - (select - (keyword_select) - (select_expression - (term - (literal) - (keyword_as) - (identifier)))))))) + (set_operation + (select + (keyword_select) + (select_expression + (term + (literal) + (keyword_as) + (identifier)))) + (keyword_union) + (keyword_all) + (select + (keyword_select) + (select_expression + (term + (literal) + (keyword_as) + (identifier))))))))) ================================================================================ Create view as select with cte diff --git a/test/corpus/cte.txt b/test/corpus/cte.txt index a092123..93a448b 100644 --- a/test/corpus/cte.txt +++ b/test/corpus/cte.txt @@ -231,52 +231,52 @@ FROM top_cte; -------------------------------------------------------------------------------- (program - (statement - (keyword_with) - (cte - (identifier) - (keyword_as) - (statement + (statement (keyword_with) (cte - (identifier) - (keyword_as) - (statement - (select - (keyword_select) - (select_expression - (term - (literal) - (keyword_as) - (identifier)) - (term - (literal) - (keyword_as) - (identifier)))))) + (identifier) + (keyword_as) + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (select + (keyword_select) + (select_expression + (term + (literal) + (keyword_as) + (identifier)) + (term + (literal) + (keyword_as) + (identifier)))))) + (select + (keyword_select) + (select_expression + (term + (field + (identifier))) + (term + (field + (identifier))))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) (select - (keyword_select) - (select_expression - (term - (field - (identifier))) - (term - (field - (identifier))))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))))) - (select - (keyword_select) - (select_expression - (term - (all_fields)))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))))) + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) ================================================================================ Nested deeper @@ -298,64 +298,172 @@ FROM top_cte; -------------------------------------------------------------------------------- (program - (statement - (keyword_with) - (cte - (identifier) - (keyword_as) - (statement + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (select + (keyword_select) + (select_expression + (term + (literal) + (keyword_as) + (identifier)) + (term + (literal) + (keyword_as) + (identifier)))))) + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + (select + (keyword_select) + (select_expression + (term + (field + (identifier))) + (term + (field + (identifier))))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + +================================================================================ +CTE with parenthesized unions +================================================================================ + +with tb2 as ( + SELECT * FROM tb1 +) +( + (SELECT * FROM tb2) + UNION + (SELECT * FROM tb2) +) + +-------------------------------------------------------------------------------- + +(program + (statement (keyword_with) (cte - (identifier) - (keyword_as) - (statement - (keyword_with) - (cte - (identifier) - (keyword_as) - (statement + (identifier) + (keyword_as) + (statement (select - (keyword_select) - (select_expression + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + (set_operation + (select + (keyword_select) + (select_expression (term - (literal) - (keyword_as) - (identifier)) + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))) + (keyword_union) + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier))))))) + +================================================================================ +CTE with unions +================================================================================ + +with tb2 as ( + SELECT * FROM tb1 +) +(SELECT * FROM tb2) +UNION +(SELECT * FROM tb2) + +-------------------------------------------------------------------------------- + +(program + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + (set_operation + (select + (keyword_select) + (select_expression (term - (literal) - (keyword_as) - (identifier)))))) + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))) + (keyword_union) (select - (keyword_select) - (select_expression - (term - (all_fields)))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))))) - (select - (keyword_select) - (select_expression - (term - (field - (identifier))) - (term - (field - (identifier))))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))))) - (select - (keyword_select) - (select_expression - (term - (all_fields)))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))))) + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier))))))) diff --git a/test/corpus/select.txt b/test/corpus/select.txt index 688e9c4..c0052cc 100644 --- a/test/corpus/select.txt +++ b/test/corpus/select.txt @@ -1746,18 +1746,91 @@ SELECT 1 UNION ALL SELECT 2; (program (statement - (select - (keyword_select) - (select_expression - (term - (literal)))) - (keyword_union) - (keyword_all) - (select - (keyword_select) - (select_expression - (term - (literal)))))) + (set_operation + (select + (keyword_select) + (select_expression + (term + value: (literal)))) + operation: (keyword_union) + operation: (keyword_all) + (select + (keyword_select) + (select_expression + (term + value: (literal))))))) + +================================================================================ +Union with parenthesis +================================================================================ + +(SELECT * FROM tb2) +UNION +(SELECT * FROM tb2) + +-------------------------------------------------------------------------------- + +(program + (statement + (set_operation + (select + (keyword_select) + (select_expression + (term + value: (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + name: (identifier)))) + operation: (keyword_union) + (select + (keyword_select) + (select_expression + (term + value: (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + name: (identifier))))))) + +================================================================================ +Union with many parenthesis +================================================================================ + +( + (SELECT * FROM tb2) + UNION + (SELECT * FROM tb2) +) + +-------------------------------------------------------------------------------- + +(program + (statement + (set_operation + (select + (keyword_select) + (select_expression + (term + value: (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + name: (identifier)))) + operation: (keyword_union) + (select + (keyword_select) + (select_expression + (term + value: (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + name: (identifier))))))) ================================================================================ Intersect @@ -1771,29 +1844,30 @@ SELECT b FROM two; (program (statement - (select - (keyword_select) - (select_expression - (term - (field - (identifier))))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))) - (keyword_intersect) - (select - (keyword_select) - (select_expression - (term - (field - (identifier))))) - (from - (keyword_from) - (relation - (object_reference - (identifier)))))) + (set_operation + (select + (keyword_select) + (select_expression + (term + value: (field + name: (identifier))))) + (from + (keyword_from) + (relation + (object_reference + name: (identifier)))) + operation: (keyword_intersect) + (select + (keyword_select) + (select_expression + (term + value: (field + name: (identifier))))) + (from + (keyword_from) + (relation + (object_reference + name: (identifier))))))) ================================================================================ Simple CASE diff --git a/test/highlight/union.sql b/test/highlight/union.sql new file mode 100644 index 0000000..a3fea8e --- /dev/null +++ b/test/highlight/union.sql @@ -0,0 +1,16 @@ +(SELECT * FROM tb2) +-- <- punctuation.bracket +-- ^ keyword + -- ^ operator + -- ^ keyword + -- ^ type + -- ^ punctuation.bracket +UNION +-- ^ keyword.operator +(SELECT * FROM tb2) +-- <- punctuation.bracket +-- ^ keyword + -- ^ operator + -- ^ keyword + -- ^ type + -- ^ punctuation.bracket From 016fc4c4818e782c1d487602bb1304527d84b78e Mon Sep 17 00:00:00 2001 From: derekstride Date: Tue, 4 Jul 2023 11:33:34 -0400 Subject: [PATCH 2/4] chore: use optional_parenthesis method in more places --- grammar.js | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/grammar.js b/grammar.js index 069cf72..048b649 100644 --- a/grammar.js +++ b/grammar.js @@ -550,13 +550,8 @@ module.exports = grammar({ statement: $ => seq( choice( $._ddl_statement, - $._dml_read, $._dml_write, - seq( - '(', - $._dml_read, - ')', - ), + optional_parenthesis($._dml_read), ), ), @@ -1700,15 +1695,8 @@ module.exports = grammar({ ), _default_expression: $ => seq( - $.keyword_default, - choice( - seq( - '(', - $._inner_default_expression, - ')', - ), - $._inner_default_expression, - ) + $.keyword_default, + optional_parenthesis($._inner_default_expression), ), _inner_default_expression: $ => choice( $.literal, From 0f3f25e19492e66bcb77dc02a9466c21000f535a Mon Sep 17 00:00:00 2001 From: derekstride Date: Tue, 4 Jul 2023 14:15:47 -0400 Subject: [PATCH 3/4] feat: allow parenthesis around CTE --- grammar.js | 2 +- test/corpus/cte.txt | 78 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/grammar.js b/grammar.js index 048b649..186d6eb 100644 --- a/grammar.js +++ b/grammar.js @@ -588,7 +588,7 @@ module.exports = grammar({ ), _dml_read: $ => seq( - optional($._cte), + optional(optional_parenthesis($._cte)), optional_parenthesis( choice( $._select_statement, diff --git a/test/corpus/cte.txt b/test/corpus/cte.txt index 93a448b..c01f544 100644 --- a/test/corpus/cte.txt +++ b/test/corpus/cte.txt @@ -467,3 +467,81 @@ UNION (relation (object_reference (identifier))))))) + +================================================================================ +Parenthesis around CTE +================================================================================ + +( + with x as (select * from ints) +) +(select * from x); + +-------------------------------------------------------------------------------- + +(program + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + +================================================================================ +Parenthesis around everything +================================================================================ + +( + (with x as (select * from ints)) + (select * from x) +); + +-------------------------------------------------------------------------------- + +(program + (statement + (keyword_with) + (cte + (identifier) + (keyword_as) + (statement + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) + (select + (keyword_select) + (select_expression + (term + (all_fields)))) + (from + (keyword_from) + (relation + (object_reference + (identifier)))))) From 5c01247696edd00687e427cf009451f3f2a0aba0 Mon Sep 17 00:00:00 2001 From: derekstride Date: Tue, 4 Jul 2023 14:39:52 -0400 Subject: [PATCH 4/4] refactor: add a `wrapped_in_parenthesis` helper function --- grammar.js | 170 +++++++++++++++++++++-------------------------------- 1 file changed, 67 insertions(+), 103 deletions(-) diff --git a/grammar.js b/grammar.js index 186d6eb..2c00c68 100644 --- a/grammar.js +++ b/grammar.js @@ -606,15 +606,12 @@ module.exports = grammar({ $.keyword_materialized, ), ), - '(', - alias( - choice( - $._dml_read, - $._dml_write, + wrapped_in_parenthesis( + alias( + choice($._dml_read, $._dml_write), + $.statement, ), - $.statement, ), - ')', ), set_operation: $ => seq( @@ -711,10 +708,7 @@ module.exports = grammar({ $.row_format, seq( $.keyword_tblproperties, - '(', - $.table_option, - repeat(seq(',', $.table_option)), - ')', + paren_list($.table_option, true), ), $.table_option, ), @@ -829,7 +823,7 @@ module.exports = grammar({ $.object_reference, choice( $.column_definitions, // TODO `default` will require own node type - seq('(', ')'), + wrapped_in_parenthesis(), ), $.keyword_returns, choice( @@ -879,11 +873,7 @@ module.exports = grammar({ seq( ':=', choice( - seq( - '(', - $.statement, - ')', - ), + wrapped_in_parenthesis($.statement), // TODO are there more possibilities here? We can't use `_expression` since // that includes subqueries $.literal, @@ -1492,10 +1482,7 @@ module.exports = grammar({ // the second argument is not a $.table_option _partition_spec: $ => seq( $.keyword_partition, - '(', - $.table_option, - repeat(seq(',', $.table_option)), - ')', + paren_list($.table_option, true), ), update: $ => seq( @@ -1581,10 +1568,7 @@ module.exports = grammar({ table_sort: $ => seq( $.keyword_sort, $.keyword_by, - '(', - $.identifier, - repeat(seq(',', ($.identifier))), - ')', + paren_list($.identifier, true), ), table_partition: $ => seq( @@ -1609,7 +1593,7 @@ module.exports = grammar({ choice( paren_list($.identifier),// postgres & Impala (CTAS) $.column_definitions, // impala/hive external tables - seq('(', $._key_value_pair, repeat(seq(',', $._key_value_pair)), ')',), // Spark SQL + paren_list($._key_value_pair, true), // Spark SQL ) ), @@ -1658,10 +1642,7 @@ module.exports = grammar({ column_definitions: $ => seq( '(', - $.column_definition, - repeat( - seq(',', $.column_definition), - ), + comma_list($.column_definition, true), optional($.constraints), ')', ), @@ -1896,20 +1877,18 @@ module.exports = grammar({ cast: $ => seq( field('name', $.keyword_cast), - '(', - seq( - field('parameter', $._expression), - $.keyword_as, - $._type, + wrapped_in_parenthesis( + seq( + field('parameter', $._expression), + $.keyword_as, + $._type, + ), ), - ')', ), filter_expression : $ => seq( $.keyword_filter, - '(', - $.where, - ')', + wrapped_in_parenthesis($.where), ), invocation: $ => prec(1, @@ -1928,17 +1907,17 @@ module.exports = grammar({ ) ), // _aggregate_function, e.g. group_concat - seq( - '(', - optional($.keyword_distinct), - field('parameter', $.term), - optional($.order_by), - optional(seq( - choice($.keyword_separator, ','), - alias($._literal_string, $.literal) - )), - optional($.limit), - ')', + wrapped_in_parenthesis( + seq( + optional($.keyword_distinct), + field('parameter', $.term), + optional($.order_by), + optional(seq( + choice($.keyword_separator, ','), + alias($._literal_string, $.literal) + )), + optional($.limit), + ), ), ), optional( @@ -2033,20 +2012,12 @@ module.exports = grammar({ $.window_specification, ), - window_specification: $ => seq( - '(', - seq( - optional( - $.partition_by, - ), - optional( - $.order_by - ), - optional( - $.window_frame, - ), - ), - ')', + window_specification: $ => wrapped_in_parenthesis( + seq( + optional($.partition_by), + optional($.order_by), + optional($.window_frame), + ), ), window_function: $ => seq( @@ -2091,11 +2062,7 @@ module.exports = grammar({ $.subquery, $.invocation, $.object_reference, - seq( - '(', - $.values, - ')', - ), + wrapped_in_parenthesis($.values), ), optional( seq( @@ -2132,9 +2099,9 @@ module.exports = grammar({ $.keyword_join, ), ), - '(', - field('index_name', $.identifier), - ')', + wrapped_in_parenthesis( + field('index_name', $.identifier), + ), ), join: $ => seq( @@ -2306,7 +2273,7 @@ module.exports = grammar({ $.array, $.interval, $.between_expression, - seq("(", $._expression, ")"), + wrapped_in_parenthesis($._expression), ) ), @@ -2404,12 +2371,12 @@ module.exports = grammar({ $.keyword_in, ), - subquery: $ => seq( - '(', - $.select, - optional($.from), - optional(";"), - ')', + subquery: $ => wrapped_in_parenthesis( + seq( + $.select, + optional($.from), + optional(";"), + ), ), list: $ => paren_list($._expression), @@ -2466,53 +2433,50 @@ function optional_parenthesis(node) { return prec.right( choice( node, - seq("(", node, ")"), + wrapped_in_parenthesis(node), ), ) } +function wrapped_in_parenthesis(node) { + if (node) { + return seq("(", node, ")"); + } + return seq("(", ")"); +} + function parametric_type($, type, params = ['size']) { return prec.right(1, choice( type, seq( type, - '(', - // first parameter is guaranteed, shift it out of the array - field(params.shift(), alias($._natural_number, $.literal)), - // then, fill in the ", next" until done - ...params.map(p => seq(',', field(p, alias($._natural_number, $.literal)))), - ')', + wrapped_in_parenthesis( + seq( + // first parameter is guaranteed, shift it out of the array + field(params.shift(), alias($._natural_number, $.literal)), + // then, fill in the ", next" until done + ...params.map(p => seq(',', field(p, alias($._natural_number, $.literal)))), + ), + ), ), ), ) } function comma_list(field, requireFirst) { + sequence = seq(field, repeat(seq(',', field))); + if (requireFirst) { - return seq( - field, - repeat( - seq(',', field) - ) - ); + return sequence; } - return optional( - seq( - field, - repeat( - seq(',', field) - ), - ), - ); + return optional(sequence); } function paren_list(field, requireFirst) { - return seq( - '(', + return wrapped_in_parenthesis( comma_list(field, requireFirst), - ')', ) }