Skip to content

Commit

Permalink
plan: build anti semi join for NOT EXISTS (#7842)
Browse files Browse the repository at this point in the history
  • Loading branch information
eurekaka committed Oct 9, 2018
1 parent c19f8fb commit d60a1a2
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 36 deletions.
2 changes: 2 additions & 0 deletions ast/expressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ type ExistsSubqueryExpr struct {
exprNode
// Sel is the subquery, may be rewritten to other type of expression.
Sel ExprNode
// Not is true, the expression is "not exists".
Not bool
}

// Format the ExprNode into a Writer.
Expand Down
7 changes: 7 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,13 @@ Projection_7 12.50 root t1.a, t1.b, t2.a, t2.b
│ └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
└─TableReader_14 10000.00 root data:TableScan_13
└─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
explain select * from t t1 where not exists (select * from t t2 where t1.b = t2.b);
id count task operator info
HashLeftJoin_9 8000.00 root anti semi join, inner:TableReader_13, equal:[eq(t1.b, t2.b)]
├─TableReader_11 10000.00 root data:TableScan_10
│ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
└─TableReader_13 10000.00 root data:TableScan_12
└─TableScan_12 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
drop table if exists t;
create table t(a bigint primary key);
explain select * from t where a = 1 and a = 2;
Expand Down
64 changes: 31 additions & 33 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -1215,31 +1215,30 @@ id count task operator info
Projection_25 100.00 root tpch.supplier.s_name, 17_col_0
└─TopN_28 100.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100
└─HashAgg_31 320000.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name)
└─Selection_32 3786715.90 root not(16_aux_0)
└─IndexJoin_38 4733394.87 root left outer semi join, inner:IndexLookUp_37, outer key:l1.l_orderkey, inner key:l3.l_orderkey, other cond:ne(l3.l_suppkey, l1.l_suppkey)
├─IndexJoin_82 4733394.87 root semi join, inner:IndexLookUp_81, outer key:l1.l_orderkey, inner key:l2.l_orderkey, other cond:ne(l2.l_suppkey, l1.l_suppkey), ne(l2.l_suppkey, tpch.supplier.s_suppkey)
│ ├─HashLeftJoin_88 5916743.59 root inner join, inner:TableReader_117, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)]
│ │ ├─HashLeftJoin_93 147918589.81 root inner join, inner:TableReader_114, equal:[eq(l1.l_suppkey, tpch.supplier.s_suppkey)]
│ │ │ ├─IndexJoin_100 147918589.81 root inner join, inner:IndexLookUp_99, outer key:tpch.orders.o_orderkey, inner key:l1.l_orderkey
│ │ │ │ ├─TableReader_109 36517371.00 root data:Selection_108
│ │ │ │ │ └─Selection_108 36517371.00 cop eq(tpch.orders.o_orderstatus, "F")
│ │ │ │ │ └─TableScan_107 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ │ │ │ └─IndexLookUp_99 240004648.80 root
│ │ │ │ ├─IndexScan_96 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ │ │ │ └─Selection_98 240004648.80 cop gt(l1.l_receiptdate, l1.l_commitdate)
│ │ │ │ └─TableScan_97 1.00 cop table:lineitem, keep order:false
│ │ │ └─TableReader_114 500000.00 root data:TableScan_113
│ │ │ └─TableScan_113 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
│ │ └─TableReader_117 1.00 root data:Selection_116
│ │ └─Selection_116 1.00 cop eq(tpch.nation.n_name, "EGYPT")
│ │ └─TableScan_115 25.00 cop table:nation, range:[-inf,+inf], keep order:false
│ └─IndexLookUp_81 1.00 root
│ ├─IndexScan_79 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
│ └─TableScan_80 1.00 cop table:lineitem, keep order:false
└─IndexLookUp_37 240004648.80 root
├─IndexScan_34 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
└─Selection_36 240004648.80 cop gt(l3.l_receiptdate, l3.l_commitdate)
└─TableScan_35 1.00 cop table:lineitem, keep order:false
└─IndexJoin_37 3786715.90 root anti semi join, inner:IndexLookUp_36, outer key:l1.l_orderkey, inner key:l3.l_orderkey, other cond:ne(l3.l_suppkey, l1.l_suppkey), ne(l3.l_suppkey, tpch.supplier.s_suppkey)
├─IndexJoin_81 4733394.87 root semi join, inner:IndexLookUp_80, outer key:l1.l_orderkey, inner key:l2.l_orderkey, other cond:ne(l2.l_suppkey, l1.l_suppkey), ne(l2.l_suppkey, tpch.supplier.s_suppkey)
│ ├─HashLeftJoin_87 5916743.59 root inner join, inner:TableReader_116, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)]
│ │ ├─HashLeftJoin_92 147918589.81 root inner join, inner:TableReader_113, equal:[eq(l1.l_suppkey, tpch.supplier.s_suppkey)]
│ │ │ ├─IndexJoin_99 147918589.81 root inner join, inner:IndexLookUp_98, outer key:tpch.orders.o_orderkey, inner key:l1.l_orderkey
│ │ │ │ ├─TableReader_108 36517371.00 root data:Selection_107
│ │ │ │ │ └─Selection_107 36517371.00 cop eq(tpch.orders.o_orderstatus, "F")
│ │ │ │ │ └─TableScan_106 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ │ │ │ └─IndexLookUp_98 240004648.80 root
│ │ │ │ ├─IndexScan_95 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ │ │ │ └─Selection_97 240004648.80 cop gt(l1.l_receiptdate, l1.l_commitdate)
│ │ │ │ └─TableScan_96 1.00 cop table:lineitem, keep order:false
│ │ │ └─TableReader_113 500000.00 root data:TableScan_112
│ │ │ └─TableScan_112 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
│ │ └─TableReader_116 1.00 root data:Selection_115
│ │ └─Selection_115 1.00 cop eq(tpch.nation.n_name, "EGYPT")
│ │ └─TableScan_114 25.00 cop table:nation, range:[-inf,+inf], keep order:false
│ └─IndexLookUp_80 1.00 root
│ ├─IndexScan_78 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
│ └─TableScan_79 1.00 cop table:lineitem, keep order:false
└─IndexLookUp_36 240004648.80 root
├─IndexScan_33 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
└─Selection_35 240004648.80 cop gt(l3.l_receiptdate, l3.l_commitdate)
└─TableScan_34 1.00 cop table:lineitem, keep order:false
/*
Q22 Global Sales Opportunity Query
The Global Sales Opportunity Query identifies geographies where there are customers who may be likely to make a
Expand Down Expand Up @@ -1291,11 +1290,10 @@ Sort_32 1.00 root custsale.cntrycode:asc
└─Projection_34 1.00 root custsale.cntrycode, 28_col_0, 28_col_1
└─HashAgg_37 1.00 root group by:custsale.cntrycode, funcs:count(1), sum(custsale.c_acctbal), firstrow(custsale.cntrycode)
└─Projection_38 0.00 root substring(tpch.customer.c_phone, 1, 2), tpch.customer.c_acctbal
└─Selection_39 0.00 root not(26_aux_0)
└─HashLeftJoin_40 0.00 root left outer semi join, inner:TableReader_46, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
├─Selection_41 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21")
│ └─TableReader_44 0.00 root data:Selection_43
│ └─Selection_43 0.00 cop gt(tpch.customer.c_acctbal, NULL)
│ └─TableScan_42 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false
└─TableReader_46 75000000.00 root data:TableScan_45
└─TableScan_45 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─HashLeftJoin_39 0.00 root anti semi join, inner:TableReader_45, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
├─Selection_40 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21")
│ └─TableReader_43 0.00 root data:Selection_42
│ └─Selection_42 0.00 cop gt(tpch.customer.c_acctbal, NULL)
│ └─TableScan_41 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false
└─TableReader_45 75000000.00 root data:TableScan_44
└─TableScan_44 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
1 change: 1 addition & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ explain select * from t where a = 1 and a = 1;
explain select * from t where a = 1 and a = 2;
explain select * from t where b = 1 and b = 2;
explain select * from t t1 join t t2 where t1.b = t2.b and t2.b is null;
explain select * from t t1 where not exists (select * from t t2 where t1.b = t2.b);

drop table if exists t;
create table t(a bigint primary key);
Expand Down
8 changes: 7 additions & 1 deletion parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -2348,7 +2348,13 @@ Expression:
}
| "NOT" Expression %prec not
{
$$ = &ast.UnaryOperationExpr{Op: opcode.Not, V: $2}
expr, ok := $2.(*ast.ExistsSubqueryExpr)
if ok {
expr.Not = true
$$ = $2
} else {
$$ = &ast.UnaryOperationExpr{Op: opcode.Not, V: $2}
}
}
| BoolPri IsOrNotOp trueKwd %prec is
{
Expand Down
18 changes: 18 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2425,3 +2425,21 @@ func (s *testParserSuite) TestTablePartition(c *C) {
createTable := stmt.(*ast.CreateTableStmt)
c.Assert(createTable.Partition.Definitions[0].Comment, Equals, "check")
}

func (s *testParserSuite) TestNotExistsSubquery(c *C) {
defer testleak.AfterTest(c)()
table := []testCase{
{`select * from t1 where not exists (select * from t2 where t1.a = t2.a)`, true},
}

parser := New()
for _, tt := range table {
stmt, err := parser.Parse(tt.src, "", "")
c.Assert(err, IsNil)

sel := stmt[0].(*ast.SelectStmt)
exists, ok := sel.Where.(*ast.ExistsSubqueryExpr)
c.Assert(ok, IsTrue)
c.Assert(exists.Not, Equals, tt.ok)
}
}
4 changes: 2 additions & 2 deletions planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as
}
np = er.popExistsSubPlan(np)
if len(np.extractCorrelatedCols()) > 0 {
er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, false)
er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, v.Not)
if er.err != nil || !er.asScalar {
return v, true
}
Expand All @@ -562,7 +562,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as
er.err = errors.Trace(err)
return v, true
}
if len(rows) > 0 {
if (len(rows) > 0 && !v.Not) || (len(rows) == 0 && v.Not) {
er.ctxStack = append(er.ctxStack, expression.One.Clone())
} else {
er.ctxStack = append(er.ctxStack, expression.Zero.Clone())
Expand Down

0 comments on commit d60a1a2

Please sign in to comment.