Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support join elimination rule #8021

Merged
merged 29 commits into from
Nov 9, 2018
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6820811
initialization
lzmhhh123 Oct 23, 2018
fa10389
debug
lzmhhh123 Oct 23, 2018
3394288
add tests
lzmhhh123 Oct 24, 2018
64996b5
Merge branch 'master' into dev/join_elimination
lzmhhh123 Oct 24, 2018
48df0c1
Update explain_easy.result
lzmhhh123 Oct 24, 2018
11fe6a7
Update explain_easy.result
lzmhhh123 Oct 24, 2018
05bc4eb
debug
lzmhhh123 Oct 24, 2018
438fafb
address comment
lzmhhh123 Oct 29, 2018
27f2b06
address comments
lzmhhh123 Oct 30, 2018
80b9125
Merge branch 'master' into dev/join_elimination
lzmhhh123 Oct 30, 2018
1b4903e
fix
lzmhhh123 Oct 30, 2018
9842cad
Merge branch 'dev/join_elimination' of https://github.com/lzmhhh123/t…
lzmhhh123 Oct 30, 2018
53d18e4
address comments
lzmhhh123 Nov 2, 2018
2cb071f
address comment
lzmhhh123 Nov 2, 2018
01649a1
change join eliminating rule order
lzmhhh123 Nov 2, 2018
4b4936d
Merge branch 'master' into dev/join_elimination
zz-jason Nov 2, 2018
83b589c
change rule order
lzmhhh123 Nov 5, 2018
89a6bb7
Merge branch 'master' into dev/join_elimination
lzmhhh123 Nov 5, 2018
155445e
address
lzmhhh123 Nov 5, 2018
129de34
Merge branch 'dev/join_elimination' of https://github.com/lzmhhh123/t…
lzmhhh123 Nov 5, 2018
91a5cdb
fix count args
lzmhhh123 Nov 5, 2018
03ad39a
add comments
lzmhhh123 Nov 5, 2018
d5a4164
improve
lzmhhh123 Nov 6, 2018
122f438
address comments
lzmhhh123 Nov 6, 2018
3c93712
ci
lzmhhh123 Nov 6, 2018
8fff6e8
address comments
lzmhhh123 Nov 7, 2018
684fd29
fix
lzmhhh123 Nov 9, 2018
f1de131
Merge branch 'master' into dev/join_elimination
zz-jason Nov 9, 2018
71e757d
Merge branch 'master' into dev/join_elimination
zz-jason Nov 9, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,19 @@ Projection_5 8000.00 root test.ta.a
└─TableReader_9 10000.00 root data:TableScan_8
└─TableScan_8 10000.00 cop table:ta, range:[-inf,+inf], keep order:false, stats:pseudo
rollback;
drop table if exists t1, t2;
create table t1(a int, b int, c int, primary key(a, b));
create table t2(a int, b int, c int, primary key(a));
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
id count task operator info
TableReader_7 10000.00 root data:TableScan_6
└─TableScan_6 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
id count task operator info
StreamAgg_19 8000.00 root group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1)
└─IndexReader_20 8000.00 root index:StreamAgg_10
└─StreamAgg_10 8000.00 cop group by:test.t1.a, test.t1.b, funcs:firstrow(test.t1.a), firstrow(test.t1.b)
└─IndexScan_18 10000.00 cop table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo
drop table if exists t;
create table t(a int, nb int not null, nc int not null);
explain select ifnull(a, 0) from t;
Expand Down
7 changes: 7 additions & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ insert tb values ('1');
explain select * from ta where a = 1;
rollback;

# outer join elimination
drop table if exists t1, t2;
create table t1(a int, b int, c int, primary key(a, b));
create table t2(a int, b int, c int, primary key(a));
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;

# https://github.com/pingcap/tidb/issues/7918
drop table if exists t;
create table t(a int, nb int not null, nc int not null);
Expand Down
4 changes: 4 additions & 0 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,13 @@ func (b *PlanBuilder) buildJoin(joinNode *ast.Join) (LogicalPlan, error) {
// Set join type.
switch joinNode.Tp {
case ast.LeftJoin:
// left outer join need to be checked elimination
b.optFlag = b.optFlag | flagEliminateOuterJoin
joinPlan.JoinType = LeftOuterJoin
resetNotNullFlag(joinPlan.schema, leftPlan.Schema().Len(), joinPlan.schema.Len())
case ast.RightJoin:
// right outer join need to be checked elimination
b.optFlag = b.optFlag | flagEliminateOuterJoin
joinPlan.JoinType = RightOuterJoin
resetNotNullFlag(joinPlan.schema, 0, leftPlan.Schema().Len())
default:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the optimization phase, the semi joins are also considered, I think we should add the optimization flag for these semi joins in here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and please add some explain tests to cover these optimizations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After my seriously considering. I think semi-joins can't be eliminated in any case. So I will remove semi-join in the eliminating rule.

Expand Down
46 changes: 46 additions & 0 deletions planner/core/logical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2021,3 +2021,49 @@ func (s *testPlanSuite) TestNameResolver(c *C) {
}
}
}

func (s *testPlanSuite) TestOuterJoinEliminator(c *C) {
defer testleak.AfterTest(c)()
tests := []struct {
sql string
best string
}{
// Test left outer join + distinct
{
sql: "select distinct t1.a, t1.b from t t1 left outer join t t2 on t1.b = t2.b",
best: "DataScan(t1)->Aggr(firstrow(t1.a),firstrow(t1.b))",
},
// Test right outer join + distinct
{
sql: "select distinct t2.a, t2.b from t t1 right outer join t t2 on t1.b = t2.b",
best: "DataScan(t2)->Aggr(firstrow(t2.a),firstrow(t2.b))",
},
// Test left outer join
{
sql: "select t1.b from t t1 left outer join t t2 on t1.a = t2.a",
best: "DataScan(t1)->Projection",
},
// Test right outer join
{
sql: "select t2.b from t t1 right outer join t t2 on t1.a = t2.a",
best: "DataScan(t2)->Projection",
},
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
}

for i, tt := range tests {
comment := Commentf("case:%v sql:%s", i, tt.sql)
stmt, err := s.ParseOneStmt(tt.sql, "", "")
c.Assert(err, IsNil, comment)
Preprocess(s.ctx, stmt, s.is, false)
builder := &PlanBuilder{
ctx: mockContext(),
is: s.is,
colMapper: make(map[*ast.ColumnNameExpr]int),
}
p, err := builder.Build(stmt)
c.Assert(err, IsNil)
p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
c.Assert(err, IsNil)
c.Assert(ToString(p), Equals, tt.best, comment)
}
}
2 changes: 2 additions & 0 deletions planner/core/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const (
flagEliminateAgg
flagEliminateProjection
flagMaxMinEliminate
flagEliminateOuterJoin
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
flagPredicatePushDown
flagPartitionProcessor
flagPushDownAgg
Expand All @@ -51,6 +52,7 @@ var optRuleList = []logicalOptRule{
&aggregationEliminator{},
&projectionEliminater{},
&maxMinEliminator{},
&outerJoinEliminator{},
&ppdSolver{},
&partitionProcessor{},
&aggregationPushDownSolver{},
Expand Down
165 changes: 165 additions & 0 deletions planner/core/rule_join_elimination.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright 2018 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package core

import (
"github.com/pingcap/parser/ast"
"github.com/pingcap/tidb/expression"
)

type outerJoinEliminator struct {
cols [][]*expression.Column
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
schemas []*expression.Schema
}

// tryToEliminateOuterJoin will eliminate outer join plan base on the following rules
// 1. outer join elimination: For example left outer join, if the parent only use the
// columns from left table and the join key of right table(the inner table) is a unique
// key of the right table. the left outer join can be eliminated.
// 2. outer join elimination with distinct: For example left outer join. If the parent
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
// only use the columns from left table with 'distinct' label. The left outer join can
// be eliminated.
func (o *outerJoinEliminator) tryToEliminateOuterJoin(p *LogicalJoin) LogicalPlan {
switch p.JoinType {
case LeftOuterJoin:
return o.doEliminate(p, 1)
case RightOuterJoin:
return o.doEliminate(p, 0)
default:
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
}

func (o *outerJoinEliminator) doEliminate(p *LogicalJoin, innerChildIdx int) LogicalPlan {
// outer join elimination with distinct
if len(o.cols) > 0 {
cols := o.cols[len(o.cols)-1]
allColsInSchema := true
for _, col := range cols {
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
if c, _ := p.children[1^innerChildIdx].Schema().FindColumn(columnName); c == nil {
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
allColsInSchema = false
break
}
}
if allColsInSchema == true {
return p.children[1^innerChildIdx]
}
}

// outer join elimination without distinct
// first, check whether the parent's schema columns are all in left or right
if len(o.schemas) == 0 {
return nil
}
for _, col := range o.schemas[len(o.schemas)-1].Columns {
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
if c, _ := p.children[1^innerChildIdx].Schema().FindColumn(columnName); c == nil {
return nil
}
}
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
// second, check whether the other side join keys are unique keys
p.children[innerChildIdx].buildKeyInfo()
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
var joinKeys []*expression.Column
for _, eqCond := range p.EqualConditions {
joinKeys = append(joinKeys, eqCond.GetArgs()[innerChildIdx].(*expression.Column))
}
tmpSchema := expression.NewSchema(joinKeys...)
for _, keyInfo := range p.children[innerChildIdx].Schema().Keys {
joinKeysContainKeyInfo := true
for _, col := range keyInfo {
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
if c, _ := tmpSchema.FindColumn(columnName); c == nil {
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
joinKeysContainKeyInfo = false
break
}
}
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
if joinKeysContainKeyInfo {
return p.children[1^innerChildIdx]
}
}
// Third, if p.children[innerChildIdx] is datasource, we must check specially index.
// Because buildKeyInfo() don't save the index without notnull flag.
// But in outer join, null==null don't return true. The null index do not affect the res.
if ds, ok := p.children[innerChildIdx].(*DataSource); ok {
eurekaka marked this conversation as resolved.
Show resolved Hide resolved
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
continue
}
idx := path.index
if !idx.Unique {
continue
}
joinKeysContainIndex := true
for _, idxCol := range idx.Columns {
columnName := &ast.ColumnName{Schema: ds.DBName, Table: ds.tableInfo.Name, Name: idxCol.Name}
if c, _ := tmpSchema.FindColumn(columnName); c == nil {
joinKeysContainIndex = false
break
}
}
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
if joinKeysContainIndex {
return p.children[1^innerChildIdx]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we remove other possible paths in this case?

}
}
}
return nil
}

func (o *outerJoinEliminator) optimize(p LogicalPlan) (LogicalPlan, error) {
// check the distinct
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
if agg, ok := p.(*LogicalAggregation); ok && len(agg.groupByCols) > 0 {
isDistinctAgg := true
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
for _, aggDesc := range agg.AggFuncs {
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
if aggDesc.Name != ast.AggFuncFirstRow &&
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
aggDesc.Name != ast.AggFuncMax &&
aggDesc.Name != ast.AggFuncMin {
XuHuaiyu marked this conversation as resolved.
Show resolved Hide resolved
isDistinctAgg = false
break
}
if _, ok := aggDesc.Args[0].(*expression.Column); !ok {
isDistinctAgg = false
break
}
}
if isDistinctAgg {
o.cols = append(o.cols, agg.groupByCols)
defer func() {
o.cols = o.cols[:len(o.cols)-1]
}()
}
}

newChildren := make([]LogicalPlan, 0, len(p.Children()))
for _, child := range p.Children() {
// if child is logical join, then save the parent schema
if _, ok := child.(*LogicalJoin); ok {
o.schemas = append(o.schemas, p.Schema())
defer func() {
o.schemas = o.schemas[:len(o.schemas)-1]
}()
eurekaka marked this conversation as resolved.
Show resolved Hide resolved
}
newChild, _ := o.optimize(child)
newChildren = append(newChildren, newChild)
}
p.SetChildren(newChildren...)
join, ok := p.(*LogicalJoin)
if !ok {
return p, nil
}
if proj := o.tryToEliminateOuterJoin(join); proj != nil {
return proj, nil
}
return p, nil
}