Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support join elimination rule #8021

Merged
merged 29 commits into from
Nov 9, 2018
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6820811
initialization
lzmhhh123 Oct 23, 2018
fa10389
debug
lzmhhh123 Oct 23, 2018
3394288
add tests
lzmhhh123 Oct 24, 2018
64996b5
Merge branch 'master' into dev/join_elimination
lzmhhh123 Oct 24, 2018
48df0c1
Update explain_easy.result
lzmhhh123 Oct 24, 2018
11fe6a7
Update explain_easy.result
lzmhhh123 Oct 24, 2018
05bc4eb
debug
lzmhhh123 Oct 24, 2018
438fafb
address comment
lzmhhh123 Oct 29, 2018
27f2b06
address comments
lzmhhh123 Oct 30, 2018
80b9125
Merge branch 'master' into dev/join_elimination
lzmhhh123 Oct 30, 2018
1b4903e
fix
lzmhhh123 Oct 30, 2018
9842cad
Merge branch 'dev/join_elimination' of https://github.com/lzmhhh123/t…
lzmhhh123 Oct 30, 2018
53d18e4
address comments
lzmhhh123 Nov 2, 2018
2cb071f
address comment
lzmhhh123 Nov 2, 2018
01649a1
change join eliminating rule order
lzmhhh123 Nov 2, 2018
4b4936d
Merge branch 'master' into dev/join_elimination
zz-jason Nov 2, 2018
83b589c
change rule order
lzmhhh123 Nov 5, 2018
89a6bb7
Merge branch 'master' into dev/join_elimination
lzmhhh123 Nov 5, 2018
155445e
address
lzmhhh123 Nov 5, 2018
129de34
Merge branch 'dev/join_elimination' of https://github.com/lzmhhh123/t…
lzmhhh123 Nov 5, 2018
91a5cdb
fix count args
lzmhhh123 Nov 5, 2018
03ad39a
add comments
lzmhhh123 Nov 5, 2018
d5a4164
improve
lzmhhh123 Nov 6, 2018
122f438
address comments
lzmhhh123 Nov 6, 2018
3c93712
ci
lzmhhh123 Nov 6, 2018
8fff6e8
address comments
lzmhhh123 Nov 7, 2018
684fd29
fix
lzmhhh123 Nov 9, 2018
f1de131
Merge branch 'master' into dev/join_elimination
zz-jason Nov 9, 2018
71e757d
Merge branch 'master' into dev/join_elimination
zz-jason Nov 9, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,19 @@ Projection_5 8000.00 root test.ta.a
└─TableReader_9 10000.00 root data:TableScan_8
└─TableScan_8 10000.00 cop table:ta, range:[-inf,+inf], keep order:false, stats:pseudo
rollback;
drop table if exists t1, t2;
create table t1(a int, b int, c int, primary key(a, b));
create table t2(a int, b int, c int, primary key(a));
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
id count task operator info
TableReader_7 10000.00 root data:TableScan_6
└─TableScan_6 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
id count task operator info
StreamAgg_19 8000.00 root group by:col_2, col_3, funcs:firstrow(col_0), firstrow(col_1)
└─IndexReader_20 8000.00 root index:StreamAgg_10
└─StreamAgg_10 8000.00 cop group by:test.t1.a, test.t1.b, funcs:firstrow(test.t1.a), firstrow(test.t1.b)
└─IndexScan_18 10000.00 cop table:t1, index:a, b, range:[NULL,+inf], keep order:true, stats:pseudo
drop table if exists t;
create table t(a int, nb int not null, nc int not null);
explain select ifnull(a, 0) from t;
Expand Down
7 changes: 7 additions & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ insert tb values ('1');
explain select * from ta where a = 1;
rollback;

# outer join elimination
drop table if exists t1, t2;
create table t1(a int, b int, c int, primary key(a, b));
create table t2(a int, b int, c int, primary key(a));
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;

# https://github.com/pingcap/tidb/issues/7918
drop table if exists t;
create table t(a int, nb int not null, nc int not null);
Expand Down
4 changes: 4 additions & 0 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,13 @@ func (b *PlanBuilder) buildJoin(joinNode *ast.Join) (LogicalPlan, error) {
// Set join type.
switch joinNode.Tp {
case ast.LeftJoin:
// left outer join need to be checked elimination
b.optFlag = b.optFlag | flagEliminateOuterJoin
joinPlan.JoinType = LeftOuterJoin
resetNotNullFlag(joinPlan.schema, leftPlan.Schema().Len(), joinPlan.schema.Len())
case ast.RightJoin:
// right outer join need to be checked elimination
b.optFlag = b.optFlag | flagEliminateOuterJoin
joinPlan.JoinType = RightOuterJoin
resetNotNullFlag(joinPlan.schema, 0, leftPlan.Schema().Len())
default:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the optimization phase, the semi joins are also considered, I think we should add the optimization flag for these semi joins in here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and please add some explain tests to cover these optimizations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After my seriously considering. I think semi-joins can't be eliminated in any case. So I will remove semi-join in the eliminating rule.

Expand Down
64 changes: 64 additions & 0 deletions planner/core/logical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2021,3 +2021,67 @@ func (s *testPlanSuite) TestNameResolver(c *C) {
}
}
}

func (s *testPlanSuite) TestOuterJoinEliminator(c *C) {
defer testleak.AfterTest(c)()
tests := []struct {
sql string
best string
}{
// Test left outer join + distinct
{
sql: "select distinct t1.a, t1.b from t t1 left outer join t t2 on t1.b = t2.b",
best: "DataScan(t1)->Aggr(firstrow(t1.a),firstrow(t1.b))",
},
// Test right outer join + distinct
{
sql: "select distinct t2.a, t2.b from t t1 right outer join t t2 on t1.b = t2.b",
best: "DataScan(t2)->Aggr(firstrow(t2.a),firstrow(t2.b))",
},
// Test duplicate agnostic agg functions on join
{
sql: "select max(t1.a), min(t1.b) from t t1 left join t t2 on t1.b = t2.b",
best: "DataScan(t1)->Aggr(max(t1.a),min(t1.b))->Projection",
},
{
sql: "select sum(distinct t1.a) from t t1 left join t t2 on t1.a = t2.a and t1.b = t2.b",
best: "DataScan(t1)->Aggr(sum(t1.a))->Projection",
},
{
sql: "select count(distinct t1.a, t1.b) from t t1 left join t t2 on t1.b = t2.b",
best: "DataScan(t1)->Aggr(count(t1.a, t1.b))->Projection",
},
// Test left outer join
{
sql: "select t1.b from t t1 left outer join t t2 on t1.a = t2.a",
best: "DataScan(t1)->Projection",
},
// Test right outer join
{
sql: "select t2.b from t t1 right outer join t t2 on t1.a = t2.a",
best: "DataScan(t2)->Projection",
},
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
// For complex join query
{
sql: "select max(t3.b) from (t t1 left join t t2 on t1.a = t2.a) right join t t3 on t1.b = t3.b",
best: "DataScan(t3)->TopN([t3.b true],0,1)->Aggr(max(t3.b))->Projection",
},
}

for i, tt := range tests {
comment := Commentf("case:%v sql:%s", i, tt.sql)
stmt, err := s.ParseOneStmt(tt.sql, "", "")
c.Assert(err, IsNil, comment)
Preprocess(s.ctx, stmt, s.is, false)
builder := &PlanBuilder{
ctx: mockContext(),
is: s.is,
colMapper: make(map[*ast.ColumnNameExpr]int),
}
p, err := builder.Build(stmt)
c.Assert(err, IsNil)
p, err = logicalOptimize(builder.optFlag, p.(LogicalPlan))
c.Assert(err, IsNil)
c.Assert(ToString(p), Equals, tt.best, comment)
}
}
2 changes: 2 additions & 0 deletions planner/core/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ const (
flagEliminateProjection
flagMaxMinEliminate
flagPredicatePushDown
flagEliminateOuterJoin
flagPartitionProcessor
flagPushDownAgg
flagPushDownTopN
Expand All @@ -52,6 +53,7 @@ var optRuleList = []logicalOptRule{
&projectionEliminater{},
&maxMinEliminator{},
&ppdSolver{},
&outerJoinEliminator{},
&partitionProcessor{},
&aggregationPushDownSolver{},
&pushDownTopNOptimizer{},
Expand Down
192 changes: 192 additions & 0 deletions planner/core/rule_join_elimination.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// Copyright 2018 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package core

import (
"github.com/pingcap/parser/ast"
"github.com/pingcap/tidb/expression"
)

type outerJoinEliminator struct {
}

// tryToEliminateOuterJoin will eliminate outer join plan base on the following rules
// 1. outer join elimination: For example left outer join, if the parent only use the
// columns from left table and the join key of right table(the inner table) is a unique
// key of the right table. the left outer join can be eliminated.
// 2. outer join elimination with duplicate agnostic aggregate functions: For example left outer join.
// If the parent only use the columns from left table with 'distinct' label. The left outer join can
// be eliminated.
func (o *outerJoinEliminator) tryToEliminateOuterJoin(p *LogicalJoin, aggCols []*expression.Column, parentSchema *expression.Schema) LogicalPlan {
var innerChildIdx int
switch p.JoinType {
case LeftOuterJoin:
innerChildIdx = 1
case RightOuterJoin:
innerChildIdx = 0
default:
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
return p
}

outerPlan := p.children[1^innerChildIdx]
innerPlan := p.children[innerChildIdx]
// outer join elimination with duplicate agnostic aggregate functions
if o.isAggColsAllFromOuterTable(outerPlan, aggCols) {
XuHuaiyu marked this conversation as resolved.
Show resolved Hide resolved
return outerPlan
}
// outer join elimination without duplicate agnostic aggregate functions
if !o.isParentColsAllFromOuterTable(outerPlan, parentSchema) {
return p
}
innerJoinKeys := o.extractInnerJoinKeys(p, innerChildIdx)
if o.isInnerJoinKeysContainUniqueKey(innerPlan, innerJoinKeys) {
return outerPlan
}
if o.isInnerJoinKeysContainIndex(innerPlan, innerJoinKeys) {
return outerPlan
}
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
return p
}

// extract join keys as a schema for inner child of a outer join
func (o *outerJoinEliminator) extractInnerJoinKeys(join *LogicalJoin, innerChildIdx int) *expression.Schema {
var joinKeys []*expression.Column
for _, eqCond := range join.EqualConditions {
joinKeys = append(joinKeys, eqCond.GetArgs()[innerChildIdx].(*expression.Column))
}
return expression.NewSchema(joinKeys...)
}

func (o *outerJoinEliminator) isAggColsAllFromOuterTable(outerPlan LogicalPlan, aggCols []*expression.Column) bool {
if len(aggCols) == 0 {
return false
}
for _, col := range aggCols {
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
if c, _ := outerPlan.Schema().FindColumn(columnName); c == nil {
return false
}
}
return true
}

// check whether schema cols of join's parent plan are all from outer join table
func (o *outerJoinEliminator) isParentColsAllFromOuterTable(outerPlan LogicalPlan, parentSchema *expression.Schema) bool {
if parentSchema == nil {
return false
}
for _, col := range parentSchema.Columns {
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
if c, _ := outerPlan.Schema().FindColumn(columnName); c == nil {
return false
}
}
return true
}

// check whether one of unique keys sets is contained by inner join keys
func (o *outerJoinEliminator) isInnerJoinKeysContainUniqueKey(innerPlan LogicalPlan, joinKeys *expression.Schema) bool {
for _, keyInfo := range innerPlan.Schema().Keys {
joinKeysContainKeyInfo := true
for _, col := range keyInfo {
columnName := &ast.ColumnName{Schema: col.DBName, Table: col.TblName, Name: col.ColName}
if c, _ := joinKeys.FindColumn(columnName); c == nil {
joinKeysContainKeyInfo = false
break
}
}
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
if joinKeysContainKeyInfo {
return true
}
}
return false
}

// check whether one of index sets is contained by inner join index
func (o *outerJoinEliminator) isInnerJoinKeysContainIndex(innerPlan LogicalPlan, joinKeys *expression.Schema) bool {
ds, ok := innerPlan.(*DataSource)
if !ok {
return false
}
for _, path := range ds.possibleAccessPaths {
if path.isTablePath {
continue
}
idx := path.index
if !idx.Unique {
continue
}
joinKeysContainIndex := true
for _, idxCol := range idx.Columns {
columnName := &ast.ColumnName{Schema: ds.DBName, Table: ds.tableInfo.Name, Name: idxCol.Name}
if c, _ := joinKeys.FindColumn(columnName); c == nil {
joinKeysContainIndex = false
break
}
}
if joinKeysContainIndex {
return true
}
}
return false
}

// Check whether a LogicalPlan is a LogicalAggregation and its all aggregate functions is duplicate agnostic.
// Also, check all the args are expression.Column.
func (o *outerJoinEliminator) isDuplicateAgnosticAgg(p LogicalPlan) (_ bool, cols []*expression.Column) {
agg, ok := p.(*LogicalAggregation)
if !ok {
return false, nil
}
cols = agg.groupByCols
for _, aggDesc := range agg.AggFuncs {
if !aggDesc.HasDistinct &&
aggDesc.Name != ast.AggFuncFirstRow &&
aggDesc.Name != ast.AggFuncMax &&
aggDesc.Name != ast.AggFuncMin {
return false, nil
}
for _, expr := range aggDesc.Args {
if col, ok := expr.(*expression.Column); ok {
cols = append(cols, col)
} else {
return false, nil
}
}
}
return true, cols
}

func (o *outerJoinEliminator) doOptimize(p LogicalPlan, aggCols []*expression.Column, parentSchema *expression.Schema) LogicalPlan {
// check the duplicate agnostic aggregate functions
if ok, newCols := o.isDuplicateAgnosticAgg(p); ok {
aggCols = newCols
}

newChildren := make([]LogicalPlan, 0, len(p.Children()))
for _, child := range p.Children() {
newChild := o.doOptimize(child, aggCols, p.Schema())
newChildren = append(newChildren, newChild)
}
p.SetChildren(newChildren...)
join, isJoin := p.(*LogicalJoin)
if !isJoin {
return p
}
return o.tryToEliminateOuterJoin(join, aggCols, parentSchema)
}

func (o *outerJoinEliminator) optimize(p LogicalPlan) (LogicalPlan, error) {
return o.doOptimize(p, nil, nil), nil
}