Skip to content

Commit

Permalink
select/pairs: don't ignore provided bucket_id
Browse files Browse the repository at this point in the history
The bug is simple: crud ignores provided bucket_id, when unable to
determine it itself. For example, when no conditions are given or when
given condition involves a secondary index, which is not entirely in the
primary index.

It leads to incorrect select/pairs result: tuples are collected from all
replicasets, while should be collected from one storage pointed by
bucket_id. Second, it involves all replicasets into the request
processing (performs map-reduce) that may dramatically drop performance.

One existing test case was changed: 'test_opts_not_damaged' in
ipairs_test.lua. The crud.pairs() request in this test case was affected
by the problem and incorrect result was expected.

The idea of the fix is suggested by Michael Filonenko in PR #221.

Fixes #220
  • Loading branch information
Totktonada committed Oct 14, 2021
1 parent 68d574e commit fa62b8c
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 5 deletions.
3 changes: 2 additions & 1 deletion crud/select/compat/select.lua
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ local function build_select_iterator(space_name, user_conditions, opts)
-- set replicasets to select from
local replicasets_to_select = replicasets

if plan.sharding_key ~= nil and opts.force_map_call ~= true then
local bucket_id_is_known = plan.sharding_key ~= nil or opts.bucket_id ~= nil
if bucket_id_is_known and opts.force_map_call ~= true then
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)

local err
Expand Down
3 changes: 2 additions & 1 deletion crud/select/compat/select_old.lua
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ local function build_select_iterator(space_name, user_conditions, opts)
-- set replicasets to select from
local replicasets_to_select = replicasets

if plan.sharding_key ~= nil and opts.force_map_call ~= true then
local bucket_id_is_known = plan.sharding_key ~= nil or opts.bucket_id ~= nil
if bucket_id_is_known and opts.force_map_call ~= true then
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)

local err
Expand Down
87 changes: 85 additions & 2 deletions test/integration/pairs_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ pgroup.before_all(function(g)
datadir = fio.tempdir(),
server_command = helpers.entrypoint('srv_select'),
use_vshard = true,
replicasets = helpers.get_test_replicasets(),
replicasets = helpers.get_test_replicasets({
extra_storage_roles = {'statistics-storage'},
}),
env = {
['ENGINE'] = g.params.engine,
},
Expand Down Expand Up @@ -758,15 +760,19 @@ end
pgroup.test_opts_not_damaged = function(g)
local customers = helpers.insert_objects(g, 'customers', {
{
-- bucket_id is 477, storage is s-2
id = 1, name = "Elizabeth", last_name = "Jackson",
age = 12, city = "Los Angeles",
}, {
-- bucket_id is 401, storage is s-2
id = 2, name = "Mary", last_name = "Brown",
age = 46, city = "London",
}, {
-- bucket_id is 2804, storage is s-1
id = 3, name = "David", last_name = "Smith",
age = 33, city = "Los Angeles",
}, {
-- bucket_id is 1161, storage is s-2
id = 4, name = "William", last_name = "White",
age = 46, city = "Chicago",
},
Expand All @@ -775,7 +781,6 @@ pgroup.test_opts_not_damaged = function(g)
table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)

local expected_customers = {
{id = 3, name = "David", age = 33},
{id = 4, name = "William", age = 46},
}

Expand Down Expand Up @@ -805,3 +810,81 @@ pgroup.test_opts_not_damaged = function(g)
t.assert_equals(objects, expected_customers)
t.assert_equals(new_pairs_opts, pairs_opts)
end

-- gh-220: bucket_id argument is ignored when it cannot be deduced
-- from provided select/pairs conditions.
pgroup.test_pairs_no_map_reduce = function(g)
local customers = helpers.insert_objects(g, 'customers', {
{
-- bucket_id is 477, storage is s-2
id = 1, name = 'Elizabeth', last_name = 'Jackson',
age = 12, city = 'New York',
}, {
-- bucket_id is 401, storage is s-2
id = 2, name = 'Mary', last_name = 'Brown',
age = 46, city = 'Los Angeles',
}, {
-- bucket_id is 2804, storage is s-1
id = 3, name = 'David', last_name = 'Smith',
age = 33, city = 'Los Angeles',
}, {
-- bucket_id is 1161, storage is s-2
id = 4, name = 'William', last_name = 'White',
age = 81, city = 'Chicago',
},
})

table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)

local storages = {'s1-master', 's1-replica', 's2-master', 's2-replica'}
local stat_a = helpers.collect_storage_stat(g.cluster, storages)

-- Case: no conditions, just bucket id.
local rows = g.cluster.main_server.net_box:eval([[
local crud = require('crud')
return crud.pairs(...):totable()
]], {
'customers',
nil,
{bucket_id = 2804, timeout = 1},
})
t.assert_equals(rows, {
{3, 2804, 'David', 'Smith', 33, 'Los Angeles'},
})

local stat_b = helpers.collect_storage_stat(g.cluster, storages)
t.assert_equals(helpers.diff_storage_stat(stat_b, stat_a), {
['s-1'] = {
select_requests = 1,
},
['s-2'] = {
select_requests = 0,
},
})

-- Case: EQ on secondary index, which is not in the sharding
-- index (primary index in the case).
local rows = g.cluster.main_server.net_box:eval([[
local crud = require('crud')
return crud.pairs(...):totable()
]], {
'customers',
{{'==', 'age', 81}},
{bucket_id = 1161, timeout = 1},
})
t.assert_equals(rows, {
{4, 1161, 'William', 'White', 81, 'Chicago'},
})

local stat_c = helpers.collect_storage_stat(g.cluster, storages)
t.assert_equals(helpers.diff_storage_stat(stat_c, stat_b), {
['s-1'] = {
select_requests = 0,
},
['s-2'] = {
select_requests = 1,
},
})
end
76 changes: 75 additions & 1 deletion test/integration/select_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ pgroup.before_all(function(g)
datadir = fio.tempdir(),
server_command = helpers.entrypoint('srv_select'),
use_vshard = true,
replicasets = helpers.get_test_replicasets(),
replicasets = helpers.get_test_replicasets({
extra_storage_roles = {'statistics-storage'},
}),
env = {
['ENGINE'] = g.params.engine,
},
Expand Down Expand Up @@ -1581,3 +1583,75 @@ pgroup.test_opts_not_damaged = function(g)
t.assert_equals(err, nil)
t.assert_equals(new_select_opts, select_opts)
end

-- gh-220: bucket_id argument is ignored when it cannot be deduced
-- from provided select/pairs conditions.
pgroup.test_select_no_map_reduce = function(g)
local customers = helpers.insert_objects(g, 'customers', {
{
-- bucket_id is 477, storage is s-2
id = 1, name = 'Elizabeth', last_name = 'Jackson',
age = 12, city = 'New York',
}, {
-- bucket_id is 401, storage is s-2
id = 2, name = 'Mary', last_name = 'Brown',
age = 46, city = 'Los Angeles',
}, {
-- bucket_id is 2804, storage is s-1
id = 3, name = 'David', last_name = 'Smith',
age = 33, city = 'Los Angeles',
}, {
-- bucket_id is 1161, storage is s-2
id = 4, name = 'William', last_name = 'White',
age = 81, city = 'Chicago',
},
})

table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)

local storages = {'s1-master', 's1-replica', 's2-master', 's2-replica'}
local stat_a = helpers.collect_storage_stat(g.cluster, storages)

-- Case: no conditions, just bucket id.
local result, err = g.cluster.main_server.net_box:call('crud.select', {
'customers',
nil,
{bucket_id = 2804, timeout = 1},
})
t.assert_equals(err, nil)
t.assert_equals(result.rows, {
{3, 2804, 'David', 'Smith', 33, 'Los Angeles'},
})

local stat_b = helpers.collect_storage_stat(g.cluster, storages)
t.assert_equals(helpers.diff_storage_stat(stat_b, stat_a), {
['s-1'] = {
select_requests = 1,
},
['s-2'] = {
select_requests = 0,
},
})

-- Case: EQ on secondary index, which is not in the sharding
-- index (primary index in the case).
local result, err = g.cluster.main_server.net_box:call('crud.select', {
'customers',
{{'==', 'age', 81}},
{bucket_id = 1161, timeout = 1},
})
t.assert_equals(err, nil)
t.assert_equals(result.rows, {
{4, 1161, 'William', 'White', 81, 'Chicago'},
})

local stat_c = helpers.collect_storage_stat(g.cluster, storages)
t.assert_equals(helpers.diff_storage_stat(stat_c, stat_b), {
['s-1'] = {
select_requests = 0,
},
['s-2'] = {
select_requests = 1,
},
})
end

0 comments on commit fa62b8c

Please sign in to comment.