Skip to content

Commit

Permalink
select/pairs: don't ignore provided bucket_id
Browse files Browse the repository at this point in the history
The bug is simple: crud ignores provided bucket_id, when unable to
determine it itself. For example, when no conditions are given or when
given condition involves a secondary index, which is not entirely in the
primary index.

It leads to incorrect select/pairs result: tuples are collected from all
replicasets, while should be collected from one replicaset pointed by
bucket_id. Second, it involves all replicasets into the request
processing (performs map-reduce) that may dramatically drop performance.

One existing test case was changed: 'test_opts_not_damaged' in
ipairs_test.lua. The crud.pairs() request in this test case was affected
by the problem and incorrect result was expected.

The idea of the fix is suggested by Michael Filonenko in PR #221.

Nice suggestions were given by Sergey Bronnikov (see PR #222).

Fixes #220
  • Loading branch information
Totktonada committed Oct 19, 2021
1 parent a6e34e1 commit 7371998
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Fixed

* Damaging of opts table by CRUD methods.
* Ignoring of `bucket_id` option in `crud.select()`/`crud.pairs()` (#220).

### Added

Expand Down
34 changes: 33 additions & 1 deletion crud/select/compat/select.lua
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,40 @@ local function build_select_iterator(space_name, user_conditions, opts)
-- set replicasets to select from
local replicasets_to_select = replicasets

if plan.sharding_key ~= nil and opts.force_map_call ~= true then
-- Whether to call one storage replicaset or perform
-- map-reduce?
--
-- If map-reduce is requested explicitly, ignore provided
-- bucket_id and fetch data from all storage replicasets.
--
-- Otherwise:
--
-- 1. If particular replicaset is pointed by a caller (using
-- the bucket_id option[^1]), crud MUST fetch data only
-- from this storage replicaset: disregarding whether other
-- storages have tuples that fit given condition.
--
-- 2. If a replicaset may be deduced from conditions
-- (conditions -> sharding key -> bucket id -> replicaset),
-- fetch data only from the replicaset. It does not change
-- the result[^2], but significantly reduces network
-- pressure.
--
-- 3. Fallback to map-reduce otherwise.
--
-- [^1]: We can change meaning of this option in a future,
-- see gh-190. But now bucket_id points a storage
-- replicaset, not a virtual bucket.
--
-- [^2]: It is correct statement only if we'll turn a blind
-- eye to resharding. However, AFAIU, the optimization
-- does not make the result less consistent (sounds
-- weird, huh?).
local perform_map_reduce = opts.force_map_call == true or
(opts.bucket_id == nil and plan.sharding_key == nil)
if not perform_map_reduce then
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)
assert(bucket_id ~= nil)

local err
replicasets_to_select, err = common.get_replicasets_by_sharding_key(bucket_id)
Expand Down
7 changes: 6 additions & 1 deletion crud/select/compat/select_old.lua
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,13 @@ local function build_select_iterator(space_name, user_conditions, opts)
-- set replicasets to select from
local replicasets_to_select = replicasets

if plan.sharding_key ~= nil and opts.force_map_call ~= true then
-- See explanation of this logic in
-- crud/select/compat/select.lua.
local perform_map_reduce = opts.force_map_call == true or
(opts.bucket_id == nil and plan.sharding_key == nil)
if not perform_map_reduce then
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)
assert(bucket_id ~= nil)

local err
replicasets_to_select, err = common.get_replicasets_by_sharding_key(bucket_id)
Expand Down
90 changes: 89 additions & 1 deletion test/integration/pairs_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ local t = require('luatest')
local crud_utils = require('crud.common.utils')

local helpers = require('test.helper')
local storage_stat = require('test.helpers.storage_stat')

local pgroup = t.group('pairs', {
{engine = 'memtx'},
Expand All @@ -25,6 +26,13 @@ pgroup.before_all(function(g)
g.cluster:start()

g.space_format = g.cluster.servers[2].net_box.space.customers:format()

helpers.call_on_storages(g.cluster, function(server)
server.net_box:eval([[
local storage_stat = require('test.helpers.storage_stat')
storage_stat.init_on_storage()
]])
end)
end)

pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end)
Expand Down Expand Up @@ -758,15 +766,19 @@ end
pgroup.test_opts_not_damaged = function(g)
local customers = helpers.insert_objects(g, 'customers', {
{
-- bucket_id is 477, storage is s-2
id = 1, name = "Elizabeth", last_name = "Jackson",
age = 12, city = "Los Angeles",
}, {
-- bucket_id is 401, storage is s-2
id = 2, name = "Mary", last_name = "Brown",
age = 46, city = "London",
}, {
-- bucket_id is 2804, storage is s-1
id = 3, name = "David", last_name = "Smith",
age = 33, city = "Los Angeles",
}, {
-- bucket_id is 1161, storage is s-2
id = 4, name = "William", last_name = "White",
age = 46, city = "Chicago",
},
Expand All @@ -775,7 +787,6 @@ pgroup.test_opts_not_damaged = function(g)
table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)

local expected_customers = {
{id = 3, name = "David", age = 33},
{id = 4, name = "William", age = 46},
}

Expand Down Expand Up @@ -805,3 +816,80 @@ pgroup.test_opts_not_damaged = function(g)
t.assert_equals(objects, expected_customers)
t.assert_equals(new_pairs_opts, pairs_opts)
end

-- gh-220: bucket_id argument is ignored when it cannot be deduced
-- from provided select/pairs conditions.
pgroup.test_pairs_no_map_reduce = function(g)
local customers = helpers.insert_objects(g, 'customers', {
{
-- bucket_id is 477, storage is s-2
id = 1, name = 'Elizabeth', last_name = 'Jackson',
age = 12, city = 'New York',
}, {
-- bucket_id is 401, storage is s-2
id = 2, name = 'Mary', last_name = 'Brown',
age = 46, city = 'Los Angeles',
}, {
-- bucket_id is 2804, storage is s-1
id = 3, name = 'David', last_name = 'Smith',
age = 33, city = 'Los Angeles',
}, {
-- bucket_id is 1161, storage is s-2
id = 4, name = 'William', last_name = 'White',
age = 81, city = 'Chicago',
},
})

table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)

local stat_a = storage_stat.collect(g.cluster)

-- Case: no conditions, just bucket id.
local rows = g.cluster.main_server.net_box:eval([[
local crud = require('crud')
return crud.pairs(...):totable()
]], {
'customers',
nil,
{bucket_id = 2804, timeout = 1},
})
t.assert_equals(rows, {
{3, 2804, 'David', 'Smith', 33, 'Los Angeles'},
})

local stat_b = storage_stat.collect(g.cluster)
t.assert_equals(storage_stat.diff(stat_b, stat_a), {
['s-1'] = {
select_requests = 1,
},
['s-2'] = {
select_requests = 0,
},
})

-- Case: EQ on secondary index, which is not in the sharding
-- index (primary index in the case).
local rows = g.cluster.main_server.net_box:eval([[
local crud = require('crud')
return crud.pairs(...):totable()
]], {
'customers',
{{'==', 'age', 81}},
{bucket_id = 1161, timeout = 1},
})
t.assert_equals(rows, {
{4, 1161, 'William', 'White', 81, 'Chicago'},
})

local stat_c = storage_stat.collect(g.cluster)
t.assert_equals(storage_stat.diff(stat_c, stat_b), {
['s-1'] = {
select_requests = 0,
},
['s-2'] = {
select_requests = 1,
},
})
end
79 changes: 79 additions & 0 deletions test/integration/select_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ local crud = require('crud')
local crud_utils = require('crud.common.utils')

local helpers = require('test.helper')
local storage_stat = require('test.helpers.storage_stat')

local pgroup = t.group('select', {
{engine = 'memtx'},
Expand All @@ -26,6 +27,13 @@ pgroup.before_all(function(g)
g.cluster:start()

g.space_format = g.cluster.servers[2].net_box.space.customers:format()

helpers.call_on_storages(g.cluster, function(server)
server.net_box:eval([[
local storage_stat = require('test.helpers.storage_stat')
storage_stat.init_on_storage()
]])
end)
end)

pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end)
Expand Down Expand Up @@ -1581,3 +1589,74 @@ pgroup.test_opts_not_damaged = function(g)
t.assert_equals(err, nil)
t.assert_equals(new_select_opts, select_opts)
end

-- gh-220: bucket_id argument is ignored when it cannot be deduced
-- from provided select/pairs conditions.
pgroup.test_select_no_map_reduce = function(g)
local customers = helpers.insert_objects(g, 'customers', {
{
-- bucket_id is 477, storage is s-2
id = 1, name = 'Elizabeth', last_name = 'Jackson',
age = 12, city = 'New York',
}, {
-- bucket_id is 401, storage is s-2
id = 2, name = 'Mary', last_name = 'Brown',
age = 46, city = 'Los Angeles',
}, {
-- bucket_id is 2804, storage is s-1
id = 3, name = 'David', last_name = 'Smith',
age = 33, city = 'Los Angeles',
}, {
-- bucket_id is 1161, storage is s-2
id = 4, name = 'William', last_name = 'White',
age = 81, city = 'Chicago',
},
})

table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)

local stat_a = storage_stat.collect(g.cluster)

-- Case: no conditions, just bucket id.
local result, err = g.cluster.main_server.net_box:call('crud.select', {
'customers',
nil,
{bucket_id = 2804, timeout = 1},
})
t.assert_equals(err, nil)
t.assert_equals(result.rows, {
{3, 2804, 'David', 'Smith', 33, 'Los Angeles'},
})

local stat_b = storage_stat.collect(g.cluster)
t.assert_equals(storage_stat.diff(stat_b, stat_a), {
['s-1'] = {
select_requests = 1,
},
['s-2'] = {
select_requests = 0,
},
})

-- Case: EQ on secondary index, which is not in the sharding
-- index (primary index in the case).
local result, err = g.cluster.main_server.net_box:call('crud.select', {
'customers',
{{'==', 'age', 81}},
{bucket_id = 1161, timeout = 1},
})
t.assert_equals(err, nil)
t.assert_equals(result.rows, {
{4, 1161, 'William', 'White', 81, 'Chicago'},
})

local stat_c = storage_stat.collect(g.cluster)
t.assert_equals(storage_stat.diff(stat_c, stat_b), {
['s-1'] = {
select_requests = 0,
},
['s-2'] = {
select_requests = 1,
},
})
end

0 comments on commit 7371998

Please sign in to comment.