From 737199825364c13f80efdbd5fb54b625e8c79f60 Mon Sep 17 00:00:00 2001 From: Alexander Turenko Date: Thu, 14 Oct 2021 03:23:56 +0300 Subject: [PATCH] select/pairs: don't ignore provided bucket_id The bug is simple: crud ignores provided bucket_id, when unable to determine it itself. For example, when no conditions are given or when given condition involves a secondary index, which is not entirely in the primary index. It leads to incorrect select/pairs result: tuples are collected from all replicasets, while should be collected from one replicaset pointed by bucket_id. Second, it involves all replicasets into the request processing (performs map-reduce) that may dramatically drop performance. One existing test case was changed: 'test_opts_not_damaged' in ipairs_test.lua. The crud.pairs() request in this test case was affected by the problem and incorrect result was expected. The idea of the fix is suggested by Michael Filonenko in PR #221. Nice suggestions were given by Sergey Bronnikov (see PR #222). Fixes #220 --- CHANGELOG.md | 1 + crud/select/compat/select.lua | 34 +++++++++++- crud/select/compat/select_old.lua | 7 ++- test/integration/pairs_test.lua | 90 ++++++++++++++++++++++++++++++- test/integration/select_test.lua | 79 +++++++++++++++++++++++++++ 5 files changed, 208 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ae5c559..3cb7495d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed * Damaging of opts table by CRUD methods. +* Ignoring of `bucket_id` option in `crud.select()`/`crud.pairs()` (#220). ### Added diff --git a/crud/select/compat/select.lua b/crud/select/compat/select.lua index 5649d009..8df9655c 100644 --- a/crud/select/compat/select.lua +++ b/crud/select/compat/select.lua @@ -65,8 +65,40 @@ local function build_select_iterator(space_name, user_conditions, opts) -- set replicasets to select from local replicasets_to_select = replicasets - if plan.sharding_key ~= nil and opts.force_map_call ~= true then + -- Whether to call one storage replicaset or perform + -- map-reduce? + -- + -- If map-reduce is requested explicitly, ignore provided + -- bucket_id and fetch data from all storage replicasets. + -- + -- Otherwise: + -- + -- 1. If particular replicaset is pointed by a caller (using + -- the bucket_id option[^1]), crud MUST fetch data only + -- from this storage replicaset: disregarding whether other + -- storages have tuples that fit given condition. + -- + -- 2. If a replicaset may be deduced from conditions + -- (conditions -> sharding key -> bucket id -> replicaset), + -- fetch data only from the replicaset. It does not change + -- the result[^2], but significantly reduces network + -- pressure. + -- + -- 3. Fallback to map-reduce otherwise. + -- + -- [^1]: We can change meaning of this option in a future, + -- see gh-190. But now bucket_id points a storage + -- replicaset, not a virtual bucket. + -- + -- [^2]: It is correct statement only if we'll turn a blind + -- eye to resharding. However, AFAIU, the optimization + -- does not make the result less consistent (sounds + -- weird, huh?). + local perform_map_reduce = opts.force_map_call == true or + (opts.bucket_id == nil and plan.sharding_key == nil) + if not perform_map_reduce then local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id) + assert(bucket_id ~= nil) local err replicasets_to_select, err = common.get_replicasets_by_sharding_key(bucket_id) diff --git a/crud/select/compat/select_old.lua b/crud/select/compat/select_old.lua index 6dca8e82..84ae06a4 100644 --- a/crud/select/compat/select_old.lua +++ b/crud/select/compat/select_old.lua @@ -118,8 +118,13 @@ local function build_select_iterator(space_name, user_conditions, opts) -- set replicasets to select from local replicasets_to_select = replicasets - if plan.sharding_key ~= nil and opts.force_map_call ~= true then + -- See explanation of this logic in + -- crud/select/compat/select.lua. + local perform_map_reduce = opts.force_map_call == true or + (opts.bucket_id == nil and plan.sharding_key == nil) + if not perform_map_reduce then local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id) + assert(bucket_id ~= nil) local err replicasets_to_select, err = common.get_replicasets_by_sharding_key(bucket_id) diff --git a/test/integration/pairs_test.lua b/test/integration/pairs_test.lua index fdb5db5c..88ad8a9c 100644 --- a/test/integration/pairs_test.lua +++ b/test/integration/pairs_test.lua @@ -5,6 +5,7 @@ local t = require('luatest') local crud_utils = require('crud.common.utils') local helpers = require('test.helper') +local storage_stat = require('test.helpers.storage_stat') local pgroup = t.group('pairs', { {engine = 'memtx'}, @@ -25,6 +26,13 @@ pgroup.before_all(function(g) g.cluster:start() g.space_format = g.cluster.servers[2].net_box.space.customers:format() + + helpers.call_on_storages(g.cluster, function(server) + server.net_box:eval([[ + local storage_stat = require('test.helpers.storage_stat') + storage_stat.init_on_storage() + ]]) + end) end) pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end) @@ -758,15 +766,19 @@ end pgroup.test_opts_not_damaged = function(g) local customers = helpers.insert_objects(g, 'customers', { { + -- bucket_id is 477, storage is s-2 id = 1, name = "Elizabeth", last_name = "Jackson", age = 12, city = "Los Angeles", }, { + -- bucket_id is 401, storage is s-2 id = 2, name = "Mary", last_name = "Brown", age = 46, city = "London", }, { + -- bucket_id is 2804, storage is s-1 id = 3, name = "David", last_name = "Smith", age = 33, city = "Los Angeles", }, { + -- bucket_id is 1161, storage is s-2 id = 4, name = "William", last_name = "White", age = 46, city = "Chicago", }, @@ -775,7 +787,6 @@ pgroup.test_opts_not_damaged = function(g) table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end) local expected_customers = { - {id = 3, name = "David", age = 33}, {id = 4, name = "William", age = 46}, } @@ -805,3 +816,80 @@ pgroup.test_opts_not_damaged = function(g) t.assert_equals(objects, expected_customers) t.assert_equals(new_pairs_opts, pairs_opts) end + +-- gh-220: bucket_id argument is ignored when it cannot be deduced +-- from provided select/pairs conditions. +pgroup.test_pairs_no_map_reduce = function(g) + local customers = helpers.insert_objects(g, 'customers', { + { + -- bucket_id is 477, storage is s-2 + id = 1, name = 'Elizabeth', last_name = 'Jackson', + age = 12, city = 'New York', + }, { + -- bucket_id is 401, storage is s-2 + id = 2, name = 'Mary', last_name = 'Brown', + age = 46, city = 'Los Angeles', + }, { + -- bucket_id is 2804, storage is s-1 + id = 3, name = 'David', last_name = 'Smith', + age = 33, city = 'Los Angeles', + }, { + -- bucket_id is 1161, storage is s-2 + id = 4, name = 'William', last_name = 'White', + age = 81, city = 'Chicago', + }, + }) + + table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end) + + local stat_a = storage_stat.collect(g.cluster) + + -- Case: no conditions, just bucket id. + local rows = g.cluster.main_server.net_box:eval([[ + local crud = require('crud') + + return crud.pairs(...):totable() + ]], { + 'customers', + nil, + {bucket_id = 2804, timeout = 1}, + }) + t.assert_equals(rows, { + {3, 2804, 'David', 'Smith', 33, 'Los Angeles'}, + }) + + local stat_b = storage_stat.collect(g.cluster) + t.assert_equals(storage_stat.diff(stat_b, stat_a), { + ['s-1'] = { + select_requests = 1, + }, + ['s-2'] = { + select_requests = 0, + }, + }) + + -- Case: EQ on secondary index, which is not in the sharding + -- index (primary index in the case). + local rows = g.cluster.main_server.net_box:eval([[ + local crud = require('crud') + + return crud.pairs(...):totable() + ]], { + 'customers', + {{'==', 'age', 81}}, + {bucket_id = 1161, timeout = 1}, + }) + t.assert_equals(rows, { + {4, 1161, 'William', 'White', 81, 'Chicago'}, + }) + + local stat_c = storage_stat.collect(g.cluster) + t.assert_equals(storage_stat.diff(stat_c, stat_b), { + ['s-1'] = { + select_requests = 0, + }, + ['s-2'] = { + select_requests = 1, + }, + }) +end diff --git a/test/integration/select_test.lua b/test/integration/select_test.lua index 74192138..1c5e5d37 100644 --- a/test/integration/select_test.lua +++ b/test/integration/select_test.lua @@ -6,6 +6,7 @@ local crud = require('crud') local crud_utils = require('crud.common.utils') local helpers = require('test.helper') +local storage_stat = require('test.helpers.storage_stat') local pgroup = t.group('select', { {engine = 'memtx'}, @@ -26,6 +27,13 @@ pgroup.before_all(function(g) g.cluster:start() g.space_format = g.cluster.servers[2].net_box.space.customers:format() + + helpers.call_on_storages(g.cluster, function(server) + server.net_box:eval([[ + local storage_stat = require('test.helpers.storage_stat') + storage_stat.init_on_storage() + ]]) + end) end) pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end) @@ -1581,3 +1589,74 @@ pgroup.test_opts_not_damaged = function(g) t.assert_equals(err, nil) t.assert_equals(new_select_opts, select_opts) end + +-- gh-220: bucket_id argument is ignored when it cannot be deduced +-- from provided select/pairs conditions. +pgroup.test_select_no_map_reduce = function(g) + local customers = helpers.insert_objects(g, 'customers', { + { + -- bucket_id is 477, storage is s-2 + id = 1, name = 'Elizabeth', last_name = 'Jackson', + age = 12, city = 'New York', + }, { + -- bucket_id is 401, storage is s-2 + id = 2, name = 'Mary', last_name = 'Brown', + age = 46, city = 'Los Angeles', + }, { + -- bucket_id is 2804, storage is s-1 + id = 3, name = 'David', last_name = 'Smith', + age = 33, city = 'Los Angeles', + }, { + -- bucket_id is 1161, storage is s-2 + id = 4, name = 'William', last_name = 'White', + age = 81, city = 'Chicago', + }, + }) + + table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end) + + local stat_a = storage_stat.collect(g.cluster) + + -- Case: no conditions, just bucket id. + local result, err = g.cluster.main_server.net_box:call('crud.select', { + 'customers', + nil, + {bucket_id = 2804, timeout = 1}, + }) + t.assert_equals(err, nil) + t.assert_equals(result.rows, { + {3, 2804, 'David', 'Smith', 33, 'Los Angeles'}, + }) + + local stat_b = storage_stat.collect(g.cluster) + t.assert_equals(storage_stat.diff(stat_b, stat_a), { + ['s-1'] = { + select_requests = 1, + }, + ['s-2'] = { + select_requests = 0, + }, + }) + + -- Case: EQ on secondary index, which is not in the sharding + -- index (primary index in the case). + local result, err = g.cluster.main_server.net_box:call('crud.select', { + 'customers', + {{'==', 'age', 81}}, + {bucket_id = 1161, timeout = 1}, + }) + t.assert_equals(err, nil) + t.assert_equals(result.rows, { + {4, 1161, 'William', 'White', 81, 'Chicago'}, + }) + + local stat_c = storage_stat.collect(g.cluster) + t.assert_equals(storage_stat.diff(stat_c, stat_b), { + ['s-1'] = { + select_requests = 0, + }, + ['s-2'] = { + select_requests = 1, + }, + }) +end