From bc7ef2b8602686d1d81eb6ce0e58f42eb121b896 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Fri, 28 Jan 2022 19:37:53 +0300 Subject: [PATCH] Integrate CRUD statistics with metrics rock If `metrics` [1] found, you can use metrics collectors to store statistics. `metrics >= 0.10.0` is required to use metrics driver. (`metrics >= 0.9.0` is required to use summary quantiles with age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported due to quantile overflow bug [2]. `metrics == 0.9.0` has bug that do not permits to create summary collector without quantiles [3]. In fact, user may use `metrics >= 0.5.0`, `metrics != 0.9.0` if he wants to use metrics without quantiles, and `metrics >= 0.9.0` if he wants to use metrics with quantiles. But this is confusing, so let's use a single restriction for both cases.) The metrics are part of global registry and can be exported together (e.g. to Prometheus) with default tools without any additional configuration. Disabling stats destroys the collectors. Metrics collectors are used by default if supported. To explicitly set driver, call `crud.enable_stats{ driver = driver }` ('local' or 'metrics'). To enable quantiles, call `crud.enable_stats{ driver = 'metrics', quantiles = true }`. With quantiles, `latency` statistics are changed to 0.99 quantile of request execution time (with aging). Quantiles computations increases performance overhead by near 10% when used in statistics. Add CI matrix to run tests with `metrics` installed. To get full coverage on coveralls, #248 must be resolved. 1. https://github.com/tarantool/metrics 2. https://github.com/tarantool/metrics/issues/235 3. https://github.com/tarantool/metrics/issues/262 Closes #224 --- .github/workflows/test_on_push.yaml | 21 +- CHANGELOG.md | 1 + CMakeLists.txt | 8 + README.md | 63 +++- crud/stats/local_registry.lua | 17 +- crud/stats/metrics_registry.lua | 464 +++++++++++++++++++++++++ crud/stats/module.lua | 97 +++++- test/integration/stats_test.lua | 513 ++++++++++++++++++++++++---- test/unit/stats_test.lua | 200 +++++++---- 9 files changed, 1234 insertions(+), 150 deletions(-) create mode 100644 crud/stats/metrics_registry.lua diff --git a/.github/workflows/test_on_push.yaml b/.github/workflows/test_on_push.yaml index bcf6d348c..eaa59f1f3 100644 --- a/.github/workflows/test_on_push.yaml +++ b/.github/workflows/test_on_push.yaml @@ -13,13 +13,24 @@ jobs: matrix: # We need 1.10.6 here to check that module works with # old Tarantool versions that don't have "tuple-keydef"/"tuple-merger" support. - tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"] + tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8"] + metrics-version: [""] remove-merger: [false] + perf-test: [false] include: + - tarantool-version: "1.10" + metrics-version: "0.12.0" + perf-test: true - tarantool-version: "2.7" remove-merger: true + - tarantool-version: "2.8" + metrics-version: "0.1.8" + - tarantool-version: "2.8" + metrics-version: "0.10.0" - tarantool-version: "2.8" coveralls: true + metrics-version: "0.12.0" + perf-test: true fail-fast: false runs-on: [ubuntu-latest] steps: @@ -47,6 +58,10 @@ jobs: tarantool --version ./deps.sh + - name: Install metrics + if: matrix.metrics-version != '' + run: tarantoolctl rocks install metrics ${{ matrix.metrics-version }} + - name: Remove external merger if needed if: ${{ matrix.remove-merger }} run: rm .rocks/lib/tarantool/tuple/merger.so @@ -62,6 +77,10 @@ jobs: - name: Run tests and code coverage analysis run: make -C build coverage + - name: Run performance tests + run: make -C build performance + if: ${{ matrix.perf-test }} + - name: Send code coverage to coveralls.io run: make -C build coveralls if: ${{ matrix.coveralls }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a3d9325f..3667a1b90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added * Statistics for CRUD operations on router (#224). +* Integrate CRUD statistics with [`metrics`](https://github.com/tarantool/metrics) (#224). ### Changed diff --git a/CMakeLists.txt b/CMakeLists.txt index 714474a18..2e11fbf3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,14 @@ add_custom_target(luatest COMMENT "Run regression tests" ) +set(PERFORMANCE_TESTS_SUBDIR "test/performance") + +add_custom_target(performance + COMMAND PERF_MODE_ON=true ${LUATEST} -v -c ${PERFORMANCE_TESTS_SUBDIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Run performance tests" +) + add_custom_target(coverage COMMAND ${LUACOV} ${PROJECT_SOURCE_DIR} && grep -A999 '^Summary' ${CODE_COVERAGE_REPORT} DEPENDS ${CODE_COVERAGE_STATS} diff --git a/README.md b/README.md index 506b4ef74..4d535b147 100644 --- a/README.md +++ b/README.md @@ -606,8 +606,24 @@ crud.disable_stats() crud.reset_stats() ``` -Format is as follows. +If [`metrics`](https://github.com/tarantool/metrics) `0.10.0` or greater +found, metrics collectors will be used by default to store statistics +instead of local collectors. You can manually choose driver if needed. ```lua +-- Use metrics collectors. (Default if metrics found). +crud.enable_stats({ driver = 'metrics' }) + +-- Use metrics collectors with 0.99 quantile. +crud.enable_stats({ driver = 'metrics', quantiles = true }) + +-- Use simple local collectors. +crud.enable_stats({ driver = 'local' }) +``` +Performance overhead is 3-7% in case of `local` driver and +5-10% in case of `metrics` driver, up to 20% for `metrics` with quantiles. + +Format is as follows. +``` crud.stats() --- - spaces: @@ -657,9 +673,44 @@ Possible statistics operation labels are Each operation section contains of different collectors for success calls and error (both error throw and `nil, err`) returns. `count` is total requests count since instance start -or stats restart. `latency` is average time of requests execution, +or stats restart. `latency` is 0.99 quantile of request execution +time if `metrics` driver used and quantiles enabled, +otherwise `latency` is total average. `time` is total time of requests execution. +In [`metrics`](https://www.tarantool.io/en/doc/latest/book/monitoring/) +registry statistics are stored as `tnt_crud_stats` metrics +with `operation`, `status` and `name` labels. Collector +`tnt_crud_space_not_found` stores count of calls to unknown spaces, +`tnt_crud_schema_reloads` stores count of schema reloads in calls. +``` +metrics:collect() +--- +- - label_pairs: + status: ok + operation: insert + name: customers + value: 221411 + metric_name: tnt_crud_stats_count + - label_pairs: + status: ok + operation: insert + name: customers + value: 10.49834896344692 + metric_name: tnt_crud_stats_sum + - label_pairs: + status: ok + operation: insert + name: customers + quantile: 0.99 + value: 0.00023606420935973 + metric_name: tnt_crud_stats + - label_pairs: [] + value: 3 + metric_name: tnt_crud_space_not_found +... +``` + `select` section additionally contains `details` collectors. ```lua crud.stats('my_space').select.details @@ -674,9 +725,17 @@ crud.stats('my_space').select.details is a count of tuples fetched from storages during execution, `tuples_lookup` is a count of tuples looked up on storages while collecting response for call. +In [`metrics`](https://www.tarantool.io/en/doc/latest/book/monitoring/) +registry they are stored as `tnt_crud_map_reduces`, +`tnt_crud_tuples_fetched` and `tnt_crud_tuples_lookup` metrics +with `{ operation = 'select', name = space_name }` labels. Statistics are preserved between package reloads or [Tarantool Cartridge role reloads](https://www.tarantool.io/en/doc/latest/book/cartridge/cartridge_api/modules/cartridge.roles/#reload). +Beware that metrics 0.12.0 and below do not support +preserving stats between role reload +(see [tarantool/metrics#334](https://github.com/tarantool/metrics/issues/334)), +thus this feature will be unsupported for `metrics` driver. ## Cartridge roles diff --git a/crud/stats/local_registry.lua b/crud/stats/local_registry.lua index fe1d437f6..777cf346b 100644 --- a/crud/stats/local_registry.lua +++ b/crud/stats/local_registry.lua @@ -2,6 +2,8 @@ -- @module crud.stats.local_registry -- +local errors = require('errors') + local dev_checks = require('crud.common.dev_checks') local op_module = require('crud.stats.operation') local registry_common = require('crud.stats.registry_common') @@ -9,6 +11,7 @@ local stash = require('crud.stats.stash') local registry = {} local internal = stash.get('local_registry') +local StatsLocalError = errors.new_class('StatsLocalError', {capture_stack = false}) --- Initialize local metrics registry. -- @@ -17,9 +20,19 @@ local internal = stash.get('local_registry') -- -- @function init -- --- @treturn boolean Returns true. +-- @tab opts -- -function registry.init() +-- @bool opts.quantiles +-- Quantiles is not supported for local, only `false` is valid. +-- +-- @treturn boolean Returns `true`. +-- +function registry.init(opts) + dev_checks({ quantiles = 'boolean' }) + + StatsLocalError:assert(opts.quantiles == false, + "Quantiles are not supported for 'local' statistics registry") + internal.registry = {} internal.registry.spaces = {} internal.registry.space_not_found = 0 diff --git a/crud/stats/metrics_registry.lua b/crud/stats/metrics_registry.lua new file mode 100644 index 000000000..6e2a685f9 --- /dev/null +++ b/crud/stats/metrics_registry.lua @@ -0,0 +1,464 @@ +---- Internal module used to store statistics in `metrics` registry. +-- @module crud.stats.metrics_registry +-- + +local is_package, metrics = pcall(require, 'metrics') + +local dev_checks = require('crud.common.dev_checks') +local op_module = require('crud.stats.operation') +local registry_common = require('crud.stats.registry_common') +local stash = require('crud.stats.stash') + +local registry = {} +-- Used to cache collectors. +local internal = stash.get('metrics_registry') + +local metric_name = { + -- Summary collector for all operations. + stats = 'tnt_crud_stats', + -- `*_count` and `*_sum` are automatically created + -- by summary collector. + stats_count = 'tnt_crud_stats_count', + stats_sum = 'tnt_crud_stats_sum', + + -- Counter collector for spaces not found. + space_not_found = 'tnt_crud_space_not_found', + + -- Counter collector for schema reloads. + schema_reloads = 'tnt_crud_schema_reloads', + + -- Counter collectors for select/pairs details. + details = { + tuples_fetched = 'tnt_crud_tuples_fetched', + tuples_lookup = 'tnt_crud_tuples_lookup', + map_reduces = 'tnt_crud_map_reduces', + } +} + +local LATENCY_QUANTILE = 0.99 + +-- Increasing tolerance threshold affects performance. +local DEFAULT_QUANTILES = { + [LATENCY_QUANTILE] = 1e-2, +} + +local DEFAULT_AGE_PARAMS = { + age_buckets_count = 2, + max_age_time = 60, +} + +--- Check if application supports metrics rock for registry +-- +-- `metrics >= 0.10.0` is required. +-- `metrics >= 0.9.0` is required to use summary quantiles with +-- age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported +-- due to quantile overflow bug +-- (https://github.com/tarantool/metrics/issues/235). +-- `metrics == 0.9.0` has bug that do not permits +-- to create summary collector without quantiles +-- (https://github.com/tarantool/metrics/issues/262). +-- In fact, user may use `metrics >= 0.5.0`, `metrics != 0.9.0` +-- if he wants to use metrics without quantiles, and `metrics >= 0.9.0` +-- if he wants to use metrics with quantiles. But this is confusing, +-- so we use a single restriction solving both cases. +-- +-- @function is_supported +-- +-- @treturn boolean Returns `true` if `metrics >= 0.10.0` found, `false` otherwise. +-- +function registry.is_supported() + if is_package == false then + return false + end + + -- Only metrics >= 0.10.0 supported. + if metrics.unregister_callback == nil then + return false + end + + return true +end + +--- Initialize collectors in global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, init is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function init +-- +-- @tab opts +-- +-- @bool opts.quantiles +-- If `true`, computes latency as 0.99 quantile with aging. +-- +-- @treturn boolean Returns `true`. +-- +function registry.init(opts) + dev_checks({ quantiles = 'boolean' }) + + internal.opts = table.deepcopy(opts) + + local quantile_params = nil + local age_params = nil + if opts.quantiles == true then + quantile_params = DEFAULT_QUANTILES + age_params = DEFAULT_AGE_PARAMS + end + + internal.registry = {} + internal.registry[metric_name.stats] = metrics.summary( + metric_name.stats, + 'CRUD router calls statistics', + quantile_params, + age_params) + + internal.registry[metric_name.space_not_found] = metrics.counter( + metric_name.space_not_found, + 'Spaces not found during CRUD calls') + + internal.registry[metric_name.schema_reloads] = metrics.counter( + metric_name.schema_reloads, + 'Schema reloads performed in operation calls') + + internal.registry[metric_name.details.tuples_fetched] = metrics.counter( + metric_name.details.tuples_fetched, + 'Tuples fetched from CRUD storages during select/pairs') + + internal.registry[metric_name.details.tuples_lookup] = metrics.counter( + metric_name.details.tuples_lookup, + 'Tuples looked up on CRUD storages while collecting response during select/pairs') + + internal.registry[metric_name.details.map_reduces] = metrics.counter( + metric_name.details.map_reduces, + 'Map reduces planned during CRUD select/pairs') + + return true +end + +--- Unregister collectors in global metrics registry. +-- +-- Registries are not meant to used explicitly +-- by users, destroy is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function destroy +-- +-- @treturn boolean Returns `true`. +-- +function registry.destroy() + for _, c in pairs(internal.registry) do + metrics.registry:unregister(c) + end + + internal.registry = nil + internal.opts = nil + + return true +end + +--- Compute `latency` field of an observation. +-- +-- If it is a `{ time = ..., count = ... }` observation, +-- compute latency as overall average and store it +-- inside observation object. +-- +-- @function compute_obs_latency +-- @local +-- +-- @tab obs +-- Objects from `registry_common` +-- `stats.spaces[name][op][status]`. +-- If something like `details` collector +-- passed, do nothing. +-- +local function compute_obs_latency(obs) + if obs.count == nil or obs.time == nil then + return + end + + if obs.count == 0 then + obs.latency = 0 + else + obs.latency = obs.time / obs.count + end +end + +--- Compute `latency` field of each observation. +-- +-- If quantiles disabled, we need to compute +-- latency as overall average from `time` and +-- `count` values. +-- +-- @function compute_latencies +-- @local +-- +-- @tab stats +-- Object from registry_common stats. +-- +local function compute_latencies(stats) + for _, space_stats in pairs(stats.spaces) do + for _, op_stats in pairs(space_stats) do + for _, obs in pairs(op_stats) do + compute_obs_latency(obs) + end + end + end +end + +--- Get copy of global metrics registry. +-- +-- Registries are not meant to used explicitly +-- by users, get is not guaranteed to work without init. +-- +-- @function get +-- +-- @string[opt] space_name +-- If specified, returns table with statistics +-- of operations on table, separated by operation type and +-- execution status. If there wasn't any requests for table, +-- returns `{}`. If not specified, returns table with statistics +-- about all existing spaces, count of calls to spaces +-- that wasn't found and count of schema reloads. +-- +-- @treturn table Returns copy of metrics registry. +function registry.get(space_name) + dev_checks('?string') + + local stats = { + spaces = {}, + space_not_found = 0, + schema_reloads = 0, + } + + -- Fill operation basic statistics values. + for _, obs in ipairs(internal.registry[metric_name.stats]:collect()) do + local op = obs.label_pairs.operation + local status = obs.label_pairs.status + local name = obs.label_pairs.name + + if space_name ~= nil and name ~= space_name then + goto stats_continue + end + + registry_common.init_collectors_if_required(stats.spaces, name, op) + local space_stats = stats.spaces[name] + + -- metric_name.stats presents only if quantiles enabled. + if obs.metric_name == metric_name.stats then + if obs.label_pairs.quantile == LATENCY_QUANTILE then + space_stats[op][status].latency = obs.value + end + elseif obs.metric_name == metric_name.stats_sum then + space_stats[op][status].time = obs.value + elseif obs.metric_name == metric_name.stats_count then + space_stats[op][status].count = obs.value + end + + :: stats_continue :: + end + + if not internal.opts.quantiles then + compute_latencies(stats) + end + + -- Fill select/pairs detail statistics values. + for stat_name, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal.registry[metric_name]:collect()) do + local name = obs.label_pairs.name + local op = obs.label_pairs.operation + + if space_name ~= nil and name ~= space_name then + goto details_continue + end + + registry_common.init_collectors_if_required(stats.spaces, name, op) + stats.spaces[name][op].details[stat_name] = obs.value + + :: details_continue :: + end + end + + if space_name ~= nil then + return stats.spaces[space_name] or {} + end + + local _, not_found_obs = next(internal.registry[metric_name.space_not_found]:collect()) + if not_found_obs ~= nil then + stats.space_not_found = not_found_obs.value + end + + local _, reload_obs = next(internal.registry[metric_name.schema_reloads]:collect()) + if reload_obs ~= nil then + stats.schema_reloads = reload_obs.value + end + + return stats +end + +--- Check if space statistics are present in registry. +-- +-- @function is_unknown_space +-- +-- @string space_name +-- Name of space. +-- +-- @treturn boolean `true`, if space stats found. `false` otherwise. +-- +function registry.is_unknown_space(space_name) + dev_checks('string') + + for _, obs in ipairs(internal.registry[metric_name.stats]:collect()) do + local name = obs.label_pairs.name + + if name == space_name then + return false + end + end + + for _, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal.registry[metric_name]:collect()) do + local name = obs.label_pairs.name + + if name == space_name then + return false + end + end + end + + return true +end + +--- Increase requests count and update latency info. +-- +-- @function observe +-- +-- @string space_name +-- Name of space. +-- +-- @number latency +-- Time of call execution. +-- +-- @string op +-- Label of registry collectors. +-- Use `require('crud.stats.module').op` to pick one. +-- +-- @string success +-- `'ok'` if no errors on execution, `'error'` otherwise. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe(latency, space_name, op, status) + dev_checks('number', 'string', 'string', 'string') + + -- Use `operations` label to be consistent with `tnt_stats_op_*` labels. + -- Use `name` label to be consistent with `tnt_space_*` labels. + -- Use `status` label to be consistent with `tnt_vinyl_*` and HTTP metrics labels. + local label_pairs = { operation = op, name = space_name, status = status } + + internal.registry[metric_name.stats]:observe(latency, label_pairs) + + return true +end + +--- Increase count of "space not found" collector by one. +-- +-- @function observe_space_not_found +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe_space_not_found() + internal.registry[metric_name.space_not_found]:inc(1) + + return true +end + +--- Increase statistics of storage select/pairs calls. +-- +-- @function observe_fetch +-- +-- @string space_name +-- Name of space. +-- +-- @number tuples_fetched +-- Count of tuples fetched during storage call. +-- +-- @number tuples_lookup +-- Count of tuples looked up on storages while collecting response. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe_fetch(tuples_fetched, tuples_lookup, space_name) + dev_checks('number', 'number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + + internal.registry[metric_name.details.tuples_fetched]:inc(tuples_fetched, label_pairs) + internal.registry[metric_name.details.tuples_lookup]:inc(tuples_lookup, label_pairs) + + return true +end + +--- Increase statistics of planned map reduces during select/pairs. +-- +-- @function observe_map_reduces +-- +-- @number count +-- Count of map reduces planned. +-- +-- @string space_name +-- Name of space. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe_map_reduces(count, space_name) + dev_checks('number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + internal.registry[metric_name.details.map_reduces]:inc(count, label_pairs) + + return true +end + +--- Increase statistics of schema reloads. +-- +-- @function observe_schema_reloads +-- +-- @number count +-- Schema reloads performed. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe_schema_reloads(count) + dev_checks('number') + + internal.registry[metric_name.schema_reloads]:inc(count) + + return true +end + +-- Workaround for https://github.com/tarantool/metrics/issues/334 . +-- This workaround does not prevent observations reset between role reloads, +-- but it fixes collector unlink from registry. Without this workaround, +-- we will continue to use cached collectors that are already cleaned up +-- from registry and changes will not appear in metrics export output. +local function workaround_role_reload() + if not registry.is_supported() then + return + end + + -- Check if this registry was enabled before reload. + if internal.registry == nil then + return + end + + -- Check if base collector is in metrics package registry. + -- If it's not, then registry has beed cleaned up on role reload. + if metrics.registry:find('summary', metric_name.stats) == nil then + registry.init(internal.opts) + end +end + +workaround_role_reload() + +return registry \ No newline at end of file diff --git a/crud/stats/module.lua b/crud/stats/module.lua index 519450e5e..cf455f7c0 100644 --- a/crud/stats/module.lua +++ b/crud/stats/module.lua @@ -11,7 +11,6 @@ local dev_checks = require('crud.common.dev_checks') local fiber_context = require('crud.common.fiber_context') local utils = require('crud.common.utils') local op_module = require('crud.stats.operation') -local registry = require('crud.stats.local_registry') local stash = require('crud.stats.stash') local StatsError = errors.new_class('StatsError', {capture_stack = false}) @@ -19,6 +18,23 @@ local StatsError = errors.new_class('StatsError', {capture_stack = false}) local stats = {} local internal = stash.get('internal') +local local_registry = require('crud.stats.local_registry') +local metrics_registry = require('crud.stats.metrics_registry') + +local drivers = { + ['local'] = local_registry, +} +if metrics_registry.is_supported() then + drivers['metrics'] = metrics_registry +end + +function internal:get_registry() + if self.driver == nil then + return nil + end + return drivers[self.driver] +end + --- Check if statistics module was enabled. -- -- @function is_enabled @@ -26,7 +42,7 @@ local internal = stash.get('internal') -- @treturn boolean Returns `true` or `false`. -- function stats.is_enabled() - return internal.is_enabled == true + return internal.driver ~= nil end --- Initializes statistics registry, enables callbacks and wrappers. @@ -35,20 +51,64 @@ end -- -- @function enable -- +-- @tab[opt] opts +-- +-- @string[opt] opts.driver +-- `'local'` or `'metrics'`. +-- If `'local'`, stores statistics in local registry (some Lua tables) +-- and computes latency as overall average. `'metrics'` requires +-- `metrics >= 0.9.0` installed and stores statistics in +-- global metrics registry (integrated with exporters). +-- `'metrics'` driver supports computing latency as 0.99 quantile with aging. +-- If `'metrics'` driver is available, it is used by default, +-- otherwise `'local'` is used. +-- +-- @bool[opt=false] opts.quantiles +-- If `'metrics'` driver used, you can enable +-- computing requests latency as 0.99 quantile with aging. +-- Performance overhead for enabling is near 10%. +-- -- @treturn boolean Returns `true`. -- -function stats.enable() - if stats.is_enabled() then - return true - end +function stats.enable(opts) + checks({ driver = '?string', quantiles = '?boolean' }) StatsError:assert( rawget(_G, 'crud') ~= nil, "Can be enabled only on crud router" ) - internal.is_enabled = true - registry.init() + opts = table.deepcopy(opts) or {} + if opts.driver == nil then + if drivers.metrics ~= nil then + opts.driver = 'metrics' + else + opts.driver = 'local' + end + end + + StatsError:assert( + drivers[opts.driver] ~= nil, + 'Unsupported driver: %s', opts.driver + ) + + if opts.quantiles == nil then + opts.quantiles = false + end + + -- Do not reinit if called with same options. + if internal.driver == opts.driver + and internal.quantiles == opts.quantiles then + return true + end + + -- Disable old driver registry, if another one was requested. + stats.disable() + + internal.driver = opts.driver + internal.quantiles = opts.quantiles + + internal:get_registry().init({ quantiles = internal.quantiles }) return true end @@ -67,8 +127,8 @@ function stats.reset() return true end - registry.destroy() - registry.init() + internal:get_registry().destroy() + internal:get_registry().init({ quantiles = internal.quantiles }) return true end @@ -86,8 +146,9 @@ function stats.disable() return true end - registry.destroy() - internal.is_enabled = false + internal:get_registry().destroy() + internal.driver = nil + internal.quantiles = nil return true end @@ -115,13 +176,17 @@ function stats.get(space_name) return {} end - return registry.get(space_name) + require('log').info(internal:get_registry()) + + return internal:get_registry().get(space_name) end local function wrap_tail(space_name, op, opts, start_time, call_status, ...) local finish_time = clock.monotonic() local latency = finish_time - start_time + local registry = internal:get_registry() + local err = nil local status = 'ok' if call_status == false then @@ -274,7 +339,7 @@ local function update_fetch_stats(storage_stats, space_name) return true end - registry.observe_fetch( + internal:get_registry().observe_fetch( storage_stats.tuples_fetched, storage_stats.tuples_lookup, space_name @@ -329,7 +394,9 @@ stats.op = op_module --- Stats module internal state (for debug/test). -- --- @tfield[opt] boolean is_enabled Is currently enabled. +-- @tfield[opt] string driver Current statistics registry driver (if nil, stats disabled). +-- +-- @tfield[opt] boolean quantiles Is quantiles computed. stats.internal = internal return stats diff --git a/test/integration/stats_test.lua b/test/integration/stats_test.lua index 91d961492..c4ec9af5a 100644 --- a/test/integration/stats_test.lua +++ b/test/integration/stats_test.lua @@ -4,7 +4,15 @@ local t = require('luatest') local stats_registry_common = require('crud.stats.registry_common') -local g = t.group('stats_integration') +local pgroup = t.group('stats_integration', { + { driver = 'local' }, + { driver = 'metrics', quantiles = false }, + { driver = 'metrics', quantiles = true }, +}) +local group_metrics = t.group('stats_metrics_integration', { + { driver = 'metrics', quantiles = false }, + { driver = 'metrics', quantiles = true }, +}) local helpers = require('test.helper') local space_id = 542 @@ -13,7 +21,8 @@ local unknown_space_name = 'non_existing_space' local new_space_name = 'newspace' local schema_change_space_name = 'change_customers' -g.before_all(function(g) + +local function before_all(g) g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), server_command = helpers.entrypoint('srv_stats'), @@ -23,28 +32,63 @@ g.before_all(function(g) g.cluster:start() g.router = g.cluster:server('router').net_box - helpers.prepare_simple_functions(g.router) - g.router:eval("require('crud').enable_stats()") - t.assert_equals(helpers.is_space_exist(g.router, space_name), true) t.assert_equals(helpers.is_space_exist(g.router, unknown_space_name), false) -end) -g.after_all(function(g) + if g.params.driver == 'metrics' then + local is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) + t.skip_if(is_metrics_supported == false, 'Metrics registry is unsupported') + end +end + +local function after_all(g) helpers.stop_cluster(g.cluster) -end) +end + +local function get_stats(g, space_name) + return g.router:eval("return require('crud').stats(...)", { space_name }) +end + +local function enable_stats(g, params) + params = params or g.params + g.router:eval("require('crud').enable_stats(...)", { params }) +end -g.before_each(function(g) +local function disable_stats(g) + g.router:eval("require('crud').disable_stats()") +end + +local function before_each(g) g.router:eval("crud = require('crud')") + enable_stats(g) helpers.truncate_space_on_cluster(g.cluster, space_name) helpers.drop_space_on_cluster(g.cluster, new_space_name) helpers.drop_space_on_cluster(g.cluster, schema_change_space_name) -end) +end -function g:get_stats(space_name) - return self.router:eval("return require('crud').stats(...)", { space_name }) +local function get_metrics(g) + return g.router:eval("return require('metrics').collect()") end +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +pgroup.before_each(before_each) + +pgroup.after_each(disable_stats) + + +group_metrics.before_all(before_all) + +group_metrics.after_all(after_all) + +group_metrics.before_each(before_each) + +group_metrics.after_each(disable_stats) + local function create_new_space(g) helpers.call_on_storages(g.cluster, function(server) @@ -435,6 +479,39 @@ local select_cases = { }, } +-- luacheck: max comment line length 150 +-- Based on https://github.com/tarantool/crud/blob/76e33749226d5fd1195e2628502a9e01d6a616fa/test/integration/updated_shema_test.lua#L622 +local function perform_insert_call_with_schema_reload(g) + -- create space w/ bucket_id index + helpers.call_on_servers(g.cluster, {'s1-master', 's2-master'}, function(server) + server.net_box:call('create_space') + server.net_box:call('create_bucket_id_index') + end) + + -- value should be string error + local obj, err = g.router:call( + 'crud.insert_object', { schema_change_space_name, { id = 11, value = 123 } } + ) + + t.assert_equals(obj, nil) + t.assert_is_not(err, nil) + t.assert_str_contains(err.err, "type does not match one required by operation: expected string") + + -- set value type to unsigned + helpers.call_on_servers(g.cluster, {'s1-master', 's2-master'}, function(server) + server.net_box:call('set_value_type_to_unsigned') + end) + + -- check that schema changes were applied + -- insert value unsigned - OK + local obj, err = g.router:call( + 'crud.insert_object', { schema_change_space_name, { id = 11, value = 123 } } + ) + + t.assert_is_not(obj, nil) + t.assert_equals(err, nil) +end + -- Generate non-null stats for all cases. local function generate_stats(g) for _, case in pairs(simple_operation_cases) do @@ -477,6 +554,9 @@ local function generate_stats(g) local case = unknown_space_cases.insert local _, err = g.router:call(case.func, case.args) t.assert_not_equals(err, nil) + + -- Generate non-null schema reloads. + perform_insert_call_with_schema_reload(g) end @@ -486,12 +566,12 @@ for name, case in pairs(simple_operation_cases) do local test_name = ('test_%s'):format(name) if case.prepare ~= nil then - g.before_test(test_name, case.prepare) + pgroup.before_test(test_name, case.prepare) end - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -517,7 +597,7 @@ for name, case in pairs(simple_operation_cases) do end -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') t.assert_not_equals(stats_after[case.op], nil) @@ -566,9 +646,9 @@ end for name, case in pairs(unknown_space_cases) do local test_name = ('test_%s_on_unknown_space'):format(name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect statss before call. - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_type(stats_before, 'table') -- Call operation. @@ -582,7 +662,7 @@ for name, case in pairs(unknown_space_cases) do t.assert_not_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_type(stats_after, 'table') t.assert_equals(stats_after.space_not_found - stats_before.space_not_found, 1, @@ -596,14 +676,14 @@ end for name, case in pairs(select_cases) do local test_name = ('test_%s_details'):format(name) - g.before_test(test_name, prepare_select_data) + pgroup.before_test(test_name, prepare_select_data) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = 'select' local space_name = space_name -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -617,7 +697,7 @@ for name, case in pairs(select_cases) do t.assert_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') local op_before = set_defaults_if_empty(stats_before, op) @@ -640,49 +720,52 @@ for name, case in pairs(select_cases) do end -g.test_resolve_name_from_id = function(g) +pgroup.test_resolve_name_from_id = function(g) local op = 'len' g.router:call('crud.len', { space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end -g.before_test( +pgroup.before_test( 'test_role_reload_do_not_reset_observations', generate_stats) -g.test_role_reload_do_not_reset_observations = function(g) - local stats_before = g:get_stats() +pgroup.test_role_reload_do_not_reset_observations = function(g) + t.xfail_if(g.params.driver == 'metrics', + 'See https://github.com/tarantool/metrics/issues/334') + + local stats_before = get_stats(g) helpers.reload_roles(g.cluster:server('router')) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, stats_before) end -g.before_test( +pgroup.before_test( 'test_module_reload_do_not_reset_observations', generate_stats) -g.test_module_reload_do_not_reset_observations = function(g) - local stats_before = g:get_stats() +pgroup.test_module_reload_do_not_reset_observations = function(g) + local stats_before = get_stats(g) g.router:eval([[ package.loaded['crud'] = nil crud = require('crud') ]]) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, stats_before) end -g.test_spaces_created_in_runtime_supported_with_stats = function(g) +pgroup.test_spaces_created_in_runtime_supported_with_stats = function(g) local op = 'insert' - local stats_before = g:get_stats(new_space_name) + local stats_before = get_stats(g, new_space_name) local op_before = set_defaults_if_empty(stats_before, op) create_new_space(g) @@ -690,7 +773,7 @@ g.test_spaces_created_in_runtime_supported_with_stats = function(g) local _, err = g.router:call('crud.insert', { new_space_name, { 1, box.NULL }}) t.assert_equals(err, nil) - local stats_after = g:get_stats(new_space_name) + local stats_after = get_stats(g, new_space_name) local op_after = stats_after[op] t.assert_type(op_after, 'table', "'insert' stats found for new space") t.assert_type(op_after.ok, 'table', "success 'insert' stats found for new space") @@ -699,7 +782,7 @@ g.test_spaces_created_in_runtime_supported_with_stats = function(g) end -g.before_test( +pgroup.before_test( 'test_spaces_dropped_in_runtime_supported_with_stats', function(g) create_new_space(g) @@ -708,9 +791,9 @@ g.before_test( t.assert_equals(err, nil) end) -g.test_spaces_dropped_in_runtime_supported_with_stats = function(g) +pgroup.test_spaces_dropped_in_runtime_supported_with_stats = function(g) local op = 'insert' - local stats_before = g:get_stats(new_space_name) + local stats_before = get_stats(g, new_space_name) local op_before = set_defaults_if_empty(stats_before, op) t.assert_type(op_before, 'table', "'insert' stats found for new space") @@ -719,7 +802,7 @@ g.test_spaces_dropped_in_runtime_supported_with_stats = function(g) local _, err = g.router:call('crud.insert', { new_space_name, { 2, box.NULL }}) t.assert_not_equals(err, nil, "Should trigger 'space not found' error") - local stats_after = g:get_stats(new_space_name) + local stats_after = get_stats(g, new_space_name) local op_after = stats_after[op] t.assert_type(op_after, 'table', "'insert' stats found for dropped new space") t.assert_type(op_after.error, 'table', "error 'insert' stats found for dropped new space") @@ -728,46 +811,334 @@ g.test_spaces_dropped_in_runtime_supported_with_stats = function(g) end --- luacheck: max comment line length 150 --- Based on https://github.com/tarantool/crud/blob/76e33749226d5fd1195e2628502a9e01d6a616fa/test/integration/updated_shema_test.lua#L622 -local function perform_insert_call_with_schema_reload(g) - -- create space w/ bucket_id index - helpers.call_on_servers(g.cluster, {'s1-master', 's2-master'}, function(server) - server.net_box:call('create_space') - server.net_box:call('create_bucket_id_index') - end) +pgroup.test_call_with_schema_reload_increments_counter = function(g) + local stats_before = get_stats(g) - -- value should be string error - local obj, err = g.router:call( - 'crud.insert_object', { schema_change_space_name, { id = 11, value = 123 } } - ) + perform_insert_call_with_schema_reload(g) - t.assert_equals(obj, nil) - t.assert_is_not(err, nil) - t.assert_str_contains(err.err, "type does not match one required by operation: expected string") + local stats_after = get_stats(g) - -- set value type to unsigned - helpers.call_on_servers(g.cluster, {'s1-master', 's2-master'}, function(server) - server.net_box:call('set_value_type_to_unsigned') - end) + t.assert_gt(stats_after.schema_reloads, stats_before.schema_reloads, + "Schema reloads counter incremented") +end - -- check that schema changes were applied - -- insert value unsigned - OK - local obj, err = g.router:call( - 'crud.insert_object', { schema_change_space_name, { id = 11, value = 123 } } - ) - t.assert_is_not(obj, nil) +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L22-L31 +local function find_obs(metric_name, label_pairs, observations) + for _, obs in pairs(observations) do + local same_label_pairs = pcall(t.assert_equals, obs.label_pairs, label_pairs) + if obs.metric_name == metric_name and same_label_pairs then + return obs + end + end + + return { value = 0 } +end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L55-L63 +local function find_metric(metric_name, metrics_data) + local m = {} + for _, v in ipairs(metrics_data) do + if v.metric_name == metric_name then + table.insert(m, v) + end + end + return #m > 0 and m or nil +end + +local function get_unique_label_values(metrics_data, label_key) + local label_values_map = {} + for _, v in ipairs(metrics_data) do + local label_pairs = v.label_pairs or {} + if label_pairs[label_key] ~= nil then + label_values_map[label_pairs[label_key]] = true + end + end + + local label_values = {} + for k, _ in pairs(label_values_map) do + table.insert(label_values, k) + end + + return label_values +end + +local function validate_metrics(g, metrics) + local quantile_stats + if g.params.quantiles == true then + quantile_stats = find_metric('tnt_crud_stats', metrics) + t.assert_type(quantile_stats, 'table', '`tnt_crud_stats` summary metrics found') + end + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_type(stats_count, 'table', '`tnt_crud_stats` summary metrics found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_type(stats_sum, 'table', '`tnt_crud_stats` summary metrics found') + + + local expected_operations = { 'insert', 'get', 'replace', 'update', + 'upsert', 'delete', 'select', 'truncate', 'len', 'borders' } + + if g.params.quantiles == true then + t.assert_items_equals(get_unique_label_values(quantile_stats, 'operation'), expected_operations, + 'Metrics are labelled with operation') + end + + t.assert_items_equals(get_unique_label_values(stats_count, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + + local expected_statuses = { 'ok', 'error' } + + if g.params.quantiles == true then + t.assert_items_equals( + get_unique_label_values(quantile_stats, 'status'), + expected_statuses, + 'Metrics are labelled with status') + end + + t.assert_items_equals(get_unique_label_values(stats_count, 'status'), expected_statuses, + 'Metrics are labelled with status') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'status'), expected_statuses, + 'Metrics are labelled with status') + + + local expected_names = { space_name, schema_change_space_name } + local expected_names_select = { space_name } + + if g.params.quantiles == true then + t.assert_items_equals( + get_unique_label_values(quantile_stats, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + end + + t.assert_items_equals(get_unique_label_values(stats_count, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + t.assert_items_equals( + get_unique_label_values(stats_sum, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + if g.params.quantiles == true then + local expected_quantiles = { 0.99 } + t.assert_items_equals(get_unique_label_values(quantile_stats, 'quantile'), expected_quantiles, + 'Quantile metrics presents') + end + + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_type(tuples_fetched, 'table', '`tnt_crud_tuples_fetched` metrics found') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'name'), expected_names_select, + 'Metrics are labelled with space name (only existing spaces)') + + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_type(tuples_lookup, 'table', '`tnt_crud_tuples_lookup` metrics found') + + t.assert_items_equals( get_unique_label_values(tuples_lookup, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_lookup, 'name'), expected_names_select, + 'Metrics are labelled with space name (only existing spaces)') + + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_type(map_reduces, 'table', '`tnt_crud_map_reduces` metrics found') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'name'), expected_names_select, + 'Metrics are labelled with space name (only existing spaces)') + + + local space_not_found = find_metric('tnt_crud_space_not_found', metrics) + t.assert_type(space_not_found, 'table', '`tnt_crud_space_not_found` metrics found') + + + local schema_reloads = find_metric('tnt_crud_schema_reloads', metrics) + t.assert_type(schema_reloads, 'table', '`tnt_crud_schema_reloads` metrics found') +end + +local function check_updated_per_call(g) + local metrics_before = get_metrics(g) + local stats_labels = { operation = 'select', status = 'ok', name = space_name } + local details_labels = { operation = 'select', name = space_name } + + local count_before = find_obs('tnt_crud_stats_count', stats_labels, metrics_before) + local time_before = find_obs('tnt_crud_stats_sum', stats_labels, metrics_before) + local tuples_lookup_before = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_before) + local tuples_fetched_before = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_before) + local map_reduces_before = find_obs('tnt_crud_map_reduces', details_labels, metrics_before) + + local case = select_cases['select_by_secondary_index'] + local _, err = g.router:call(case.func, { space_name, case.conditions }) t.assert_equals(err, nil) + + local metrics_after = get_metrics(g) + local count_after = find_obs('tnt_crud_stats_count', stats_labels, metrics_after) + local time_after = find_obs('tnt_crud_stats_sum', stats_labels, metrics_after) + local tuples_lookup_after = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_after) + local tuples_fetched_after = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_after) + local map_reduces_after = find_obs('tnt_crud_map_reduces', details_labels, metrics_after) + + t.assert_equals(count_after.value - count_before.value, 1, + '`select` metrics count increased') + t.assert_ge(time_after.value - time_before.value, 0, + '`select` total time increased') + t.assert_ge(tuples_lookup_after.value - tuples_lookup_before.value, case.tuples_lookup, + '`select` tuples lookup expected change') + t.assert_ge(tuples_fetched_after.value - tuples_fetched_before.value, case.tuples_fetched, + '`select` tuples feched expected change') + t.assert_ge(map_reduces_after.value - map_reduces_before.value, case.tuples_lookup, + '`select` map reduces expected change') end -g.test_call_with_schema_reload_increments_counter = function(g) - local stats_before = get_stats(g) - perform_insert_call_with_schema_reload(g) +group_metrics.before_test( + 'test_stats_stored_in_global_metrics_registry', + generate_stats) - local stats_after = get_stats(g) +group_metrics.test_stats_stored_in_global_metrics_registry = function(g) + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end - t.assert_gt(stats_after.schema_reloads, stats_before.schema_reloads, - "Schema reloads counter incremented") + +group_metrics.before_test('test_metrics_updated_per_call', generate_stats) + +group_metrics.test_metrics_updated_per_call = check_updated_per_call + + +group_metrics.before_test( + 'test_space_not_found_metrics_updated_per_call', + generate_stats) + +group_metrics.test_space_not_found_metrics_updated_per_call = function(g) + local metrics_before = get_metrics(g) + + local space_not_found_before = find_obs('tnt_crud_space_not_found', {}, metrics_before) + + local case = unknown_space_cases.insert + local _, err = g.router:call(case.func, case.args) + t.assert_not_equals(err, nil) + + local metrics_after = get_metrics(g) + local space_not_found_after = find_obs('tnt_crud_space_not_found', {}, metrics_after) + + t.assert_equals(space_not_found_after.value - space_not_found_before.value, 1, + '`tnt_crud_space_not_found` metrics count increased') +end + + +group_metrics.before_test( + 'test_metrics_collectors_destroyed_if_stats_disabled', + generate_stats) + +group_metrics.test_metrics_collectors_destroyed_if_stats_disabled = function(g) + disable_stats(g) + + local metrics = get_metrics(g) + + local stats = find_metric('tnt_crud_stats', metrics) + t.assert_equals(stats, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_equals(stats_count, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_equals(stats_sum, nil, '`tnt_crud_stats` summary metrics not found') + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_equals(tuples_fetched, nil, '`tnt_crud_tuples_fetched` metrics not found') + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_equals(tuples_lookup, nil, '`tnt_crud_tuples_lookup` metrics not found') + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_equals(map_reduces, nil, '`tnt_crud_map_reduces` metrics not found') + + local space_not_found = find_metric('tnt_crud_space_not_found', metrics) + t.assert_equals(space_not_found, nil, '`tnt_crud_space_not_found` metrics not found') +end + + +group_metrics.before_test( + 'test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver', + disable_stats) + +group_metrics.test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver = function(g) + enable_stats(g, { driver = 'local' }) + -- Switch to metrics driver. + enable_stats(g) + + generate_stats(g) + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + +group_metrics.before_test( + 'test_role_reload_do_not_reset_metrics_observations', + generate_stats) + +group_metrics.test_role_reload_do_not_reset_metrics_observations = function(g) + t.xfail('See https://github.com/tarantool/metrics/issues/334') + + helpers.reload_roles(g.cluster:server('router')) + g.router:eval("crud = require('crud')") + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + + +group_metrics.before_test( + 'test_module_reload_do_not_reset_metrics_observations', + generate_stats) + +group_metrics.test_module_reload_do_not_reset_metrics_observations = function(g) + g.router:eval([[ + package.loaded['crud'] = nil + package.loaded['metrics'] = nil + crud = require('crud') + ]]) + + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + +group_metrics.before_test( + 'test_stats_changed_in_metrics_registry_after_role_reload', + prepare_select_data) + +group_metrics.test_stats_changed_in_metrics_registry_after_role_reload = function(g) + helpers.reload_roles(g.cluster:server('router')) + g.router:eval("crud = require('crud')") + check_updated_per_call(g) +end + + +group_metrics.before_test( + 'test_stats_changed_in_metrics_registry_after_module_reload', + prepare_select_data) + +group_metrics.test_stats_changed_in_metrics_registry_after_module_reload = function(g) + g.router:eval([[ + package.loaded['crud'] = nil + package.loaded['metrics'] = nil + crud = require('crud') + ]]) + + check_updated_per_call(g) end diff --git a/test/unit/stats_test.lua b/test/unit/stats_test.lua index d1e3479b2..592bece58 100644 --- a/test/unit/stats_test.lua +++ b/test/unit/stats_test.lua @@ -6,14 +6,19 @@ local t = require('luatest') local stats_module = require('crud.stats.module') local utils = require('crud.common.utils') -local g = t.group('stats_unit') +local pgroup = t.group('stats_unit', { + { driver = 'local' }, + { driver = 'metrics', quantiles = false }, + { driver = 'metrics', quantiles = true }, +}) +local group_driver = t.group('stats_driver_unit') local helpers = require('test.helper') local space_id = 542 local space_name = 'customers' local unknown_space_name = 'non_existing_space' -g.before_all(function(g) +local function before_all(g) -- Enable test cluster for "is space exist?" checks. g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), @@ -29,47 +34,63 @@ g.before_all(function(g) t.assert_equals(helpers.is_space_exist(g.router, space_name), true) t.assert_equals(helpers.is_space_exist(g.router, unknown_space_name), false) -end) -g.after_all(function(g) - helpers.stop_cluster(g.cluster) -end) + g.is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) --- Reset statistics between tests, reenable if needed. -g.before_each(function(g) - g:enable_stats() -end) + if g.params ~= nil and g.params.driver == 'metrics' then + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +end -g.after_each(function(g) - g:disable_stats() -end) +local function after_all(g) + helpers.stop_cluster(g.cluster) +end -function g:get_stats(space_name) - return self.router:eval("return stats_module.get(...)", { space_name }) +local function get_stats(g, space_name) + return g.router:eval("return stats_module.get(...)", { space_name }) end -function g:enable_stats() - self.router:eval("stats_module.enable()") +local function enable_stats(g, params) + params = params or g.params + g.router:eval("stats_module.enable(...)", { params }) end -function g:disable_stats() - self.router:eval("stats_module.disable()") +local function disable_stats(g) + g.router:eval("stats_module.disable()") end -function g:reset_stats() - self.router:eval("return stats_module.reset()") +local function reset_stats(g) + g.router:eval("return stats_module.reset()") end -g.test_get_format_after_enable = function(g) - local stats = g:get_stats() +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +-- Reset statistics between tests, reenable if needed. +pgroup.before_each(enable_stats) + +pgroup.after_each(disable_stats) + + +group_driver.before_all(before_all) + +group_driver.after_all(after_all) + +group_driver.after_each(disable_stats) + +pgroup.test_get_format_after_enable = function(g) + local stats = get_stats(g) t.assert_type(stats, 'table') t.assert_equals(stats.spaces, {}) t.assert_equals(stats.space_not_found, 0) end -g.test_get_by_space_name_format_after_enable = function(g) - local stats = g:get_stats(space_name) +pgroup.test_get_by_space_name_format_after_enable = function(g) + local stats = get_stats(g, space_name) t.assert_type(stats, 'table') t.assert_equals(stats, {}) @@ -127,7 +148,7 @@ for name, case in pairs(observe_cases) do for _, op in pairs(case.operations) do local test_name = ('test_%s_%s'):format(op, name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Call wrapped functions on server side. -- Collect execution times from outside. local run_count = 10 @@ -153,10 +174,10 @@ for name, case in pairs(observe_cases) do local total_time = fun.foldl(function(acc, x) return acc + x end, 0, time_diffs) -- Validate stats format after execution. - local total_stats = g:get_stats() + local total_stats = get_stats(g) t.assert_type(total_stats, 'table', 'Total stats present after observations') - local space_stats = g:get_stats(space_name) + local space_stats = get_stats(g, space_name) t.assert_type(space_stats, 'table', 'Space stats present after observations') t.assert_equals(total_stats.spaces[space_name], space_stats, @@ -252,7 +273,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, return_case in pairs(preserve_return_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -276,7 +297,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do local test_name = ('test_%spairs_wrapper_preserves_return_values'):format(name_head) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local input = { a = 'a', b = 'b' } @@ -306,7 +327,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, throw_case in pairs(preserve_throw_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -362,7 +383,7 @@ local error_cases = { for name, case in pairs(error_cases) do local test_name = ('test_%s_increases_space_not_found_count'):format(name) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -391,7 +412,7 @@ for name, case in pairs(error_cases) do t.assert_str_contains(err_msg, case.msg, "Error preserved") - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats.space_not_found, 1) t.assert_equals(stats.spaces[unknown_space_name], nil, @@ -399,13 +420,13 @@ for name, case in pairs(error_cases) do end end -g.test_stats_is_empty_after_disable = function(g) - g:disable_stats() +pgroup.test_stats_is_empty_after_disable = function(g) + disable_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats, {}) end @@ -413,57 +434,57 @@ local function prepare_non_default_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats[op].ok.count, 1, 'Non-zero stats prepared') return stats end -g.test_enable_is_idempotent = function(g) +pgroup.test_enable_is_idempotent = function(g) local stats_before = prepare_non_default_stats(g) - g:enable_stats() + enable_stats(g) - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_equals(stats_after, stats_before, 'Stats have not been reset') end -g.test_reset = function(g) +pgroup.test_reset = function(g) prepare_non_default_stats(g) - g:reset_stats() + reset_stats(g) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats, {}, 'Stats have been reset') end -g.test_reset_for_disabled_stats_does_not_init_module = function(g) - g:disable_stats() +pgroup.test_reset_for_disabled_stats_does_not_init_module = function(g) + disable_stats(g) - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_equals(stats_before, {}, "Stats is empty") - g:reset_stats() + reset_stats(g) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, {}, "Stats is still empty") end -g.test_enabling_stats_on_non_router_throws_error = function(g) +pgroup.test_enabling_stats_on_non_router_throws_error = function(g) local storage = g.cluster:server('s1-master').net_box t.assert_error(storage.eval, storage, " require('crud.stats.module').enable() ") end -g.test_stats_fetch_callback = function(g) +pgroup.test_stats_fetch_callback = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], { storage_cursor_stats, space_name }) local op = stats_module.op.SELECT - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, 'Fetch stats update inits SELECT collectors') @@ -476,8 +497,8 @@ g.test_stats_fetch_callback = function(g) 'tuples_lookup is inremented by expected value') end -g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) - g:disable_stats() +pgroup.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) + disable_stats(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], @@ -486,7 +507,7 @@ g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) +pgroup.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ @@ -516,7 +537,7 @@ local eval_with_fiber_context_setup = [[ return stats_module.wrap(func, op)(space_name) ]] -g.test_map_reduce_increment = function(g) +pgroup.test_map_reduce_increment = function(g) local op = stats_module.op.SELECT local inc = 1 @@ -524,13 +545,13 @@ g.test_map_reduce_increment = function(g) { op, space_name, 'map_reduces', inc }) t.assert_equals(err, nil) - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats.spaces[space_name][op].details.map_reduces, inc, "Counter of map reduces incremented") end -g.test_schema_reload = function(g) +pgroup.test_schema_reload = function(g) local op = stats_module.op.INSERT local inc = 1 @@ -544,10 +565,71 @@ g.test_schema_reload = function(g) "Counter of map reduces incremented") end -g.test_resolve_name_from_id = function(g) +pgroup.test_resolve_name_from_id = function(g) local op = stats_module.op.LEN g.router:eval(call_wrapped, { 'return_true', stats_module.op.LEN, {}, space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end + +group_driver.test_default_driver = function(g) + enable_stats(g) + + local driver = g.router:eval(" return stats_module.internal.driver ") + + if g.is_metrics_supported then + t.assert_equals(driver, 'metrics') + else + t.assert_equals(driver, 'local') + end +end + +group_driver.test_default_quantiles = function(g) + enable_stats(g) + + local quantiles = g.router:eval(" return stats_module.internal.quantiles ") + t.assert_equals(quantiles, false) +end + +group_driver.before_test( + 'test_stats_reenable_with_different_driver_reset_stats', + function(g) + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +) + +group_driver.test_stats_reenable_with_different_driver_reset_stats = function(g) + enable_stats(g, { driver = 'metrics' }) + + prepare_non_default_stats(g) + + enable_stats(g, { driver = 'local' }) + local stats = get_stats(g) + t.assert_equals(stats.spaces, {}, 'Stats have been reset') +end + +group_driver.test_unknown_driver_throws_error = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: unknown', + enable_stats, g, { driver = 'unknown' }) +end + +group_driver.before_test( + 'test_stats_enable_with_metrics_throws_error_if_unsupported', + function(g) + t.skip_if(g.is_metrics_supported == true, 'Metrics registry is supported') + end +) + +group_driver.test_stats_enable_with_metrics_throws_error_if_unsupported = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: metrics', + enable_stats, g, { driver = 'metrics' }) +end + +group_driver.test_stats_enable_with_local_throws_error_if_quantiles_enabled = function(g) + t.assert_error_msg_contains( + 'Quantiles are not supported', + enable_stats, g, { driver = 'local', quantiles = true }) +end