From 0a9a7718590b5aa0c0854c1391bad3f3c7620598 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 9 Aug 2023 17:04:15 -0400 Subject: [PATCH] [wip] add a couch_scanner app couch_scanner will run in the background and scans all the dbs and ddocs, first trying to compile them against quickjs and the existing sm engine then possibly even run a sample of docs through the map/reduce functions and compare the results. since there could be a large number of dbs and scanning would happened at a low background rate, there is a periodic checkpointing mechanism to save the currently processed db shard checkpoint. --- rebar.config.script | 1 + rel/reltool.config | 2 + src/couch_scanner/README.md | 4 + src/couch_scanner/src/couch_scanner.app.src | 29 ++++ src/couch_scanner/src/couch_scanner.erl | 28 ++++ src/couch_scanner/src/couch_scanner_app.erl | 23 +++ .../src/couch_scanner_checkpoint.erl | 89 ++++++++++ .../src/couch_scanner_server.erl | 154 ++++++++++++++++++ src/couch_scanner/src/couch_scanner_sup.erl | 34 ++++ .../test/eunit/couch_scanner_test.erl | 34 ++++ 10 files changed, 398 insertions(+) create mode 100644 src/couch_scanner/README.md create mode 100644 src/couch_scanner/src/couch_scanner.app.src create mode 100644 src/couch_scanner/src/couch_scanner.erl create mode 100644 src/couch_scanner/src/couch_scanner_app.erl create mode 100644 src/couch_scanner/src/couch_scanner_checkpoint.erl create mode 100644 src/couch_scanner/src/couch_scanner_server.erl create mode 100644 src/couch_scanner/src/couch_scanner_sup.erl create mode 100644 src/couch_scanner/test/eunit/couch_scanner_test.erl diff --git a/rebar.config.script b/rebar.config.script index 6b6c7202d4c..68cee3584bc 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -142,6 +142,7 @@ SubDirs = [ "src/smoosh", "src/weatherreport", "src/couch_prometheus", + "src/couch_scanner", "rel" ]. diff --git a/rel/reltool.config b/rel/reltool.config index cac5f0c4d02..98f44f46150 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -64,6 +64,7 @@ snappy, weatherreport, couch_prometheus, + couch_scanner, %% extra nouveau, @@ -128,6 +129,7 @@ {app, snappy, [{incl_cond, include}]}, {app, weatherreport, [{incl_cond, include}]}, {app, couch_prometheus, [{incl_cond, include}]}, + {app, couch_scanner, [{incl_cond, include}]}, %% extra {app, nouveau, [{incl_cond, include}]}, diff --git a/src/couch_scanner/README.md b/src/couch_scanner/README.md new file mode 100644 index 00000000000..9b8bf66b724 --- /dev/null +++ b/src/couch_scanner/README.md @@ -0,0 +1,4 @@ +Couch Scanner +================ + +Traverse all dbs periodically and emit various reports diff --git a/src/couch_scanner/src/couch_scanner.app.src b/src/couch_scanner/src/couch_scanner.app.src new file mode 100644 index 00000000000..961e9e80e31 --- /dev/null +++ b/src/couch_scanner/src/couch_scanner.app.src @@ -0,0 +1,29 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_scanner, [ + {description, "CouchDB Scanner"}, + {vsn, git}, + {registered, [ + couch_scanner_server + ]}, + {applications, [ + kernel, + stdlib, + crypto, + config, + couch_log, + couch_stats, + fabric + ]}, + {mod, {couch_scanner_app, []}} +]}. diff --git a/src/couch_scanner/src/couch_scanner.erl b/src/couch_scanner/src/couch_scanner.erl new file mode 100644 index 00000000000..1e7741d4259 --- /dev/null +++ b/src/couch_scanner/src/couch_scanner.erl @@ -0,0 +1,28 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_scanner). + +-export([ + enable/0, + disable/0, + status/0 +]). + +enable() -> + couch_scanner_server:enable(). + +disable() -> + couch_scanner_server:disable(). + +status() -> + couch_scanner_server:status(). diff --git a/src/couch_scanner/src/couch_scanner_app.erl b/src/couch_scanner/src/couch_scanner_app.erl new file mode 100644 index 00000000000..23c4093dc41 --- /dev/null +++ b/src/couch_scanner/src/couch_scanner_app.erl @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_scanner_app). + +-behaviour(application). + +-export([start/2, stop/1]). + +start(_StartType, _StartArgs) -> + couch_scanner_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/couch_scanner/src/couch_scanner_checkpoint.erl b/src/couch_scanner/src/couch_scanner_checkpoint.erl new file mode 100644 index 00000000000..4031bd55db9 --- /dev/null +++ b/src/couch_scanner/src/couch_scanner_checkpoint.erl @@ -0,0 +1,89 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_scanner_checkpoint). + +-export([ + write/1, + read/0, + reset/0 +]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(DOC_ID, <>). + +% Public API + +write(#{} = State) -> + case enabled() of + true -> with_db(fun(Db) -> update_doc(Db, ?DOC_ID, State) end); + false -> ok + end. + +read() -> + case enabled() of + true -> with_db(fun(Db) -> load_doc(Db, ?DOC_ID) end); + false -> not_found + end. + +reset() -> + case enabled() of + true -> with_db(fun(Db) -> delete_doc(Db, ?DOC_ID) end); + false -> ok + end. + +% Private functions + +delete_doc(Db, DocId) -> + case couch_db:open_doc(Db, DocId, []) of + {ok, #doc{revs = {_, RevList}}} -> + {ok, _} = couch_db:delete_doc(Db, DocId, RevList), + ok; + {not_found, _} -> + not_found + end. + +update_doc(Db, DocId, #{} = Body) -> + EJsonBody = ?JSON_DECODE(?JSON_ENCODE(Body#{<<"_id">> => DocId})), + Doc = couch_doc:from_json_obj(EJsonBody), + case couch_db:open_doc(Db, DocId, []) of + {ok, #doc{revs = Revs}} -> + {ok, _} = couch_db:update_doc(Db, Doc#doc{revs = Revs}, []); + {not_found, _} -> + {ok, _} = couch_db:update_doc(Db, Doc, []) + end, + ok. + +load_doc(Db, DocId) -> + case couch_db:open_doc(Db, DocId, [ejson_body]) of + {ok, #doc{body = EJsonBody}} -> + ?JSON_DECODE(?JSON_ENCODE(EJsonBody), [return_maps]); + {not_found, _} -> + not_found + end. + +with_db(Fun) -> + DbName = config:get("mem3", "shards_db", "_dbs"), + case mem3_util:ensure_exists(DbName) of + {ok, Db} -> + try + Fun(Db) + after + catch couch_db:close(Db) + end; + Else -> + throw(Else) + end. + +enabled() -> + config:get_boolean("couch_scanner", "persist", true). diff --git a/src/couch_scanner/src/couch_scanner_server.erl b/src/couch_scanner/src/couch_scanner_server.erl new file mode 100644 index 00000000000..da9cc1a1898 --- /dev/null +++ b/src/couch_scanner/src/couch_scanner_server.erl @@ -0,0 +1,154 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_scanner_server). + +%-include_lib("couch/include/couch_db.hrl"). + +-export([ + start_link/0, + enable/0, + disable/0, + status/0 +]). + +-export([ + init/1, + handle_continue/2, + handle_call/3, + handle_cast/2, + handle_info/2 +]). + +-export([ + handle_config_change/5, + handle_config_terminate/3 +]). + +-define(CHECKPOINT_INTERVAL_MSEC, 180000). +-define(CONFIG_RELISTEN_MSEC, 5000). + +-record(st, { + enabled, + started, + db_cursor, + checkpoint_st, + cluster_state +}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +enable() -> + gen_server:call(?MODULE, enable). + +disable() -> + gen_server:call(?MODULE, disable). + +status() -> + gen_server:call(?MODULE, status). + +init(_Args) -> + process_flag(trap_exit, true), + ok = config:listen_for_changes(?MODULE, nil), + St = #st{ + enabled = config:get_boolean("couch_scanner", "enabled", false), + started = 0, + db_cursor = <<>>, + checkpoint_st = #{}, + cluster_state = {[node() | nodes()], mem3:nodes()} + }, + {ok, St, {continue, unpersist}}. + +handle_continue(unpersist, #st{} = St) -> + UnpersistState = couch_scanner_persist:unpersist(), + couch_log:info("~p : unpersist state ~p", [?MODULE, UnpersistState]), + schedule_checkpoint(), + St1 = start(St), + {noreply, St1}. + +handle_call(enable, _From, #st{enabled = true} = St) -> + {reply, ok, St}; +handle_call(enable, _From, #st{enabled = false} = St) -> + couch_log:info("~p : enable", [?MODULE]), + {reply, ok, start(St#st{enabled = true})}; +handle_call(disable, _From, #st{enabled = false} = St) -> + {reply, ok, St}; +handle_call(disable, _From, #st{enabled = true} = St) -> + couch_log:info("~p : disable", [?MODULE]), + {reply, ok, stop(St#st{enabled = false})}; +handle_call(status, _From, #st{} = St) -> + {reply, St, St}. + +handle_cast(Msg, #st{} = St) -> + couch_log:error("~p : unknown cast ~p", [?MODULE, Msg]), + {noreply, St}. + +handle_info(checkpoint, #st{} = St) -> + St1 = checkpoint(St), + schedule_checkpoint(), + {noreply, St1}; +handle_info(restart_config, #st{} = St) -> + k = config:listen_for_changes(?MODULE, nil), + {noreply, St}; +handle_info(Msg, St) -> + couch_log:error("~p : unknown info message ~p", [?MODULE, Msg]), + {noreply, St}. + +handle_config_change("couch_scanner", "enabled", "true", _, S) -> + enable(), + {ok, S}; +handle_config_change("couch_scanner", "enabled", "false", _, S) -> + disable(), + {ok, S}; +handle_config_change(_, _, _, _, _) -> + {ok, nil}. + +handle_config_terminate(_Server, stop, _State) -> + ok; +handle_config_terminate(_Server, _Reason, _State) -> + erlang:send_after(?CONFIG_RELISTEN_MSEC, whereis(?MODULE), restart_config). + +schedule_checkpoint() -> + erlang:send_after(?CHECKPOINT_INTERVAL_MSEC, self(), checkpoint). + +start(#st{enabled = false} = St) -> + St; +start(#st{enabled = true} = St) -> + % check config module + % check if resuming from state + % if stale reset + % spawn db folder + % spawn isolated runner for each module + St#st{started = erlang:system_time(second)}. + +stop(#st{enabled = false} = St) -> + St; +stop(#st{enabled = true} = St) -> + % maybe checkpoint + % stop runners + % stop db folder + St#st{enabled = false}. + +checkpoint(#st{enabled = false} = St) -> + St; +checkpoint(#st{enabled = true} = St) -> + #st{checkpoint_st = PrevCheckpointSt} = St, + #st{db_cursor = DbCursor} = St, + CheckpointSt = #{db_cursor => DbCursor}, + case PrevCheckpointSt == CheckpointSt of + true -> + St; + false -> + ok = couch_scanner_persist:persist(CheckpointSt), + St#st{checkpoint_st = CheckpointSt} + end. diff --git a/src/couch_scanner/src/couch_scanner_sup.erl b/src/couch_scanner/src/couch_scanner_sup.erl new file mode 100644 index 00000000000..7baf0037a9d --- /dev/null +++ b/src/couch_scanner/src/couch_scanner_sup.erl @@ -0,0 +1,34 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_scanner_sup). + +-behaviour(supervisor). + +-export([ + start_link/0, + init/1 +]). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + Children = [ + #{ + id => couch_scanner_server, + start => {couch_scanner_server, start_link, []}, + shutdown => 5000 + } + ], + SupFlags = #{strategy => rest_for_one, intensity => 25, period => 1}, + {ok, {SupFlags, Children}}. diff --git a/src/couch_scanner/test/eunit/couch_scanner_test.erl b/src/couch_scanner/test/eunit/couch_scanner_test.erl new file mode 100644 index 00000000000..6b4ecd99df4 --- /dev/null +++ b/src/couch_scanner/test/eunit/couch_scanner_test.erl @@ -0,0 +1,34 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_scanner_test). + +-include_lib("couch/include/couch_eunit.hrl"). + +couch_scanner_test_() -> + { + setup, + fun start_couch/0, + fun stop_couch/1, + with([ + ?TDEF(t_scanner) + ]) + }. + +start_couch() -> + test_util:start_config(). + +stop_couch(Ctx) -> + test_util:stop_couch(Ctx). + +t_scanner(_Ctx) -> + ok.