Skip to content

Commit

Permalink
[wip] add a couch_scanner app
Browse files Browse the repository at this point in the history
couch_scanner will run in the background and scans all the dbs and ddocs, first
trying to compile them against quickjs and the existing sm engine then possibly
even run a sample of docs through the map/reduce functions and compare the
results.

since there could be a large number of dbs and scanning would happened at a low
background rate, there is a periodic checkpointing mechanism to save the
currently processed db shard checkpoint.
  • Loading branch information
nickva committed Oct 24, 2023
1 parent aae8961 commit 0a9a771
Show file tree
Hide file tree
Showing 10 changed files with 398 additions and 0 deletions.
1 change: 1 addition & 0 deletions rebar.config.script
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ SubDirs = [
"src/smoosh",
"src/weatherreport",
"src/couch_prometheus",
"src/couch_scanner",
"rel"
].

Expand Down
2 changes: 2 additions & 0 deletions rel/reltool.config
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
snappy,
weatherreport,
couch_prometheus,
couch_scanner,

%% extra
nouveau,
Expand Down Expand Up @@ -128,6 +129,7 @@
{app, snappy, [{incl_cond, include}]},
{app, weatherreport, [{incl_cond, include}]},
{app, couch_prometheus, [{incl_cond, include}]},
{app, couch_scanner, [{incl_cond, include}]},

%% extra
{app, nouveau, [{incl_cond, include}]},
Expand Down
4 changes: 4 additions & 0 deletions src/couch_scanner/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Couch Scanner
================

Traverse all dbs periodically and emit various reports
29 changes: 29 additions & 0 deletions src/couch_scanner/src/couch_scanner.app.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

{application, couch_scanner, [
{description, "CouchDB Scanner"},
{vsn, git},
{registered, [
couch_scanner_server
]},
{applications, [
kernel,
stdlib,
crypto,
config,
couch_log,
couch_stats,
fabric
]},
{mod, {couch_scanner_app, []}}
]}.
28 changes: 28 additions & 0 deletions src/couch_scanner/src/couch_scanner.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner).

-export([
enable/0,
disable/0,
status/0
]).

enable() ->
couch_scanner_server:enable().

disable() ->
couch_scanner_server:disable().

status() ->
couch_scanner_server:status().
23 changes: 23 additions & 0 deletions src/couch_scanner/src/couch_scanner_app.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner_app).

-behaviour(application).

-export([start/2, stop/1]).

start(_StartType, _StartArgs) ->
couch_scanner_sup:start_link().

stop(_State) ->
ok.
89 changes: 89 additions & 0 deletions src/couch_scanner/src/couch_scanner_checkpoint.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner_checkpoint).

-export([
write/1,
read/0,
reset/0
]).

-include_lib("couch/include/couch_db.hrl").

-define(DOC_ID, <<?LOCAL_DOC_PREFIX, "scanner-checkpoint">>).

% Public API

write(#{} = State) ->
case enabled() of
true -> with_db(fun(Db) -> update_doc(Db, ?DOC_ID, State) end);
false -> ok
end.

read() ->
case enabled() of
true -> with_db(fun(Db) -> load_doc(Db, ?DOC_ID) end);
false -> not_found
end.

reset() ->
case enabled() of
true -> with_db(fun(Db) -> delete_doc(Db, ?DOC_ID) end);
false -> ok
end.

% Private functions

delete_doc(Db, DocId) ->
case couch_db:open_doc(Db, DocId, []) of
{ok, #doc{revs = {_, RevList}}} ->
{ok, _} = couch_db:delete_doc(Db, DocId, RevList),
ok;
{not_found, _} ->
not_found
end.

update_doc(Db, DocId, #{} = Body) ->
EJsonBody = ?JSON_DECODE(?JSON_ENCODE(Body#{<<"_id">> => DocId})),
Doc = couch_doc:from_json_obj(EJsonBody),
case couch_db:open_doc(Db, DocId, []) of
{ok, #doc{revs = Revs}} ->
{ok, _} = couch_db:update_doc(Db, Doc#doc{revs = Revs}, []);
{not_found, _} ->
{ok, _} = couch_db:update_doc(Db, Doc, [])
end,
ok.

load_doc(Db, DocId) ->
case couch_db:open_doc(Db, DocId, [ejson_body]) of
{ok, #doc{body = EJsonBody}} ->
?JSON_DECODE(?JSON_ENCODE(EJsonBody), [return_maps]);
{not_found, _} ->
not_found
end.

with_db(Fun) ->
DbName = config:get("mem3", "shards_db", "_dbs"),
case mem3_util:ensure_exists(DbName) of
{ok, Db} ->
try
Fun(Db)
after
catch couch_db:close(Db)
end;
Else ->
throw(Else)
end.

enabled() ->
config:get_boolean("couch_scanner", "persist", true).
154 changes: 154 additions & 0 deletions src/couch_scanner/src/couch_scanner_server.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner_server).

%-include_lib("couch/include/couch_db.hrl").

-export([
start_link/0,
enable/0,
disable/0,
status/0
]).

-export([
init/1,
handle_continue/2,
handle_call/3,
handle_cast/2,
handle_info/2
]).

-export([
handle_config_change/5,
handle_config_terminate/3
]).

-define(CHECKPOINT_INTERVAL_MSEC, 180000).
-define(CONFIG_RELISTEN_MSEC, 5000).

-record(st, {
enabled,
started,
db_cursor,
checkpoint_st,
cluster_state
}).

start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).

enable() ->
gen_server:call(?MODULE, enable).

disable() ->
gen_server:call(?MODULE, disable).

status() ->
gen_server:call(?MODULE, status).

init(_Args) ->
process_flag(trap_exit, true),
ok = config:listen_for_changes(?MODULE, nil),
St = #st{
enabled = config:get_boolean("couch_scanner", "enabled", false),
started = 0,
db_cursor = <<>>,
checkpoint_st = #{},
cluster_state = {[node() | nodes()], mem3:nodes()}
},
{ok, St, {continue, unpersist}}.

handle_continue(unpersist, #st{} = St) ->
UnpersistState = couch_scanner_persist:unpersist(),
couch_log:info("~p : unpersist state ~p", [?MODULE, UnpersistState]),
schedule_checkpoint(),
St1 = start(St),
{noreply, St1}.

handle_call(enable, _From, #st{enabled = true} = St) ->
{reply, ok, St};
handle_call(enable, _From, #st{enabled = false} = St) ->
couch_log:info("~p : enable", [?MODULE]),
{reply, ok, start(St#st{enabled = true})};
handle_call(disable, _From, #st{enabled = false} = St) ->
{reply, ok, St};
handle_call(disable, _From, #st{enabled = true} = St) ->
couch_log:info("~p : disable", [?MODULE]),
{reply, ok, stop(St#st{enabled = false})};
handle_call(status, _From, #st{} = St) ->
{reply, St, St}.

handle_cast(Msg, #st{} = St) ->
couch_log:error("~p : unknown cast ~p", [?MODULE, Msg]),
{noreply, St}.

handle_info(checkpoint, #st{} = St) ->
St1 = checkpoint(St),
schedule_checkpoint(),
{noreply, St1};
handle_info(restart_config, #st{} = St) ->
k = config:listen_for_changes(?MODULE, nil),
{noreply, St};
handle_info(Msg, St) ->
couch_log:error("~p : unknown info message ~p", [?MODULE, Msg]),
{noreply, St}.

handle_config_change("couch_scanner", "enabled", "true", _, S) ->
enable(),
{ok, S};
handle_config_change("couch_scanner", "enabled", "false", _, S) ->
disable(),
{ok, S};
handle_config_change(_, _, _, _, _) ->
{ok, nil}.

handle_config_terminate(_Server, stop, _State) ->
ok;
handle_config_terminate(_Server, _Reason, _State) ->
erlang:send_after(?CONFIG_RELISTEN_MSEC, whereis(?MODULE), restart_config).

schedule_checkpoint() ->
erlang:send_after(?CHECKPOINT_INTERVAL_MSEC, self(), checkpoint).

start(#st{enabled = false} = St) ->
St;
start(#st{enabled = true} = St) ->
% check config module
% check if resuming from state
% if stale reset
% spawn db folder
% spawn isolated runner for each module
St#st{started = erlang:system_time(second)}.

stop(#st{enabled = false} = St) ->
St;
stop(#st{enabled = true} = St) ->
% maybe checkpoint
% stop runners
% stop db folder
St#st{enabled = false}.

checkpoint(#st{enabled = false} = St) ->
St;
checkpoint(#st{enabled = true} = St) ->
#st{checkpoint_st = PrevCheckpointSt} = St,
#st{db_cursor = DbCursor} = St,
CheckpointSt = #{db_cursor => DbCursor},
case PrevCheckpointSt == CheckpointSt of
true ->
St;
false ->
ok = couch_scanner_persist:persist(CheckpointSt),
St#st{checkpoint_st = CheckpointSt}
end.
34 changes: 34 additions & 0 deletions src/couch_scanner/src/couch_scanner_sup.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner_sup).

-behaviour(supervisor).

-export([
start_link/0,
init/1
]).

start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).

init([]) ->
Children = [
#{
id => couch_scanner_server,
start => {couch_scanner_server, start_link, []},
shutdown => 5000
}
],
SupFlags = #{strategy => rest_for_one, intensity => 25, period => 1},
{ok, {SupFlags, Children}}.
Loading

0 comments on commit 0a9a771

Please sign in to comment.