Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Quick & dirty metric for background update status (#15740)
Browse files Browse the repository at this point in the history
* Quick & dirty metric for background update status

* Changelog

* Remove debug

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>

* Actually write to _aborted

---------

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
  • Loading branch information
David Robertson and Mathieu Velten authored Jun 7, 2023
1 parent e536f02 commit d162aec
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 1 deletion.
1 change: 1 addition & 0 deletions changelog.d/15740.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Expose a metric reporting the database background update status.
2 changes: 2 additions & 0 deletions synapse/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def collect() -> Iterable[Metric]:

@attr.s(slots=True, hash=True, auto_attribs=True)
class LaterGauge(Collector):
"""A Gauge which periodically calls a user-provided callback to produce metrics."""

name: str
desc: str
labels: Optional[Sequence[str]] = attr.ib(hash=False)
Expand Down
30 changes: 30 additions & 0 deletions synapse/storage/background_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from enum import IntEnum
from types import TracebackType
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -136,6 +137,15 @@ def total_items_per_ms(self) -> Optional[float]:
return float(self.total_item_count) / float(self.total_duration_ms)


class UpdaterStatus(IntEnum):
# Use negative values for error conditions.
ABORTED = -1
DISABLED = 0
NOT_STARTED = 1
RUNNING_UPDATE = 2
COMPLETE = 3


class BackgroundUpdater:
"""Background updates are updates to the database that run in the
background. Each update processes a batch of data at once. We attempt to
Expand All @@ -158,11 +168,16 @@ def __init__(self, hs: "HomeServer", database: "DatabasePool"):

self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {}
# TODO: all these bool flags make me feel icky---can we combine into a status
# enum?
self._all_done = False

# Whether we're currently running updates
self._running = False

# Marker to be set if we abort and halt all background updates.
self._aborted = False

# Whether background updates are enabled. This allows us to
# enable/disable background updates via the admin API.
self.enabled = True
Expand All @@ -175,6 +190,20 @@ def __init__(self, hs: "HomeServer", database: "DatabasePool"):
self.sleep_duration_ms = hs.config.background_updates.sleep_duration_ms
self.sleep_enabled = hs.config.background_updates.sleep_enabled

def get_status(self) -> UpdaterStatus:
"""An integer summarising the updater status. Used as a metric."""
if self._aborted:
return UpdaterStatus.ABORTED
# TODO: a status for "have seen at least one failure, but haven't aborted yet".
if not self.enabled:
return UpdaterStatus.DISABLED

if self._all_done:
return UpdaterStatus.COMPLETE
if self._running:
return UpdaterStatus.RUNNING_UPDATE
return UpdaterStatus.NOT_STARTED

def register_update_controller_callbacks(
self,
on_update: ON_UPDATE_CALLBACK,
Expand Down Expand Up @@ -296,6 +325,7 @@ async def run_background_updates(self, sleep: bool) -> None:
except Exception:
back_to_back_failures += 1
if back_to_back_failures >= 5:
self._aborted = True
raise RuntimeError(
"5 back-to-back background update failures; aborting."
)
Expand Down
8 changes: 7 additions & 1 deletion synapse/storage/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
current_context,
make_deferred_yieldable,
)
from synapse.metrics import register_threadpool
from synapse.metrics import LaterGauge, register_threadpool
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage.background_updates import BackgroundUpdater
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
Expand Down Expand Up @@ -547,6 +547,12 @@ def __init__(
self._db_pool = make_pool(hs.get_reactor(), database_config, engine)

self.updates = BackgroundUpdater(hs, self)
LaterGauge(
"synapse_background_update_status",
"Background update status",
[],
self.updates.get_status,
)

self._previous_txn_total_time = 0.0
self._current_txn_total_time = 0.0
Expand Down

0 comments on commit d162aec

Please sign in to comment.