From 1dd457a52e69d71362836805c88530ac4487ee4a Mon Sep 17 00:00:00 2001 From: Philip Iezzi Date: Wed, 6 Sep 2023 23:19:29 +0200 Subject: [PATCH] Database cleanup job now also purges old messages, introduced `MESSAGE_RETENTION` env var get ready for v0.6.1 release --- .env.example | 1 + CHANGELOG.md | 13 +++++++++++++ README.md | 5 ++++- app/message.py | 13 +++++++++++++ cleanup.py | 13 ++++++++++--- .../migrations/20230906_01_Cle31-update-indexes.sql | 5 +++++ 6 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 database/migrations/20230906_01_Cle31-update-indexes.sql diff --git a/.env.example b/.env.example index f951c3a..d1c20ea 100644 --- a/.env.example +++ b/.env.example @@ -27,3 +27,4 @@ LOG_CONSOLE=True # True or False (default: False) - Output logs to console (stde # SYSLOG=True # True or False (default: False) - Send logs to syslog # SENTRY_DSN=https://**********.ingest.sentry.io/XXXXXXXXXXXXXXXX # Your Sentry DSN (default: None) # SENTRY_ENVIRONMENT=prod +# MESSAGE_RETENTION=90 # How many days to keep messages in the database (default: 0, never delete) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd95fae..671b6e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # CHANGELOG +## [v0.6.1](https://github.com/onlime/policyd-rate-guard/releases/tag/v0.6.1) (2023-09-06) + +**Improved:** + +- Code cleanup: Using relative imports. +- Code cleanup: Simplified parsing of data using dict comprehension in `Handler`. +- Refactoring: moved `PrefixedLogger` class into its own file. + +**Added:** + +- Database cleanup job now also purges old messages, if enabled through `MESSAGE_RETENTION` env var. +- Introduced new environment variable `MESSAGE_RETENTION` to control number of days to keep messages in the database. Defaults to `0` (keep forever). + ## [v0.6.0](https://github.com/onlime/policyd-rate-guard/releases/tag/v0.6.0) (2023-09-01) **Improved:** diff --git a/README.md b/README.md index e5a10b1..3b215a8 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,8 @@ PolicydRateGuard can be fully configured through environment variables in `.env` Your Sentry DSN in the following form: `https://**********.ingest.sentry.io/XXXXXXXXXXXXXXXX`. Defaults to `None` (commented out). - `SENTRY_ENVIRONMENT` Sentry environment. Suggested values: `dev` or `prod`, but can be any custom string. Defaults to `dev`. +- `MESSAGE_RETENTION` + How many days to keep messages in the database. Defaults to `0` (never delete). You may also tune the database connection pooling by modifying the following environment variables (defaults are fine for most environments, and you'll find e detailed description in the [DBUtils PooledDB](https://webwareforpython.github.io/DBUtils/main.html#pooleddb-pooled-db-1) usage docs): @@ -284,7 +286,7 @@ $ cp yoyo.ini.docker yoyo.ini # & Adjust the settings (venv)$ python3 run.py ``` -To cleanup (reset all counters and quotas) the database, run: +To cleanup (reset all counters and quotas and purge old messages if `MESSAGE_RETENTION` is set) the database, run: ```bash (venv)$ python3 cleanup.py @@ -401,6 +403,7 @@ Planned features (coming soon): - [x] **Sentry** integration for exception reporting - [x] **Ansible role** for easy production deployment - [x] **Github workflow** for CI/testing +- [x] **Message retention**: Expire/purge old messages, configurable via env var `MESSAGE_RETENTION` (defaults to keep forever) - [ ] Implement a **configurable webhook API** call for notification to sender on reaching quota limit (on first block) to external service. - [ ] **Publish package** to [PyPI](https://pypi.org/) (Might need some restructuring. Any help greatly appreciated!) diff --git a/app/message.py b/app/message.py index 2552be2..3f64629 100644 --- a/app/message.py +++ b/app/message.py @@ -76,3 +76,16 @@ def is_blocked(self) -> bool: def get_props_description(self, props: list = ['sender', 'rcpt_count', 'from_addr', 'client_address', 'client_name'], separator: str = ' '): return separator.join(f"{name}={getattr(self, name)}" for name in props) + + @staticmethod + def purge_old_messages(db_pool: object, logger: object, days: int = 90) -> None: + """Purge old messages""" + logger.debug('Purge old messages') + db = db_pool.connection() + try: + deleted = db.cursor().execute('DELETE FROM `messages` WHERE `created_at` < DATE_SUB(CURDATE(), INTERVAL %s DAY)', (days,)) + db.commit() + if deleted > 0: + logger.info('Deleted {} old messages (retention: {} days)'.format(deleted, days)) + finally: + db.close() diff --git a/cleanup.py b/cleanup.py index e5637ba..82aa577 100644 --- a/cleanup.py +++ b/cleanup.py @@ -2,6 +2,7 @@ from app.logging import get_logger from app.db import DbConnectionPool from app.ratelimit import Ratelimit +from app.message import Message class Cleaner: @@ -9,13 +10,19 @@ def __init__(self, conf: object = None) -> None: self.conf = conf or Config() self.logger = get_logger(self.conf) self.db_pool = DbConnectionPool(self.conf) + self.logger.debug('Cleaning up database') + self.reset_counters() self.cleanup() - def cleanup(self) -> None: - """Cleanup database""" - self.logger.debug('Cleaning up database') + def reset_counters(self) -> None: + """Reset counters""" Ratelimit.reset_all_counters(self.db_pool, self.logger) + def cleanup(self) -> None: + """Cleanup database""" + message_retention = int(self.conf.get('MESSAGE_RETENTION', 0)) + if message_retention > 0: + Message.purge_old_messages(self.db_pool, self.logger, message_retention) if __name__ == '__main__': # pragma: no cover Cleaner() diff --git a/database/migrations/20230906_01_Cle31-update-indexes.sql b/database/migrations/20230906_01_Cle31-update-indexes.sql new file mode 100644 index 0000000..c845a34 --- /dev/null +++ b/database/migrations/20230906_01_Cle31-update-indexes.sql @@ -0,0 +1,5 @@ +-- update indexes +-- depends: 20230827_01_o7RGx-update-table-ratelimits + +ALTER TABLE `ratelimits` RENAME INDEX `idx_sender` TO `ratelimits_sender_unique`; +ALTER TABLE `messages` ADD INDEX `messages_created_at_index` (`created_at`);