Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Delete messages from device_inbox table when deleting device #10969

Merged
merged 22 commits into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10969.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where messages in the `device_inbox` table for deleted devices would persist indefinitely. Contributed by @dklimpel and @JohannesKleine.
76 changes: 75 additions & 1 deletion synapse/storage/databases/main/deviceinbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
from synapse.logging.opentracing import log_kv, set_tag, trace
from synapse.replication.tcp.streams import ToDeviceStream
from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.storage.database import DatabasePool
from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.engines import PostgresEngine
from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator
from synapse.types import JsonDict
from synapse.util import json_encoder
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.caches.stream_change_cache import StreamChangeCache
Expand Down Expand Up @@ -552,6 +553,7 @@ def _add_messages_to_local_device_inbox_txn(

class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox"

def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
Expand All @@ -567,6 +569,11 @@ def __init__(self, database: DatabasePool, db_conn, hs):
self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox
)

self.db_pool.updates.register_background_update_handler(
self.REMOVE_DELETED_DEVICES,
self._remove_deleted_devices_from_device_inbox,
)

async def _background_drop_index_device_inbox(self, progress, batch_size):
def reindex_txn(conn):
txn = conn.cursor()
Expand All @@ -579,6 +586,73 @@ def reindex_txn(conn):

return 1

async def _remove_deleted_devices_from_device_inbox(
self, progress: JsonDict, batch_size: int
) -> int:
"""A background update that deletes all device_inboxes for deleted devices.

This should only need to be run once (when users upgrade to v1.45.0)

Args:
progress: JsonDict used to store progress of this background update
batch_size: the maximum number of rows to retrieve in a single select query

Returns:
The number of deleted rows
"""

def _remove_deleted_devices_from_device_inbox_txn(
txn: LoggingTransaction,
) -> int:

sql = """
WITH get_devices AS
(SELECT device_inbox.device_id, device_inbox.user_id
FROM device_inbox
WHERE (device_inbox.device_id, device_inbox.user_id)
NOT IN
(SELECT device_id, user_id FROM devices)
LIMIT ?)
SELECT DISTINCT * FROM get_devices;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern with this approach is that it starts off fast (as it will quickly find rows to delete), but will become increasingly heavyweight until at the end it will do a full sequential scan of device_inbox.

I think a simple fix for that is to order by stream_id and then track the latest stream_id that was reached (via the progress json):

SELECT device_id, user_id, stream_id
FROM device_inbox
WHERE
    stream_id >= ?
    AND (device_id, user_id) NOT IN (
        SELECT device_id, user_id FROM devices
    )
ORDER BY stream_id
LIMIT ?

(Note that since stream_id is not unique we need to use an inclusive stream_id >= ? clause, since we might not have deleted all dead device messages for the stream_id returned from the previous query)

I would also then delete only rows matching the (user_id, device_id, stream_id) tuple, to avoid problems of deleting a large number of rows all at once due to a single device having lots of device messages (which I think is common).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. I work on it. This is similar to my first approach in a543a27#diff-3811c5dc2d8444a4922451939dbd64b55056168b6055094c27207cd7ff809552
Also: comment

But we have more informations now.

"""

txn.execute(sql, (batch_size,))
rows = txn.fetchall()

num_deleted = 0
for row in rows:
num_deleted += self.db_pool.simple_delete_txn(
txn,
"device_inbox",
{"device_id": row[0], "user_id": row[1]},
)

if rows:
self.db_pool.updates._background_update_progress_txn(
txn,
self.REMOVE_DELETED_DEVICES,
{"device_id": rows[-1][0], "user_id": rows[-1][1]},
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if it necessary to call the update pogress.
No intermediate status is required for processing this task.
If I am add something like WHERE uiser_id > to the SQL query, the costs rise immensely.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you run my explain query on your database to get a better cost comparison?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


return num_deleted

number_deleted = await self.db_pool.runInteraction(
"_remove_deleted_devices_from_device_inbox",
_remove_deleted_devices_from_device_inbox_txn,
)

# The task is finished when no more lines are deleted.
# The `batch_size` only specifies how many devices are cleaned per run.
# More than one line is deleted in the deviceinbox per run and device,
# so it is possible that the number of deleted lines is larger
# than the batch size.
if not number_deleted:
await self.db_pool.updates._end_background_update(
self.REMOVE_DELETED_DEVICES
)

return number_deleted


class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore):
pass
35 changes: 21 additions & 14 deletions synapse/storage/databases/main/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,19 +1121,14 @@ async def store_device(
raise StoreError(500, "Problem storing device.")

async def delete_device(self, user_id: str, device_id: str) -> None:
"""Delete a device.
"""Delete a device and its device_inbox.

Args:
user_id: The ID of the user which owns the device
device_id: The ID of the device to delete
"""
await self.db_pool.simple_delete_one(
table="devices",
keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False},
desc="delete_device",
)

self.device_id_exists_cache.invalidate((user_id, device_id))
await self.delete_devices(user_id, [device_id])

async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
"""Deletes several devices.
Expand All @@ -1142,13 +1137,25 @@ async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
user_id: The ID of the user which owns the devices
device_ids: The IDs of the devices to delete
"""
await self.db_pool.simple_delete_many(
table="devices",
column="device_id",
iterable=device_ids,
keyvalues={"user_id": user_id, "hidden": False},
desc="delete_devices",
)

def _delete_devices_txn(txn: LoggingTransaction) -> None:
self.db_pool.simple_delete_many_txn(
txn,
table="devices",
column="device_id",
values=device_ids,
keyvalues={"user_id": user_id, "hidden": False},
)

self.db_pool.simple_delete_many_txn(
txn,
table="device_inbox",
column="device_id",
values=device_ids,
keyvalues={"user_id": user_id},
)

await self.db_pool.runInteraction("delete_devices", _delete_devices_txn)
for device_id in device_ids:
self.device_id_exists_cache.invalidate((user_id, device_id))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/* Copyright 2021 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


-- Remove messages from the device_inbox table which were orphaned
-- when a device was deleted using Synapse earlier than 1.46.0.
-- This runs as background task, but may take a bit to finish.

INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(6402, 'remove_deleted_devices_from_device_inbox', '{}');
31 changes: 31 additions & 0 deletions tests/handlers/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,37 @@ def test_delete_device(self):
# we'd like to check the access token was invalidated, but that's a
# bit of a PITA.

def test_delete_device_and_device_inbox(self):
self._record_users()

# add an device_inbox
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": user1,
"device_id": "abc",
"stream_id": 1,
"message_json": "{}",
},
)
)

# delete the device
self.get_success(self.handler.delete_device(user1, "abc"))

# check that the device_inbox was deleted
res = self.get_success(
self.store.db_pool.simple_select_one(
table="device_inbox",
keyvalues={"user_id": user1, "device_id": "abc"},
retcols=("user_id", "device_id"),
allow_none=True,
desc="get_device_id_from_device_inbox",
)
)
self.assertIsNone(res)

def test_update_device(self):
self._record_users()

Expand Down
90 changes: 90 additions & 0 deletions tests/storage/databases/main/test_deviceinbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.rest import admin
from synapse.rest.client import devices

from tests.unittest import HomeserverTestCase


class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase):

servlets = [
admin.register_servlets,
devices.register_servlets,
]

def prepare(self, reactor, clock, hs):
self.store = hs.get_datastore()
self.user_id = self.register_user("foo", "pass")

def test_background_remove_deleted_devices_from_device_inbox(self):
"""Test that the background task to delete old device_inboxes works properly."""

# create a valid device
self.get_success(
self.store.store_device(self.user_id, "cur_device", "display_name")
)

# Add device_inbox to devices
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": self.user_id,
"device_id": "cur_device",
"stream_id": 1,
"message_json": "{}",
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": self.user_id,
"device_id": "old_device",
"stream_id": 2,
"message_json": "{}",
},
)
)

# Insert and run the background update.
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": "remove_deleted_devices_from_device_inbox",
"progress_json": "{}",
},
)
)

# ... and tell the DataStore that it hasn't finished all updates yet
self.store.db_pool.updates._all_done = False

self.wait_for_background_updates()

# Make sure the background task deleted old device_inbox
res = self.get_success(
self.store.db_pool.simple_select_onecol(
table="device_inbox",
keyvalues={},
retcol="device_id",
desc="get_device_id_from_device_inbox",
)
)
self.assertEqual(1, len(res))
self.assertEqual(res[0], "cur_device")