From 32768e96d44d0f3febae8c372e8c1569ea31788e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 24 Jul 2019 11:37:43 +0100 Subject: [PATCH 1/6] Add function to get all forgotten rooms for user This will allow us to efficiently filter out rooms that have been forgotten in other queries without having to join against the `room_memberships` table. --- synapse/storage/roommember.py | 43 +++++++++++++++++++ .../schema/delta/56/room_membership_idx.sql | 25 +++++++++++ 2 files changed, 68 insertions(+) create mode 100644 synapse/storage/schema/delta/56/room_membership_idx.sql diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index b3c002b9eb92..bc77705e972d 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -639,6 +639,39 @@ def f(txn): count = yield self.runInteraction("did_forget_membership", f) defer.returnValue(count == 0) + @cached() + def get_forgotten_rooms_for_user(self, user_id): + """Gets all rooms the user has forgotten. + + Args: + user_id (str) + + Returns: + Deferred[set[str]] + """ + + def _get_forgotten_rooms_for_user_txn(txn): + # This is a slightly convoluted query that first looks up all rooms + # that the user has forgotten in the past, then rechecks that list + # to see if any have subsequently been updated. This is done so that + # we can use a partial index on `forgotten = 1` on the assumption + # that few users will actually forget many rooms. + sql = """ + SELECT room_id, ( + SELECT count(*) FROM room_memberships + WHERE room_id = m.room_id AND user_id = m.user_id AND forgotten = 0 + ) AS count + FROM room_memberships AS m + WHERE user_id = ? AND forgotten = 1 + GROUP BY room_id, user_id; + """ + txn.execute(sql, (user_id,)) + return set(row[0] for row in txn if row[1] == 0) + + return self.runInteraction( + "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn + ) + @defer.inlineCallbacks def get_rooms_user_has_been_in(self, user_id): """Get all rooms that the user has ever been in. @@ -670,6 +703,13 @@ def __init__(self, db_conn, hs): _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME, self._background_current_state_membership, ) + self.register_background_index_update( + "room_membership_forgotten_idx", + index_name="room_memberships_user_room_forgotten", + table="room_memberships", + columns=["user_id", "room_id"], + where_clause="forgotten = 1", + ) def _store_room_members_txn(self, txn, events, backfilled): """Store a room member in the database. @@ -771,6 +811,9 @@ def f(txn): txn.execute(sql, (user_id, room_id)) self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.get_forgotten_rooms_for_user, (user_id,) + ) return self.runInteraction("forget_membership", f) diff --git a/synapse/storage/schema/delta/56/room_membership_idx.sql b/synapse/storage/schema/delta/56/room_membership_idx.sql new file mode 100644 index 000000000000..fc0b49884328 --- /dev/null +++ b/synapse/storage/schema/delta/56/room_membership_idx.sql @@ -0,0 +1,25 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We add membership to current state so that we don't need to join against +-- room_memberships, which can be surprisingly costly (we do such queries +-- very frequently). +-- This will be null for non-membership events and the content.membership key +-- for membership events. (Will also be null for membership events until the +-- background update job has finished). + +-- Adds an index on room_memberships for fetching all forgotten rooms for a user +INSERT INTO background_updates (update_name, progress_json) VALUES + ('room_membership_forgotten_idx', '{}'); From 62921fb53e773c0510aacf36345c5301b4688088 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 24 Jul 2019 11:45:58 +0100 Subject: [PATCH 2/6] Remove join on room_memberships when fetching rooms for user. --- synapse/storage/roommember.py | 58 ++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index bc77705e972d..7852d3866aef 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -256,28 +256,35 @@ def get_invite_for_user_in_room(self, user_id, room_id): defer.returnValue(invite) defer.returnValue(None) + @defer.inlineCallbacks def get_rooms_for_user_where_membership_is(self, user_id, membership_list): """ Get all the rooms for this user where the membership for this user matches one in the membership list. + Filters out forgotten rooms. + Args: user_id (str): The user ID. membership_list (list): A list of synapse.api.constants.Membership values which the user must be in. + Returns: - A list of dictionary objects, with room_id, membership and sender - defined. + Deferred[list[RoomsForUser]] """ if not membership_list: return defer.succeed(None) - return self.runInteraction( + rooms = yield self.runInteraction( "get_rooms_for_user_where_membership_is", self._get_rooms_for_user_where_membership_is_txn, user_id, membership_list, ) + # Now we filter out forgotten rooms + forgotten_rooms = yield self.get_forgotten_rooms_for_user(user_id) + return [room for room in rooms if room.room_id not in forgotten_rooms] + def _get_rooms_for_user_where_membership_is_txn( self, txn, user_id, membership_list ): @@ -287,26 +294,33 @@ def _get_rooms_for_user_where_membership_is_txn( results = [] if membership_list: - where_clause = "user_id = ? AND (%s) AND forgotten = 0" % ( - " OR ".join(["m.membership = ?" for _ in membership_list]), - ) - - args = [user_id] - args.extend(membership_list) + if self._current_state_events_membership_up_to_date: + sql = """ + SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering + FROM current_state_events AS c + INNER JOIN events AS e USING (room_id, event_id) + WHERE + c.type = 'm.room.member' + AND state_key = ? + AND c.membership IN (%s) + """ % ( + ",".join("?" * len(membership_list)) + ) + else: + sql = """ + SELECT room_id, e.sender, m.membership, event_id, e.stream_ordering + FROM current_state_events AS c + INNER JOIN room_memberships AS m USING (room_id, event_id) + INNER JOIN events AS e USING (room_id, event_id) + WHERE + c.type = 'm.room.member' + AND state_key = ? + AND m.membership IN (%s) + """ % ( + ",".join("?" * len(membership_list)) + ) - sql = ( - "SELECT m.room_id, m.sender, m.membership, m.event_id, e.stream_ordering" - " FROM current_state_events as c" - " INNER JOIN room_memberships as m" - " ON m.event_id = c.event_id" - " INNER JOIN events as e" - " ON e.event_id = c.event_id" - " AND m.room_id = c.room_id" - " AND m.user_id = c.state_key" - " WHERE c.type = 'm.room.member' AND %s" - ) % (where_clause,) - - txn.execute(sql, args) + txn.execute(sql, (user_id, *membership_list)) results = [RoomsForUser(**r) for r in self.cursor_to_dict(txn)] if do_invite: From 0c4a99607e839c5a363351238a6a555015c8ddfc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 24 Jul 2019 11:49:15 +0100 Subject: [PATCH 3/6] Remove join when calculating room summaries. --- synapse/storage/roommember.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 7852d3866aef..bfb834ccca76 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -179,19 +179,27 @@ def _get_room_summary_txn(txn): # we order by membership and then fairly arbitrarily by event_id so # heroes are consistent - sql = """ - SELECT m.user_id, m.membership, m.event_id - FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? - ORDER BY - CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, - m.event_id ASC - LIMIT ? - """ + if self._current_state_events_membership_up_to_date: + sql = """ + SELECT state_key, membership, event_id + FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? + ORDER BY + CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, + event_id ASC + LIMIT ? + """ + else: + sql = """ + SELECT c.state_key, m.membership, c.event_id + FROM room_memberships as m + INNER JOIN current_state_events as c USING (room_id, event_id) + WHERE c.type = 'm.room.member' AND c.room_id = ? + ORDER BY + CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC, + c.event_id ASC + LIMIT ? + """ # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user. txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6)) From c1598030671ee39364863fef579d041989bf852e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 24 Jul 2019 11:51:44 +0100 Subject: [PATCH 4/6] Newsfile --- changelog.d/5752.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5752.misc diff --git a/changelog.d/5752.misc b/changelog.d/5752.misc new file mode 100644 index 000000000000..5e15dfd5faa7 --- /dev/null +++ b/changelog.d/5752.misc @@ -0,0 +1 @@ +Reduce database IO usage by optimising queries for current membership. From 105e7f6ed3a08bcbf0fac2c7749ccb29f39d1492 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Jul 2019 16:09:48 +0100 Subject: [PATCH 5/6] Remove lost comment --- synapse/storage/schema/delta/56/room_membership_idx.sql | 7 ------- 1 file changed, 7 deletions(-) diff --git a/synapse/storage/schema/delta/56/room_membership_idx.sql b/synapse/storage/schema/delta/56/room_membership_idx.sql index fc0b49884328..92ab1f5e65c2 100644 --- a/synapse/storage/schema/delta/56/room_membership_idx.sql +++ b/synapse/storage/schema/delta/56/room_membership_idx.sql @@ -13,13 +13,6 @@ * limitations under the License. */ --- We add membership to current state so that we don't need to join against --- room_memberships, which can be surprisingly costly (we do such queries --- very frequently). --- This will be null for non-membership events and the content.membership key --- for membership events. (Will also be null for membership events until the --- background update job has finished). - -- Adds an index on room_memberships for fetching all forgotten rooms for a user INSERT INTO background_updates (update_name, progress_json) VALUES ('room_membership_forgotten_idx', '{}'); From df3a5db629daa384cdf291f5ecbb0ff1721c80df Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Jul 2019 16:40:25 +0100 Subject: [PATCH 6/6] Expand comment --- synapse/storage/roommember.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index bfb834ccca76..59ea7277fe43 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -678,6 +678,11 @@ def _get_forgotten_rooms_for_user_txn(txn): # to see if any have subsequently been updated. This is done so that # we can use a partial index on `forgotten = 1` on the assumption # that few users will actually forget many rooms. + # + # Note that a room is considered "forgotten" if *all* membership + # events for that user and room have the forgotten field set (as + # when a user forgets a room we update all rows for that user and + # room, not just the current one). sql = """ SELECT room_id, ( SELECT count(*) FROM room_memberships