Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix MSC3030 /timestamp_to_event returning outliers that it has no idea whether are near a gap or not #14215

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/14215.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint returning potentially inaccurate closest events with `outliers` present.
59 changes: 38 additions & 21 deletions synapse/storage/databases/main/events_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,12 +1976,17 @@ async def is_event_next_to_backward_gap(self, event: EventBase) -> bool:

Args:
room_id: room where the event lives
event_id: event to check
event: event to check (can't be an `outlier`)

Returns:
Boolean indicating whether it's an extremity
"""

assert not event.internal_metadata.is_outlier(), (
"is_event_next_to_backward_gap(...) can't be used with `outlier` events. "
"This function relies on `event_backward_extremities` which won't be filled in for `outliers`."
)

def is_event_next_to_backward_gap_txn(txn: LoggingTransaction) -> bool:
# If the event in question has any of its prev_events listed as a
# backward extremity, it's next to a gap.
Expand Down Expand Up @@ -2031,12 +2036,17 @@ async def is_event_next_to_forward_gap(self, event: EventBase) -> bool:

Args:
room_id: room where the event lives
event_id: event to check
event: event to check (can't be an `outlier`)

Returns:
Boolean indicating whether it's an extremity
"""

assert not event.internal_metadata.is_outlier(), (
"is_event_next_to_forward_gap(...) can't be used with `outlier` events. "
"This function relies on `event_edges` and `event_forward_extremities` which won't be filled in for `outliers`."
)

def is_event_next_to_gap_txn(txn: LoggingTransaction) -> bool:
# If the event in question is a forward extremity, we will just
# consider any potential forward gap as not a gap since it's one of
Expand Down Expand Up @@ -2117,13 +2127,33 @@ async def get_event_id_for_timestamp(
The closest event_id otherwise None if we can't find any event in
the given direction.
"""
if direction == "b":
# Find closest event *before* a given timestamp. We use descending
# (which gives values largest to smallest) because we want the
# largest possible timestamp *before* the given timestamp.
comparison_operator = "<="
order = "DESC"
else:
# Find closest event *after* a given timestamp. We use ascending
# (which gives values smallest to largest) because we want the
# closest possible timestamp *after* the given timestamp.
comparison_operator = ">="
order = "ASC"

sql_template = """
sql_template = f"""
SELECT event_id FROM events
LEFT JOIN rejections USING (event_id)
WHERE
origin_server_ts %s ?
AND room_id = ?
room_id = ?
AND origin_server_ts {comparison_operator} ?
/**
* Make sure the event isn't an `outlier` because we have no way
* to later check whether it's next to a gap. `outliers` do not
* have entries in the `event_edges`, `event_forward_extremeties`,
* and `event_backward_extremities` tables to check against
* (used by `is_event_next_to_backward_gap` and `is_event_next_to_forward_gap`).
*/
AND NOT outlier
/* Make sure event is not rejected */
AND rejections.event_id IS NULL
/**
Expand All @@ -2133,27 +2163,14 @@ async def get_event_id_for_timestamp(
* Finally, we can tie-break based on when it was received on the server
* (`stream_ordering`).
*/
ORDER BY origin_server_ts %s, depth %s, stream_ordering %s
ORDER BY origin_server_ts {order}, depth {order}, stream_ordering {order}
LIMIT 1;
"""

def get_event_id_for_timestamp_txn(txn: LoggingTransaction) -> Optional[str]:
if direction == "b":
# Find closest event *before* a given timestamp. We use descending
# (which gives values largest to smallest) because we want the
# largest possible timestamp *before* the given timestamp.
comparison_operator = "<="
order = "DESC"
else:
# Find closest event *after* a given timestamp. We use ascending
# (which gives values smallest to largest) because we want the
# closest possible timestamp *after* the given timestamp.
comparison_operator = ">="
order = "ASC"

txn.execute(
sql_template % (comparison_operator, order, order, order),
(timestamp, room_id),
sql_template,
(room_id, timestamp),
)
row = txn.fetchone()
if row:
Expand Down
65 changes: 65 additions & 0 deletions tests/rest/client/test_rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
)
from synapse.api.errors import Codes, HttpResponseException
from synapse.appservice import ApplicationService
from synapse.events import EventBase
from synapse.events.snapshot import EventContext
from synapse.handlers.pagination import PurgeStatus
from synapse.rest import admin
from synapse.rest.client import account, directory, login, profile, register, room, sync
Expand All @@ -51,6 +53,7 @@
from tests.http.server._base import make_request_with_cancellation_test
from tests.storage.test_stream import PaginationTestCase
from tests.test_utils import make_awaitable
from tests.test_utils.event_injection import create_event

PATH_PREFIX = b"/_matrix/client/api/v1"

Expand Down Expand Up @@ -3486,3 +3489,65 @@ def test_400_missing_param_without_id_access_token(self) -> None:
)
self.assertEqual(channel.code, 400)
self.assertEqual(channel.json_body["errcode"], "M_MISSING_PARAM")


class TimestampLookupTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
room.register_servlets,
login.register_servlets,
]

def default_config(self) -> JsonDict:
config = super().default_config()
config["experimental_features"] = {"msc3030_enabled": True}
return config

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self._storage_controllers = self.hs.get_storage_controllers()

self.room_owner = self.register_user("room_owner", "test")
self.room_owner_tok = self.login("room_owner", "test")

def _inject_outlier(self, room_id: str) -> EventBase:
event, _context = self.get_success(
create_event(
self.hs,
room_id=room_id,
type="m.test",
sender="@test_remote_user:remote",
)
)

event.internal_metadata.outlier = True
self.get_success(
self._storage_controllers.persistence.persist_event(
event, EventContext.for_outlier(self._storage_controllers)
)
)
return event

def test_no_outliers(self) -> None:
"""
Test to make sure `/timestamp_to_event` does not return `outlier` events.
We're unable to determine whether an `outlier` is next to a gap so we
don't know whether it's actually the closest event. Instead, let's just
ignore `outliers` with this endpoint.
This test is really seeing that we choose the non-`outlier` event behind the
`outlier`. Since the gap checking logic considers the latest message in the room
as *not* next to a gap, asking over federation does not come into play here.
"""
room_id = self.helper.create_room_as(self.room_owner, tok=self.room_owner_tok)

outlier_event = self._inject_outlier(room_id)

channel = self.make_request(
"GET",
f"/_matrix/client/unstable/org.matrix.msc3030/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}",
access_token=self.room_owner_tok,
)
self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)

# Make sure the outlier event is not returned
self.assertNotEqual(channel.json_body["event_id"], outlier_event.event_id)