Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix MSC3030 /timestamp_to_event returning outliers that it has no idea whether are near a gap or not #14215

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/14215.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint returning potentially inaccurate closest events with `outliers` present.
59 changes: 38 additions & 21 deletions synapse/storage/databases/main/events_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,12 +1976,17 @@ async def is_event_next_to_backward_gap(self, event: EventBase) -> bool:

Args:
room_id: room where the event lives
event_id: event to check
event: event to check (can't be an `outlier`)

Returns:
Boolean indicating whether it's an extremity
"""

assert not event.internal_metadata.is_outlier(), (
"is_event_next_to_backward_gap(...) can't be used with `outlier` events. "
"This function relies on `event_backward_extremities` which won't be filled in for `outliers`."
)

def is_event_next_to_backward_gap_txn(txn: LoggingTransaction) -> bool:
# If the event in question has any of its prev_events listed as a
# backward extremity, it's next to a gap.
Expand Down Expand Up @@ -2031,12 +2036,17 @@ async def is_event_next_to_forward_gap(self, event: EventBase) -> bool:

Args:
room_id: room where the event lives
event_id: event to check
event: event to check (can't be an `outlier`)

Returns:
Boolean indicating whether it's an extremity
"""

assert not event.internal_metadata.is_outlier(), (
"is_event_next_to_forward_gap(...) can't be used with `outlier` events. "
"This function relies on `event_edges` and `event_forward_extremities` which won't be filled in for `outliers`."
)

def is_event_next_to_gap_txn(txn: LoggingTransaction) -> bool:
# If the event in question is a forward extremity, we will just
# consider any potential forward gap as not a gap since it's one of
Expand Down Expand Up @@ -2117,13 +2127,33 @@ async def get_event_id_for_timestamp(
The closest event_id otherwise None if we can't find any event in
the given direction.
"""
if direction == "b":
# Find closest event *before* a given timestamp. We use descending
# (which gives values largest to smallest) because we want the
# largest possible timestamp *before* the given timestamp.
comparison_operator = "<="
order = "DESC"
else:
# Find closest event *after* a given timestamp. We use ascending
# (which gives values smallest to largest) because we want the
# closest possible timestamp *after* the given timestamp.
comparison_operator = ">="
order = "ASC"

sql_template = """
sql_template = f"""
SELECT event_id FROM events
LEFT JOIN rejections USING (event_id)
WHERE
origin_server_ts %s ?
AND room_id = ?
room_id = ?
AND origin_server_ts {comparison_operator} ?
/**
* Make sure the event isn't an `outlier` because we have no way
* to later check whether it's next to a gap. `outliers` do not
* have entries in the `event_edges`, `event_forward_extremeties`,
* and `event_backward_extremities` tables to check against
* (used by `is_event_next_to_backward_gap` and `is_event_next_to_forward_gap`).
*/
AND outlier = ? /* False */
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor Author

@MadLittleMods MadLittleMods Oct 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only new thing in get_event_id_for_timestamp(...) is here. The rest is just moving stuff around with the f-string

/* Make sure event is not rejected */
AND rejections.event_id IS NULL
/**
Expand All @@ -2133,27 +2163,14 @@ async def get_event_id_for_timestamp(
* Finally, we can tie-break based on when it was received on the server
* (`stream_ordering`).
*/
ORDER BY origin_server_ts %s, depth %s, stream_ordering %s
ORDER BY origin_server_ts {order}, depth {order}, stream_ordering {order}
LIMIT 1;
"""

def get_event_id_for_timestamp_txn(txn: LoggingTransaction) -> Optional[str]:
if direction == "b":
# Find closest event *before* a given timestamp. We use descending
# (which gives values largest to smallest) because we want the
# largest possible timestamp *before* the given timestamp.
comparison_operator = "<="
order = "DESC"
else:
# Find closest event *after* a given timestamp. We use ascending
# (which gives values smallest to largest) because we want the
# closest possible timestamp *after* the given timestamp.
comparison_operator = ">="
order = "ASC"

txn.execute(
sql_template % (comparison_operator, order, order, order),
(timestamp, room_id),
sql_template,
(room_id, timestamp, False),
)
row = txn.fetchone()
if row:
Expand Down
61 changes: 61 additions & 0 deletions tests/rest/client/test_rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
)
from synapse.api.errors import Codes, HttpResponseException
from synapse.appservice import ApplicationService
from synapse.events import EventBase
from synapse.events.snapshot import EventContext
from synapse.handlers.pagination import PurgeStatus
from synapse.rest import admin
from synapse.rest.client import account, directory, login, profile, register, room, sync
Expand All @@ -51,6 +53,7 @@
from tests.http.server._base import make_request_with_cancellation_test
from tests.storage.test_stream import PaginationTestCase
from tests.test_utils import make_awaitable
from tests.test_utils.event_injection import create_event

PATH_PREFIX = b"/_matrix/client/api/v1"

Expand Down Expand Up @@ -3486,3 +3489,61 @@ def test_400_missing_param_without_id_access_token(self) -> None:
)
self.assertEqual(channel.code, 400)
self.assertEqual(channel.json_body["errcode"], "M_MISSING_PARAM")


class TimestampLookupTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
room.register_servlets,
login.register_servlets,
]

def default_config(self) -> JsonDict:
config = super().default_config()
config["experimental_features"] = {"msc3030_enabled": True}
return config

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self._storage_controllers = self.hs.get_storage_controllers()

self.room_owner = self.register_user("room_owner", "test")
self.room_owner_tok = self.login("room_owner", "test")

def _inject_outlier(self, room_id: str) -> EventBase:
event, _context = self.get_success(
create_event(
self.hs,
room_id=room_id,
type="m.test",
sender="@test_remote_user:remote",
)
)

event.internal_metadata.outlier = True
self.get_success(
self._storage_controllers.persistence.persist_event(
event, EventContext.for_outlier(self._storage_controllers)
)
)
return event

def test_no_outliers(self) -> None:
"""
Test to make sure `/timestamp_to_event` does not return `outlier` events.
We're unable to determine whether an `outlier` is next to a gap so we
don't know whether it's actually the closest event. Instead, let's just
ignore `outliers` with this endpoint and just ask over federation.
"""
room_id = self.helper.create_room_as(self.room_owner, tok=self.room_owner_tok)

outlier_event = self._inject_outlier(room_id)

channel = self.make_request(
"GET",
f"/_matrix/client/unstable/org.matrix.msc3030/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}",
access_token=self.room_owner_tok,
)
self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)

# Make sure the outlier event is not returned
self.assertNotEqual(channel.json_body["event_id"], outlier_event.event_id)