Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Reorganise the database schema directories (#9932)
Browse files Browse the repository at this point in the history
The hope here is that by moving all the schema files into synapse/storage/schema, it gets a bit easier for newcomers to navigate.

It certainly got easier for me to write a helpful README. There's more to do on that front, but I'll follow up with other PRs for that.
  • Loading branch information
richvdh authored May 7, 2021
1 parent 8771b13 commit 25f43fa
Show file tree
Hide file tree
Showing 284 changed files with 81 additions and 47 deletions.
1 change: 1 addition & 0 deletions changelog.d/9932.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Move database schema files into a common directory.
21 changes: 0 additions & 21 deletions synapse/storage/databases/main/schema/full_schemas/README.md

This file was deleted.

48 changes: 25 additions & 23 deletions synapse/storage/prepare_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,13 @@
from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.engines import BaseDatabaseEngine
from synapse.storage.engines.postgres import PostgresEngine
from synapse.storage.schema import SCHEMA_VERSION
from synapse.storage.types import Cursor

logger = logging.getLogger(__name__)


# Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 59

dir_path = os.path.abspath(os.path.dirname(__file__))
schema_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "schema")


class PrepareDatabaseException(Exception):
Expand Down Expand Up @@ -167,23 +164,29 @@ def _setup_new_database(
Example directory structure:
schema/
schema/
common/
delta/
...
full_schemas/
11/
foo.sql
main/
delta/
...
full_schemas/
3/
test.sql
...
11/
foo.sql
bar.sql
...
In the example foo.sql and bar.sql would be run, and then any delta files
for versions strictly greater than 11.
Note: we apply the full schemas and deltas from the top level `schema/`
folder as well those in the data stores specified.
Note: we apply the full schemas and deltas from the `schema/common`
folder as well those in the databases specified.
Args:
cur: a database cursor
Expand All @@ -195,12 +198,12 @@ def _setup_new_database(
# configured to our liking.
database_engine.check_new_database(cur)

current_dir = os.path.join(dir_path, "schema", "full_schemas")
full_schemas_dir = os.path.join(schema_path, "common", "full_schemas")

# First we find the highest full schema version we have
valid_versions = []

for filename in os.listdir(current_dir):
for filename in os.listdir(full_schemas_dir):
try:
ver = int(filename)
except ValueError:
Expand All @@ -218,15 +221,13 @@ def _setup_new_database(

logger.debug("Initialising schema v%d", max_current_ver)

# Now lets find all the full schema files, both in the global schema and
# in data store schemas.
directories = [os.path.join(current_dir, str(max_current_ver))]
# Now let's find all the full schema files, both in the common schema and
# in database schemas.
directories = [os.path.join(full_schemas_dir, str(max_current_ver))]
directories.extend(
os.path.join(
dir_path,
"databases",
schema_path,
database,
"schema",
"full_schemas",
str(max_current_ver),
)
Expand Down Expand Up @@ -357,6 +358,9 @@ def _upgrade_existing_database(
check_database_before_upgrade(cur, database_engine, config)

start_ver = current_version

# if we got to this schema version by running a full_schema rather than a series
# of deltas, we should not run the deltas for this version.
if not upgraded:
start_ver += 1

Expand Down Expand Up @@ -385,12 +389,10 @@ def _upgrade_existing_database(
# directories for schema updates.

# First we find the directories to search in
delta_dir = os.path.join(dir_path, "schema", "delta", str(v))
delta_dir = os.path.join(schema_path, "common", "delta", str(v))
directories = [delta_dir]
for database in databases:
directories.append(
os.path.join(dir_path, "databases", database, "schema", "delta", str(v))
)
directories.append(os.path.join(schema_path, database, "delta", str(v)))

# Used to check if we have any duplicate file names
file_name_counter = Counter() # type: CounterType[str]
Expand Down Expand Up @@ -621,8 +623,8 @@ def _get_or_create_schema_state(
txn: Cursor, database_engine: BaseDatabaseEngine
) -> Optional[Tuple[int, List[str], bool]]:
# Bluntly try creating the schema_version tables.
schema_path = os.path.join(dir_path, "schema", "schema_version.sql")
executescript(txn, schema_path)
sql_path = os.path.join(schema_path, "common", "schema_version.sql")
executescript(txn, sql_path)

txn.execute("SELECT version, upgraded FROM schema_version")
row = txn.fetchone()
Expand Down
37 changes: 37 additions & 0 deletions synapse/storage/schema/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Synapse Database Schemas

This directory contains the schema files used to build Synapse databases.

Synapse supports splitting its datastore across multiple physical databases (which can
be useful for large installations), and the schema files are therefore split according
to the logical database they are apply to.

At the time of writing, the following "logical" databases are supported:

* `state` - used to store Matrix room state (more specifically, `state_groups`,
their relationships and contents.)
* `main` - stores everything else.

Addionally, the `common` directory contains schema files for tables which must be
present on *all* physical databases.

## Full schema dumps

In the `full_schemas` directories, only the most recently-numbered snapshot is useful
(`54` at the time of writing). Older snapshots (eg, `16`) are present for historical
reference only.

## Building full schema dumps

If you want to recreate these schemas, they need to be made from a database that
has had all background updates run.

To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
`full.sql.postgres` and `full.sql.sqlite` files.

Ensure postgres is installed, then run:

./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/

NB at the time of writing, this script predates the split into separate `state`/`main`
databases so will require updates to handle that correctly.
17 changes: 17 additions & 0 deletions synapse/storage/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 59
4 changes: 1 addition & 3 deletions tests/storage/test_cleanup_extrems.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,8 @@ def run_background_update(self):
)

schema_path = os.path.join(
prepare_database.dir_path,
"databases",
prepare_database.schema_path,
"main",
"schema",
"delta",
"54",
"delete_forward_extremities.sql",
Expand Down

0 comments on commit 25f43fa

Please sign in to comment.