Skip to content

Commit

Permalink
chore: update alembic migration to use new database (#3841)
Browse files Browse the repository at this point in the history
- Upgrade SQLAlchemy for the processing container so it matches the other containers
- Upgrade Alembic for local development.
- factor out "persistance_schema" into a constant to avoid naming issues.
- Update alembic to support both the new and legacy ORM for now. The legacy ORM can be removed in a future migration.
- Add an Alembic migration for the redesign database changes. This is will not affect the legacy database.
- Modify `migrate_redesign_write` so it no longer create the new database tables. This should be handled by the automated migration processes in GHA.
- Fixe the legacy ORM to match the actual database.
- Update `create_db` to use the new ORM.
- updated sqlalchemy in the processing container to be current with other containers.
- Add Make recipe `db/check` to make it easier to detect if the ORM is different from the database. 
- Add a try..except block in `DatabaseProvider._drop_schema` so prevent breaking if the schema does not exist.
- Add support for creating both the legacy and new database.
- removing CORPORA_LOCAL_DEV from setup_dev_data.sh.
  • Loading branch information
Bento007 authored Jan 9, 2023
1 parent a50a8c5 commit 4e6c4e2
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 47 deletions.
4 changes: 4 additions & 0 deletions backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ db/new_migration_auto:
# Usage: make db/new_migration_auto MESSAGE="purpose_of_migration"
PYTHONPATH=.. alembic -c=./database/database.ini revision --autogenerate --message "$(MESSAGE)"

db/check:
# Check if the database needs to be migrated due to changes in the schema.
PYTHONPATH=.. alembic -c=./database/database.ini check

# interactive mode usage: AWS_PROFILE=single-cell-dev DEPLOYMENT_STAGE=dev make db/connect
# ARGS usage: AWS_PROFILE=single-cell-dev DEPLOYMENT_STAGE=dev make db/connect ARGS="-c \"select * from dataset_artifact where filetype='CXG'\""
db/connect:
Expand Down
4 changes: 2 additions & 2 deletions backend/common/corpora_orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,5 +529,5 @@ class DbGenesetDatasetLink(Base, AuditMixin):

__tablename__ = "geneset_dataset_link"

geneset_id = Column(String, ForeignKey("geneset.id"), index=True, nullable=False)
dataset_id = Column(String, ForeignKey("dataset.id"), index=True, nullable=False)
geneset_id = Column(String, ForeignKey("geneset.id"), nullable=False)
dataset_id = Column(String, ForeignKey("dataset.id"), nullable=False)
10 changes: 6 additions & 4 deletions backend/database/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from sqlalchemy import engine_from_config, pool

from backend.common.corpora_config import CorporaDbConfig
from backend.common.corpora_orm import DbCollection
from backend.common.corpora_orm import Base as corpora_orm
from backend.layers.persistence import orm as persistence_orm

# this is the Alembic Config object, which provides access to the values within the .ini file in use.
config = context.config
Expand All @@ -18,7 +19,8 @@
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = DbCollection.metadata
# TODO remove support for corpora_orm.metadata once the old database is fully deprecaited.
target_metadata = [corpora_orm.metadata, persistence_orm.metadata]

# other values from the config, defined by the needs of env.py, can be acquired:
# my_important_option = config.get_main_option("my_important_option")
Expand All @@ -38,7 +40,7 @@ def run_migrations_offline():
"""

url = config.get_main_option("sqlalchemy.url")
context.configure(url=url, target_metadata=target_metadata, literal_binds=True)
context.configure(url=url, target_metadata=target_metadata, literal_binds=True, include_schemas=True)

with context.begin_transaction():
context.run_migrations()
Expand Down Expand Up @@ -69,7 +71,7 @@ def run_migrations_online():
connectable = engine_from_config(alembic_config, prefix="sqlalchemy.", poolclass=pool.NullPool)

with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
context.configure(connection=connection, target_metadata=target_metadata, include_schemas=True)

with context.begin_transaction():
context.run_migrations()
Expand Down
89 changes: 89 additions & 0 deletions backend/database/versions/33_c5aaf6e2ca9e_redesign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""redesign
Revision ID: 33_c5aaf6e2ca9e
Revises: 32_c27083d1a76d
Create Date: 2023-01-05 16:06:27.723131
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "33_c5aaf6e2ca9e"
down_revision = "32_c27083d1a76d"
branch_labels = None
depends_on = None


def upgrade():
op.execute("CREATE SCHEMA persistence_schema")
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"Collection",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("originally_published_at", sa.DateTime(), nullable=True),
sa.Column("tombstoned", sa.BOOLEAN(), nullable=True),
sa.PrimaryKeyConstraint("id"),
schema="persistence_schema",
)
op.create_table(
"CollectionVersion",
sa.Column("version_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("collection_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column("owner", sa.String(), nullable=True),
sa.Column("curator_name", sa.String(), nullable=True),
sa.Column("publisher_metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column("published_at", sa.DateTime(), nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=True),
sa.Column("datasets", postgresql.ARRAY(postgresql.UUID(as_uuid=True)), nullable=True),
sa.PrimaryKeyConstraint("version_id"),
schema="persistence_schema",
)
op.create_table(
"Dataset",
sa.Column("dataset_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("dataset_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("published_at", sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint("dataset_id"),
schema="persistence_schema",
)
op.create_table(
"DatasetArtifact",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("type", sa.Enum("RAW_H5AD", "H5AD", "RDS", "CXG", name="datasetartifacttype"), nullable=True),
sa.Column("uri", sa.String(), nullable=True),
sa.PrimaryKeyConstraint("id"),
schema="persistence_schema",
)
op.create_table(
"DatasetVersion",
sa.Column("version_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("dataset_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("collection_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=True),
sa.Column("metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column("artifacts", postgresql.ARRAY(postgresql.UUID(as_uuid=True)), nullable=True),
sa.Column("status", postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.ForeignKeyConstraint(
["dataset_id"],
["persistence_schema.Dataset.dataset_id"],
),
sa.PrimaryKeyConstraint("version_id"),
schema="persistence_schema",
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("DatasetVersion", schema="persistence_schema")
op.drop_table("DatasetArtifact", schema="persistence_schema")
op.drop_table("Dataset", schema="persistence_schema")
op.drop_table("CollectionVersion", schema="persistence_schema")
op.drop_table("Collection", schema="persistence_schema")
# ### end Alembic commands ###
sa.Enum(name="datasetartifacttype").drop(op.get_bind(), checkfirst=False)
op.execute("DROP SCHEMA persistence_schema")
1 change: 1 addition & 0 deletions backend/layers/persistence/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SCHEMA_NAME = "persistence_schema"
3 changes: 2 additions & 1 deletion backend/layers/persistence/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from sqlalchemy.schema import MetaData

from backend.layers.common.entities import DatasetArtifactType
from backend.layers.persistence.constants import SCHEMA_NAME

metadata = MetaData(schema="persistence_schema")
metadata = MetaData(schema=SCHEMA_NAME)
mapper_registry = registry(metadata=metadata)


Expand Down
21 changes: 13 additions & 8 deletions backend/layers/persistence/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Iterable, List, Optional

from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.exc import SQLAlchemyError, ProgrammingError
from sqlalchemy.orm import sessionmaker

from backend.common.corpora_config import CorporaDbConfig
Expand All @@ -32,6 +32,7 @@
DatasetVersionId,
)
from backend.layers.business.exceptions import CollectionIsPublishedException
from backend.layers.persistence.constants import SCHEMA_NAME
from backend.layers.persistence.orm import (
Collection as CollectionTable,
CollectionVersion as CollectionVersionTable,
Expand All @@ -45,27 +46,31 @@


class DatabaseProvider(DatabaseProviderInterface):
def __init__(self, database_uri: str = None, schema_name: str = "persistence_schema") -> None:
def __init__(self, database_uri: str = None, schema_name: str = SCHEMA_NAME) -> None:
if not database_uri:
database_uri = CorporaDbConfig().database_uri
self._engine = create_engine(database_uri, connect_args={"connect_timeout": 5})
self._session_maker = sessionmaker(bind=self._engine)
self._schema_name = schema_name
try:
self._create_schema(schema_name)
self._create_schema()
except Exception:
pass

def _drop_schema(self, schema_name: str):
def _drop_schema(self):
from sqlalchemy.schema import DropSchema

self._engine.execute(DropSchema(schema_name, cascade=True))
try:
self._engine.execute(DropSchema(self._schema_name, cascade=True))
except ProgrammingError:
pass

def _create_schema(self, schema_name: str):
def _create_schema(self):
from sqlalchemy.schema import CreateSchema
from backend.layers.persistence.orm import metadata

self._engine.execute(CreateSchema(schema_name))
metadata.schema = schema_name
self._engine.execute(CreateSchema(self._schema_name))
metadata.schema = self._schema_name
metadata.create_all(bind=self._engine)

@contextmanager
Expand Down
2 changes: 1 addition & 1 deletion backend/portal/pipeline/processing/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ pyarrow>=1.0
pydantic>=1.9.0
python-json-logger
SQLAlchemy-Utils>=0.36.8
SQLAlchemy>=1.3.17,<2
SQLAlchemy>=1.4.0,<1.5
29 changes: 22 additions & 7 deletions backend/scripts/create_db.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
"""
Drops and recreates all tables according to corpora_orm.py
Drops and recreates all tables according to corpora_orm.py and orm.py
"""

from sqlalchemy import create_engine

from backend.common.corpora_config import CorporaDbConfig
from backend.common.corpora_orm import Base
def legacy_db():
from sqlalchemy import create_engine

from backend.common.corpora_config import CorporaDbConfig
from backend.common.corpora_orm import Base

def create_db():
engine = create_engine(CorporaDbConfig().database_uri)
print("Dropping tables")
print("legacy db: Dropping tables")
Base.metadata.drop_all(engine)
print("Recreating tables")
print("legacy db: Recreating tables")
Base.metadata.create_all(engine)


def current_db():
from backend.layers.persistence.persistence import DatabaseProvider

db_provider = DatabaseProvider()
print("current db: Dropping tables")
db_provider._drop_schema()
print("current db: Recreating tables")
db_provider._create_schema()


def create_db():
legacy_db()
current_db()


if __name__ == "__main__":
create_db()
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
alembic
alembic>=1.9, <2
anndata==0.8.0
allure-pytest<3
black==22.3.0 # Must be kept in sync with black version in .pre-commit-config.yaml
Expand All @@ -20,5 +20,7 @@ python-json-logger
requests>=2.22.0
rsa>=4.7 # not directly required, pinned by Snyk to avoid a vulnerability
s3fs==0.4.2
SQLAlchemy-Utils>=0.36.8
SQLAlchemy>=1.4.0,<1.5
tenacity
tiledb==0.16.5 # Portal's tiledb version should always be the same or older than Explorer's
14 changes: 0 additions & 14 deletions scripts/cxg_admin_scripts/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,18 +430,6 @@ def migrate_redesign_write(ctx):
# database_uri = f"postgresql://postgres:secret@localhost"
engine = create_engine(database_uri, connect_args={"connect_timeout": 5})

# engine.execute(schema.CreateSchema('persistence_schema'))
# metadata_obj.create_all(bind=engine)

# from sqlalchemy.schema import DropSchema
# engine.execute(DropSchema("persistence_schema", cascade=True))

from sqlalchemy.schema import CreateSchema
from backend.layers.persistence.orm import metadata

engine.execute(CreateSchema("persistence_schema"))
metadata.create_all(bind=engine)

with Session(engine) as session:

for collection in collections:
Expand Down Expand Up @@ -491,8 +479,6 @@ def migrate_redesign_write(ctx):
if not dataset_version.get("status"):
continue

metadata = dataset_version["metadata"]

dataset_version_row = DatasetVersionRow(
version_id=dataset_version["version_id"],
dataset_id=dataset_version["dataset_id"],
Expand Down
6 changes: 3 additions & 3 deletions scripts/setup_dev_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ${local_aws} s3api create-bucket --bucket artifact-bucket &>/dev/null || true
${local_aws} s3api create-bucket --bucket cellxgene-bucket &>/dev/null || true
${local_aws} secretsmanager create-secret --name corpora/backend/test/auth0-secret &>/dev/null || true
${local_aws} secretsmanager create-secret --name corpora/cicd/test/auth0-secret &>/dev/null || true
${local_aws} secretsmanager create-secret --name corpora/backend/test/database_local &>/dev/null || true
${local_aws} secretsmanager create-secret --name corpora/backend/test/database &>/dev/null || true
${local_aws} secretsmanager create-secret --name corpora/backend/test/config &>/dev/null || true

echo "Creating default state machine"
Expand Down Expand Up @@ -81,7 +81,8 @@ ${local_aws} secretsmanager update-secret --secret-id corpora/cicd/test/auth0-se
"grant_type": ""
}' || true

${local_aws} secretsmanager update-secret --secret-id corpora/backend/test/database_local --secret-string '{"database_uri": "postgresql://corpora:test_pw@database.corporanet.local:5432"}' || true
${local_aws} secretsmanager update-secret --secret-id corpora/backend/test/database --secret-string '{"database_uri":
"postgresql://corpora:test_pw@database.corporanet.local:5432"}' || true
${local_aws} secretsmanager update-secret --secret-id corpora/backend/test/config --secret-string '{"upload_sfn_arn": "arn:aws:states:us-west-2:000000000000:stateMachine:uploader-dev-sfn", "curator_role_arn":"test_curation_role"}' || true

# Make a 1mb data file
Expand All @@ -91,7 +92,6 @@ ${local_aws} s3 cp fake-h5ad-file.h5ad s3://corpora-data-dev/
rm fake-h5ad-file.h5ad

echo "Populating test db"
export CORPORA_LOCAL_DEV=true
export BOTO_ENDPOINT_URL=${LOCALSTACK_URL}
cd $(dirname ${BASH_SOURCE[0]})/..
python3 -m scripts.populate_db
Expand Down
1 change: 1 addition & 0 deletions tests/unit/backend/fixtures/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def populate_test_data(self):
del self.session

def _populate_test_data(self):
# TODO update for the redesign
self._create_test_collections()
self._create_test_collection_links()
self._create_test_datasets()
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/backend/layers/business/test_business.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ def setUpClass(cls) -> None:
if cls.run_as_integration:
database_uri = os.environ.get("DB_URI", "postgresql://postgres:secret@localhost")
cls.database_provider = DatabaseProvider(database_uri=database_uri)
cls.database_provider._drop_schema("persistence_schema")
cls.database_provider._drop_schema()

def setUp(self) -> None:
if self.run_as_integration:
self.database_provider._create_schema("persistence_schema")
self.database_provider._create_schema()
else:
self.database_provider = DatabaseProviderMock()

Expand Down Expand Up @@ -127,7 +127,7 @@ def mock_config_fn(name):

def tearDown(self):
if self.run_as_integration:
self.database_provider._drop_schema("persistence_schema")
self.database_provider._drop_schema()

@classmethod
def tearDownClass(cls) -> None:
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/backend/layers/common/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def setUpClass(cls) -> None:
if cls.run_as_integration:
database_uri = os.environ.get("DB_URI", "postgresql://postgres:secret@localhost")
cls.database_provider = DatabaseProvider(database_uri=database_uri)
cls.database_provider._drop_schema("persistence_schema")
cls.database_provider._drop_schema()

def setUp(self):
super().setUp()
Expand All @@ -86,7 +86,7 @@ def mock_config_fn(name):
mock_config.start()

if self.run_as_integration:
self.database_provider._create_schema("persistence_schema")
self.database_provider._create_schema()
else:
self.database_provider = DatabaseProviderMock()

Expand Down Expand Up @@ -134,7 +134,7 @@ def mock_config_fn(name):
def tearDown(self):
super().tearDown()
if self.run_as_integration:
self.database_provider._drop_schema("persistence_schema")
self.database_provider._drop_schema()

@classmethod
def tearDownClass(cls) -> None:
Expand Down

0 comments on commit 4e6c4e2

Please sign in to comment.