diff --git a/backend/Makefile b/backend/Makefile index dd641c01e2547..89b3f75beba96 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -46,6 +46,10 @@ db/new_migration_auto: # Usage: make db/new_migration_auto MESSAGE="purpose_of_migration" PYTHONPATH=.. alembic -c=./database/database.ini revision --autogenerate --message "$(MESSAGE)" +db/check: + # Check if the database needs to be migrated due to changes in the schema. + PYTHONPATH=.. alembic -c=./database/database.ini check + # interactive mode usage: AWS_PROFILE=single-cell-dev DEPLOYMENT_STAGE=dev make db/connect # ARGS usage: AWS_PROFILE=single-cell-dev DEPLOYMENT_STAGE=dev make db/connect ARGS="-c \"select * from dataset_artifact where filetype='CXG'\"" db/connect: diff --git a/backend/common/corpora_orm.py b/backend/common/corpora_orm.py index c43c93b1f1d91..3aaac7f050ad6 100644 --- a/backend/common/corpora_orm.py +++ b/backend/common/corpora_orm.py @@ -529,5 +529,5 @@ class DbGenesetDatasetLink(Base, AuditMixin): __tablename__ = "geneset_dataset_link" - geneset_id = Column(String, ForeignKey("geneset.id"), index=True, nullable=False) - dataset_id = Column(String, ForeignKey("dataset.id"), index=True, nullable=False) + geneset_id = Column(String, ForeignKey("geneset.id"), nullable=False) + dataset_id = Column(String, ForeignKey("dataset.id"), nullable=False) diff --git a/backend/database/env.py b/backend/database/env.py index e84771f9161fb..eaedd9ccb437b 100644 --- a/backend/database/env.py +++ b/backend/database/env.py @@ -5,7 +5,8 @@ from sqlalchemy import engine_from_config, pool from backend.common.corpora_config import CorporaDbConfig -from backend.common.corpora_orm import DbCollection +from backend.common.corpora_orm import Base as corpora_orm +from backend.layers.persistence import orm as persistence_orm # this is the Alembic Config object, which provides access to the values within the .ini file in use. config = context.config @@ -18,7 +19,8 @@ # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -target_metadata = DbCollection.metadata +# TODO remove support for corpora_orm.metadata once the old database is fully deprecaited. +target_metadata = [corpora_orm.metadata, persistence_orm.metadata] # other values from the config, defined by the needs of env.py, can be acquired: # my_important_option = config.get_main_option("my_important_option") @@ -38,7 +40,7 @@ def run_migrations_offline(): """ url = config.get_main_option("sqlalchemy.url") - context.configure(url=url, target_metadata=target_metadata, literal_binds=True) + context.configure(url=url, target_metadata=target_metadata, literal_binds=True, include_schemas=True) with context.begin_transaction(): context.run_migrations() @@ -69,7 +71,7 @@ def run_migrations_online(): connectable = engine_from_config(alembic_config, prefix="sqlalchemy.", poolclass=pool.NullPool) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure(connection=connection, target_metadata=target_metadata, include_schemas=True) with context.begin_transaction(): context.run_migrations() diff --git a/backend/database/versions/33_c5aaf6e2ca9e_redesign.py b/backend/database/versions/33_c5aaf6e2ca9e_redesign.py new file mode 100644 index 0000000000000..841cc08db352e --- /dev/null +++ b/backend/database/versions/33_c5aaf6e2ca9e_redesign.py @@ -0,0 +1,89 @@ +"""redesign + +Revision ID: 33_c5aaf6e2ca9e +Revises: 32_c27083d1a76d +Create Date: 2023-01-05 16:06:27.723131 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "33_c5aaf6e2ca9e" +down_revision = "32_c27083d1a76d" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("CREATE SCHEMA persistence_schema") + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "Collection", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("version_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("originally_published_at", sa.DateTime(), nullable=True), + sa.Column("tombstoned", sa.BOOLEAN(), nullable=True), + sa.PrimaryKeyConstraint("id"), + schema="persistence_schema", + ) + op.create_table( + "CollectionVersion", + sa.Column("version_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("collection_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("owner", sa.String(), nullable=True), + sa.Column("curator_name", sa.String(), nullable=True), + sa.Column("publisher_metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("published_at", sa.DateTime(), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("datasets", postgresql.ARRAY(postgresql.UUID(as_uuid=True)), nullable=True), + sa.PrimaryKeyConstraint("version_id"), + schema="persistence_schema", + ) + op.create_table( + "Dataset", + sa.Column("dataset_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("dataset_version_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("published_at", sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint("dataset_id"), + schema="persistence_schema", + ) + op.create_table( + "DatasetArtifact", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("type", sa.Enum("RAW_H5AD", "H5AD", "RDS", "CXG", name="datasetartifacttype"), nullable=True), + sa.Column("uri", sa.String(), nullable=True), + sa.PrimaryKeyConstraint("id"), + schema="persistence_schema", + ) + op.create_table( + "DatasetVersion", + sa.Column("version_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("dataset_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("collection_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=True), + sa.Column("metadata", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("artifacts", postgresql.ARRAY(postgresql.UUID(as_uuid=True)), nullable=True), + sa.Column("status", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.ForeignKeyConstraint( + ["dataset_id"], + ["persistence_schema.Dataset.dataset_id"], + ), + sa.PrimaryKeyConstraint("version_id"), + schema="persistence_schema", + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("DatasetVersion", schema="persistence_schema") + op.drop_table("DatasetArtifact", schema="persistence_schema") + op.drop_table("Dataset", schema="persistence_schema") + op.drop_table("CollectionVersion", schema="persistence_schema") + op.drop_table("Collection", schema="persistence_schema") + # ### end Alembic commands ### + sa.Enum(name="datasetartifacttype").drop(op.get_bind(), checkfirst=False) + op.execute("DROP SCHEMA persistence_schema") diff --git a/backend/layers/persistence/constants.py b/backend/layers/persistence/constants.py new file mode 100644 index 0000000000000..deb3cd22e9fc4 --- /dev/null +++ b/backend/layers/persistence/constants.py @@ -0,0 +1 @@ +SCHEMA_NAME = "persistence_schema" diff --git a/backend/layers/persistence/orm.py b/backend/layers/persistence/orm.py index b8a997efd31c9..f41edb6273c1b 100644 --- a/backend/layers/persistence/orm.py +++ b/backend/layers/persistence/orm.py @@ -4,8 +4,9 @@ from sqlalchemy.schema import MetaData from backend.layers.common.entities import DatasetArtifactType +from backend.layers.persistence.constants import SCHEMA_NAME -metadata = MetaData(schema="persistence_schema") +metadata = MetaData(schema=SCHEMA_NAME) mapper_registry = registry(metadata=metadata) diff --git a/backend/layers/persistence/persistence.py b/backend/layers/persistence/persistence.py index 30fdd0030c221..9b5cbb95b8843 100644 --- a/backend/layers/persistence/persistence.py +++ b/backend/layers/persistence/persistence.py @@ -6,7 +6,7 @@ from typing import Any, Iterable, List, Optional from sqlalchemy import create_engine -from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.exc import SQLAlchemyError, ProgrammingError from sqlalchemy.orm import sessionmaker from backend.common.corpora_config import CorporaDbConfig @@ -32,6 +32,7 @@ DatasetVersionId, ) from backend.layers.business.exceptions import CollectionIsPublishedException +from backend.layers.persistence.constants import SCHEMA_NAME from backend.layers.persistence.orm import ( Collection as CollectionTable, CollectionVersion as CollectionVersionTable, @@ -45,27 +46,31 @@ class DatabaseProvider(DatabaseProviderInterface): - def __init__(self, database_uri: str = None, schema_name: str = "persistence_schema") -> None: + def __init__(self, database_uri: str = None, schema_name: str = SCHEMA_NAME) -> None: if not database_uri: database_uri = CorporaDbConfig().database_uri self._engine = create_engine(database_uri, connect_args={"connect_timeout": 5}) self._session_maker = sessionmaker(bind=self._engine) + self._schema_name = schema_name try: - self._create_schema(schema_name) + self._create_schema() except Exception: pass - def _drop_schema(self, schema_name: str): + def _drop_schema(self): from sqlalchemy.schema import DropSchema - self._engine.execute(DropSchema(schema_name, cascade=True)) + try: + self._engine.execute(DropSchema(self._schema_name, cascade=True)) + except ProgrammingError: + pass - def _create_schema(self, schema_name: str): + def _create_schema(self): from sqlalchemy.schema import CreateSchema from backend.layers.persistence.orm import metadata - self._engine.execute(CreateSchema(schema_name)) - metadata.schema = schema_name + self._engine.execute(CreateSchema(self._schema_name)) + metadata.schema = self._schema_name metadata.create_all(bind=self._engine) @contextmanager diff --git a/backend/portal/pipeline/processing/requirements.txt b/backend/portal/pipeline/processing/requirements.txt index 9ae32ffca9151..91e2c6da7549f 100644 --- a/backend/portal/pipeline/processing/requirements.txt +++ b/backend/portal/pipeline/processing/requirements.txt @@ -9,4 +9,4 @@ pyarrow>=1.0 pydantic>=1.9.0 python-json-logger SQLAlchemy-Utils>=0.36.8 -SQLAlchemy>=1.3.17,<2 +SQLAlchemy>=1.4.0,<1.5 diff --git a/backend/scripts/create_db.py b/backend/scripts/create_db.py index 445b0c11a84a5..c81fbcfb298be 100644 --- a/backend/scripts/create_db.py +++ b/backend/scripts/create_db.py @@ -1,20 +1,35 @@ """ -Drops and recreates all tables according to corpora_orm.py +Drops and recreates all tables according to corpora_orm.py and orm.py """ -from sqlalchemy import create_engine -from backend.common.corpora_config import CorporaDbConfig -from backend.common.corpora_orm import Base +def legacy_db(): + from sqlalchemy import create_engine + from backend.common.corpora_config import CorporaDbConfig + from backend.common.corpora_orm import Base -def create_db(): engine = create_engine(CorporaDbConfig().database_uri) - print("Dropping tables") + print("legacy db: Dropping tables") Base.metadata.drop_all(engine) - print("Recreating tables") + print("legacy db: Recreating tables") Base.metadata.create_all(engine) +def current_db(): + from backend.layers.persistence.persistence import DatabaseProvider + + db_provider = DatabaseProvider() + print("current db: Dropping tables") + db_provider._drop_schema() + print("current db: Recreating tables") + db_provider._create_schema() + + +def create_db(): + legacy_db() + current_db() + + if __name__ == "__main__": create_db() diff --git a/requirements.txt b/requirements.txt index 76be703197c1b..c25e0389092c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -alembic +alembic>=1.9, <2 anndata==0.8.0 allure-pytest<3 black==22.3.0 # Must be kept in sync with black version in .pre-commit-config.yaml @@ -20,5 +20,7 @@ python-json-logger requests>=2.22.0 rsa>=4.7 # not directly required, pinned by Snyk to avoid a vulnerability s3fs==0.4.2 +SQLAlchemy-Utils>=0.36.8 +SQLAlchemy>=1.4.0,<1.5 tenacity tiledb==0.16.5 # Portal's tiledb version should always be the same or older than Explorer's diff --git a/scripts/cxg_admin_scripts/migrate.py b/scripts/cxg_admin_scripts/migrate.py index f1cde0ac6aa44..7f39c7e7365b1 100644 --- a/scripts/cxg_admin_scripts/migrate.py +++ b/scripts/cxg_admin_scripts/migrate.py @@ -430,18 +430,6 @@ def migrate_redesign_write(ctx): # database_uri = f"postgresql://postgres:secret@localhost" engine = create_engine(database_uri, connect_args={"connect_timeout": 5}) - # engine.execute(schema.CreateSchema('persistence_schema')) - # metadata_obj.create_all(bind=engine) - - # from sqlalchemy.schema import DropSchema - # engine.execute(DropSchema("persistence_schema", cascade=True)) - - from sqlalchemy.schema import CreateSchema - from backend.layers.persistence.orm import metadata - - engine.execute(CreateSchema("persistence_schema")) - metadata.create_all(bind=engine) - with Session(engine) as session: for collection in collections: @@ -491,8 +479,6 @@ def migrate_redesign_write(ctx): if not dataset_version.get("status"): continue - metadata = dataset_version["metadata"] - dataset_version_row = DatasetVersionRow( version_id=dataset_version["version_id"], dataset_id=dataset_version["dataset_id"], diff --git a/scripts/setup_dev_data.sh b/scripts/setup_dev_data.sh index acc7bcc5d708e..8830c022a94cd 100755 --- a/scripts/setup_dev_data.sh +++ b/scripts/setup_dev_data.sh @@ -38,7 +38,7 @@ ${local_aws} s3api create-bucket --bucket artifact-bucket &>/dev/null || true ${local_aws} s3api create-bucket --bucket cellxgene-bucket &>/dev/null || true ${local_aws} secretsmanager create-secret --name corpora/backend/test/auth0-secret &>/dev/null || true ${local_aws} secretsmanager create-secret --name corpora/cicd/test/auth0-secret &>/dev/null || true -${local_aws} secretsmanager create-secret --name corpora/backend/test/database_local &>/dev/null || true +${local_aws} secretsmanager create-secret --name corpora/backend/test/database &>/dev/null || true ${local_aws} secretsmanager create-secret --name corpora/backend/test/config &>/dev/null || true echo "Creating default state machine" @@ -81,7 +81,8 @@ ${local_aws} secretsmanager update-secret --secret-id corpora/cicd/test/auth0-se "grant_type": "" }' || true -${local_aws} secretsmanager update-secret --secret-id corpora/backend/test/database_local --secret-string '{"database_uri": "postgresql://corpora:test_pw@database.corporanet.local:5432"}' || true +${local_aws} secretsmanager update-secret --secret-id corpora/backend/test/database --secret-string '{"database_uri": + "postgresql://corpora:test_pw@database.corporanet.local:5432"}' || true ${local_aws} secretsmanager update-secret --secret-id corpora/backend/test/config --secret-string '{"upload_sfn_arn": "arn:aws:states:us-west-2:000000000000:stateMachine:uploader-dev-sfn", "curator_role_arn":"test_curation_role"}' || true # Make a 1mb data file @@ -91,7 +92,6 @@ ${local_aws} s3 cp fake-h5ad-file.h5ad s3://corpora-data-dev/ rm fake-h5ad-file.h5ad echo "Populating test db" -export CORPORA_LOCAL_DEV=true export BOTO_ENDPOINT_URL=${LOCALSTACK_URL} cd $(dirname ${BASH_SOURCE[0]})/.. python3 -m scripts.populate_db diff --git a/tests/unit/backend/fixtures/test_db.py b/tests/unit/backend/fixtures/test_db.py index 806aa42ce37c8..6c2295332cda6 100644 --- a/tests/unit/backend/fixtures/test_db.py +++ b/tests/unit/backend/fixtures/test_db.py @@ -49,6 +49,7 @@ def populate_test_data(self): del self.session def _populate_test_data(self): + # TODO update for the redesign self._create_test_collections() self._create_test_collection_links() self._create_test_datasets() diff --git a/tests/unit/backend/layers/business/test_business.py b/tests/unit/backend/layers/business/test_business.py index 52e6192fc40b2..a8f8d0ac8e15e 100644 --- a/tests/unit/backend/layers/business/test_business.py +++ b/tests/unit/backend/layers/business/test_business.py @@ -61,11 +61,11 @@ def setUpClass(cls) -> None: if cls.run_as_integration: database_uri = os.environ.get("DB_URI", "postgresql://postgres:secret@localhost") cls.database_provider = DatabaseProvider(database_uri=database_uri) - cls.database_provider._drop_schema("persistence_schema") + cls.database_provider._drop_schema() def setUp(self) -> None: if self.run_as_integration: - self.database_provider._create_schema("persistence_schema") + self.database_provider._create_schema() else: self.database_provider = DatabaseProviderMock() @@ -127,7 +127,7 @@ def mock_config_fn(name): def tearDown(self): if self.run_as_integration: - self.database_provider._drop_schema("persistence_schema") + self.database_provider._drop_schema() @classmethod def tearDownClass(cls) -> None: diff --git a/tests/unit/backend/layers/common/base_test.py b/tests/unit/backend/layers/common/base_test.py index 41f7762f485e7..4b0dfa2922d09 100644 --- a/tests/unit/backend/layers/common/base_test.py +++ b/tests/unit/backend/layers/common/base_test.py @@ -70,7 +70,7 @@ def setUpClass(cls) -> None: if cls.run_as_integration: database_uri = os.environ.get("DB_URI", "postgresql://postgres:secret@localhost") cls.database_provider = DatabaseProvider(database_uri=database_uri) - cls.database_provider._drop_schema("persistence_schema") + cls.database_provider._drop_schema() def setUp(self): super().setUp() @@ -86,7 +86,7 @@ def mock_config_fn(name): mock_config.start() if self.run_as_integration: - self.database_provider._create_schema("persistence_schema") + self.database_provider._create_schema() else: self.database_provider = DatabaseProviderMock() @@ -134,7 +134,7 @@ def mock_config_fn(name): def tearDown(self): super().tearDown() if self.run_as_integration: - self.database_provider._drop_schema("persistence_schema") + self.database_provider._drop_schema() @classmethod def tearDownClass(cls) -> None: