diff --git a/mwdb/model/blob.py b/mwdb/model/blob.py index cf094e63b..f8adb75b0 100644 --- a/mwdb/model/blob.py +++ b/mwdb/model/blob.py @@ -11,9 +11,9 @@ class TextBlob(Object): - blob_name = db.Column(db.String, index=True) + blob_name = db.Column(db.String(collation="C"), index=True) blob_size = db.Column(db.Integer, index=True) - blob_type = db.Column(db.String(32), index=True) + blob_type = db.Column(db.String(32, collation="C"), index=True) _content = db.Column("content", db.String()) last_seen = db.Column(db.DateTime, index=True) diff --git a/mwdb/model/config.py b/mwdb/model/config.py index 3070a5f17..162959691 100644 --- a/mwdb/model/config.py +++ b/mwdb/model/config.py @@ -11,8 +11,8 @@ class Config(Object): - family = db.Column(db.String(32), index=True) - config_type = db.Column(db.String(32), index=True) + family = db.Column(db.String(32, collation="C"), index=True) + config_type = db.Column(db.String(32, collation="C"), index=True) _cfg = db.Column("cfg", JSONB) __mapper_args__ = { diff --git a/mwdb/model/file.py b/mwdb/model/file.py index c1a2357b5..f57d9f8bb 100644 --- a/mwdb/model/file.py +++ b/mwdb/model/file.py @@ -33,17 +33,19 @@ class EmptyFileError(ValueError): class File(Object): - file_name = db.Column(db.String, index=True) + file_name = db.Column(db.String(collation="C"), index=True) file_size = db.Column(db.Integer, index=True) file_type = db.Column(db.Text, index=True) - md5 = db.Column(db.String(32), index=True) - crc32 = db.Column(db.String(8), index=True) - sha1 = db.Column(db.String(40), index=True) - sha256 = db.Column(db.String(64), index=True, unique=True) - sha512 = db.Column(db.String(128), index=True) - ssdeep = db.Column(db.String(255), index=True) + md5 = db.Column(db.String(32, collation="C"), index=True) + crc32 = db.Column(db.String(8, collation="C"), index=True) + sha1 = db.Column(db.String(40, collation="C"), index=True) + sha256 = db.Column(db.String(64, collation="C"), index=True, unique=True) + sha512 = db.Column(db.String(128, collation="C"), index=True) + ssdeep = db.Column(db.String(255, collation="C"), index=True) alt_names = db.Column( - MutableList.as_mutable(ARRAY(db.String)), nullable=False, server_default="{}" + MutableList.as_mutable(ARRAY(db.String(collation="C"))), + nullable=False, + server_default="{}", ) __mapper_args__ = { diff --git a/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py b/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py new file mode 100644 index 000000000..cf8590126 --- /dev/null +++ b/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py @@ -0,0 +1,70 @@ +"""Set collation of fields that may be searched by prefix + +Revision ID: 6fc42e070495 +Revises: 1a46a79d9108 +Create Date: 2024-07-12 09:35:20.591920 + +""" +import logging + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "6fc42e070495" +down_revision = "1a46a79d9108" +branch_labels = None +depends_on = None + +current_change = 1 +total_changes = 13 + +logger = logging.getLogger("alembic") + + +def alter_column(table, column, type_): + global current_change + logger.info( + f"[{current_change}/{total_changes}] Setting collation of {table}.{column}" + ) + op.alter_column(table, column, type_=type_) + current_change += 1 + + +def upgrade(): + logger.info("Changing column collation, this may take a while...") + alter_column("object", "blob_name", type_=sa.String(collation="C")) + alter_column("object", "blob_type", type_=sa.String(32, collation="C")) + alter_column("object", "family", type_=sa.String(32, collation="C")) + alter_column("object", "config_type", type_=sa.String(32, collation="C")) + alter_column("object", "file_name", type_=sa.String(collation="C")) + alter_column("object", "md5", type_=sa.String(32, collation="C")) + alter_column("object", "crc32", type_=sa.String(8, collation="C")) + alter_column("object", "sha1", type_=sa.String(40, collation="C")) + alter_column("object", "sha256", type_=sa.String(64, collation="C")) + alter_column("object", "sha512", type_=sa.String(128, collation="C")) + alter_column("object", "ssdeep", type_=sa.String(255, collation="C")) + alter_column( + "object", "alt_names", type_=postgresql.ARRAY(sa.String(collation="C")) + ) + alter_column("tag", "tag", type_=sa.String(collation="C")) + op.execute("ANALYZE") + + +def downgrade(): + logger.info("Changing column collation, this may take a while...") + alter_column("object", "blob_name", type_=sa.String()) + alter_column("object", "blob_type", type_=sa.String(32)) + alter_column("object", "family", type_=sa.String(32)) + alter_column("object", "config_type", type_=sa.String(32)) + alter_column("object", "file_name", type_=sa.String()) + alter_column("object", "md5", type_=sa.String(32)) + alter_column("object", "crc32", type_=sa.String(8)) + alter_column("object", "sha1", type_=sa.String(40)) + alter_column("object", "sha256", type_=sa.String(64)) + alter_column("object", "sha512", type_=sa.String(128)) + alter_column("object", "ssdeep", type_=sa.String(255)) + alter_column("object", "alt_names", type_=postgresql.ARRAY(sa.String())) + alter_column("tag", "tag", type_=sa.String()) + op.execute("ANALYZE") diff --git a/mwdb/model/tag.py b/mwdb/model/tag.py index c24aa19b5..bef7a4dc7 100644 --- a/mwdb/model/tag.py +++ b/mwdb/model/tag.py @@ -8,7 +8,7 @@ class Tag(db.Model): ) id = db.Column(db.Integer, primary_key=True) - tag = db.Column(db.String, nullable=False, index=True) + tag = db.Column(db.String(collation="C"), nullable=False, index=True) object_id = db.Column( db.Integer, db.ForeignKey("object.id", ondelete="CASCADE"),