Skip to content

Commit

Permalink
Set collation of some string fields to 'C' (#959)
Browse files Browse the repository at this point in the history
* Set collation of some string fields to 'C'

* Apply suggestions from code review
  • Loading branch information
psrok1 authored Jul 12, 2024
1 parent 5f3c778 commit 416ab4b
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 13 deletions.
4 changes: 2 additions & 2 deletions mwdb/model/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@


class TextBlob(Object):
blob_name = db.Column(db.String, index=True)
blob_name = db.Column(db.String(collation="C"), index=True)
blob_size = db.Column(db.Integer, index=True)
blob_type = db.Column(db.String(32), index=True)
blob_type = db.Column(db.String(32, collation="C"), index=True)
_content = db.Column("content", db.String())
last_seen = db.Column(db.DateTime, index=True)

Expand Down
4 changes: 2 additions & 2 deletions mwdb/model/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@


class Config(Object):
family = db.Column(db.String(32), index=True)
config_type = db.Column(db.String(32), index=True)
family = db.Column(db.String(32, collation="C"), index=True)
config_type = db.Column(db.String(32, collation="C"), index=True)
_cfg = db.Column("cfg", JSONB)

__mapper_args__ = {
Expand Down
18 changes: 10 additions & 8 deletions mwdb/model/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,19 @@ class EmptyFileError(ValueError):


class File(Object):
file_name = db.Column(db.String, index=True)
file_name = db.Column(db.String(collation="C"), index=True)
file_size = db.Column(db.Integer, index=True)
file_type = db.Column(db.Text, index=True)
md5 = db.Column(db.String(32), index=True)
crc32 = db.Column(db.String(8), index=True)
sha1 = db.Column(db.String(40), index=True)
sha256 = db.Column(db.String(64), index=True, unique=True)
sha512 = db.Column(db.String(128), index=True)
ssdeep = db.Column(db.String(255), index=True)
md5 = db.Column(db.String(32, collation="C"), index=True)
crc32 = db.Column(db.String(8, collation="C"), index=True)
sha1 = db.Column(db.String(40, collation="C"), index=True)
sha256 = db.Column(db.String(64, collation="C"), index=True, unique=True)
sha512 = db.Column(db.String(128, collation="C"), index=True)
ssdeep = db.Column(db.String(255, collation="C"), index=True)
alt_names = db.Column(
MutableList.as_mutable(ARRAY(db.String)), nullable=False, server_default="{}"
MutableList.as_mutable(ARRAY(db.String(collation="C"))),
nullable=False,
server_default="{}",
)

__mapper_args__ = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Set collation of fields that may be searched by prefix
Revision ID: 6fc42e070495
Revises: 1a46a79d9108
Create Date: 2024-07-12 09:35:20.591920
"""
import logging

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "6fc42e070495"
down_revision = "1a46a79d9108"
branch_labels = None
depends_on = None

current_change = 1
total_changes = 13

logger = logging.getLogger("alembic")


def alter_column(table, column, type_):
global current_change
logger.info(
f"[{current_change}/{total_changes}] Setting collation of {table}.{column}"
)
op.alter_column(table, column, type_=type_)
current_change += 1


def upgrade():
logger.info("Changing column collation, this may take a while...")
alter_column("object", "blob_name", type_=sa.String(collation="C"))
alter_column("object", "blob_type", type_=sa.String(32, collation="C"))
alter_column("object", "family", type_=sa.String(32, collation="C"))
alter_column("object", "config_type", type_=sa.String(32, collation="C"))
alter_column("object", "file_name", type_=sa.String(collation="C"))
alter_column("object", "md5", type_=sa.String(32, collation="C"))
alter_column("object", "crc32", type_=sa.String(8, collation="C"))
alter_column("object", "sha1", type_=sa.String(40, collation="C"))
alter_column("object", "sha256", type_=sa.String(64, collation="C"))
alter_column("object", "sha512", type_=sa.String(128, collation="C"))
alter_column("object", "ssdeep", type_=sa.String(255, collation="C"))
alter_column(
"object", "alt_names", type_=postgresql.ARRAY(sa.String(collation="C"))
)
alter_column("tag", "tag", type_=sa.String(collation="C"))
op.execute("ANALYZE")


def downgrade():
logger.info("Changing column collation, this may take a while...")
alter_column("object", "blob_name", type_=sa.String())
alter_column("object", "blob_type", type_=sa.String(32))
alter_column("object", "family", type_=sa.String(32))
alter_column("object", "config_type", type_=sa.String(32))
alter_column("object", "file_name", type_=sa.String())
alter_column("object", "md5", type_=sa.String(32))
alter_column("object", "crc32", type_=sa.String(8))
alter_column("object", "sha1", type_=sa.String(40))
alter_column("object", "sha256", type_=sa.String(64))
alter_column("object", "sha512", type_=sa.String(128))
alter_column("object", "ssdeep", type_=sa.String(255))
alter_column("object", "alt_names", type_=postgresql.ARRAY(sa.String()))
alter_column("tag", "tag", type_=sa.String())
op.execute("ANALYZE")
2 changes: 1 addition & 1 deletion mwdb/model/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Tag(db.Model):
)

id = db.Column(db.Integer, primary_key=True)
tag = db.Column(db.String, nullable=False, index=True)
tag = db.Column(db.String(collation="C"), nullable=False, index=True)
object_id = db.Column(
db.Integer,
db.ForeignKey("object.id", ondelete="CASCADE"),
Expand Down

0 comments on commit 416ab4b

Please sign in to comment.