Skip to content

Commit

Permalink
SYS-1601: Add command to reprocess image derivatives (#122)
Browse files Browse the repository at this point in the history
* add reprocess_derivative_images command
* add more logging
* refactoring and (currently non-working) tests
* add fixed tests for reprocess_derivative_images tests
* test cleanup and fixed file deletion logic
* fix too-broad test teardown, and improve logging
  • Loading branch information
ztucker4 committed May 10, 2024
1 parent e94c3f2 commit 65bea6d
Show file tree
Hide file tree
Showing 5 changed files with 317 additions and 2 deletions.
2 changes: 1 addition & 1 deletion charts/prod-ohstaff-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ replicaCount: 1

image:
repository: uclalibrary/oral-history-staff-ui
tag: v1.1.1
tag: v1.1.2
pullPolicy: Always

nameOverride: ""
Expand Down
162 changes: 162 additions & 0 deletions oh_staff_ui/management/commands/reprocess_derivative_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import logging
from pathlib import Path
from django.core.management.base import BaseCommand, CommandParser
from django.core.management.base import CommandError
from django.http import HttpRequest
from django.db.models.query import QuerySet
from django.conf import settings
from oh_staff_ui.classes.ImageFileHandler import ImageFileHandler
from oh_staff_ui.classes.OralHistoryFile import OralHistoryFile
from oh_staff_ui.models import MediaFile, MediaFileType

# For handling command-line processing
from django.contrib.auth.models import User


logger = logging.getLogger(__name__)


def reprocess_derivative_images(master_images: QuerySet, request: HttpRequest) -> None:
"""Reprocess all derivative images from masters."""
try:
for master_image in master_images:
logger.info(
f"Recreating derivatives for master image {master_image.file.name}."
)
master_id = master_image.id
# Create an OHF for the master file so we can use ImageFileHandler
master_file = OralHistoryFile(
master_image.item.id,
Path(settings.MEDIA_ROOT).joinpath(master_image.file.name),
master_image.file_type,
"master",
request,
)
handler = ImageFileHandler(master_file)

# Submaster - generate and save
submaster_file_name = handler.create_submaster()
submaster_file = OralHistoryFile(
item_id=master_image.item.id,
file_name=submaster_file_name,
file_type=MediaFileType.objects.get(file_code="image_submaster"),
file_use="submaster",
request=request,
)
submaster_file.process_media_file(parent_id=master_id)
logger.info(f"Submaster image {submaster_file_name} created.")

# Thumbnail - generate and save
thumbnail_file_name = handler.create_thumbnail()
thumbnail_file = OralHistoryFile(
item_id=master_image.item.id,
file_name=thumbnail_file_name,
file_type=MediaFileType.objects.get(file_code="image_thumbnail"),
file_use="thumbnail",
request=request,
)
thumbnail_file.process_media_file(parent_id=master_id)
logger.info(f"Thumbnail image {thumbnail_file_name} created.")
logger.info(
f"Finished creating derivative images for master file {master_image.file.name}."
)

except (CommandError, ValueError) as ex:
logger.error(ex)
raise
finally:
# Delete the temporary files;
# don't throw FileNotFoundError if for some reason it doesn't exist.
try:
Path(submaster_file_name).unlink(missing_ok=True)
Path(thumbnail_file_name).unlink(missing_ok=True)
except UnboundLocalError:
# Swallow this, which happens when derivative creation fails
# so file_name variable is not defined.
pass


def delete_existing_derivative_images(master_images: QuerySet) -> None:
"""Delete existing derivative images from masters."""
try:
for master_image in master_images:
project_item = master_image.item
logger.info(
f"Deleting existing derivative images for project item {project_item} "
f"(ID {project_item.id})."
)
files_to_delete = MediaFile.objects.filter(
item=project_item,
file_type__file_code__in=["image_submaster", "image_thumbnail"],
)
for mf in files_to_delete:
file_name = mf.file.name
file_path = Path(settings.MEDIA_ROOT).joinpath(file_name)
mediafile_id = mf.id

# confirm that the file exists with Path
if file_path.exists():
# delete associated file from file system
mf.file.delete()
# confirm that the file no longer exists
if not file_path.exists():
logger.info(f"Deleted {file_name} from file system.")
mf.delete()
logger.info(f"Deleted MediaFile {mediafile_id} from database.")
else:
raise CommandError(
f"Failed to delete {file_name} from file system."
)

else:
logger.info(
f"File {file_name}, for MediaFile {mediafile_id}, "
"does not exist in the file system."
)
mf.delete()
logger.info(f"Deleted MediaFile {mediafile_id} from database.")

logger.info(
f"Finished deleting existing derivative images for project item {project_item} "
f"(ID {project_item.id})."
)

except (CommandError, ValueError) as ex:
logger.error(ex)
raise


def get_mock_request() -> HttpRequest:
"""Get mock request with generic user info for command-line processing."""
mock_request = HttpRequest()
mock_request.user = User.objects.get(username="oralhistory data entry")
return mock_request


class Command(BaseCommand):
help = "Django management command to reprocess derivative images"

def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"--project_item_id",
type=str,
help="ID of ProjectItem to reprocess derivative images for, or 'ALL' to reprocess all.",
required=True,
)

def handle(self, *args, **options) -> None:
if options["project_item_id"] == "ALL":
master_images = MediaFile.objects.filter(
file_type__file_code="image_master"
).order_by("id")
else:
master_images = MediaFile.objects.filter(
item__id=int(options["project_item_id"]),
file_type__file_code="image_master",
).order_by("id")

master_image_count = master_images.count()
logger.info(f"Found {master_image_count} master images to reprocess")

delete_existing_derivative_images(master_images)
reprocess_derivative_images(master_images, get_mock_request())
6 changes: 6 additions & 0 deletions oh_staff_ui/templates/oh_staff_ui/release_notes.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
<h3>Release Notes</h3>
<hr/>

<h4>1.1.2</h4>
<p><i>May 8, 2024</i></p>
<ul>
<li>Added command to reprocess all derivative images.</li>
</ul>

<h4>1.1.1</h4>
<p><i>May 8, 2024</i></p>
<ul>
Expand Down
145 changes: 145 additions & 0 deletions oh_staff_ui/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@
get_bad_arg_error_xml,
get_bad_verb_error_xml,
)
from oh_staff_ui.management.commands.reprocess_derivative_images import (
reprocess_derivative_images,
delete_existing_derivative_images,
)


class MediaFileTestCase(TestCase):
Expand Down Expand Up @@ -1564,3 +1568,144 @@ def test_get_full_file_name_audio_submaster_older_file(self):
"https://testing/oralhistory/21198-zz00094qtd-3-submaster.mp3/playlist.m3u8",
)
self.assertEqual(full_file_name, "oh_wowza/21198-zz00094qtd-3-submaster.mp3")


class ReprocessDerivativeImagesTestCase(TestCase):
# Load the lookup tables needed for these tests.
fixtures = [
"item-status-data.json",
"item-type-data.json",
"media-file-type-data.json",
]

@classmethod
def setUpTestData(cls):
cls.user = User.objects.create_user("tester")
cls.mock_request = HttpRequest()
cls.mock_request.user = User.objects.get(username=cls.user.username)

def create_master_and_derivatives(self, user, mock_request):
item = ProjectItem.objects.create(
ark="fake/abcdef",
created_by=user,
last_modified_by=user,
title="Fake title",
type=ItemType.objects.get(type="Audio"),
)
file_type = MediaFileType.objects.get(file_code="image_master")
master_image_file = OralHistoryFile(
item.id,
"samples/sample_marbles.tif",
file_type,
"master",
mock_request,
)

handler = ImageFileHandler(master_image_file)
handler.process_files()

# get created date from derivative images, so we can check if they are updated
thumbnail_create_date = MediaFile.objects.get(
item=item, file_type__file_code="image_thumbnail"
).create_date
submaster_create_date = MediaFile.objects.get(
item=item, file_type__file_code="image_submaster"
).create_date

return item, thumbnail_create_date, submaster_create_date

def test_delete_existing_derivative_images(self):
self.item, self.thumbnail_created_date, self.submaster_created_date = (
self.create_master_and_derivatives(self.user, self.mock_request)
)

# check that the thumbnail and submaster images are created
self.assertTrue(MediaFile.objects.filter(item=self.item).count() == 3)

# check that the thumbnail and submaster images exist on disk
thumbnail_path = thumbnail_path = Path(
f"{settings.MEDIA_ROOT}/oh_static/nails/fake-abcdef-1-thumbnail.jpg"
)
submaster_path = Path(
f"{settings.MEDIA_ROOT}/oh_static/submasters/fake-abcdef-1-submaster.jpg"
)
self.assertTrue(thumbnail_path.is_file())
self.assertTrue(submaster_path.is_file())

# get queryset with our fake image file
image_file_qset = MediaFile.objects.filter(
item=self.item, file_type__file_code="image_master"
)
# delete derivative images
delete_existing_derivative_images(image_file_qset)
# check that the thumbnail and submaster images are deleted
self.assertFalse(
MediaFile.objects.filter(
item=self.item, file_type__file_code="image_thumbnail"
).exists()
)
self.assertFalse(
MediaFile.objects.filter(
item=self.item, file_type__file_code="image_submaster"
).exists()
)
# check that the files on disk are deleted
self.assertFalse(thumbnail_path.is_file())
self.assertFalse(submaster_path.is_file())

def test_reprocess_derivative_images(self):
self.item, self.thumbnail_created_date, self.submaster_created_date = (
self.create_master_and_derivatives(self.user, self.mock_request)
)
# get queryset with our fake master file
image_file_qset = MediaFile.objects.filter(
item=self.item, file_type__file_code="image_master"
)
# delete the existing derivative images
delete_existing_derivative_images(image_file_qset)
# reprocess the image file
reprocess_derivative_images(image_file_qset, self.mock_request)

# check that we now have a single thumbnail and submaster image
self.assertTrue(
MediaFile.objects.filter(
item=self.item, file_type__file_code="image_thumbnail"
).count()
== 1
)
self.assertTrue(
MediaFile.objects.filter(
item=self.item, file_type__file_code="image_submaster"
).count()
== 1
)

# check that the created date of the derivative images has been updated
thumbnail_create_date_updated = MediaFile.objects.get(
item=self.item, file_type__file_code="image_thumbnail"
).create_date
submaster_create_date_updated = MediaFile.objects.get(
item=self.item, file_type__file_code="image_submaster"
).create_date
self.assertNotEqual(self.thumbnail_created_date, thumbnail_create_date_updated)
self.assertNotEqual(self.submaster_created_date, submaster_create_date_updated)

# check that the files on disk exist using Path
thumbnail_path = Path(
f"{settings.MEDIA_ROOT}/oh_static/nails/fake-abcdef-1-thumbnail.jpg"
)
submaster_path = Path(
f"{settings.MEDIA_ROOT}/oh_static/submasters/fake-abcdef-1-submaster.jpg"
)
self.assertTrue(thumbnail_path.is_file())
self.assertTrue(submaster_path.is_file())

def tearDown(self) -> None:
media_files = MediaFile.objects.filter(item=self.item)
# delete files on disk first
for mf in media_files:
if mf.file:
mf.file.delete()
# delete MediaFile objects starting with the parent - CASCADE will delete children
for parent_mf in media_files.filter(parent__isnull=True):
parent_mf.delete()
4 changes: 3 additions & 1 deletion oh_staff_ui/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ def show_log(request, line_count: int = 200) -> HttpResponse:
@login_required
def upload_file(request: HttpRequest, item_id: int) -> HttpResponse:
item = ProjectItem.objects.get(pk=item_id)
files = MediaFile.objects.filter(item=item)
files = MediaFile.objects.filter(item=item).order_by(
"sequence", "file_type__file_code"
)
file_errors = MediaFileError.objects.filter(item=item).order_by("create_date")
if request.method == "POST":
# Pass item_id and request to submitted form to help with validation.
Expand Down

0 comments on commit 65bea6d

Please sign in to comment.