From 7e2b774c03dc9a95d56e3659123c24f1846fa3f5 Mon Sep 17 00:00:00 2001 From: Matthew Heguy <10162554+mheguy@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:30:33 -0400 Subject: [PATCH] Fix time formatting and other small format changes --- sgu/data/templates/dumbest.j2x | 1 + sgu/data/templates/email.j2x | 1 + sgu/data/templates/interview.j2x | 2 +- sgu/data/templates/logical_fallacy.j2x | 2 +- sgu/data/templates/news.j2x | 4 ++-- sgu/data/templates/noisy.j2x | 2 +- sgu/data/templates/quickie.j2x | 2 +- sgu/data/templates/quote.j2x | 2 +- sgu/data/templates/science_or_fiction.j2x | 2 +- sgu/data/templates/tiktok.j2x | 2 +- sgu/data/templates/unknown.j2x | 2 +- sgu/data/templates/whats_the_word.j2x | 2 +- sgu/episode_segments.py | 4 ++-- sgu/transcript_formatting.py | 27 +++++++++++++++-------- sgu/wiki.py | 5 ++--- 15 files changed, 35 insertions(+), 25 deletions(-) diff --git a/sgu/data/templates/dumbest.j2x b/sgu/data/templates/dumbest.j2x index 3f50804..5dfbc7f 100644 --- a/sgu/data/templates/dumbest.j2x +++ b/sgu/data/templates/dumbest.j2x @@ -10,4 +10,5 @@ |article_title = ((( article_title ))) |publication = ((( publication ))) }} + ((( transcript ))) diff --git a/sgu/data/templates/email.j2x b/sgu/data/templates/email.j2x index af0fb1c..c1c586d 100644 --- a/sgu/data/templates/email.j2x +++ b/sgu/data/templates/email.j2x @@ -7,4 +7,5 @@ text: ((( item ))) ((* endfor *)) --> + ((( transcript ))) diff --git a/sgu/data/templates/interview.j2x b/sgu/data/templates/interview.j2x index b1f53fa..6814024 100644 --- a/sgu/data/templates/interview.j2x +++ b/sgu/data/templates/interview.j2x @@ -1,4 +1,4 @@ {{anchor|interview}} == Interview with ((( name ))) ( ((( start_time ))) ) == -((( source ))) + ((( transcript ))) diff --git a/sgu/data/templates/logical_fallacy.j2x b/sgu/data/templates/logical_fallacy.j2x index 4f6a284..e6d2c39 100644 --- a/sgu/data/templates/logical_fallacy.j2x +++ b/sgu/data/templates/logical_fallacy.j2x @@ -1,4 +1,4 @@ {{anchor|ntlf}} == Name That Logical Fallacy ( ((( start_time ))) ) == -((( source ))) + ((( transcript ))) diff --git a/sgu/data/templates/news.j2x b/sgu/data/templates/news.j2x index 647bbba..4c01119 100644 --- a/sgu/data/templates/news.j2x +++ b/sgu/data/templates/news.j2x @@ -1,9 +1,9 @@ {{anchor|news((( item_number )))}} -=== News Item #((( item_number ))) - ((( topic ))) ( ((( start_time ))) ) === +== News Item #((( item_number ))) - ((( topic ))) ( ((( start_time ))) ) == {{shownotes |weblink = ((( url ))) |article_title = ((( topic ))) ((# TODO: Replace with title #)) |publication = ((( publication ))) }} -((( source ))) + ((( transcript ))) diff --git a/sgu/data/templates/noisy.j2x b/sgu/data/templates/noisy.j2x index baa3f10..1b344cf 100644 --- a/sgu/data/templates/noisy.j2x +++ b/sgu/data/templates/noisy.j2x @@ -1,5 +1,5 @@ {{anchor|wtn}} {{anchor|futureWTN}} == Who's That Noisy? + Announcements ( ((( start_time ))) ) == -((( source ))) + ((( transcript ))) diff --git a/sgu/data/templates/quickie.j2x b/sgu/data/templates/quickie.j2x index 7b753d3..071c751 100644 --- a/sgu/data/templates/quickie.j2x +++ b/sgu/data/templates/quickie.j2x @@ -1,4 +1,4 @@ {{anchor|quickie}} == ((( title ))): ((( subject ))) ( ((( start_time ))) ) == -((( source ))) + ((( transcript ))) diff --git a/sgu/data/templates/quote.j2x b/sgu/data/templates/quote.j2x index 607d4a0..e4dc226 100644 --- a/sgu/data/templates/quote.j2x +++ b/sgu/data/templates/quote.j2x @@ -1,10 +1,10 @@ {{anchor|qow}} == Skeptical Quote of the Week ( ((( start_time ))) ) == -((( source ))) {{qow |text = ((( quote ))) |author = ((( attribution ))) |lived = |desc = }} + ((( transcript ))) diff --git a/sgu/data/templates/science_or_fiction.j2x b/sgu/data/templates/science_or_fiction.j2x index fa4a873..ca05d0c 100644 --- a/sgu/data/templates/science_or_fiction.j2x +++ b/sgu/data/templates/science_or_fiction.j2x @@ -1,7 +1,6 @@ {{anchor|sof}} {{anchor|theme}} == Science or Fiction ( ((( start_time ))) ) == -((( source ))) {{SOFinfo |theme = ((( theme ))) |hiddentheme = @@ -31,4 +30,5 @@ |win = |swept = }} + ((( transcript ))) diff --git a/sgu/data/templates/tiktok.j2x b/sgu/data/templates/tiktok.j2x index 385a6c0..7fa0779 100644 --- a/sgu/data/templates/tiktok.j2x +++ b/sgu/data/templates/tiktok.j2x @@ -1,5 +1,5 @@ {{anchor|tiktok}} == From TikTok: ((( title ))) ( ((( start_time ))) ) == -((( source ))) ((( url ))) + ((( transcript ))) diff --git a/sgu/data/templates/unknown.j2x b/sgu/data/templates/unknown.j2x index 235ebcf..92fc4c9 100644 --- a/sgu/data/templates/unknown.j2x +++ b/sgu/data/templates/unknown.j2x @@ -1,3 +1,3 @@ == ((( title ))) ( ((( start_time ))) ) == -((( source ))) + ((( transcript ))) diff --git a/sgu/data/templates/whats_the_word.j2x b/sgu/data/templates/whats_the_word.j2x index 9f59adf..48c1b5d 100644 --- a/sgu/data/templates/whats_the_word.j2x +++ b/sgu/data/templates/whats_the_word.j2x @@ -1,5 +1,5 @@ {{anchor|wtw}} == What's the Word? ( ((( start_time ))) ) == -((( source ))) * ((( word ))) + ((( transcript ))) diff --git a/sgu/episode_segments.py b/sgu/episode_segments.py index 52ed8af..0beb045 100644 --- a/sgu/episode_segments.py +++ b/sgu/episode_segments.py @@ -10,7 +10,7 @@ from sgu.exceptions import StartTimeNotFoundError from sgu.helpers import are_strings_in_string, find_single_element, string_is_url from sgu.template_environment import template_env -from sgu.transcript_formatting import format_transcript_for_wiki +from sgu.transcript_formatting import format_time, format_transcript_for_wiki if TYPE_CHECKING: from sgu.transcription import DiarizedTranscript @@ -62,7 +62,7 @@ def to_wiki(self) -> str: template = template_env.get_template(f"{self.template_name}.j2x") template_values = self.get_template_values() return template.render( - start_time=self.start_time, + start_time=format_time(self.start_time), transcript=format_transcript_for_wiki(self.transcript), source=f"", **template_values, diff --git a/sgu/transcript_formatting.py b/sgu/transcript_formatting.py index 2ca3f33..f6f6671 100644 --- a/sgu/transcript_formatting.py +++ b/sgu/transcript_formatting.py @@ -9,19 +9,28 @@ def format_transcript_for_wiki(transcript: "DiarizedTranscript") -> str: transcript = _join_speaker_segments(transcript) _abbreviate_speakers(transcript) - text_segments: list[str] = [] - for transcript_chunk in transcript: - start_time = _format_time(transcript_chunk["start"]) - end_time = _format_time(transcript_chunk["end"]) + text_segments = [ + f"'''{transcript_chunk['speaker']}''':{transcript_chunk['text']}" for transcript_chunk in transcript + ] - text_segments.append(f"") - text_segments.append(f"'''{transcript_chunk['speaker']}''':{transcript_chunk['text']}
") + return "\n\n".join(text_segments) - return "\n".join(text_segments) +def format_time(time: float | None) -> str: + """Format a float time to h:mm:ss or mm:ss if < 1 hour.""" + if not time: + return "???" -def _format_time(time: float) -> str: - return f"{int(time) // 3600:02d}:{int(time) // 60 % 60:02d}:{int(time) % 60:02d}" + hour_count = int(time) // 3600 + + hour = "" + if hour_count: + hour = f"{hour_count}:" + + minutes = f"{int(time) // 60 % 60:02d}:" + seconds = f"{int(time) % 60:02d}" + + return f"{hour}{minutes}{seconds}" def _join_speaker_segments(transcript: "DiarizedTranscript") -> "DiarizedTranscript": diff --git a/sgu/wiki.py b/sgu/wiki.py index 45fb913..bda9500 100644 --- a/sgu/wiki.py +++ b/sgu/wiki.py @@ -47,7 +47,8 @@ async def create_podcast_wiki_page(client: "Session", podcast: "PodcastEpisode") # Above: Generic actions # Below: Wiki-specific actions - # convert segments to wiki + # we must grab speaker data before we convert transcript to wiki + speakers = {s["speaker"].lower() for s in episode_data.transcript} wiki_segments = "\n".join(s.to_wiki() for s in episode_segments) qotw_segment = _extract_quote_of_the_week_for_wiki(episode_segments) @@ -58,8 +59,6 @@ async def create_podcast_wiki_page(client: "Session", podcast: "PodcastEpisode") episode_image_url = get_episode_image_url(episode_data.show_notes) episode_icon_name = _upload_image_to_wiki(client, episode_image_url, episode_data.podcast.episode_number) - speakers = {s["speaker"].lower() for s in episode_data.transcript} - logger.debug("Creating wiki page...") wiki_page = _construct_wiki_page(episode_data, episode_icon_name, wiki_segments, qotw_segment, speakers) _edit_page(client, page_text=wiki_page) # TODO: Change for "Create page"