From 7e2b774c03dc9a95d56e3659123c24f1846fa3f5 Mon Sep 17 00:00:00 2001
From: Matthew Heguy <10162554+mheguy@users.noreply.github.com>
Date: Thu, 25 Jul 2024 22:30:33 -0400
Subject: [PATCH] Fix time formatting and other small format changes
---
sgu/data/templates/dumbest.j2x | 1 +
sgu/data/templates/email.j2x | 1 +
sgu/data/templates/interview.j2x | 2 +-
sgu/data/templates/logical_fallacy.j2x | 2 +-
sgu/data/templates/news.j2x | 4 ++--
sgu/data/templates/noisy.j2x | 2 +-
sgu/data/templates/quickie.j2x | 2 +-
sgu/data/templates/quote.j2x | 2 +-
sgu/data/templates/science_or_fiction.j2x | 2 +-
sgu/data/templates/tiktok.j2x | 2 +-
sgu/data/templates/unknown.j2x | 2 +-
sgu/data/templates/whats_the_word.j2x | 2 +-
sgu/episode_segments.py | 4 ++--
sgu/transcript_formatting.py | 27 +++++++++++++++--------
sgu/wiki.py | 5 ++---
15 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/sgu/data/templates/dumbest.j2x b/sgu/data/templates/dumbest.j2x
index 3f50804..5dfbc7f 100644
--- a/sgu/data/templates/dumbest.j2x
+++ b/sgu/data/templates/dumbest.j2x
@@ -10,4 +10,5 @@
|article_title = ((( article_title )))
|publication = ((( publication )))
}}
+
((( transcript )))
diff --git a/sgu/data/templates/email.j2x b/sgu/data/templates/email.j2x
index af0fb1c..c1c586d 100644
--- a/sgu/data/templates/email.j2x
+++ b/sgu/data/templates/email.j2x
@@ -7,4 +7,5 @@
text: ((( item )))
((* endfor *))
-->
+
((( transcript )))
diff --git a/sgu/data/templates/interview.j2x b/sgu/data/templates/interview.j2x
index b1f53fa..6814024 100644
--- a/sgu/data/templates/interview.j2x
+++ b/sgu/data/templates/interview.j2x
@@ -1,4 +1,4 @@
{{anchor|interview}}
== Interview with ((( name ))) ( ((( start_time ))) ) ==
-((( source )))
+
((( transcript )))
diff --git a/sgu/data/templates/logical_fallacy.j2x b/sgu/data/templates/logical_fallacy.j2x
index 4f6a284..e6d2c39 100644
--- a/sgu/data/templates/logical_fallacy.j2x
+++ b/sgu/data/templates/logical_fallacy.j2x
@@ -1,4 +1,4 @@
{{anchor|ntlf}}
== Name That Logical Fallacy ( ((( start_time ))) ) ==
-((( source )))
+
((( transcript )))
diff --git a/sgu/data/templates/news.j2x b/sgu/data/templates/news.j2x
index 647bbba..4c01119 100644
--- a/sgu/data/templates/news.j2x
+++ b/sgu/data/templates/news.j2x
@@ -1,9 +1,9 @@
{{anchor|news((( item_number )))}}
-=== News Item #((( item_number ))) - ((( topic ))) ( ((( start_time ))) ) ===
+== News Item #((( item_number ))) - ((( topic ))) ( ((( start_time ))) ) ==
{{shownotes
|weblink = ((( url )))
|article_title = ((( topic ))) ((# TODO: Replace with title #))
|publication = ((( publication )))
}}
-((( source )))
+
((( transcript )))
diff --git a/sgu/data/templates/noisy.j2x b/sgu/data/templates/noisy.j2x
index baa3f10..1b344cf 100644
--- a/sgu/data/templates/noisy.j2x
+++ b/sgu/data/templates/noisy.j2x
@@ -1,5 +1,5 @@
{{anchor|wtn}}
{{anchor|futureWTN}}
== Who's That Noisy? + Announcements ( ((( start_time ))) ) ==
-((( source )))
+
((( transcript )))
diff --git a/sgu/data/templates/quickie.j2x b/sgu/data/templates/quickie.j2x
index 7b753d3..071c751 100644
--- a/sgu/data/templates/quickie.j2x
+++ b/sgu/data/templates/quickie.j2x
@@ -1,4 +1,4 @@
{{anchor|quickie}}
== ((( title ))): ((( subject ))) ( ((( start_time ))) ) ==
-((( source )))
+
((( transcript )))
diff --git a/sgu/data/templates/quote.j2x b/sgu/data/templates/quote.j2x
index 607d4a0..e4dc226 100644
--- a/sgu/data/templates/quote.j2x
+++ b/sgu/data/templates/quote.j2x
@@ -1,10 +1,10 @@
{{anchor|qow}}
== Skeptical Quote of the Week ( ((( start_time ))) ) ==
-((( source )))
{{qow
|text = ((( quote )))
|author = ((( attribution )))
|lived =
|desc =
}}
+
((( transcript )))
diff --git a/sgu/data/templates/science_or_fiction.j2x b/sgu/data/templates/science_or_fiction.j2x
index fa4a873..ca05d0c 100644
--- a/sgu/data/templates/science_or_fiction.j2x
+++ b/sgu/data/templates/science_or_fiction.j2x
@@ -1,7 +1,6 @@
{{anchor|sof}}
{{anchor|theme}}
== Science or Fiction ( ((( start_time ))) ) ==
-((( source )))
{{SOFinfo
|theme = ((( theme )))
|hiddentheme =
@@ -31,4 +30,5 @@
|win =
|swept =
}}
+
((( transcript )))
diff --git a/sgu/data/templates/tiktok.j2x b/sgu/data/templates/tiktok.j2x
index 385a6c0..7fa0779 100644
--- a/sgu/data/templates/tiktok.j2x
+++ b/sgu/data/templates/tiktok.j2x
@@ -1,5 +1,5 @@
{{anchor|tiktok}}
== From TikTok: ((( title ))) ( ((( start_time ))) ) ==
-((( source )))
((( url )))
+
((( transcript )))
diff --git a/sgu/data/templates/unknown.j2x b/sgu/data/templates/unknown.j2x
index 235ebcf..92fc4c9 100644
--- a/sgu/data/templates/unknown.j2x
+++ b/sgu/data/templates/unknown.j2x
@@ -1,3 +1,3 @@
== ((( title ))) ( ((( start_time ))) ) ==
-((( source )))
+
((( transcript )))
diff --git a/sgu/data/templates/whats_the_word.j2x b/sgu/data/templates/whats_the_word.j2x
index 9f59adf..48c1b5d 100644
--- a/sgu/data/templates/whats_the_word.j2x
+++ b/sgu/data/templates/whats_the_word.j2x
@@ -1,5 +1,5 @@
{{anchor|wtw}}
== What's the Word? ( ((( start_time ))) ) ==
-((( source )))
* ((( word )))
+
((( transcript )))
diff --git a/sgu/episode_segments.py b/sgu/episode_segments.py
index 52ed8af..0beb045 100644
--- a/sgu/episode_segments.py
+++ b/sgu/episode_segments.py
@@ -10,7 +10,7 @@
from sgu.exceptions import StartTimeNotFoundError
from sgu.helpers import are_strings_in_string, find_single_element, string_is_url
from sgu.template_environment import template_env
-from sgu.transcript_formatting import format_transcript_for_wiki
+from sgu.transcript_formatting import format_time, format_transcript_for_wiki
if TYPE_CHECKING:
from sgu.transcription import DiarizedTranscript
@@ -62,7 +62,7 @@ def to_wiki(self) -> str:
template = template_env.get_template(f"{self.template_name}.j2x")
template_values = self.get_template_values()
return template.render(
- start_time=self.start_time,
+ start_time=format_time(self.start_time),
transcript=format_transcript_for_wiki(self.transcript),
source=f"",
**template_values,
diff --git a/sgu/transcript_formatting.py b/sgu/transcript_formatting.py
index 2ca3f33..f6f6671 100644
--- a/sgu/transcript_formatting.py
+++ b/sgu/transcript_formatting.py
@@ -9,19 +9,28 @@ def format_transcript_for_wiki(transcript: "DiarizedTranscript") -> str:
transcript = _join_speaker_segments(transcript)
_abbreviate_speakers(transcript)
- text_segments: list[str] = []
- for transcript_chunk in transcript:
- start_time = _format_time(transcript_chunk["start"])
- end_time = _format_time(transcript_chunk["end"])
+ text_segments = [
+ f"'''{transcript_chunk['speaker']}''':{transcript_chunk['text']}" for transcript_chunk in transcript
+ ]
- text_segments.append(f"")
- text_segments.append(f"'''{transcript_chunk['speaker']}''':{transcript_chunk['text']}
")
+ return "\n\n".join(text_segments)
- return "\n".join(text_segments)
+def format_time(time: float | None) -> str:
+ """Format a float time to h:mm:ss or mm:ss if < 1 hour."""
+ if not time:
+ return "???"
-def _format_time(time: float) -> str:
- return f"{int(time) // 3600:02d}:{int(time) // 60 % 60:02d}:{int(time) % 60:02d}"
+ hour_count = int(time) // 3600
+
+ hour = ""
+ if hour_count:
+ hour = f"{hour_count}:"
+
+ minutes = f"{int(time) // 60 % 60:02d}:"
+ seconds = f"{int(time) % 60:02d}"
+
+ return f"{hour}{minutes}{seconds}"
def _join_speaker_segments(transcript: "DiarizedTranscript") -> "DiarizedTranscript":
diff --git a/sgu/wiki.py b/sgu/wiki.py
index 45fb913..bda9500 100644
--- a/sgu/wiki.py
+++ b/sgu/wiki.py
@@ -47,7 +47,8 @@ async def create_podcast_wiki_page(client: "Session", podcast: "PodcastEpisode")
# Above: Generic actions
# Below: Wiki-specific actions
- # convert segments to wiki
+ # we must grab speaker data before we convert transcript to wiki
+ speakers = {s["speaker"].lower() for s in episode_data.transcript}
wiki_segments = "\n".join(s.to_wiki() for s in episode_segments)
qotw_segment = _extract_quote_of_the_week_for_wiki(episode_segments)
@@ -58,8 +59,6 @@ async def create_podcast_wiki_page(client: "Session", podcast: "PodcastEpisode")
episode_image_url = get_episode_image_url(episode_data.show_notes)
episode_icon_name = _upload_image_to_wiki(client, episode_image_url, episode_data.podcast.episode_number)
- speakers = {s["speaker"].lower() for s in episode_data.transcript}
-
logger.debug("Creating wiki page...")
wiki_page = _construct_wiki_page(episode_data, episode_icon_name, wiki_segments, qotw_segment, speakers)
_edit_page(client, page_text=wiki_page) # TODO: Change for "Create page"