Skip to content

Commit

Permalink
Clean up wiki module
Browse files Browse the repository at this point in the history
  • Loading branch information
mheguy committed Jul 26, 2024
1 parent 7e2b774 commit 6d0dbfe
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 30 deletions.
17 changes: 15 additions & 2 deletions sgu/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

from sgu.config import CUSTOM_HEADERS
from sgu.custom_logger import logger
from sgu.data_gathering import gather_data
from sgu.parsers.episode_data import convert_episode_data_to_episode_segments
from sgu.parsers.rss_feed import get_podcast_episodes
from sgu.transcription_splitting import add_transcript_to_segments
from sgu.wiki import create_podcast_wiki_page, episode_has_wiki_page

load_dotenv()
Expand All @@ -14,7 +17,8 @@
async def main() -> None:
"""Main function that starts the program and processes podcast episodes.
This function retrieves podcast episodes from an RSS feed, checks if each episode has a wiki page,
This function retrieves podcast episodes from an RSS feed,
checks if each episode has a wiki page,
and creates a wiki page for episodes that don't have one.
"""
logger.success("Starting...")
Expand All @@ -35,7 +39,16 @@ async def main() -> None:
logger.info("Episode has a wiki page. Stopping.")
break

await create_podcast_wiki_page(client, podcast_episode)
logger.debug("Gathering all data...")
episode_data = await gather_data(client, podcast_episode)

logger.debug("Converting data to segments...")
episode_segments = convert_episode_data_to_episode_segments(episode_data)

logger.debug("Merging transcript into episode segments...")
episode_segments = add_transcript_to_segments(episode_data.transcript, episode_segments)

await create_podcast_wiki_page(client, episode_data, episode_segments)

break # TODO: Maybe remove this at some point. It's just making sure that we don't process multiple episodes

Expand Down
30 changes: 2 additions & 28 deletions sgu/wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,23 @@

from sgu.config import WIKI_API_BASE, WIKI_EPISODE_URL_BASE
from sgu.custom_logger import logger
from sgu.data_gathering import gather_data
from sgu.episode_segments import BaseSegment, QuoteSegment
from sgu.parsers.episode_data import convert_episode_data_to_episode_segments
from sgu.episode_segments import BaseSegment, QuoteSegment, Segments
from sgu.parsers.show_notes import get_episode_image_url
from sgu.template_environment import template_env
from sgu.transcription_splitting import add_transcript_to_segments

if TYPE_CHECKING:
from requests import Session

from sgu.data_gathering import EpisodeData
from sgu.parsers.rss_feed import PodcastEpisode


# region public functions


async def create_podcast_wiki_page(client: "Session", podcast: "PodcastEpisode"):
async def create_podcast_wiki_page(client: "Session", episode_data: "EpisodeData", episode_segments: Segments) -> None:
"""Creates a wiki page for a podcast episode.
This function gathers all the necessary data for the episode, merges the data into segments,
and converts the segments into wiki page content.
Args:
client (requests.Session): The HTTP client session.
podcast (PodcastEpisode): The podcast episode.
Returns:
str: The wiki page content.
"""
logger.debug("Gathering all data...")
episode_data = await gather_data(client, podcast)

logger.debug("Converting data to segments...")
episode_segments = convert_episode_data_to_episode_segments(episode_data)

logger.debug("Merging transcript into episode segments...")
episode_segments = add_transcript_to_segments(episode_data.transcript, episode_segments)

# Above: Generic actions
# Below: Wiki-specific actions
# we must grab speaker data before we convert transcript to wiki
speakers = {s["speaker"].lower() for s in episode_data.transcript}
wiki_segments = "\n".join(s.to_wiki() for s in episode_segments)
Expand Down Expand Up @@ -94,8 +70,6 @@ def log_into_wiki(client: "Session") -> str:

# endregion
# region private functions


def _find_image_upload(client: "Session", episode_number: str) -> str:
params = {"action": "query", "list": "allimages", "aiprefix": episode_number, "format": "json"}
response = client.get(WIKI_API_BASE, params=params)
Expand Down

0 comments on commit 6d0dbfe

Please sign in to comment.