From d6a33f1d21a8cbb34b584895554147ad97e97a72 Mon Sep 17 00:00:00 2001
From: boxydog <boxydog@users.noreply.github.com>
Date: Fri, 1 Dec 2023 11:27:16 -0600
Subject: [PATCH] Medium post importer (from medium export)

---
 docs/content.rst                              |   4 +-
 docs/importer.rst                             |  13 +-
 pelican/tests/content/medium_post_content.txt |   4 +
 ...2017-04-21_-medium-post--d1bf01d62ba3.html |  72 ++++++++
 pelican/tests/test_generators.py              |  37 +++-
 pelican/tests/test_importer.py                |  83 +++++++++
 pelican/tools/pelican_import.py               | 165 +++++++++++++++++-
 7 files changed, 357 insertions(+), 21 deletions(-)
 create mode 100644 pelican/tests/content/medium_post_content.txt
 create mode 100644 pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
diff --git a/docs/content.rst b/docs/content.rst
index cacacea9a..46db11405 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -439,8 +439,8 @@ For **Markdown**, one must rely on an extension. For example, using the `mdx_inc
 Importing an existing site
 ==========================
 
-It is possible to import your site from WordPress, Tumblr, Dotclear, and RSS
-feeds using a simple script. See :ref:`import`.
+It is possible to import your site from several other blogging sites
+(like WordPress, Tumblr, ..) using a simple script. See :ref:`import`.
 
 Translations
 ============
diff --git a/docs/importer.rst b/docs/importer.rst
index 997a46323..093ef465e 100644
--- a/docs/importer.rst
+++ b/docs/importer.rst
@@ -11,6 +11,7 @@ software to reStructuredText or Markdown. The supported import formats are:
 
 - Blogger XML export
 - Dotclear export
+- Medium export
 - Tumblr API
 - WordPress XML export
 - RSS/Atom feed
@@ -65,6 +66,7 @@ Optional arguments
   -h, --help            Show this help message and exit
   --blogger             Blogger XML export (default: False)
   --dotclear            Dotclear export (default: False)
+  --medium              Medium export (default: False)
   --tumblr              Tumblr API (default: False)
   --wpfile              WordPress XML export (default: False)
   --feed                Feed to parse (default: False)
@@ -80,8 +82,7 @@ Optional arguments
                           (default: False)
   --filter-author       Import only post from the specified author
   --strip-raw           Strip raw HTML code that can't be converted to markup
-                        such as flash embeds or iframes (wordpress import
-                        only) (default: False)
+                        such as flash embeds or iframes (default: False)
   --wp-custpost         Put wordpress custom post types in directories. If
                         used with --dir-cat option directories will be created
                         as "/post_type/category/" (wordpress import only)
@@ -113,6 +114,14 @@ For Dotclear::
 
     $ pelican-import --dotclear -o ~/output ~/backup.txt
 
+For Medium::
+
+    $ pelican-import --medium -o ~/output ~/medium-export/posts/
+
+The Medium export is a zip file.  Unzip it, and point this tool to the
+"posts" subdirectory.  For more information on how to export, see
+https://help.medium.com/hc/en-us/articles/115004745787-Export-your-account-data.
+
 For Tumblr::
 
     $ pelican-import --tumblr -o ~/output --blogname=<blogname> <api_key>
diff --git a/pelican/tests/content/medium_post_content.txt b/pelican/tests/content/medium_post_content.txt
new file mode 100644
index 000000000..5e21881cb
--- /dev/null
+++ b/pelican/tests/content/medium_post_content.txt
@@ -0,0 +1,4 @@
+
+<hr/><h3>Title header</h3><p>A paragraph of content.</p><p>Paragraph number two.</p><p>A list:</p><ol><li>One.</li><li>Two.</li><li>Three.</li></ol><p>A link: <a data-href="https://example.com/example" href="https://example.com/example" target="_blank">link text</a>.</p><h3>Header 2</h3><p>A block quote:</p><blockquote>quote words <strong>strong words</strong></blockquote><p>after blockquote</p><figure><img data-height="282" data-image-id="image1.png" data-width="739" src="https://cdn-images-1.medium.com/max/800/image1.png"/><figcaption>A figure caption.</figcaption></figure><p>A final note: <a data-href="http://stats.stackexchange.com/" href="http://stats.stackexchange.com/" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p><hr/><p><em>Next: </em><a data-href="https://medium.com/@username/post-url" href="https://medium.com/@username/post-url" target="_blank"><em>Next post</em>
+</a></p>
+<p>By <a href="https://medium.com/@username">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p>
diff --git a/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html b/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
new file mode 100644
index 000000000..02d272dc0
--- /dev/null
+++ b/pelican/tests/content/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html
@@ -0,0 +1,72 @@
+<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>A title</title><style>
+      * {
+        font-family: Georgia, Cambria, "Times New Roman", Times, serif;
+      }
+      html, body {
+        margin: 0;
+        padding: 0;
+      }
+      h1 {
+        font-size: 50px;
+        margin-bottom: 17px;
+        color: #333;
+      }
+      h2 {
+        font-size: 24px;
+        line-height: 1.6;
+        margin: 30px 0 0 0;
+        margin-bottom: 18px;
+        margin-top: 33px;
+        color: #333;
+      }
+      h3 {
+        font-size: 30px;
+        margin: 10px 0 20px 0;
+        color: #333;
+      }
+      header {
+        width: 640px;
+        margin: auto;
+      }
+      section {
+        width: 640px;
+        margin: auto;
+      }
+      section p {
+        margin-bottom: 27px;
+        font-size: 20px;
+        line-height: 1.6;
+        color: #333;
+      }
+      section img {
+        max-width: 640px;
+      }
+      footer {
+        padding: 0 20px;
+        margin: 50px 0;
+        text-align: center;
+        font-size: 12px;
+      }
+      .aspectRatioPlaceholder {
+        max-width: auto !important;
+        max-height: auto !important;
+      }
+      .aspectRatioPlaceholder-fill {
+        padding-bottom: 0 !important;
+      }
+      header,
+      section[data-field=subtitle],
+      section[data-field=description] {
+        display: none;
+      }
+      </style></head><body><article class="h-entry">
+<header>
+<h1 class="p-name">A name (like title)</h1>
+</header>
+<section data-field="subtitle" class="p-summary">
+    Summary (first several words of content)
+</section>
+<section data-field="body" class="e-content">
+<section name="ad15" class="section section--body section--first"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><h3 name="20a3" id="20a3" class="graf graf--h3 graf--leading graf--title">Title header</h3><p name="e3d6" id="e3d6" class="graf graf--p graf-after--h3">A paragraph of content.</p><p name="c7a8" id="c7a8" class="graf graf--p graf-after--p">Paragraph number two.</p><p name="42aa" id="42aa" class="graf graf--p graf-after--p">A list:</p><ol class="postList"><li name="d65f" id="d65f" class="graf graf--li graf-after--p">One.</li><li name="232b" id="232b" class="graf graf--li graf-after--li">Two.</li><li name="ef87" id="ef87" class="graf graf--li graf-after--li">Three.</li></ol><p name="e743" id="e743" class="graf graf--p graf-after--p">A link: <a href="https://example.com/example" data-href="https://example.com/example" class="markup--anchor markup--p-anchor" target="_blank">link text</a>.</p><h3 name="4cfd" id="4cfd" class="graf graf--h3 graf-after--p">Header 2</h3><p name="433c" id="433c" class="graf graf--p graf-after--p">A block quote:</p><blockquote name="3537" id="3537" class="graf graf--blockquote graf-after--p">quote words <strong class="markup--strong markup--blockquote-strong">strong words</strong></blockquote><p name="00cc" id="00cc" class="graf graf--p graf-after--blockquote">after blockquote</p><figure name="edb0" id="edb0" class="graf graf--figure graf-after--p"><img class="graf-image" data-image-id="image1.png" data-width="739" data-height="282" src="https://cdn-images-1.medium.com/max/800/image1.png"><figcaption class="imageCaption">A figure caption.</figcaption></figure><p name="f401" id="f401" class="graf graf--p graf-after--p graf--trailing">A final note: <a href="http://stats.stackexchange.com/" data-href="http://stats.stackexchange.com/" class="markup--anchor markup--p-anchor" rel="noopener" target="_blank">Cross-Validated</a> has sometimes been helpful.</p></div></div></section><section name="09a3" class="section section--body section--last"><div class="section-divider"><hr class="section-divider"></div><div class="section-content"><div class="section-inner sectionLayout--insetColumn"><p name="81e8" id="81e8" class="graf graf--p graf--leading"><em class="markup--em markup--p-em">Next: </em><a href="https://medium.com/@username/post-url" data-href="https://medium.com/@username/post-url" class="markup--anchor markup--p-anchor" target="_blank"><em class="markup--em markup--p-em">Next post</em>
+</section>
+<footer><p>By <a href="https://medium.com/@username" class="p-author h-card">User Name</a> on <a href="https://medium.com/p/medium-short-url"><time class="dt-published" datetime="2017-04-21T17:11:55.799Z">April 21, 2017</time></a>.</p><p><a href="https://medium.com/@username/this-post-url" class="p-canonical">Canonical link</a></p><p>Exported from <a href="https://medium.com">Medium</a> on December 1, 2023.</p></footer></article></body></html>
diff --git a/pelican/tests/test_generators.py b/pelican/tests/test_generators.py
index af6f5b1ab..8c257b550 100644
--- a/pelican/tests/test_generators.py
+++ b/pelican/tests/test_generators.py
@@ -264,6 +264,7 @@ def test_generate_feeds_override_url(self):
 
     def test_generate_context(self):
         articles_expected = [
+            ["A title", "published", "medium_posts", "article"],
             ["Article title", "published", "Default", "article"],
             [
                 "Article with markdown and summary metadata multi",
@@ -391,13 +392,24 @@ def test_generate_categories(self):
         # terms of process order will define the name for that category
         categories = [cat.name for cat, _ in self.generator.categories]
         categories_alternatives = (
-            sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
-            sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
+            ),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
+            ),
         )
         self.assertIn(sorted(categories), categories_alternatives)
         # test for slug
         categories = [cat.slug for cat, _ in self.generator.categories]
-        categories_expected = ["default", "testcategory", "yeah", "test", "zhi-dao-shu"]
+        categories_expected = [
+            "default",
+            "testcategory",
+            "medium_posts",
+            "yeah",
+            "test",
+            "zhi-dao-shu",
+        ]
         self.assertEqual(sorted(categories), sorted(categories_expected))
 
     def test_do_not_use_folder_as_category(self):
@@ -549,7 +561,8 @@ def test_period_archives_context(self):
             granularity: {period["period"] for period in periods}
             for granularity, periods in period_archives.items()
         }
-        expected = {"year": {(1970,), (2010,), (2012,), (2014,)}}
+        self.maxDiff = None
+        expected = {"year": {(1970,), (2010,), (2012,), (2014,), (2017,)}}
         self.assertEqual(expected, abbreviated_archives)
 
         # Month archives enabled:
@@ -570,7 +583,7 @@ def test_period_archives_context(self):
             for granularity, periods in period_archives.items()
         }
         expected = {
-            "year": {(1970,), (2010,), (2012,), (2014,)},
+            "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
             "month": {
                 (1970, "January"),
                 (2010, "December"),
@@ -578,6 +591,7 @@ def test_period_archives_context(self):
                 (2012, "November"),
                 (2012, "October"),
                 (2014, "February"),
+                (2017, "April"),
             },
         }
         self.assertEqual(expected, abbreviated_archives)
@@ -602,7 +616,7 @@ def test_period_archives_context(self):
             for granularity, periods in period_archives.items()
         }
         expected = {
-            "year": {(1970,), (2010,), (2012,), (2014,)},
+            "year": {(1970,), (2010,), (2012,), (2014,), (2017,)},
             "month": {
                 (1970, "January"),
                 (2010, "December"),
@@ -610,6 +624,7 @@ def test_period_archives_context(self):
                 (2012, "November"),
                 (2012, "October"),
                 (2014, "February"),
+                (2017, "April"),
             },
             "day": {
                 (1970, "January", 1),
@@ -619,6 +634,7 @@ def test_period_archives_context(self):
                 (2012, "October", 30),
                 (2012, "October", 31),
                 (2014, "February", 9),
+                (2017, "April", 21),
             },
         }
         self.assertEqual(expected, abbreviated_archives)
@@ -836,8 +852,12 @@ def test_standard_metadata_in_default_metadata(self):
 
         categories = sorted([category.name for category, _ in generator.categories])
         categories_expected = [
-            sorted(["Default", "TestCategory", "yeah", "test", "指導書"]),
-            sorted(["Default", "TestCategory", "Yeah", "test", "指導書"]),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "yeah", "test", "指導書"]
+            ),
+            sorted(
+                ["Default", "TestCategory", "medium_posts", "Yeah", "test", "指導書"]
+            ),
         ]
         self.assertIn(categories, categories_expected)
 
@@ -864,6 +884,7 @@ def test_article_order_by(self):
         generator.generate_context()
 
         expected = [
+            "A title",
             "An Article With Code Block To Test Typogrify Ignore",
             "Article title",
             "Article with Nonconformant HTML meta tags",
diff --git a/pelican/tests/test_importer.py b/pelican/tests/test_importer.py
index 05ef5bbdb..916c1183d 100644
--- a/pelican/tests/test_importer.py
+++ b/pelican/tests/test_importer.py
@@ -21,6 +21,10 @@
     get_attachments,
     tumblr2fields,
     wp2fields,
+    mediumpost2fields,
+    mediumposts2fields,
+    strip_medium_post_content,
+    medium_slug,
 )
 from pelican.utils import path_to_file_url, slugify
 
@@ -708,3 +712,82 @@ def get_posts(api_key, blogname, offset=0):
             posts,
             posts,
         )
+
+
+class TestMediumImporter(TestCaseWithCLocale):
+    def setUp(self):
+        super().setUp()
+        self.test_content_root = "pelican/tests/content"
+        # The content coming out of parsing is similar, but not the same.
+        # Beautiful soup rearranges the order of attributes, for example.
+        # So, we keep a copy of the content for the test.
+        content_filename = f"{self.test_content_root}/medium_post_content.txt"
+        with open(content_filename, encoding="utf-8") as the_content_file:
+            # Many editors and scripts add a final newline, so live with that
+            # in our test
+            the_content = the_content_file.read()
+            assert the_content[-1] == "\n"
+            the_content = the_content[:-1]
+        self.post_tuple = (
+            "A title",
+            the_content,
+            # slug:
+            "2017-04-21-medium-post",
+            "2017-04-21 17:11",
+            "User Name",
+            None,
+            (),
+            "published",
+            "article",
+            "html",
+        )
+
+    def test_mediumpost2field(self):
+        """Parse one post"""
+        post_filename = f"{self.test_content_root}/medium_posts/2017-04-21_-medium-post--d1bf01d62ba3.html"
+        val = mediumpost2fields(post_filename)
+        self.assertEqual(self.post_tuple, val, val)
+
+    def test_mediumposts2field(self):
+        """Parse all posts in an export directory"""
+        posts = [
+            fields
+            for fields in mediumposts2fields(f"{self.test_content_root}/medium_posts")
+        ]
+        self.assertEqual(1, len(posts))
+        self.assertEqual(self.post_tuple, posts[0])
+
+    def test_strip_content(self):
+        """Strip out unhelpful tags"""
+        html_doc = (
+            "<section>This keeps <i>lots</i> of <b>tags</b>, but not "
+            "the <section>section</section> tags</section>"
+        )
+        soup = BeautifulSoup(html_doc, "html.parser")
+        self.assertEqual(
+            "This keeps <i>lots</i> of <b>tags</b>, but not the section tags",
+            strip_medium_post_content(soup),
+        )
+
+    def test_medium_slug(self):
+        # Remove hex stuff at the end
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug(
+                "medium-export/posts/2017-04-27_A-long-title--2971442227dd.html"
+            ),
+        )
+        # Remove "--DRAFT" at the end
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug("medium-export/posts/2017-04-27_A-long-title--DRAFT.html"),
+        )
+        # Remove both (which happens)
+        self.assertEqual(
+            "draft_How-to-do", medium_slug("draft_How-to-do--DRAFT--87225c81dddd.html")
+        )
+        # If no hex stuff, leave it alone
+        self.assertEqual(
+            "2017-04-27_A-long-title",
+            medium_slug("medium-export/posts/2017-04-27_A-long-title.html"),
+        )
diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py
index 681a5c453..eb343860d 100755
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@@ -15,6 +15,8 @@
 from urllib.parse import quote, urlparse, urlsplit, urlunsplit
 from urllib.request import urlretrieve
 
+import dateutil.parser
+
 # because logging.setLoggerClass has to be called before logging.getLogger
 from pelican.log import init
 from pelican.settings import DEFAULT_CONFIG
@@ -114,19 +116,25 @@ def _multi_replace(dic, string):
     return content
 
 
-def xml_to_soup(xml):
-    """Opens an xml file"""
+def _import_bs4():
+    """Import and return bs4, otherwise sys.exit."""
     try:
-        from bs4 import BeautifulSoup
+        import bs4
     except ImportError:
         error = (
             'Missing dependency "BeautifulSoup4" and "lxml" required to '
             "import XML files."
         )
         sys.exit(error)
+    return bs4
+
+
+def file_to_soup(xml, features="xml"):
+    """Reads a file, returns soup."""
+    bs4 = _import_bs4()
     with open(xml, encoding="utf-8") as infile:
         xmlfile = infile.read()
-    soup = BeautifulSoup(xmlfile, "xml")
+    soup = bs4.BeautifulSoup(xmlfile, features)
     return soup
 
 
@@ -140,7 +148,7 @@ def get_filename(post_name, post_id):
 def wp2fields(xml, wp_custpost=False):
     """Opens a wordpress XML file, and yield Pelican fields"""
 
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
     items = soup.rss.channel.findAll("item")
     for item in items:
         if item.find("status").string in ["publish", "draft"]:
@@ -210,7 +218,7 @@ def wp2fields(xml, wp_custpost=False):
 def blogger2fields(xml):
     """Opens a blogger XML file, and yield Pelican fields"""
 
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
     entries = soup.feed.findAll("entry")
     for entry in entries:
         raw_kind = entry.find(
@@ -536,6 +544,133 @@ def tumblr2fields(api_key, blogname):
         posts = _get_tumblr_posts(api_key, blogname, offset)
 
 
+def strip_medium_post_content(soup) -> str:
+    """Strip some tags and attributes from medium post content.
+
+    For example, the 'section' and 'div' tags cause trouble while rendering.
+
+    The problem with these tags is you can get a section divider (--------------)
+    that is not between two pieces of content.  For example:
+
+      Some text.
+
+      .. container:: section-divider
+
+         --------------
+
+      .. container:: section-content
+
+      More content.
+
+    In this case, pandoc complains: "Unexpected section title or transition."
+
+    Also, the "id" and "name" attributes in tags cause similar problems.  They show
+    up in .rst as extra junk that separates transitions.
+    """
+    # Remove tags
+    # section and div cause problems
+    # footer also can cause problems, and has nothing we want to keep
+    # See https://stackoverflow.com/a/8439761
+    invalid_tags = ["section", "div", "footer"]
+    for tag in invalid_tags:
+        for match in soup.findAll(tag):
+            match.replaceWithChildren()
+
+    # Remove attributes
+    # See https://stackoverflow.com/a/9045719
+    invalid_attributes = ["name", "id", "class"]
+    bs4 = _import_bs4()
+    for tag in soup.descendants:
+        if isinstance(tag, bs4.element.Tag):
+            tag.attrs = {
+                key: value
+                for key, value in tag.attrs.items()
+                if key not in invalid_attributes
+            }
+
+    # Get the string of all content, keeping other tags
+    all_content = "".join(str(element) for element in soup.contents)
+    return all_content
+
+
+def mediumpost2fields(filepath: str) -> tuple:
+    """Take an HTML post from a medium export, return Pelican fields."""
+
+    soup = file_to_soup(filepath, "html.parser")
+    if not soup:
+        raise ValueError(f"{filepath} could not be parsed by beautifulsoup")
+    kind = "article"
+
+    content = soup.find("section", class_="e-content")
+    if not content:
+        raise ValueError(f"{filepath}: Post has no content")
+
+    title = soup.find("title").string or ""
+
+    raw_date = soup.find("time", class_="dt-published")
+    date = None
+    if raw_date:
+        # This datetime can include timezone, e.g., "2017-04-21T17:11:55.799Z"
+        # python before 3.11 can't parse the timezone using datetime.fromisoformat
+        # See also https://docs.python.org/3.10/library/datetime.html#datetime.datetime.fromisoformat
+        # "This does not support parsing arbitrary ISO 8601 strings"
+        # So, we use dateutil.parser, which can handle it.
+        date_object = dateutil.parser.parse(raw_date.attrs["datetime"])
+        date = date_object.strftime("%Y-%m-%d %H:%M")
+        status = "published"
+    else:
+        status = "draft"
+    author = soup.find("a", class_="p-author h-card")
+    if author:
+        author = author.string
+
+    # Now that we're done with classes, we can strip the content
+    content = strip_medium_post_content(content)
+
+    # medium HTML export doesn't have tag or category
+    # RSS feed has tags, but it doesn't have all the posts.
+    tags = ()
+
+    slug = medium_slug(filepath)
+
+    # TODO: make the fields a python dataclass
+    return (
+        title,
+        content,
+        slug,
+        date,
+        author,
+        None,
+        tags,
+        status,
+        kind,
+        "html",
+    )
+
+
+def medium_slug(filepath: str) -> str:
+    """Make the filepath of a medium exported file into a slug."""
+    # slug: filename without extension
+    slug = os.path.basename(filepath)
+    slug = os.path.splitext(slug)[0]
+    # A medium export filename looks like date_-title-...html
+    # But, RST doesn't like "_-" (see https://github.com/sphinx-doc/sphinx/issues/4350)
+    # so get rid of it
+    slug = slug.replace("_-", "-")
+    # drop the hex string medium puts on the end of the filename, why keep it.
+    # e.g., "-a8a8a8a8" or "---a9a9a9a9"
+    # also: drafts don't need "--DRAFT"
+    slug = re.sub(r"((-)+([0-9a-f]+|DRAFT))+$", "", slug)
+    return slug
+
+
+def mediumposts2fields(medium_export_dir: str):
+    """Take HTML posts in a medium export directory, and yield Pelican fields."""
+    for file in os.listdir(medium_export_dir):
+        filename = os.fsdecode(file)
+        yield mediumpost2fields(os.path.join(medium_export_dir, filename))
+
+
 def feed2fields(file):
     """Read a feed and yield pelican fields"""
     import feedparser
@@ -711,7 +846,7 @@ def get_attachments(xml):
     """returns a dictionary of posts that have attachments with a list
     of the attachment_urls
     """
-    soup = xml_to_soup(xml)
+    soup = file_to_soup(xml)
     items = soup.rss.channel.findAll("item")
     names = {}
     attachments = []
@@ -837,6 +972,9 @@ def fields2pelican(
             posts_require_pandoc.append(filename)
 
         slug = not disable_slugs and filename or None
+        assert slug is None or filename == os.path.basename(
+            filename
+        ), f"filename is not a basename: {filename}"
 
         if wp_attach and attachments:
             try:
@@ -984,6 +1122,9 @@ def main():
     parser.add_argument(
         "--dotclear", action="store_true", dest="dotclear", help="Dotclear export"
     )
+    parser.add_argument(
+        "--medium", action="store_true", dest="medium", help="Medium export"
+    )
     parser.add_argument(
         "--tumblr", action="store_true", dest="tumblr", help="Tumblr export"
     )
@@ -1069,6 +1210,8 @@ def main():
         input_type = "blogger"
     elif args.dotclear:
         input_type = "dotclear"
+    elif args.medium:
+        input_type = "medium"
     elif args.tumblr:
         input_type = "tumblr"
     elif args.wpfile:
@@ -1077,8 +1220,8 @@ def main():
         input_type = "feed"
     else:
         error = (
-            "You must provide either --blogger, --dotclear, "
-            "--tumblr, --wpfile or --feed options"
+            "You must provide one of --blogger, --dotclear, "
+            "--medium, --tumblr, --wpfile or --feed options"
         )
         exit(error)
 
@@ -1097,12 +1240,16 @@ def main():
         fields = blogger2fields(args.input)
     elif input_type == "dotclear":
         fields = dc2fields(args.input)
+    elif input_type == "medium":
+        fields = mediumposts2fields(args.input)
     elif input_type == "tumblr":
         fields = tumblr2fields(args.input, args.blogname)
     elif input_type == "wordpress":
         fields = wp2fields(args.input, args.wp_custpost or False)
     elif input_type == "feed":
         fields = feed2fields(args.input)
+    else:
+        raise ValueError(f"Unhandled input_type {input_type}")
 
     if args.wp_attach:
         attachments = get_attachments(args.input)