Skip to content

Commit

Permalink
Convert Wordpress caption to figure
Browse files Browse the repository at this point in the history
In Wordpress, inserting image with a caption can look like:

[caption id="attachment_42" caption="Image Description"]<a ...><img ... /></a>[/caption]
[caption id="attachment_42"]<a ...><img ... /></a> Image Description[/caption]
[caption id="attachment_42"]<img ... > Image Description[/caption]

Replace by an HTML figure tag
  • Loading branch information
mart-e committed Jun 5, 2023
1 parent 1f6b344 commit 0fa27fc
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 1 deletion.
47 changes: 46 additions & 1 deletion pelican/tests/content/wordpressexport.xml
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,52 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></con
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta>
</item>
</item>
<item>
<title>Caption on image</title>
<link>http://thisisa.test/?p=176</link>
<pubDate>Thu, 01 Jan 1970 00:00:00 +0000</pubDate>
<dc:creator>bob</dc:creator>
<guid isPermaLink="false">http://thisisa.test/?p=176</guid>
<description></description>
<content:encoded><![CDATA[Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
[caption attachment_id="42" align="aligncenter" width="300" caption="This is a pelican"]<img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png"/>[/caption]
[caption attachment_id="43" align="aligncenter" width="300"]<img src="/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png" width="300" height="216" class="size-medium wp-image-1055" /> This also a pelican[/caption]
[caption attachment_id="44" align="aligncenter" width="300"]<a href="https://getpelican.com/"><img src="/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png" alt=""/> Yet another pelican[/caption]
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>176</wp:post_id>
<wp:post_date>2012-02-16 15:52:55</wp:post_date>
<wp:post_date_gmt>0000-00-00 00:00:00</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:ping_status>open</wp:ping_status>
<wp:post_name>caption-on-image</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type>post</wp:post_type>
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<category domain="category" nicename="category-2"><![CDATA[Category 2]]></category>
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[3]]></wp:meta_value>
</wp:postmeta>
</item>
<item>
<title>A custom post in category 4</title>
<link>http://thisisa.test/?p=175</link>
Expand Down
27 changes: 27 additions & 0 deletions pelican/tests/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,33 @@ def r(f):
escaped_quotes = re.search(r'\\[\'"“”‘’]', md)
self.assertFalse(escaped_quotes)

def test_convert_caption_to_figure(self):
def r(f):
with open(f, encoding='utf-8') as infile:
return infile.read()
silent_f2p = mute(True)(fields2pelican)
test_post = filter(
lambda p: p[0].startswith("Caption on image"),
self.posts)
with temporary_folder() as temp:
md = [r(f) for f in silent_f2p(test_post, 'markdown', temp)][0]

caption = re.search(r'\[caption', md)
self.assertFalse(caption)
images = re.findall(r'\<img src="(.*?)"', md)
self.assertEqual(images, [
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png',
'/theme/img/xpelican-3.png.pagespeed.ic.m-NAIdRCOM.png',
'/theme/img/xpelican.png.pagespeed.ic.Rjep0025-y.png'
])

figcaptions = re.findall(r'\<figcaption\>(.*?)\<\/figcaption\>', md)
self.assertEqual(figcaptions, [
'This is a pelican',
'This also a pelican',
'Yet another pelican'
])


class TestBuildHeader(unittest.TestCase):
def test_build_header(self):
Expand Down
7 changes: 7 additions & 0 deletions pelican/tools/pelican_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ def _multi_replace(dic, string):
return re.sub(pattern, lambda m: dic[m.group()], string)
content = _multi_replace(pre_tags, content)

# convert [caption] tags into <figure>
content = re.sub(
r'\[caption(?:.*?)(?:caption=\"(.*?)\")?\]'
r'((?:\<a(?:.*?)\>)?(?:\<img.*?\>)(?:\<\/a\>)?)\s?(.*?)\[\/caption\]',
r'<figure>\n\2\n<figcaption>\1\3</figcaption>\n</figure>',
content)

return content


Expand Down

0 comments on commit 0fa27fc

Please sign in to comment.