Skip to content

Commit

Permalink
fix #1146 update lxml with html5lib
Browse files Browse the repository at this point in the history
  • Loading branch information
Nandaka committed Aug 16, 2022
1 parent 03636a5 commit 0999bd4
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions PixivImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def ParseInfo(self, page, writeRawJSON):

# Strip HTML tags from caption once they have been collected by the above statement.
if self.stripHTMLTagsFromCaption:
self.imageCaption = BeautifulSoup(self.imageCaption, "lxml").text
self.imageCaption = BeautifulSoup(self.imageCaption, features="html5lib").text

# Issue #1064
if "titleCaptionTranslation" in root:
Expand All @@ -268,7 +268,7 @@ def ParseInfo(self, page, writeRawJSON):
self.translated_work_caption = root["titleCaptionTranslation"]["workCaption"]
self.parse_url_from_caption(self.translated_work_caption)
if self.stripHTMLTagsFromCaption:
self.translated_work_caption = BeautifulSoup(self.translated_work_caption, "lxml").text
self.translated_work_caption = BeautifulSoup(self.translated_work_caption, features="html5lib").text

def parse_url_from_caption(self, caption_to_parse):
parsed = BeautifulSoup(caption_to_parse, features="html5lib")
Expand Down

0 comments on commit 0999bd4

Please sign in to comment.