Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added image embedding support for epub #84

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions ebook/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ def chapter_html(story, titleprefix=None, normalize=False):
# This is a Section
chapters.extend(chapter_html(chapter, titleprefix=title, normalize=normalize))
else:
# Add all pictures on this chapter as well.
for image in chapter.images:
# For/else syntax, check if the image path already exists, if it doesn't add the image.
# Duplicates are not allowed in the format.
for other_file in chapters:
if other_file.path == image.path:
break
else:
chapters.append(EpubFile(path=image.path, contents=image.contents, filetype=image.content_type))

title = titleprefix and f'{titleprefix}: {title}' or title
contents = chapter.contents
if normalize:
Expand Down
3 changes: 2 additions & 1 deletion examples/pale-withextras.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"content_title_selector": "h1.entry-title",
"content_text_selector": ".entry-content",
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
"next_selector": "a[rel=\"next\"]"
"next_selector": "a[rel=\"next\"]",
"image_selector": ".entry-content img"
}
7 changes: 5 additions & 2 deletions examples/pale.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
"url": "https://palewebserial.wordpress.com/table-of-contents/",
"title": "Pale",
"author": "Wildbow",
"content_selector": "#main",
"content_title_selector": "h1.entry-title",
"content_text_selector": ".entry-content",
"chapter_selector": "article .entry-content > p a",
"content_selector": "article .entry-content",
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']"
"filter_selector": ".sharedaddy, style, a[href*='palewebserial.wordpress.com']",
"image_selector": ".entry-content img"
}
3 changes: 2 additions & 1 deletion examples/unsong.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"content_title_selector": ".pjgm-posttitle",
"content_text_selector": ".pjgm-postcontent",
"filter_selector": ".sharedaddy",
"next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])"
"next_selector": "a[rel=\"next\"]:not([href*=\"prologue\"])",
"cover_url": "https://i.imgur.com/d9LvKMc.png%22"
}
8 changes: 8 additions & 0 deletions sites/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,19 @@ def _default_uuid_string(self):
return str(uuid.UUID(int=rd.getrandbits(8*16), version=4))


@attr.s
class Image:
path = attr.ib()
contents = attr.ib()
content_type = attr.ib()


@attr.s
class Chapter:
title = attr.ib()
contents = attr.ib()
date = attr.ib(default=False)
images = attr.ib(default=attr.Factory(list))


@attr.s
Expand Down
37 changes: 36 additions & 1 deletion sites/arbitrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import json
import re
import os.path
from . import register, Site, Section, Chapter
import urllib
from . import register, Site, Section, Chapter, Image

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,6 +43,9 @@ class SiteDefinition:
filter_selector = attr.ib(default=False)
cover_url = attr.ib(default='')

# If present, use to also download the images and embed them into the epub.
image_selector = attr.ib(default=False)


@register
class Arbitrary(Site):
Expand Down Expand Up @@ -132,11 +136,42 @@ def _chapter(self, url, definition, title=False):

self._clean(content)

images = []
if definition.image_selector:
images = self.load_images(content, definition.image_selector)

chapters.append(Chapter(
title=title,
contents=content.prettify(),
# TODO: better date detection
date=datetime.datetime.now(),
images=images
))

return chapters

def load_images(self, content, selector):
images = []
for image in content.select(selector):
if not image.has_attr('src'):
continue

image_url = image['src']
url = urllib.parse.urlparse(image_url)
local_path = 'chapter_images/' + url.path.strip('/')

image_res = self.session.get(image_url)
content_type = image_res.headers['Content-Type']
image_data = image_res.content

images.append(Image(
path=local_path,
contents=image_data,
content_type=content_type
))
# Replace 'src'.
image['src'] = '../' + local_path
if image.has_attr('srcset'):
del image['srcset']

return images
Loading