Merge pull request #2243 from NilanEkanayake/dev

Fix Syosetu and Fanstrans
dipu-bd · Feb 12, 2024 · 6eac1c0 · 6eac1c0
2 parents a35ee6e + 79c47f4
commit 6eac1c0
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 19 deletions.
diff --git a/sources/en/f/fanstrans.py b/sources/en/f/fanstrans.py
@@ -26,6 +26,8 @@ def initialize(self) -> None:
             r"^Get  on Patreon",
             r"^Check out other novels on Fan’s Translation~",
             r"^to get Notification for latest Chapter Releases",
+            r"^Can’t wait to read more? Want to show your support? Click",
+            r"^to be a sponsor and get additional chapters ahead of time!",
         ]
     )
     self.cleaner.bad_tags.update(["a"])
@@ -36,6 +38,7 @@ class FansTranslations(Crawler):
 
     def initialize(self) -> None:
         self.cleaner.bad_tags.update(["h3"])
+        self.init_executor(4)
 
     def search_novel(self, query):
         query = query.lower().replace(" ", "+")

diff --git a/sources/jp/s/syosetu.py b/sources/jp/s/syosetu.py
@@ -11,6 +11,9 @@ class SyosetuCrawler(Crawler):
     has_mtl = True
     base_url = "https://ncode.syosetu.com/"
 
+    def initialize(self) -> None:
+        self.init_executor(2)
+
     def search_novel(self, query):
         soup = self.get_soup(search_url % quote_plus(query))
         results = []
@@ -45,28 +48,37 @@ def read_novel_info(self):
             self.novel_author = author_tag.text.strip()
 
         # Syosetu calls parts "chapters"
+        soups = []
+        pager_last = soup.select_one("a[class='novelview_pager-last']")
+        if pager_last and 'href' in pager_last.attrs:
+            page_num = int(pager_last["href"].split("=")[-1])
+            for x in range(1, page_num + 1):
+                soup = self.get_soup(f'{self.novel_url}?p={x}')
+                soups.append(soup)
+        else:
+            soups.append(soup)
+
+        volume_id = 0
         chapter_id = 0
-        volume = {"id": 0}
-        self.volumes.append(volume)
-        for tag in soup.select(".index_box .chapter_title, .index_box .subtitle a"):
-            if 'chapter_title' in tag.attrs.get('class', ''):
-                # Part/volume (there might be none)
-                volume = {
-                    "id": volume['id'] + 1,
-                    "title": tag.text.strip(),
-                }
-                self.volumes.append(volume)
-            elif tag.name == "a":
-                # Chapter
-                chapter_id += 1
-                self.chapters.append(
-                    {
+        self.volumes.append({'id': 0})
+        for soup in soups:
+            for tag in soup.select(".index_box .chapter_title, .index_box .subtitle a"):
+                if 'chapter_title' in tag.attrs.get('class', ''):
+                    # Part/volume (there might be none)
+                    volume_id += 1
+                    self.volumes.append({
+                        'id': volume_id,
+                        'title': tag.text.strip(),
+                    })
+                elif tag.name == "a":
+                    # Chapter
+                    chapter_id += 1
+                    self.chapters.append({
                         "id": chapter_id,
-                        "volume": volume['id'],
-                        "title": tag.text.strip() or ("Chapter %d" % chapter_id),
+                        "volume": volume_id,
+                        "title": tag.text.strip(),
                         "url": self.absolute_url(tag["href"]),
-                    }
-                )
+                    })
 
     def download_chapter_body(self, chapter):
         soup = self.get_soup(chapter["url"])