Skip to content

Commit

Permalink
Fix page number retrieval (#25)
Browse files Browse the repository at this point in the history
* Added new page number retrieval logic

* Excluded utils and bumped version

* Fixed name variable xpath
  • Loading branch information
JonathanVusich authored May 12, 2019
1 parent 8840995 commit 56588b3
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pcpartpicker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
from .parts import *

__name__ = ["pcpartpicker"]
__version__ = '1.2.2'
__version__ = '1.2.3'
__author__ = 'Jonathan Vusich'
__email__ = 'jonathanvusich@gmail.com'
2 changes: 1 addition & 1 deletion pcpartpicker/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def html_to_tokens(parse_args: Tuple[str, List[str]]) -> Tuple[str, List[List[st
part, raw_html = parse_args
html = [lxml.html.fromstring(html) for html in raw_html]
tags = [page.xpath(
'tr/td/a/p | tr/td[contains(@class, "td__spec")] | tr/td[@class="td__price"]')
'tr/td/a/div[@class="td__nameWrapper"]/p | tr/td[contains(@class, "td__spec")] | tr/td[@class="td__price"]')
for page in html]
return part, [parse_elements(elements) for elements in tags]

Expand Down
9 changes: 6 additions & 3 deletions pcpartpicker/scraper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
from typing import List, Tuple, Iterable, Dict
import logging
import lxml.html

import aiohttp

Expand Down Expand Up @@ -63,10 +64,12 @@ async def _retrieve_page_numbers(self, session: aiohttp.ClientSession, part: str
"""

data: dict = await self._retrieve_page_data(session, part)
num = data["result"]["paging_data"]["page_blocks"][-1]["page"]
return [x for x in range(1, num + 1)]
num_data = data["result"]["paging_row"]
html_tags = lxml.html.fromstring(num_data)
tags = html_tags.xpath('section/ul/li')
return [x for x in range(1, len(tags) + 1)]

async def _retrieve_page_data(self, session: aiohttp.ClientSession, part: str, page_num: int = 1) -> str:
async def _retrieve_page_data(self, session: aiohttp.ClientSession, part: str, page_num: int = 1) -> dict:
"""
Hidden method that retrieves page data for a given part type and page number.
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def read(file_name: str):

setup(
name="pcpartpicker",
version="1.2.2",
version="1.2.3",
author="Jonathan Vusich",
author_email="jonathanvusich@gmail.com",
description="A fast, simple API for PCPartPicker.com.",
Expand All @@ -17,7 +17,7 @@ def read(file_name: str):
license="GPL",
keywords="pcpartpicker api webscraper",
url="https://github.com/JonathanVusich/pcpartpicker",
packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests", "utils"]),
install_requires=read("requirements.txt"),
classifiers=[
"Development Status :: 5 - Production/Stable",
Expand Down

0 comments on commit 56588b3

Please sign in to comment.