Skip to content

Commit

Permalink
test: Update walkpdf to fix PyPDF deprecation warnings (#934)
Browse files Browse the repository at this point in the history
  • Loading branch information
kesara authored Nov 7, 2022
1 parent 1381bb8 commit 9308e40
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions xml2rfc/walkpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def walk(obj, seen):
d, i = walk(obj[key], seen)
dobj[k] = d
iobj += i
if hasattr(obj, 'extractText'):
dobj['text'] = obj.extractText()
if hasattr(obj, 'extract_text'):
dobj['text'] = obj.extract_text()
elif isinstance(obj, pypdf2.generic.ArrayObject):
dobj = []
for o in obj:
Expand All @@ -41,7 +41,7 @@ def walk(obj, seen):
dobj = str(obj)
if (obj.idnum, obj.generation) not in seen:
seen.add((obj.idnum, obj.generation))
d, i = walk(obj.getObject(), seen)
d, i = walk(obj.get_object(), seen)
if isinstance(d, dict):
d['IdNum'] = obj.idnum
d['Generation'] = obj.generation
Expand All @@ -63,17 +63,17 @@ def pyobj(filename=None, bytes=None):
seen = set()
#
pdffile = io.BytesIO(bytes) if bytes else io.open(filename, 'br')
reader = pypdf2.PdfFileReader(pdffile, strict=False)
info = reader.getDocumentInfo()
reader = pypdf2.PdfReader(pdffile, strict=False)
info = reader.metadata
doc = {}
for key in info.keys():
k = key[1:] if key.startswith('/') else key
doc[k] = info[key]
iobj = []
pages = []
for num in range(reader.getNumPages()):
page = reader.getPage(num)
obj = page.getObject()
for num in range(len(reader.pages)):
page = reader.pages[num]
obj = page.get_object()
d, i = walk(obj, seen)
#pages[num+1] = d
pages.append(d)
Expand Down Expand Up @@ -110,4 +110,4 @@ def main():
print('Wrote: %s' % x.name)

if __name__ == "__main__":
main()
main()

0 comments on commit 9308e40

Please sign in to comment.