From 505abf4da08d17b4d151259708dd5e924ef17bae Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Wed, 13 Mar 2024 02:45:53 +0800 Subject: [PATCH] 0.1.91 --- PyPI/setup.cfg | 2 +- PyPI/src/guan.egg-info/PKG-INFO | 2 +- PyPI/src/guan/others.py | 21 +++++++++++---------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index 1270ae1..5e9d9f1 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.90 +version = 0.1.91 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index 35f1c5f..f4a4492 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: guan -Version: 0.1.90 +Version: 0.1.91 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/others.py b/PyPI/src/guan/others.py index 0383914..133d9c3 100644 --- a/PyPI/src/guan/others.py +++ b/PyPI/src/guan/others.py @@ -751,24 +751,25 @@ def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1): def get_links_from_pdf(pdf_path, link_starting_form=''): import PyPDF2 import re - pdfReader = PyPDF2.PdfFileReader(pdf_path) - pages = pdfReader.getNumPages() + reader = PyPDF2.PdfReader(pdf_path) + pages = len(reader.pages) i0 = 0 links = [] for page in range(pages): - pageSliced = pdfReader.getPage(page) - pageObject = pageSliced.getObject() + pageSliced = reader.pages[page] + pageObject = pageSliced.get_object() if '/Annots' in pageObject.keys(): ann = pageObject['/Annots'] old = '' for a in ann: - u = a.getObject() + u = a.get_object() if '/A' in u.keys(): - if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): - if u['/A']['/URI'] != old: - links.append(u['/A']['/URI']) - i0 += 1 - old = u['/A']['/URI'] + if '/URI' in u['/A']: + if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): + if u['/A']['/URI'] != old: + links.append(u['/A']['/URI']) + i0 += 1 + old = u['/A']['/URI'] return links # 通过Sci-Hub网站下载文献