0.1.91

2024-03-13 02:45:53 +08:00
parent d17d4433f7
commit 505abf4da0
3 changed files with 13 additions and 12 deletions
--- a/PyPI/setup.cfg
+++ b/PyPI/setup.cfg
@@ -1,7 +1,7 @@
 [metadata]
 # replace with your username:
 name = guan
-version = 0.1.90
+version = 0.1.91
 author = guanjihuan
 author_email = guanjihuan@163.com
 description = An open source python package
--- a/PyPI/src/guan.egg-info/PKG-INFO
+++ b/PyPI/src/guan.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: guan
-Version: 0.1.90
+Version: 0.1.91
 Summary: An open source python package
 Home-page: https://py.guanjihuan.com
 Author: guanjihuan
--- a/PyPI/src/guan/others.py
+++ b/PyPI/src/guan/others.py
@@ -751,24 +751,25 @@ def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1):
 def get_links_from_pdf(pdf_path, link_starting_form=''):
    import PyPDF2
    import re
-    pdfReader = PyPDF2.PdfFileReader(pdf_path)
+    reader = PyPDF2.PdfReader(pdf_path)
-    pages = pdfReader.getNumPages()
+    pages = len(reader.pages)
    i0 = 0
    links = []
    for page in range(pages):
-        pageSliced = pdfReader.getPage(page)
+        pageSliced = reader.pages[page]
-        pageObject = pageSliced.getObject()
+        pageObject = pageSliced.get_object() 
        if '/Annots' in pageObject.keys():
            ann = pageObject['/Annots']
            old = ''
            for a in ann:
-                u = a.getObject()
+                u = a.get_object() 
                if '/A' in u.keys():
-                    if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
+                    if '/URI' in u['/A']: 
-                        if u['/A']['/URI'] != old:
+                        if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
-                            links.append(u['/A']['/URI']) 
+                            if u['/A']['/URI'] != old:
-                            i0 += 1
+                                links.append(u['/A']['/URI']) 
-                            old = u['/A']['/URI']
+                                i0 += 1
                                old = u['/A']['/URI']
    return links
 # 通过Sci-Hub网站下载文献