0.1.91
This commit is contained in:
		| @@ -1,7 +1,7 @@ | |||||||
| [metadata] | [metadata] | ||||||
| # replace with your username: | # replace with your username: | ||||||
| name = guan | name = guan | ||||||
| version = 0.1.90 | version = 0.1.91 | ||||||
| author = guanjihuan | author = guanjihuan | ||||||
| author_email = guanjihuan@163.com | author_email = guanjihuan@163.com | ||||||
| description = An open source python package | description = An open source python package | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| Metadata-Version: 2.1 | Metadata-Version: 2.1 | ||||||
| Name: guan | Name: guan | ||||||
| Version: 0.1.90 | Version: 0.1.91 | ||||||
| Summary: An open source python package | Summary: An open source python package | ||||||
| Home-page: https://py.guanjihuan.com | Home-page: https://py.guanjihuan.com | ||||||
| Author: guanjihuan | Author: guanjihuan | ||||||
|   | |||||||
| @@ -751,24 +751,25 @@ def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1): | |||||||
| def get_links_from_pdf(pdf_path, link_starting_form=''): | def get_links_from_pdf(pdf_path, link_starting_form=''): | ||||||
|     import PyPDF2 |     import PyPDF2 | ||||||
|     import re |     import re | ||||||
|     pdfReader = PyPDF2.PdfFileReader(pdf_path) |     reader = PyPDF2.PdfReader(pdf_path) | ||||||
|     pages = pdfReader.getNumPages() |     pages = len(reader.pages) | ||||||
|     i0 = 0 |     i0 = 0 | ||||||
|     links = [] |     links = [] | ||||||
|     for page in range(pages): |     for page in range(pages): | ||||||
|         pageSliced = pdfReader.getPage(page) |         pageSliced = reader.pages[page] | ||||||
|         pageObject = pageSliced.getObject() |         pageObject = pageSliced.get_object()  | ||||||
|         if '/Annots' in pageObject.keys(): |         if '/Annots' in pageObject.keys(): | ||||||
|             ann = pageObject['/Annots'] |             ann = pageObject['/Annots'] | ||||||
|             old = '' |             old = '' | ||||||
|             for a in ann: |             for a in ann: | ||||||
|                 u = a.getObject() |                 u = a.get_object()  | ||||||
|                 if '/A' in u.keys(): |                 if '/A' in u.keys(): | ||||||
|                     if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): |                     if '/URI' in u['/A']:  | ||||||
|                         if u['/A']['/URI'] != old: |                         if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): | ||||||
|                             links.append(u['/A']['/URI'])  |                             if u['/A']['/URI'] != old: | ||||||
|                             i0 += 1 |                                 links.append(u['/A']['/URI'])  | ||||||
|                             old = u['/A']['/URI'] |                                 i0 += 1 | ||||||
|  |                                 old = u['/A']['/URI'] | ||||||
|     return links |     return links | ||||||
|  |  | ||||||
| # 通过Sci-Hub网站下载文献 | # 通过Sci-Hub网站下载文献 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user