0.1.91
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| [metadata] | ||||
| # replace with your username: | ||||
| name = guan | ||||
| version = 0.1.90 | ||||
| version = 0.1.91 | ||||
| author = guanjihuan | ||||
| author_email = guanjihuan@163.com | ||||
| description = An open source python package | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| Metadata-Version: 2.1 | ||||
| Name: guan | ||||
| Version: 0.1.90 | ||||
| Version: 0.1.91 | ||||
| Summary: An open source python package | ||||
| Home-page: https://py.guanjihuan.com | ||||
| Author: guanjihuan | ||||
|   | ||||
| @@ -751,19 +751,20 @@ def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1): | ||||
| def get_links_from_pdf(pdf_path, link_starting_form=''): | ||||
|     import PyPDF2 | ||||
|     import re | ||||
|     pdfReader = PyPDF2.PdfFileReader(pdf_path) | ||||
|     pages = pdfReader.getNumPages() | ||||
|     reader = PyPDF2.PdfReader(pdf_path) | ||||
|     pages = len(reader.pages) | ||||
|     i0 = 0 | ||||
|     links = [] | ||||
|     for page in range(pages): | ||||
|         pageSliced = pdfReader.getPage(page) | ||||
|         pageObject = pageSliced.getObject() | ||||
|         pageSliced = reader.pages[page] | ||||
|         pageObject = pageSliced.get_object()  | ||||
|         if '/Annots' in pageObject.keys(): | ||||
|             ann = pageObject['/Annots'] | ||||
|             old = '' | ||||
|             for a in ann: | ||||
|                 u = a.getObject() | ||||
|                 u = a.get_object()  | ||||
|                 if '/A' in u.keys(): | ||||
|                     if '/URI' in u['/A']:  | ||||
|                         if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): | ||||
|                             if u['/A']['/URI'] != old: | ||||
|                                 links.append(u['/A']['/URI'])  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user