From 7cb875ade866f084006763f76c4527aa59632924 Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Sat, 7 Oct 2023 01:21:31 +0800 Subject: [PATCH] 0.0.186 --- API_Reference/API_Reference.py | 8 ++++++- Source_Code/PyPI/setup.cfg | 2 +- Source_Code/PyPI/src/guan.egg-info/PKG-INFO | 2 +- Source_Code/PyPI/src/guan/__init__.py | 23 ++++++++++++++++++++- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/API_Reference/API_Reference.py b/API_Reference/API_Reference.py index 9b767aa..1c8b87b 100644 --- a/API_Reference/API_Reference.py +++ b/API_Reference/API_Reference.py @@ -1,4 +1,4 @@ -# API Reference shows all functions in Guan package. The current version is guan-0.0.185, updated on December 05, 2023. +# API Reference shows all functions in Guan package. The current version is guan-0.0.186, updated on December 07, 2023. import guan @@ -917,6 +917,12 @@ guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fi # 将PDF文件转成文本 content = guan.pdf_to_text(pdf_path) +# 获取PDF文件页数 +num_pages = guan.get_pdf_page_number(pdf_path) + +# 获取PDF文件指定页面的内容 +page_text = guan.pdf_to_txt_for_a_specific_page(pdf_path, page_num=1) + # 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' links = guan.get_links_from_pdf(pdf_path, link_starting_form='') diff --git a/Source_Code/PyPI/setup.cfg b/Source_Code/PyPI/setup.cfg index abb7a1c..17f4a0a 100644 --- a/Source_Code/PyPI/setup.cfg +++ b/Source_Code/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.0.185 +version = 0.0.186 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO index 7453d84..3d15882 100644 --- a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO +++ b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: guan -Version: 0.0.185 +Version: 0.0.186 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/Source_Code/PyPI/src/guan/__init__.py b/Source_Code/PyPI/src/guan/__init__.py index 31ab481..8b6be8e 100644 --- a/Source_Code/PyPI/src/guan/__init__.py +++ b/Source_Code/PyPI/src/guan/__init__.py @@ -1,6 +1,6 @@ # Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com. -# The current version is guan-0.0.185, updated on December 05, 2023. +# The current version is guan-0.0.186, updated on December 07, 2023. # Installation: pip install --upgrade guan @@ -4038,6 +4038,27 @@ def pdf_to_text(pdf_path): content = content + x.get_text().strip() return content +# 获取PDF文件页数 +def get_pdf_page_number(pdf_path): + import PyPDF2 + pdf_file = open(pdf_path, 'rb') + pdf_reader = PyPDF2.PdfReader(pdf_file) + num_pages = len(pdf_reader.pages) + return num_pages + +# 获取PDF文件指定页面的内容 +def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1): + import PyPDF2 + pdf_file = open(pdf_path, 'rb') + pdf_reader = PyPDF2.PdfReader(pdf_file) + num_pages = len(pdf_reader.pages) + for page_num0 in range(num_pages): + if page_num0 == page_num-1: + page = pdf_reader.pages[page_num0] + page_text = page.extract_text() + pdf_file.close() + return page_text + # 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' def get_links_from_pdf(pdf_path, link_starting_form=''): import PyPDF2