0.0.186
This commit is contained in:
parent
652a9d9ce7
commit
7cb875ade8
@ -1,4 +1,4 @@
|
|||||||
# API Reference shows all functions in Guan package. The current version is guan-0.0.185, updated on December 05, 2023.
|
# API Reference shows all functions in Guan package. The current version is guan-0.0.186, updated on December 07, 2023.
|
||||||
|
|
||||||
import guan
|
import guan
|
||||||
|
|
||||||
@ -917,6 +917,12 @@ guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fi
|
|||||||
# 将PDF文件转成文本
|
# 将PDF文件转成文本
|
||||||
content = guan.pdf_to_text(pdf_path)
|
content = guan.pdf_to_text(pdf_path)
|
||||||
|
|
||||||
|
# 获取PDF文件页数
|
||||||
|
num_pages = guan.get_pdf_page_number(pdf_path)
|
||||||
|
|
||||||
|
# 获取PDF文件指定页面的内容
|
||||||
|
page_text = guan.pdf_to_txt_for_a_specific_page(pdf_path, page_num=1)
|
||||||
|
|
||||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||||
links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
|
links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
# replace with your username:
|
# replace with your username:
|
||||||
name = guan
|
name = guan
|
||||||
version = 0.0.185
|
version = 0.0.186
|
||||||
author = guanjihuan
|
author = guanjihuan
|
||||||
author_email = guanjihuan@163.com
|
author_email = guanjihuan@163.com
|
||||||
description = An open source python package
|
description = An open source python package
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: guan
|
Name: guan
|
||||||
Version: 0.0.185
|
Version: 0.0.186
|
||||||
Summary: An open source python package
|
Summary: An open source python package
|
||||||
Home-page: https://py.guanjihuan.com
|
Home-page: https://py.guanjihuan.com
|
||||||
Author: guanjihuan
|
Author: guanjihuan
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com.
|
# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com.
|
||||||
|
|
||||||
# The current version is guan-0.0.185, updated on December 05, 2023.
|
# The current version is guan-0.0.186, updated on December 07, 2023.
|
||||||
|
|
||||||
# Installation: pip install --upgrade guan
|
# Installation: pip install --upgrade guan
|
||||||
|
|
||||||
@ -4038,6 +4038,27 @@ def pdf_to_text(pdf_path):
|
|||||||
content = content + x.get_text().strip()
|
content = content + x.get_text().strip()
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
# 获取PDF文件页数
|
||||||
|
def get_pdf_page_number(pdf_path):
|
||||||
|
import PyPDF2
|
||||||
|
pdf_file = open(pdf_path, 'rb')
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
num_pages = len(pdf_reader.pages)
|
||||||
|
return num_pages
|
||||||
|
|
||||||
|
# 获取PDF文件指定页面的内容
|
||||||
|
def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1):
|
||||||
|
import PyPDF2
|
||||||
|
pdf_file = open(pdf_path, 'rb')
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
num_pages = len(pdf_reader.pages)
|
||||||
|
for page_num0 in range(num_pages):
|
||||||
|
if page_num0 == page_num-1:
|
||||||
|
page = pdf_reader.pages[page_num0]
|
||||||
|
page_text = page.extract_text()
|
||||||
|
pdf_file.close()
|
||||||
|
return page_text
|
||||||
|
|
||||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||||
def get_links_from_pdf(pdf_path, link_starting_form=''):
|
def get_links_from_pdf(pdf_path, link_starting_form=''):
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user