diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index 546a0bf..da2dbfc 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.196 +version = 0.1.198 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index d711977..24e9eec 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: guan -Version: 0.1.196 +Version: 0.1.198 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/file_reading_and_writing.py b/PyPI/src/guan/file_reading_and_writing.py index 77287f8..5ed1e11 100644 --- a/PyPI/src/guan/file_reading_and_writing.py +++ b/PyPI/src/guan/file_reading_and_writing.py @@ -471,7 +471,7 @@ def open_file(filename='a', file_format='.txt', mode='add'): f = open(filename+file_format, 'w', encoding='UTF-8') return f -# 打印到TXT文件 +# 打印到TXT文件(补充内容) def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True): if print_on==True: for arg in args: @@ -483,7 +483,12 @@ def print_to_file(*args, filename='print_result', file_format='.txt', print_on=T f.write('\n') f.close() -# 读取文本文件内容。如果文件不存在,返回空字符串 +# 写入到文本文件(覆盖内容) +def write_text_file(content, filename='a', file_format='.txt'): + with open(filename+file_format, 'w', encoding='UTF-8') as f: + f.write(content) + +# 读取文本文件内容(如果文件不存在,返回空字符串) def read_text_file(file_path='./a.txt', make_file=None): import os if not os.path.exists(file_path): diff --git a/PyPI/src/guan/others.py b/PyPI/src/guan/others.py index 1b6406e..a0f49d3 100644 --- a/PyPI/src/guan/others.py +++ b/PyPI/src/guan/others.py @@ -28,6 +28,17 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil with open(output_file, 'wb') as combined_file: output_pdf.write(combined_file) +# 使用pdfplumber将PDF文件转成文本 +def pdf_to_text_with_pdfplumber(pdf_path): + import pdfplumber + with pdfplumber.open(pdf_path) as pdf: + all_text = [] + for page in pdf.pages: + text = page.extract_text() + all_text.append(text) + content = "\n\n".join(all_text) + return content + # 使用pdfminer3k将PDF文件转成文本 def pdf_to_text_with_pdfminer3k(pdf_path): from pdfminer.pdfparser import PDFParser, PDFDocument