This commit is contained in:
2026-01-08 12:19:44 +08:00
parent fd43e303fa
commit b8fea19fc3
4 changed files with 20 additions and 4 deletions

View File

@@ -28,6 +28,17 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil
with open(output_file, 'wb') as combined_file:
output_pdf.write(combined_file)
# 使用pdfplumber将PDF文件转成文本
def pdf_to_text_with_pdfplumber(pdf_path):
import pdfplumber
with pdfplumber.open(pdf_path) as pdf:
all_text = []
for page in pdf.pages:
text = page.extract_text()
all_text.append(text)
content = "\n\n".join(all_text)
return content
# 使用pdfminer3k将PDF文件转成文本
def pdf_to_text_with_pdfminer3k(pdf_path):
from pdfminer.pdfparser import PDFParser, PDFDocument