2021-09-25 17:14:59 +08:00

128 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com. The primary location of this package is on website https://py.guanjihuan.com.
# others
## download
def download_with_scihub(address=None, num=1):
from bs4 import BeautifulSoup
import re
import requests
import os
if num==1 and address!=None:
address_array = [address]
else:
address_array = []
for i in range(num):
address = input('\nInput')
address_array.append(address)
for address in address_array:
r = requests.post('https://sci-hub.st/', data={'request': address})
print('\nResponse', r)
print('Address', r.url)
soup = BeautifulSoup(r.text, features='lxml')
pdf_URL = soup.iframe['src']
if re.search(re.compile('^https:'), pdf_URL):
pass
else:
pdf_URL = 'https:'+pdf_URL
print('PDF address', pdf_URL)
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
print('PDF name', name)
print('Directory', os.getcwd())
print('\nDownloading...')
r = requests.get(pdf_URL, stream=True)
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
print('Completed!\n')
if num != 1:
print('All completed!\n')
## audio
def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3
if print_text==1:
print(str)
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice].id)
engine.setProperty("rate", rate)
if save==1:
engine.save_to_file(str, 'str.mp3')
engine.runAndWait()
print('MP3 file saved!')
if read==1:
engine.say(str)
engine.runAndWait()
def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3
f = open(txt_path, 'r', encoding ='utf-8')
text = f.read()
if print_text==1:
print(text)
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice].id)
engine.setProperty("rate", rate)
if save==1:
import re
file_name = re.split('[/,\\\]', txt_path)[-1][:-4]
engine.save_to_file(text, file_name+'.mp3')
engine.runAndWait()
print('MP3 file saved!')
if read==1:
engine.say(text)
engine.runAndWait()
def pdf_to_text(pdf_path):
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR)
praser = PDFParser(open(pdf_path, 'rb'))
doc = PDFDocument()
praser.set_document(doc)
doc.set_parser(praser)
doc.initialize()
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
layout = device.get_result()
for x in layout:
if isinstance(x, LTTextBox):
content = content + x.get_text().strip()
return content
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3
text = pdf_to_text(pdf_path)
text = text.replace('\n', ' ')
if print_text==1:
print(text)
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice].id)
engine.setProperty("rate", rate)
if save==1:
import re
file_name = re.split('[/,\\\]', pdf_path)[-1][:-4]
engine.save_to_file(text, file_name+'.mp3')
engine.runAndWait()
print('MP3 file saved!')
if read==1:
engine.say(text)
engine.runAndWait()