128 lines
4.3 KiB
Python
128 lines
4.3 KiB
Python
# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com. The primary location of this package is on website https://py.guanjihuan.com.
|
||
|
||
# others
|
||
|
||
## download
|
||
|
||
def download_with_scihub(address=None, num=1):
|
||
from bs4 import BeautifulSoup
|
||
import re
|
||
import requests
|
||
import os
|
||
if num==1 and address!=None:
|
||
address_array = [address]
|
||
else:
|
||
address_array = []
|
||
for i in range(num):
|
||
address = input('\nInput:')
|
||
address_array.append(address)
|
||
for address in address_array:
|
||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||
print('\nResponse:', r)
|
||
print('Address:', r.url)
|
||
soup = BeautifulSoup(r.text, features='lxml')
|
||
pdf_URL = soup.iframe['src']
|
||
if re.search(re.compile('^https:'), pdf_URL):
|
||
pass
|
||
else:
|
||
pdf_URL = 'https:'+pdf_URL
|
||
print('PDF address:', pdf_URL)
|
||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||
print('PDF name:', name)
|
||
print('Directory:', os.getcwd())
|
||
print('\nDownloading...')
|
||
r = requests.get(pdf_URL, stream=True)
|
||
with open(name, 'wb') as f:
|
||
for chunk in r.iter_content(chunk_size=32):
|
||
f.write(chunk)
|
||
print('Completed!\n')
|
||
if num != 1:
|
||
print('All completed!\n')
|
||
|
||
## audio
|
||
|
||
def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0):
|
||
import pyttsx3
|
||
if print_text==1:
|
||
print(str)
|
||
engine = pyttsx3.init()
|
||
voices = engine.getProperty('voices')
|
||
engine.setProperty('voice', voices[voice].id)
|
||
engine.setProperty("rate", rate)
|
||
if save==1:
|
||
engine.save_to_file(str, 'str.mp3')
|
||
engine.runAndWait()
|
||
print('MP3 file saved!')
|
||
if read==1:
|
||
engine.say(str)
|
||
engine.runAndWait()
|
||
|
||
def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0):
|
||
import pyttsx3
|
||
f = open(txt_path, 'r', encoding ='utf-8')
|
||
text = f.read()
|
||
if print_text==1:
|
||
print(text)
|
||
engine = pyttsx3.init()
|
||
voices = engine.getProperty('voices')
|
||
engine.setProperty('voice', voices[voice].id)
|
||
engine.setProperty("rate", rate)
|
||
if save==1:
|
||
import re
|
||
file_name = re.split('[/,\\\]', txt_path)[-1][:-4]
|
||
engine.save_to_file(text, file_name+'.mp3')
|
||
engine.runAndWait()
|
||
print('MP3 file saved!')
|
||
if read==1:
|
||
engine.say(text)
|
||
engine.runAndWait()
|
||
|
||
def pdf_to_text(pdf_path):
|
||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||
from pdfminer.converter import PDFPageAggregator
|
||
from pdfminer.layout import LAParams, LTTextBox
|
||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||
import logging
|
||
logging.Logger.propagate = False
|
||
logging.getLogger().setLevel(logging.ERROR)
|
||
praser = PDFParser(open(pdf_path, 'rb'))
|
||
doc = PDFDocument()
|
||
praser.set_document(doc)
|
||
doc.set_parser(praser)
|
||
doc.initialize()
|
||
if not doc.is_extractable:
|
||
raise PDFTextExtractionNotAllowed
|
||
else:
|
||
rsrcmgr = PDFResourceManager()
|
||
laparams = LAParams()
|
||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||
content = ''
|
||
for page in doc.get_pages():
|
||
interpreter.process_page(page)
|
||
layout = device.get_result()
|
||
for x in layout:
|
||
if isinstance(x, LTTextBox):
|
||
content = content + x.get_text().strip()
|
||
return content
|
||
|
||
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0):
|
||
import pyttsx3
|
||
text = pdf_to_text(pdf_path)
|
||
text = text.replace('\n', ' ')
|
||
if print_text==1:
|
||
print(text)
|
||
engine = pyttsx3.init()
|
||
voices = engine.getProperty('voices')
|
||
engine.setProperty('voice', voices[voice].id)
|
||
engine.setProperty("rate", rate)
|
||
if save==1:
|
||
import re
|
||
file_name = re.split('[/,\\\]', pdf_path)[-1][:-4]
|
||
engine.save_to_file(text, file_name+'.mp3')
|
||
engine.runAndWait()
|
||
print('MP3 file saved!')
|
||
if read==1:
|
||
engine.say(text)
|
||
engine.runAndWait() |