version 0.0.51
This commit is contained in:
@@ -1,130 +0,0 @@
|
||||
# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about. The primary location of this package is on website https://py.guanjihuan.com.
|
||||
|
||||
# others
|
||||
|
||||
import guan
|
||||
|
||||
## download
|
||||
|
||||
def download_with_scihub(address=None, num=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
if num==1 and address!=None:
|
||||
address_array = [address]
|
||||
else:
|
||||
address_array = []
|
||||
for i in range(num):
|
||||
address = input('\nInput:')
|
||||
address_array.append(address)
|
||||
for address in address_array:
|
||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||||
print('\nResponse:', r)
|
||||
print('Address:', r.url)
|
||||
soup = BeautifulSoup(r.text, features='lxml')
|
||||
pdf_URL = soup.iframe['src']
|
||||
if re.search(re.compile('^https:'), pdf_URL):
|
||||
pass
|
||||
else:
|
||||
pdf_URL = 'https:'+pdf_URL
|
||||
print('PDF address:', pdf_URL)
|
||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||||
print('PDF name:', name)
|
||||
print('Directory:', os.getcwd())
|
||||
print('\nDownloading...')
|
||||
r = requests.get(pdf_URL, stream=True)
|
||||
with open(name, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
print('Completed!\n')
|
||||
if num != 1:
|
||||
print('All completed!\n')
|
||||
|
||||
## audio
|
||||
|
||||
def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0):
|
||||
import pyttsx3
|
||||
if print_text==1:
|
||||
print(str)
|
||||
engine = pyttsx3.init()
|
||||
voices = engine.getProperty('voices')
|
||||
engine.setProperty('voice', voices[voice].id)
|
||||
engine.setProperty("rate", rate)
|
||||
if save==1:
|
||||
engine.save_to_file(str, 'str.mp3')
|
||||
engine.runAndWait()
|
||||
print('MP3 file saved!')
|
||||
if read==1:
|
||||
engine.say(str)
|
||||
engine.runAndWait()
|
||||
|
||||
def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0):
|
||||
import pyttsx3
|
||||
f = open(txt_path, 'r', encoding ='utf-8')
|
||||
text = f.read()
|
||||
if print_text==1:
|
||||
print(text)
|
||||
engine = pyttsx3.init()
|
||||
voices = engine.getProperty('voices')
|
||||
engine.setProperty('voice', voices[voice].id)
|
||||
engine.setProperty("rate", rate)
|
||||
if save==1:
|
||||
import re
|
||||
file_name = re.split('[/,\\\]', txt_path)[-1][:-4]
|
||||
engine.save_to_file(text, file_name+'.mp3')
|
||||
engine.runAndWait()
|
||||
print('MP3 file saved!')
|
||||
if read==1:
|
||||
engine.say(text)
|
||||
engine.runAndWait()
|
||||
|
||||
def pdf_to_text(pdf_path):
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
praser = PDFParser(open(pdf_path, 'rb'))
|
||||
doc = PDFDocument()
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
doc.initialize()
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
rsrcmgr = PDFResourceManager()
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
layout = device.get_result()
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0):
|
||||
import pyttsx3
|
||||
text = guan.pdf_to_text(pdf_path)
|
||||
text = text.replace('\n', ' ')
|
||||
if print_text==1:
|
||||
print(text)
|
||||
engine = pyttsx3.init()
|
||||
voices = engine.getProperty('voices')
|
||||
engine.setProperty('voice', voices[voice].id)
|
||||
engine.setProperty("rate", rate)
|
||||
if save==1:
|
||||
import re
|
||||
file_name = re.split('[/,\\\]', pdf_path)[-1][:-4]
|
||||
engine.save_to_file(text, file_name+'.mp3')
|
||||
engine.runAndWait()
|
||||
print('MP3 file saved!')
|
||||
if read==1:
|
||||
engine.say(text)
|
||||
engine.runAndWait()
|
||||
Reference in New Issue
Block a user