version 0.0.51

This commit is contained in:
2022-01-19 21:22:54 +08:00
parent 0b8cf206f7
commit e77297648a
28 changed files with 1666 additions and 1564 deletions

View File

@@ -1,130 +0,0 @@
# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about. The primary location of this package is on website https://py.guanjihuan.com.
# others
import guan
## download
def download_with_scihub(address=None, num=1):
from bs4 import BeautifulSoup
import re
import requests
import os
if num==1 and address!=None:
address_array = [address]
else:
address_array = []
for i in range(num):
address = input('\nInput')
address_array.append(address)
for address in address_array:
r = requests.post('https://sci-hub.st/', data={'request': address})
print('\nResponse', r)
print('Address', r.url)
soup = BeautifulSoup(r.text, features='lxml')
pdf_URL = soup.iframe['src']
if re.search(re.compile('^https:'), pdf_URL):
pass
else:
pdf_URL = 'https:'+pdf_URL
print('PDF address', pdf_URL)
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
print('PDF name', name)
print('Directory', os.getcwd())
print('\nDownloading...')
r = requests.get(pdf_URL, stream=True)
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
print('Completed!\n')
if num != 1:
print('All completed!\n')
## audio
def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3
if print_text==1:
print(str)
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice].id)
engine.setProperty("rate", rate)
if save==1:
engine.save_to_file(str, 'str.mp3')
engine.runAndWait()
print('MP3 file saved!')
if read==1:
engine.say(str)
engine.runAndWait()
def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3
f = open(txt_path, 'r', encoding ='utf-8')
text = f.read()
if print_text==1:
print(text)
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice].id)
engine.setProperty("rate", rate)
if save==1:
import re
file_name = re.split('[/,\\\]', txt_path)[-1][:-4]
engine.save_to_file(text, file_name+'.mp3')
engine.runAndWait()
print('MP3 file saved!')
if read==1:
engine.say(text)
engine.runAndWait()
def pdf_to_text(pdf_path):
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR)
praser = PDFParser(open(pdf_path, 'rb'))
doc = PDFDocument()
praser.set_document(doc)
doc.set_parser(praser)
doc.initialize()
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
layout = device.get_result()
for x in layout:
if isinstance(x, LTTextBox):
content = content + x.get_text().strip()
return content
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3
text = guan.pdf_to_text(pdf_path)
text = text.replace('\n', ' ')
if print_text==1:
print(text)
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice].id)
engine.setProperty("rate", rate)
if save==1:
import re
file_name = re.split('[/,\\\]', pdf_path)[-1][:-4]
engine.save_to_file(text, file_name+'.mp3')
engine.runAndWait()
print('MP3 file saved!')
if read==1:
engine.say(text)
engine.runAndWait()