# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com. The primary location of this package is on website https://py.guanjihuan.com. # others ## download def download_with_scihub(address=None, num=1): from bs4 import BeautifulSoup import re import requests import os if num==1 and address!=None: address_array = [address] else: address_array = [] for i in range(num): address = input('\nInput:') address_array.append(address) for address in address_array: r = requests.post('https://sci-hub.st/', data={'request': address}) print('\nResponse:', r) print('Address:', r.url) soup = BeautifulSoup(r.text, features='lxml') pdf_URL = soup.iframe['src'] if re.search(re.compile('^https:'), pdf_URL): pass else: pdf_URL = 'https:'+pdf_URL print('PDF address:', pdf_URL) name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::] print('PDF name:', name) print('Directory:', os.getcwd()) print('\nDownloading...') r = requests.get(pdf_URL, stream=True) with open(name, 'wb') as f: for chunk in r.iter_content(chunk_size=32): f.write(chunk) print('Completed!\n') if num != 1: print('All completed!\n') ## audio def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0): import pyttsx3 if print_text==1: print(str) engine = pyttsx3.init() voices = engine.getProperty('voices') engine.setProperty('voice', voices[voice].id) engine.setProperty("rate", rate) if save==1: engine.save_to_file(str, 'str.mp3') engine.runAndWait() print('MP3 file saved!') if read==1: engine.say(str) engine.runAndWait() def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0): import pyttsx3 f = open(txt_path, 'r', encoding ='utf-8') text = f.read() if print_text==1: print(text) engine = pyttsx3.init() voices = engine.getProperty('voices') engine.setProperty('voice', voices[voice].id) engine.setProperty("rate", rate) if save==1: import re file_name = re.split('[/,\\\]', txt_path)[-1][:-4] engine.save_to_file(text, file_name+'.mp3') engine.runAndWait() print('MP3 file saved!') if read==1: engine.say(text) engine.runAndWait() def pdf_to_text(pdf_path): from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LAParams, LTTextBox from pdfminer.pdfinterp import PDFTextExtractionNotAllowed import logging logging.Logger.propagate = False logging.getLogger().setLevel(logging.ERROR) praser = PDFParser(open(pdf_path, 'rb')) doc = PDFDocument() praser.set_document(doc) doc.set_parser(praser) doc.initialize() if not doc.is_extractable: raise PDFTextExtractionNotAllowed else: rsrcmgr = PDFResourceManager() laparams = LAParams() device = PDFPageAggregator(rsrcmgr, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) content = '' for page in doc.get_pages(): interpreter.process_page(page) layout = device.get_result() for x in layout: if isinstance(x, LTTextBox): content = content + x.get_text().strip() return content def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0): import pyttsx3 text = pdf_to_text(pdf_path) text = text.replace('\n', ' ') if print_text==1: print(text) engine = pyttsx3.init() voices = engine.getProperty('voices') engine.setProperty('voice', voices[voice].id) engine.setProperty("rate", rate) if save==1: import re file_name = re.split('[/,\\\]', pdf_path)[-1][:-4] engine.save_to_file(text, file_name+'.mp3') engine.runAndWait() print('MP3 file saved!') if read==1: engine.say(text) engine.runAndWait()