This commit is contained in:
guanjihuan 2022-06-29 14:57:30 +08:00
parent baf1fac3aa
commit 6d559e0bb2
2 changed files with 85 additions and 39 deletions

View File

@ -1,7 +1,7 @@
[metadata] [metadata]
# replace with your username: # replace with your username:
name = guan name = guan
version = 0.0.94 version = 0.0.95
author = guanjihuan author = guanjihuan
author_email = guanjihuan@163.com author_email = guanjihuan@163.com
description = An open source python package description = An open source python package

View File

@ -2,11 +2,12 @@
# With this package, you can calculate band structures, density of states, quantum transport and topological invariant of tight-binding models by invoking the functions you need. Other frequently used functions are also integrated in this package, such as file reading/writing, figure plotting, data processing. # With this package, you can calculate band structures, density of states, quantum transport and topological invariant of tight-binding models by invoking the functions you need. Other frequently used functions are also integrated in this package, such as file reading/writing, figure plotting, data processing.
# The current version is guan-0.0.94, updated on June 24, 2022. # The current version is guan-0.0.95, updated on June 29, 2022.
# Installation: pip install --upgrade guan # Installation: pip install --upgrade guan
# Modules: # Modules:
# # Module 1: basic functions # # Module 1: basic functions
# # Module 2: Fourier transform # # Module 2: Fourier transform
# # Module 3: Hamiltonian of finite size systems # # Module 3: Hamiltonian of finite size systems
@ -22,8 +23,6 @@
# # Module 13: others # # Module 13: others
''' '''
API Reference API Reference
@ -281,9 +280,8 @@ guan.write_one_dimensional_data(x_array, y_array, filename='a', format='txt')
guan.write_two_dimensional_data(x_array, y_array, matrix, filename='a', format='txt') guan.write_two_dimensional_data(x_array, y_array, matrix, filename='a', format='txt')
hex = guan.rgb_to_hex(rgb, pound=1) guan.print_array(array, show_index=0, index_type=0)
rgb = guan.hex_to_rgb(hex)
@ -315,18 +313,24 @@ guan.change_directory_by_replacement(current_key_word='code', new_key_word='data
guan.batch_reading_and_plotting(directory, xlabel='x', ylabel='y') guan.batch_reading_and_plotting(directory, xlabel='x', ylabel='y')
hex = guan.rgb_to_hex(rgb, pound=1)
rgb = guan.hex_to_rgb(hex)
# Module 13: others # Module 13: others
guan.download_with_scihub(address=None, num=1) guan.download_with_scihub(address=None, num=1)
links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
content = guan.pdf_to_text(pdf_path)
guan.str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0) guan.str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0)
guan.txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0) guan.txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0)
content = guan.pdf_to_text(pdf_path)
guan.pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0) guan.pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0)
guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1) guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
@ -1968,7 +1972,21 @@ def write_two_dimensional_data(x_array, y_array, matrix, filename='a', format='t
f.write('\n') f.write('\n')
i0 += 1 i0 += 1
def print_array(array, show_index=0, index_type=0):
if show_index==0:
for i0 in array:
print(i0)
else:
if index_type==0:
index = 0
for i0 in array:
print(index, i0)
index += 1
else:
index = 0
for i0 in array:
index += 1
print(index, i0)
@ -2279,6 +2297,62 @@ def download_with_scihub(address=None, num=1):
if num != 1: if num != 1:
print('All completed!\n') print('All completed!\n')
# PDF
def get_links_from_pdf(pdf_path, link_starting_form=''):
# Example: link_starting_form='https://doi.org'
import PyPDF2
import re
pdfReader = PyPDF2.PdfFileReader(pdf_path)
pages = pdfReader.getNumPages()
i0 = 0
links = []
for page in range(pages):
pageSliced = pdfReader.getPage(page)
pageObject = pageSliced.getObject()
if '/Annots' in pageObject.keys():
ann = pageObject['/Annots']
old = ''
for a in ann:
u = a.getObject()
if '/A' in u.keys():
if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
if u['/A']['/URI'] != old:
links.append(u['/A']['/URI'])
i0 += 1
old = u['/A']['/URI']
return links
def pdf_to_text(pdf_path):
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR)
praser = PDFParser(open(pdf_path, 'rb'))
doc = PDFDocument()
praser.set_document(doc)
doc.set_parser(praser)
doc.initialize()
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
layout = device.get_result()
for x in layout:
if isinstance(x, LTTextBox):
content = content + x.get_text().strip()
return content
## audio ## audio
def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0): def str_to_audio(str='hello world', rate=125, voice=1, read=1, save=0, print_text=0):
@ -2317,36 +2391,6 @@ def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, print_text=0):
engine.say(text) engine.say(text)
engine.runAndWait() engine.runAndWait()
def pdf_to_text(pdf_path):
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR)
praser = PDFParser(open(pdf_path, 'rb'))
doc = PDFDocument()
praser.set_document(doc)
doc.set_parser(praser)
doc.initialize()
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
layout = device.get_result()
for x in layout:
if isinstance(x, LTTextBox):
content = content + x.get_text().strip()
return content
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0): def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0):
import pyttsx3 import pyttsx3
text = guan.pdf_to_text(pdf_path) text = guan.pdf_to_text(pdf_path)
@ -2367,6 +2411,8 @@ def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, print_text=0):
engine.say(text) engine.say(text)
engine.runAndWait() engine.runAndWait()
## words
def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1): def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re