diff --git a/API_Reference/API_Reference.py b/API_Reference/API_Reference.py index b50d464..7823061 100644 --- a/API_Reference/API_Reference.py +++ b/API_Reference/API_Reference.py @@ -14,6 +14,8 @@ import guan + + # Module 1: basic functions guan.test() @@ -75,6 +77,12 @@ sigma_zz = guan.sigma_zz() + + + + + + @@ -453,6 +461,8 @@ local_dos = guan.local_density_of_states_for_square_lattice_with_self_energy_usi + + @@ -610,6 +620,8 @@ wilson_loop_array = guan.calculate_wilson_loop(hamiltonian_function, k_min=-math + + @@ -793,7 +805,32 @@ hashed_password = guan.encryption_MD5(password, salt='') # 使用SHA-256进行散列加密 hashed_password = guan.encryption_SHA_256(password, salt='') +# 获取当前日期字符串 +datetime_date = guan.get_date(bar=True) +# 获取当前时间字符串 +datetime_time = guan.get_time() + +# 获取所有股票 +title, stock_data = guan.all_stocks() + +# 获取所有股票的代码 +stock_symbols = guan.all_stock_symbols() + +# 从股票代码获取股票名称 +stock_name = guan.find_stock_name_from_symbol(symbol='000002') + +# 获取单个股票的历史数据 +title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101') + +# 播放学术单词 +guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1) + +# 播放挑选过后的学术单词 +guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3) + +# 播放元素周期表上的单词 +guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1) @@ -824,6 +861,18 @@ guan.make_directory(directory='./test') # 复制一份文件 guan.copy_file(file1='./a.txt', file2='./b.txt') +# 拼接两个PDF文件 +guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf') + +# 将PDF文件转成文本 +content = guan.pdf_to_text(pdf_path) + +# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' +links = guan.get_links_from_pdf(pdf_path, link_starting_form='') + +# 通过Sci-Hub网站下载文献 +guan.download_with_scihub(address=None, num=1) + # 将文件目录结构写入Markdown文件 guan.write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None) @@ -845,76 +894,9 @@ guan.move_all_files_to_root_directory(directory) # 改变当前的目录位置 guan.change_directory_by_replacement(current_key_word='code', new_key_word='data') - - - - - - - - - - - - - - - - - - - - - - - -# Module 14: others - -# time - -# 获取当前日期字符串 -datetime_date = guan.get_date(bar=True) - -# 获取当前时间字符串 -datetime_time = guan.get_time() - -# stocks - -# 获取所有股票 -title, stock_data = guan.all_stocks() - -# 获取所有股票的代码 -stock_symbols = guan.all_stock_symbols() - -# 从股票代码获取股票名称 -stock_name = guan.find_stock_name_from_symbol(symbol='000002') - -# 获取单个股票的历史数据 -title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101') - -# 拼接两个PDF文件 -guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf') - -# download - -# 通过Sci-Hub网站下载文献 -guan.download_with_scihub(address=None, num=1) - -# PDF - -# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' -links = guan.get_links_from_pdf(pdf_path, link_starting_form='') - -# 将PDF文件转成文本 -content = guan.pdf_to_text(pdf_path) - -# image - # 生成二维码 guan.creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png') -# audio - # 将文本转成音频 guan.str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0) @@ -926,14 +908,3 @@ guan.pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitra # 将wav音频文件压缩成MP3音频文件 guan.compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k') - -# words - -# 播放学术单词 -guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1) - -# 播放挑选过后的学术单词 -guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3) - -# 播放元素周期表上的单词 -guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1) \ No newline at end of file diff --git a/Source_Code/PyPI/src/guan/__init__.py b/Source_Code/PyPI/src/guan/__init__.py index e9a7de9..8520aeb 100644 --- a/Source_Code/PyPI/src/guan/__init__.py +++ b/Source_Code/PyPI/src/guan/__init__.py @@ -50,6 +50,9 @@ + + + @@ -904,6 +907,10 @@ def hamiltonian_of_kagome_lattice(kx, ky, t=1): + + + + @@ -2040,6 +2047,18 @@ def calculate_scattering_matrix_with_disorder_and_get_averaged_information(fermi + + + + + + + + + + + + @@ -3010,6 +3029,16 @@ def color_matplotlib(): + + + + + + + + + + @@ -3240,6 +3269,40 @@ def print_array_with_index(array, show_index=1, index_type=0): + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + # Module 12: data processing # 并行计算前的预处理,把参数分成多份 @@ -3321,6 +3384,228 @@ def encryption_SHA_256(password, salt=''): hashed_password = hashlib.sha256(password.encode()).hexdigest() return hashed_password +# 获取当前日期字符串 +def get_date(bar=True): + import datetime + datetime_date = str(datetime.date.today()) + if bar==False: + datetime_date = datetime_date.replace('-', '') + return datetime_date + +# 获取当前时间字符串 +def get_time(): + import datetime + datetime_time = datetime.datetime.now().strftime('%H:%M:%S') + return datetime_time + +# 获取所有股票 +def all_stocks(): + import akshare as ak + stocks = ak.stock_zh_a_spot_em() + title = np.array(stocks.columns) + stock_data = stocks.values + return title, stock_data + +# 获取所有股票的代码 +def all_stock_symbols(): + title, stock_data = guan.all_stocks() + stock_symbols = stock_data[:, 1] + return stock_symbols + +# 从股票代码获取股票名称 +def find_stock_name_from_symbol(symbol='000002'): + title, stock_data = guan.all_stocks() + for stock in stock_data: + if symbol in stock: + stock_name = stock[2] + return stock_name + +# 获取单个股票的历史数据 +def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'): + # period = 'daily' + # period = 'weekly' + # period = 'monthly' + import akshare as ak + stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date) + title = np.array(stock.columns) + stock_data = stock.values[::-1] + return title, stock_data + +# 播放学术单词 +def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1): + from bs4 import BeautifulSoup + import re + import urllib.request + import requests + import os + import pygame + import time + import ssl + import random + ssl._create_default_https_context = ssl._create_unverified_context + html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8') + if bre_or_ame == 'ame': + directory = 'words_mp3_ameProns/' + elif bre_or_ame == 'bre': + directory = 'words_mp3_breProns/' + exist_directory = os.path.exists(directory) + html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') + if exist_directory == 0: + os.makedirs(directory) + soup = BeautifulSoup(html, features='lxml') + contents = re.findall('
.*?
', content, re.S)[0][3:-4] + if show_translation==1: + time.sleep(translation_time) + print(translation) + time.sleep(rest_time) + pygame.mixer.music.stop() + print() + +# 播放挑选过后的学术单词 +def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3): + from bs4 import BeautifulSoup + import re + import urllib.request + import requests + import os + import pygame + import time + import ssl + import random + ssl._create_default_https_context = ssl._create_unverified_context + html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8') + if bre_or_ame == 'ame': + directory = 'words_mp3_ameProns/' + elif bre_or_ame == 'bre': + directory = 'words_mp3_breProns/' + exist_directory = os.path.exists(directory) + html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') + if exist_directory == 0: + os.makedirs(directory) + soup = BeautifulSoup(html, features='lxml') + contents = re.findall('.*?
', content, re.S)[0][3:-4] + if show_translation==1: + time.sleep(translation_time) + print(translation) + time.sleep(rest_time) + pygame.mixer.music.stop() + print() + + + + + @@ -3369,6 +3654,113 @@ def copy_file(file1='./a.txt', file2='./b.txt'): import shutil shutil.copy(file1, file2) +# 拼接两个PDF文件 +def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'): + import PyPDF2 + output_pdf = PyPDF2.PdfWriter() + with open(input_file_1, 'rb') as file1: + pdf1 = PyPDF2.PdfReader(file1) + for page in range(len(pdf1.pages)): + output_pdf.add_page(pdf1.pages[page]) + with open(input_file_2, 'rb') as file2: + pdf2 = PyPDF2.PdfReader(file2) + for page in range(len(pdf2.pages)): + output_pdf.add_page(pdf2.pages[page]) + with open(output_file, 'wb') as combined_file: + output_pdf.write(combined_file) + +# 将PDF文件转成文本 +def pdf_to_text(pdf_path): + from pdfminer.pdfparser import PDFParser, PDFDocument + from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter + from pdfminer.converter import PDFPageAggregator + from pdfminer.layout import LAParams, LTTextBox + from pdfminer.pdfinterp import PDFTextExtractionNotAllowed + import logging + logging.Logger.propagate = False + logging.getLogger().setLevel(logging.ERROR) + praser = PDFParser(open(pdf_path, 'rb')) + doc = PDFDocument() + praser.set_document(doc) + doc.set_parser(praser) + doc.initialize() + if not doc.is_extractable: + raise PDFTextExtractionNotAllowed + else: + rsrcmgr = PDFResourceManager() + laparams = LAParams() + device = PDFPageAggregator(rsrcmgr, laparams=laparams) + interpreter = PDFPageInterpreter(rsrcmgr, device) + content = '' + for page in doc.get_pages(): + interpreter.process_page(page) + layout = device.get_result() + for x in layout: + if isinstance(x, LTTextBox): + content = content + x.get_text().strip() + return content + +# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' +def get_links_from_pdf(pdf_path, link_starting_form=''): + import PyPDF2 + import re + pdfReader = PyPDF2.PdfFileReader(pdf_path) + pages = pdfReader.getNumPages() + i0 = 0 + links = [] + for page in range(pages): + pageSliced = pdfReader.getPage(page) + pageObject = pageSliced.getObject() + if '/Annots' in pageObject.keys(): + ann = pageObject['/Annots'] + old = '' + for a in ann: + u = a.getObject() + if '/A' in u.keys(): + if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): + if u['/A']['/URI'] != old: + links.append(u['/A']['/URI']) + i0 += 1 + old = u['/A']['/URI'] + return links + +# 通过Sci-Hub网站下载文献 +def download_with_scihub(address=None, num=1): + from bs4 import BeautifulSoup + import re + import requests + import os + if num==1 and address!=None: + address_array = [address] + else: + address_array = [] + for i in range(num): + address = input('\nInput:') + address_array.append(address) + for address in address_array: + r = requests.post('https://sci-hub.st/', data={'request': address}) + print('\nResponse:', r) + print('Address:', r.url) + soup = BeautifulSoup(r.text, features='lxml') + pdf_URL = soup.embed['src'] + # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL. + if re.search(re.compile('^https:'), pdf_URL): + pass + else: + pdf_URL = 'https:'+pdf_URL + print('PDF address:', pdf_URL) + name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::] + print('PDF name:', name) + print('Directory:', os.getcwd()) + print('\nDownloading...') + r = requests.get(pdf_URL, stream=True) + with open(name, 'wb') as f: + for chunk in r.iter_content(chunk_size=32): + f.write(chunk) + print('Completed!\n') + if num != 1: + print('All completed!\n') + # 将文件目录结构写入Markdown文件 def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None): import os @@ -3582,235 +3974,12 @@ def change_directory_by_replacement(current_key_word='code', new_key_word='data' os.makedirs(data_path) os.chdir(data_path) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# Module 14: others - -## time - -# 获取当前日期字符串 -def get_date(bar=True): - import datetime - datetime_date = str(datetime.date.today()) - if bar==False: - datetime_date = datetime_date.replace('-', '') - return datetime_date - -# 获取当前时间字符串 -def get_time(): - import datetime - datetime_time = datetime.datetime.now().strftime('%H:%M:%S') - return datetime_time - -## stocks - -# 获取所有股票 -def all_stocks(): - import akshare as ak - stocks = ak.stock_zh_a_spot_em() - title = np.array(stocks.columns) - stock_data = stocks.values - return title, stock_data - -# 获取所有股票的代码 -def all_stock_symbols(): - title, stock_data = guan.all_stocks() - stock_symbols = stock_data[:, 1] - return stock_symbols - -# 从股票代码获取股票名称 -def find_stock_name_from_symbol(symbol='000002'): - title, stock_data = guan.all_stocks() - for stock in stock_data: - if symbol in stock: - stock_name = stock[2] - return stock_name - -# 获取单个股票的历史数据 -def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'): - # period = 'daily' - # period = 'weekly' - # period = 'monthly' - import akshare as ak - stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date) - title = np.array(stock.columns) - stock_data = stock.values[::-1] - return title, stock_data - -## download - -# 通过Sci-Hub网站下载文献 -def download_with_scihub(address=None, num=1): - from bs4 import BeautifulSoup - import re - import requests - import os - if num==1 and address!=None: - address_array = [address] - else: - address_array = [] - for i in range(num): - address = input('\nInput:') - address_array.append(address) - for address in address_array: - r = requests.post('https://sci-hub.st/', data={'request': address}) - print('\nResponse:', r) - print('Address:', r.url) - soup = BeautifulSoup(r.text, features='lxml') - pdf_URL = soup.embed['src'] - # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL. - if re.search(re.compile('^https:'), pdf_URL): - pass - else: - pdf_URL = 'https:'+pdf_URL - print('PDF address:', pdf_URL) - name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::] - print('PDF name:', name) - print('Directory:', os.getcwd()) - print('\nDownloading...') - r = requests.get(pdf_URL, stream=True) - with open(name, 'wb') as f: - for chunk in r.iter_content(chunk_size=32): - f.write(chunk) - print('Completed!\n') - if num != 1: - print('All completed!\n') - -## PDF - -# 拼接两个PDF文件 -def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'): - import PyPDF2 - output_pdf = PyPDF2.PdfWriter() - with open(input_file_1, 'rb') as file1: - pdf1 = PyPDF2.PdfReader(file1) - for page in range(len(pdf1.pages)): - output_pdf.add_page(pdf1.pages[page]) - with open(input_file_2, 'rb') as file2: - pdf2 = PyPDF2.PdfReader(file2) - for page in range(len(pdf2.pages)): - output_pdf.add_page(pdf2.pages[page]) - with open(output_file, 'wb') as combined_file: - output_pdf.write(combined_file) - - -# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' -def get_links_from_pdf(pdf_path, link_starting_form=''): - import PyPDF2 - import re - pdfReader = PyPDF2.PdfFileReader(pdf_path) - pages = pdfReader.getNumPages() - i0 = 0 - links = [] - for page in range(pages): - pageSliced = pdfReader.getPage(page) - pageObject = pageSliced.getObject() - if '/Annots' in pageObject.keys(): - ann = pageObject['/Annots'] - old = '' - for a in ann: - u = a.getObject() - if '/A' in u.keys(): - if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): - if u['/A']['/URI'] != old: - links.append(u['/A']['/URI']) - i0 += 1 - old = u['/A']['/URI'] - return links - -# 将PDF文件转成文本 -def pdf_to_text(pdf_path): - from pdfminer.pdfparser import PDFParser, PDFDocument - from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter - from pdfminer.converter import PDFPageAggregator - from pdfminer.layout import LAParams, LTTextBox - from pdfminer.pdfinterp import PDFTextExtractionNotAllowed - import logging - logging.Logger.propagate = False - logging.getLogger().setLevel(logging.ERROR) - praser = PDFParser(open(pdf_path, 'rb')) - doc = PDFDocument() - praser.set_document(doc) - doc.set_parser(praser) - doc.initialize() - if not doc.is_extractable: - raise PDFTextExtractionNotAllowed - else: - rsrcmgr = PDFResourceManager() - laparams = LAParams() - device = PDFPageAggregator(rsrcmgr, laparams=laparams) - interpreter = PDFPageInterpreter(rsrcmgr, device) - content = '' - for page in doc.get_pages(): - interpreter.process_page(page) - layout = device.get_result() - for x in layout: - if isinstance(x, LTTextBox): - content = content + x.get_text().strip() - return content - - -## image - # 生成二维码 def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): import qrcode img = qrcode.make(data) img.save(filename+file_format) - -## audio - # 将文本转成音频 def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): import pyttsx3 @@ -3891,176 +4060,3 @@ def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'): from pydub import AudioSegment sound = AudioSegment.from_mp3(wav_path) sound.export(output_filename,format="mp3",bitrate=bitrate) - -## words - -# 播放学术单词 -def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1): - from bs4 import BeautifulSoup - import re - import urllib.request - import requests - import os - import pygame - import time - import ssl - import random - ssl._create_default_https_context = ssl._create_unverified_context - html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8') - if bre_or_ame == 'ame': - directory = 'words_mp3_ameProns/' - elif bre_or_ame == 'bre': - directory = 'words_mp3_breProns/' - exist_directory = os.path.exists(directory) - html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') - if exist_directory == 0: - os.makedirs(directory) - soup = BeautifulSoup(html, features='lxml') - contents = re.findall('.*?
', content, re.S)[0][3:-4] - if show_translation==1: - time.sleep(translation_time) - print(translation) - time.sleep(rest_time) - pygame.mixer.music.stop() - print() - -# 播放挑选过后的学术单词 -def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3): - from bs4 import BeautifulSoup - import re - import urllib.request - import requests - import os - import pygame - import time - import ssl - import random - ssl._create_default_https_context = ssl._create_unverified_context - html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8') - if bre_or_ame == 'ame': - directory = 'words_mp3_ameProns/' - elif bre_or_ame == 'bre': - directory = 'words_mp3_breProns/' - exist_directory = os.path.exists(directory) - html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') - if exist_directory == 0: - os.makedirs(directory) - soup = BeautifulSoup(html, features='lxml') - contents = re.findall('.*?
', content, re.S)[0][3:-4] - if show_translation==1: - time.sleep(translation_time) - print(translation) - time.sleep(rest_time) - pygame.mixer.music.stop() - print() \ No newline at end of file