update
This commit is contained in:
parent
ea023e26b3
commit
2d36804856
@ -14,6 +14,8 @@ import guan
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Module 1: basic functions
|
||||
|
||||
guan.test()
|
||||
@ -75,6 +77,12 @@ sigma_zz = guan.sigma_zz()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -453,6 +461,8 @@ local_dos = guan.local_density_of_states_for_square_lattice_with_self_energy_usi
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -610,6 +620,8 @@ wilson_loop_array = guan.calculate_wilson_loop(hamiltonian_function, k_min=-math
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -793,7 +805,32 @@ hashed_password = guan.encryption_MD5(password, salt='')
|
||||
# 使用SHA-256进行散列加密
|
||||
hashed_password = guan.encryption_SHA_256(password, salt='')
|
||||
|
||||
# 获取当前日期字符串
|
||||
datetime_date = guan.get_date(bar=True)
|
||||
|
||||
# 获取当前时间字符串
|
||||
datetime_time = guan.get_time()
|
||||
|
||||
# 获取所有股票
|
||||
title, stock_data = guan.all_stocks()
|
||||
|
||||
# 获取所有股票的代码
|
||||
stock_symbols = guan.all_stock_symbols()
|
||||
|
||||
# 从股票代码获取股票名称
|
||||
stock_name = guan.find_stock_name_from_symbol(symbol='000002')
|
||||
|
||||
# 获取单个股票的历史数据
|
||||
title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101')
|
||||
|
||||
# 播放学术单词
|
||||
guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
|
||||
|
||||
# 播放挑选过后的学术单词
|
||||
guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3)
|
||||
|
||||
# 播放元素周期表上的单词
|
||||
guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)
|
||||
|
||||
|
||||
|
||||
@ -824,6 +861,18 @@ guan.make_directory(directory='./test')
|
||||
# 复制一份文件
|
||||
guan.copy_file(file1='./a.txt', file2='./b.txt')
|
||||
|
||||
# 拼接两个PDF文件
|
||||
guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
|
||||
|
||||
# 将PDF文件转成文本
|
||||
content = guan.pdf_to_text(pdf_path)
|
||||
|
||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||
links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
|
||||
|
||||
# 通过Sci-Hub网站下载文献
|
||||
guan.download_with_scihub(address=None, num=1)
|
||||
|
||||
# 将文件目录结构写入Markdown文件
|
||||
guan.write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None)
|
||||
|
||||
@ -845,76 +894,9 @@ guan.move_all_files_to_root_directory(directory)
|
||||
# 改变当前的目录位置
|
||||
guan.change_directory_by_replacement(current_key_word='code', new_key_word='data')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Module 14: others
|
||||
|
||||
# time
|
||||
|
||||
# 获取当前日期字符串
|
||||
datetime_date = guan.get_date(bar=True)
|
||||
|
||||
# 获取当前时间字符串
|
||||
datetime_time = guan.get_time()
|
||||
|
||||
# stocks
|
||||
|
||||
# 获取所有股票
|
||||
title, stock_data = guan.all_stocks()
|
||||
|
||||
# 获取所有股票的代码
|
||||
stock_symbols = guan.all_stock_symbols()
|
||||
|
||||
# 从股票代码获取股票名称
|
||||
stock_name = guan.find_stock_name_from_symbol(symbol='000002')
|
||||
|
||||
# 获取单个股票的历史数据
|
||||
title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101')
|
||||
|
||||
# 拼接两个PDF文件
|
||||
guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
|
||||
|
||||
# download
|
||||
|
||||
# 通过Sci-Hub网站下载文献
|
||||
guan.download_with_scihub(address=None, num=1)
|
||||
|
||||
# PDF
|
||||
|
||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||
links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
|
||||
|
||||
# 将PDF文件转成文本
|
||||
content = guan.pdf_to_text(pdf_path)
|
||||
|
||||
# image
|
||||
|
||||
# 生成二维码
|
||||
guan.creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png')
|
||||
|
||||
# audio
|
||||
|
||||
# 将文本转成音频
|
||||
guan.str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0)
|
||||
|
||||
@ -926,14 +908,3 @@ guan.pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitra
|
||||
|
||||
# 将wav音频文件压缩成MP3音频文件
|
||||
guan.compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k')
|
||||
|
||||
# words
|
||||
|
||||
# 播放学术单词
|
||||
guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
|
||||
|
||||
# 播放挑选过后的学术单词
|
||||
guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3)
|
||||
|
||||
# 播放元素周期表上的单词
|
||||
guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)
|
@ -50,6 +50,9 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -904,6 +907,10 @@ def hamiltonian_of_kagome_lattice(kx, ky, t=1):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -2040,6 +2047,18 @@ def calculate_scattering_matrix_with_disorder_and_get_averaged_information(fermi
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -3010,6 +3029,16 @@ def color_matplotlib():
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -3240,6 +3269,40 @@ def print_array_with_index(array, show_index=1, index_type=0):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Module 12: data processing
|
||||
|
||||
# 并行计算前的预处理,把参数分成多份
|
||||
@ -3321,6 +3384,228 @@ def encryption_SHA_256(password, salt=''):
|
||||
hashed_password = hashlib.sha256(password.encode()).hexdigest()
|
||||
return hashed_password
|
||||
|
||||
# 获取当前日期字符串
|
||||
def get_date(bar=True):
|
||||
import datetime
|
||||
datetime_date = str(datetime.date.today())
|
||||
if bar==False:
|
||||
datetime_date = datetime_date.replace('-', '')
|
||||
return datetime_date
|
||||
|
||||
# 获取当前时间字符串
|
||||
def get_time():
|
||||
import datetime
|
||||
datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
|
||||
return datetime_time
|
||||
|
||||
# 获取所有股票
|
||||
def all_stocks():
|
||||
import akshare as ak
|
||||
stocks = ak.stock_zh_a_spot_em()
|
||||
title = np.array(stocks.columns)
|
||||
stock_data = stocks.values
|
||||
return title, stock_data
|
||||
|
||||
# 获取所有股票的代码
|
||||
def all_stock_symbols():
|
||||
title, stock_data = guan.all_stocks()
|
||||
stock_symbols = stock_data[:, 1]
|
||||
return stock_symbols
|
||||
|
||||
# 从股票代码获取股票名称
|
||||
def find_stock_name_from_symbol(symbol='000002'):
|
||||
title, stock_data = guan.all_stocks()
|
||||
for stock in stock_data:
|
||||
if symbol in stock:
|
||||
stock_name = stock[2]
|
||||
return stock_name
|
||||
|
||||
# 获取单个股票的历史数据
|
||||
def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
|
||||
# period = 'daily'
|
||||
# period = 'weekly'
|
||||
# period = 'monthly'
|
||||
import akshare as ak
|
||||
stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
|
||||
title = np.array(stock.columns)
|
||||
stock_data = stock.values[::-1]
|
||||
return title, stock_data
|
||||
|
||||
# 播放学术单词
|
||||
def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import urllib.request
|
||||
import requests
|
||||
import os
|
||||
import pygame
|
||||
import time
|
||||
import ssl
|
||||
import random
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
|
||||
if bre_or_ame == 'ame':
|
||||
directory = 'words_mp3_ameProns/'
|
||||
elif bre_or_ame == 'bre':
|
||||
directory = 'words_mp3_breProns/'
|
||||
exist_directory = os.path.exists(directory)
|
||||
html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
|
||||
if exist_directory == 0:
|
||||
os.makedirs(directory)
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
contents = re.findall('<h2.*?</a></p>', html, re.S)
|
||||
if random_on==1:
|
||||
random.shuffle(contents)
|
||||
if reverse==1:
|
||||
contents.reverse()
|
||||
for content in contents:
|
||||
soup2 = BeautifulSoup(content, features='lxml')
|
||||
all_h2 = soup2.find_all('h2')
|
||||
for h2 in all_h2:
|
||||
if re.search('\d*. ', h2.get_text()):
|
||||
word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
|
||||
exist = os.path.exists(directory+word+'.mp3')
|
||||
if not exist:
|
||||
try:
|
||||
if re.search(word, html_file):
|
||||
r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
|
||||
with open(directory+word+'.mp3', 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
except:
|
||||
pass
|
||||
print(h2.get_text())
|
||||
try:
|
||||
pygame.mixer.init()
|
||||
track = pygame.mixer.music.load(directory+word+'.mp3')
|
||||
pygame.mixer.music.play()
|
||||
if show_link==1:
|
||||
print('https://www.ldoceonline.com/dictionary/'+word)
|
||||
except:
|
||||
pass
|
||||
translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
|
||||
if show_translation==1:
|
||||
time.sleep(translation_time)
|
||||
print(translation)
|
||||
time.sleep(rest_time)
|
||||
pygame.mixer.music.stop()
|
||||
print()
|
||||
|
||||
# 播放挑选过后的学术单词
|
||||
def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import urllib.request
|
||||
import requests
|
||||
import os
|
||||
import pygame
|
||||
import time
|
||||
import ssl
|
||||
import random
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
|
||||
if bre_or_ame == 'ame':
|
||||
directory = 'words_mp3_ameProns/'
|
||||
elif bre_or_ame == 'bre':
|
||||
directory = 'words_mp3_breProns/'
|
||||
exist_directory = os.path.exists(directory)
|
||||
html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
|
||||
if exist_directory == 0:
|
||||
os.makedirs(directory)
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
contents = re.findall('<li>\d.*?</li>', html, re.S)
|
||||
if random_on==1:
|
||||
random.shuffle(contents)
|
||||
if reverse==1:
|
||||
contents.reverse()
|
||||
for content in contents:
|
||||
soup2 = BeautifulSoup(content, features='lxml')
|
||||
all_li = soup2.find_all('li')
|
||||
for li in all_li:
|
||||
if re.search('\d*. ', li.get_text()):
|
||||
word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
|
||||
exist = os.path.exists(directory+word+'.mp3')
|
||||
if not exist:
|
||||
try:
|
||||
if re.search(word, html_file):
|
||||
r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
|
||||
with open(directory+word+'.mp3', 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
except:
|
||||
pass
|
||||
print(li.get_text())
|
||||
try:
|
||||
pygame.mixer.init()
|
||||
track = pygame.mixer.music.load(directory+word+'.mp3')
|
||||
pygame.mixer.music.play()
|
||||
if show_link==1:
|
||||
print('https://www.ldoceonline.com/dictionary/'+word)
|
||||
except:
|
||||
pass
|
||||
time.sleep(rest_time)
|
||||
pygame.mixer.music.stop()
|
||||
print()
|
||||
|
||||
# 播放元素周期表上的单词
|
||||
def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import urllib.request
|
||||
import requests
|
||||
import os
|
||||
import pygame
|
||||
import time
|
||||
import ssl
|
||||
import random
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
|
||||
directory = 'prons/'
|
||||
exist_directory = os.path.exists(directory)
|
||||
html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
|
||||
if exist_directory == 0:
|
||||
os.makedirs(directory)
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
contents = re.findall('<h2.*?</a></p>', html, re.S)
|
||||
if random_on==1:
|
||||
random.shuffle(contents)
|
||||
for content in contents:
|
||||
soup2 = BeautifulSoup(content, features='lxml')
|
||||
all_h2 = soup2.find_all('h2')
|
||||
for h2 in all_h2:
|
||||
if re.search('\d*. ', h2.get_text()):
|
||||
word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
|
||||
exist = os.path.exists(directory+word+'.mp3')
|
||||
if not exist:
|
||||
try:
|
||||
if re.search(word, html_file):
|
||||
r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
|
||||
with open(directory+word+'.mp3', 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
except:
|
||||
pass
|
||||
print(h2.get_text())
|
||||
try:
|
||||
pygame.mixer.init()
|
||||
track = pygame.mixer.music.load(directory+word+'.mp3')
|
||||
pygame.mixer.music.play()
|
||||
if show_link==1:
|
||||
print('https://www.merriam-webster.com/dictionary/'+word)
|
||||
except:
|
||||
pass
|
||||
translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
|
||||
if show_translation==1:
|
||||
time.sleep(translation_time)
|
||||
print(translation)
|
||||
time.sleep(rest_time)
|
||||
pygame.mixer.music.stop()
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -3369,6 +3654,113 @@ def copy_file(file1='./a.txt', file2='./b.txt'):
|
||||
import shutil
|
||||
shutil.copy(file1, file2)
|
||||
|
||||
# 拼接两个PDF文件
|
||||
def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
|
||||
import PyPDF2
|
||||
output_pdf = PyPDF2.PdfWriter()
|
||||
with open(input_file_1, 'rb') as file1:
|
||||
pdf1 = PyPDF2.PdfReader(file1)
|
||||
for page in range(len(pdf1.pages)):
|
||||
output_pdf.add_page(pdf1.pages[page])
|
||||
with open(input_file_2, 'rb') as file2:
|
||||
pdf2 = PyPDF2.PdfReader(file2)
|
||||
for page in range(len(pdf2.pages)):
|
||||
output_pdf.add_page(pdf2.pages[page])
|
||||
with open(output_file, 'wb') as combined_file:
|
||||
output_pdf.write(combined_file)
|
||||
|
||||
# 将PDF文件转成文本
|
||||
def pdf_to_text(pdf_path):
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
praser = PDFParser(open(pdf_path, 'rb'))
|
||||
doc = PDFDocument()
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
doc.initialize()
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
rsrcmgr = PDFResourceManager()
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
layout = device.get_result()
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||
def get_links_from_pdf(pdf_path, link_starting_form=''):
|
||||
import PyPDF2
|
||||
import re
|
||||
pdfReader = PyPDF2.PdfFileReader(pdf_path)
|
||||
pages = pdfReader.getNumPages()
|
||||
i0 = 0
|
||||
links = []
|
||||
for page in range(pages):
|
||||
pageSliced = pdfReader.getPage(page)
|
||||
pageObject = pageSliced.getObject()
|
||||
if '/Annots' in pageObject.keys():
|
||||
ann = pageObject['/Annots']
|
||||
old = ''
|
||||
for a in ann:
|
||||
u = a.getObject()
|
||||
if '/A' in u.keys():
|
||||
if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
|
||||
if u['/A']['/URI'] != old:
|
||||
links.append(u['/A']['/URI'])
|
||||
i0 += 1
|
||||
old = u['/A']['/URI']
|
||||
return links
|
||||
|
||||
# 通过Sci-Hub网站下载文献
|
||||
def download_with_scihub(address=None, num=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
if num==1 and address!=None:
|
||||
address_array = [address]
|
||||
else:
|
||||
address_array = []
|
||||
for i in range(num):
|
||||
address = input('\nInput:')
|
||||
address_array.append(address)
|
||||
for address in address_array:
|
||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||||
print('\nResponse:', r)
|
||||
print('Address:', r.url)
|
||||
soup = BeautifulSoup(r.text, features='lxml')
|
||||
pdf_URL = soup.embed['src']
|
||||
# pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
|
||||
if re.search(re.compile('^https:'), pdf_URL):
|
||||
pass
|
||||
else:
|
||||
pdf_URL = 'https:'+pdf_URL
|
||||
print('PDF address:', pdf_URL)
|
||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||||
print('PDF name:', name)
|
||||
print('Directory:', os.getcwd())
|
||||
print('\nDownloading...')
|
||||
r = requests.get(pdf_URL, stream=True)
|
||||
with open(name, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
print('Completed!\n')
|
||||
if num != 1:
|
||||
print('All completed!\n')
|
||||
|
||||
# 将文件目录结构写入Markdown文件
|
||||
def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):
|
||||
import os
|
||||
@ -3582,235 +3974,12 @@ def change_directory_by_replacement(current_key_word='code', new_key_word='data'
|
||||
os.makedirs(data_path)
|
||||
os.chdir(data_path)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Module 14: others
|
||||
|
||||
## time
|
||||
|
||||
# 获取当前日期字符串
|
||||
def get_date(bar=True):
|
||||
import datetime
|
||||
datetime_date = str(datetime.date.today())
|
||||
if bar==False:
|
||||
datetime_date = datetime_date.replace('-', '')
|
||||
return datetime_date
|
||||
|
||||
# 获取当前时间字符串
|
||||
def get_time():
|
||||
import datetime
|
||||
datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
|
||||
return datetime_time
|
||||
|
||||
## stocks
|
||||
|
||||
# 获取所有股票
|
||||
def all_stocks():
|
||||
import akshare as ak
|
||||
stocks = ak.stock_zh_a_spot_em()
|
||||
title = np.array(stocks.columns)
|
||||
stock_data = stocks.values
|
||||
return title, stock_data
|
||||
|
||||
# 获取所有股票的代码
|
||||
def all_stock_symbols():
|
||||
title, stock_data = guan.all_stocks()
|
||||
stock_symbols = stock_data[:, 1]
|
||||
return stock_symbols
|
||||
|
||||
# 从股票代码获取股票名称
|
||||
def find_stock_name_from_symbol(symbol='000002'):
|
||||
title, stock_data = guan.all_stocks()
|
||||
for stock in stock_data:
|
||||
if symbol in stock:
|
||||
stock_name = stock[2]
|
||||
return stock_name
|
||||
|
||||
# 获取单个股票的历史数据
|
||||
def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
|
||||
# period = 'daily'
|
||||
# period = 'weekly'
|
||||
# period = 'monthly'
|
||||
import akshare as ak
|
||||
stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
|
||||
title = np.array(stock.columns)
|
||||
stock_data = stock.values[::-1]
|
||||
return title, stock_data
|
||||
|
||||
## download
|
||||
|
||||
# 通过Sci-Hub网站下载文献
|
||||
def download_with_scihub(address=None, num=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
if num==1 and address!=None:
|
||||
address_array = [address]
|
||||
else:
|
||||
address_array = []
|
||||
for i in range(num):
|
||||
address = input('\nInput:')
|
||||
address_array.append(address)
|
||||
for address in address_array:
|
||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||||
print('\nResponse:', r)
|
||||
print('Address:', r.url)
|
||||
soup = BeautifulSoup(r.text, features='lxml')
|
||||
pdf_URL = soup.embed['src']
|
||||
# pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
|
||||
if re.search(re.compile('^https:'), pdf_URL):
|
||||
pass
|
||||
else:
|
||||
pdf_URL = 'https:'+pdf_URL
|
||||
print('PDF address:', pdf_URL)
|
||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||||
print('PDF name:', name)
|
||||
print('Directory:', os.getcwd())
|
||||
print('\nDownloading...')
|
||||
r = requests.get(pdf_URL, stream=True)
|
||||
with open(name, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
print('Completed!\n')
|
||||
if num != 1:
|
||||
print('All completed!\n')
|
||||
|
||||
## PDF
|
||||
|
||||
# 拼接两个PDF文件
|
||||
def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
|
||||
import PyPDF2
|
||||
output_pdf = PyPDF2.PdfWriter()
|
||||
with open(input_file_1, 'rb') as file1:
|
||||
pdf1 = PyPDF2.PdfReader(file1)
|
||||
for page in range(len(pdf1.pages)):
|
||||
output_pdf.add_page(pdf1.pages[page])
|
||||
with open(input_file_2, 'rb') as file2:
|
||||
pdf2 = PyPDF2.PdfReader(file2)
|
||||
for page in range(len(pdf2.pages)):
|
||||
output_pdf.add_page(pdf2.pages[page])
|
||||
with open(output_file, 'wb') as combined_file:
|
||||
output_pdf.write(combined_file)
|
||||
|
||||
|
||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||
def get_links_from_pdf(pdf_path, link_starting_form=''):
|
||||
import PyPDF2
|
||||
import re
|
||||
pdfReader = PyPDF2.PdfFileReader(pdf_path)
|
||||
pages = pdfReader.getNumPages()
|
||||
i0 = 0
|
||||
links = []
|
||||
for page in range(pages):
|
||||
pageSliced = pdfReader.getPage(page)
|
||||
pageObject = pageSliced.getObject()
|
||||
if '/Annots' in pageObject.keys():
|
||||
ann = pageObject['/Annots']
|
||||
old = ''
|
||||
for a in ann:
|
||||
u = a.getObject()
|
||||
if '/A' in u.keys():
|
||||
if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
|
||||
if u['/A']['/URI'] != old:
|
||||
links.append(u['/A']['/URI'])
|
||||
i0 += 1
|
||||
old = u['/A']['/URI']
|
||||
return links
|
||||
|
||||
# 将PDF文件转成文本
|
||||
def pdf_to_text(pdf_path):
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR)
|
||||
praser = PDFParser(open(pdf_path, 'rb'))
|
||||
doc = PDFDocument()
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
doc.initialize()
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
rsrcmgr = PDFResourceManager()
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
layout = device.get_result()
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
## image
|
||||
|
||||
# 生成二维码
|
||||
def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'):
|
||||
import qrcode
|
||||
img = qrcode.make(data)
|
||||
img.save(filename+file_format)
|
||||
|
||||
|
||||
## audio
|
||||
|
||||
# 将文本转成音频
|
||||
def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
||||
import pyttsx3
|
||||
@ -3891,176 +4060,3 @@ def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'):
|
||||
from pydub import AudioSegment
|
||||
sound = AudioSegment.from_mp3(wav_path)
|
||||
sound.export(output_filename,format="mp3",bitrate=bitrate)
|
||||
|
||||
## words
|
||||
|
||||
# 播放学术单词
|
||||
def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import urllib.request
|
||||
import requests
|
||||
import os
|
||||
import pygame
|
||||
import time
|
||||
import ssl
|
||||
import random
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
|
||||
if bre_or_ame == 'ame':
|
||||
directory = 'words_mp3_ameProns/'
|
||||
elif bre_or_ame == 'bre':
|
||||
directory = 'words_mp3_breProns/'
|
||||
exist_directory = os.path.exists(directory)
|
||||
html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
|
||||
if exist_directory == 0:
|
||||
os.makedirs(directory)
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
contents = re.findall('<h2.*?</a></p>', html, re.S)
|
||||
if random_on==1:
|
||||
random.shuffle(contents)
|
||||
if reverse==1:
|
||||
contents.reverse()
|
||||
for content in contents:
|
||||
soup2 = BeautifulSoup(content, features='lxml')
|
||||
all_h2 = soup2.find_all('h2')
|
||||
for h2 in all_h2:
|
||||
if re.search('\d*. ', h2.get_text()):
|
||||
word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
|
||||
exist = os.path.exists(directory+word+'.mp3')
|
||||
if not exist:
|
||||
try:
|
||||
if re.search(word, html_file):
|
||||
r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
|
||||
with open(directory+word+'.mp3', 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
except:
|
||||
pass
|
||||
print(h2.get_text())
|
||||
try:
|
||||
pygame.mixer.init()
|
||||
track = pygame.mixer.music.load(directory+word+'.mp3')
|
||||
pygame.mixer.music.play()
|
||||
if show_link==1:
|
||||
print('https://www.ldoceonline.com/dictionary/'+word)
|
||||
except:
|
||||
pass
|
||||
translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
|
||||
if show_translation==1:
|
||||
time.sleep(translation_time)
|
||||
print(translation)
|
||||
time.sleep(rest_time)
|
||||
pygame.mixer.music.stop()
|
||||
print()
|
||||
|
||||
# 播放挑选过后的学术单词
|
||||
def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import urllib.request
|
||||
import requests
|
||||
import os
|
||||
import pygame
|
||||
import time
|
||||
import ssl
|
||||
import random
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
|
||||
if bre_or_ame == 'ame':
|
||||
directory = 'words_mp3_ameProns/'
|
||||
elif bre_or_ame == 'bre':
|
||||
directory = 'words_mp3_breProns/'
|
||||
exist_directory = os.path.exists(directory)
|
||||
html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
|
||||
if exist_directory == 0:
|
||||
os.makedirs(directory)
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
contents = re.findall('<li>\d.*?</li>', html, re.S)
|
||||
if random_on==1:
|
||||
random.shuffle(contents)
|
||||
if reverse==1:
|
||||
contents.reverse()
|
||||
for content in contents:
|
||||
soup2 = BeautifulSoup(content, features='lxml')
|
||||
all_li = soup2.find_all('li')
|
||||
for li in all_li:
|
||||
if re.search('\d*. ', li.get_text()):
|
||||
word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
|
||||
exist = os.path.exists(directory+word+'.mp3')
|
||||
if not exist:
|
||||
try:
|
||||
if re.search(word, html_file):
|
||||
r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
|
||||
with open(directory+word+'.mp3', 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
except:
|
||||
pass
|
||||
print(li.get_text())
|
||||
try:
|
||||
pygame.mixer.init()
|
||||
track = pygame.mixer.music.load(directory+word+'.mp3')
|
||||
pygame.mixer.music.play()
|
||||
if show_link==1:
|
||||
print('https://www.ldoceonline.com/dictionary/'+word)
|
||||
except:
|
||||
pass
|
||||
time.sleep(rest_time)
|
||||
pygame.mixer.music.stop()
|
||||
print()
|
||||
|
||||
# 播放元素周期表上的单词
|
||||
def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import urllib.request
|
||||
import requests
|
||||
import os
|
||||
import pygame
|
||||
import time
|
||||
import ssl
|
||||
import random
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
|
||||
directory = 'prons/'
|
||||
exist_directory = os.path.exists(directory)
|
||||
html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
|
||||
if exist_directory == 0:
|
||||
os.makedirs(directory)
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
contents = re.findall('<h2.*?</a></p>', html, re.S)
|
||||
if random_on==1:
|
||||
random.shuffle(contents)
|
||||
for content in contents:
|
||||
soup2 = BeautifulSoup(content, features='lxml')
|
||||
all_h2 = soup2.find_all('h2')
|
||||
for h2 in all_h2:
|
||||
if re.search('\d*. ', h2.get_text()):
|
||||
word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
|
||||
exist = os.path.exists(directory+word+'.mp3')
|
||||
if not exist:
|
||||
try:
|
||||
if re.search(word, html_file):
|
||||
r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
|
||||
with open(directory+word+'.mp3', 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
except:
|
||||
pass
|
||||
print(h2.get_text())
|
||||
try:
|
||||
pygame.mixer.init()
|
||||
track = pygame.mixer.music.load(directory+word+'.mp3')
|
||||
pygame.mixer.music.play()
|
||||
if show_link==1:
|
||||
print('https://www.merriam-webster.com/dictionary/'+word)
|
||||
except:
|
||||
pass
|
||||
translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
|
||||
if show_translation==1:
|
||||
time.sleep(translation_time)
|
||||
print(translation)
|
||||
time.sleep(rest_time)
|
||||
pygame.mixer.music.stop()
|
||||
print()
|
Loading…
x
Reference in New Issue
Block a user