From 2d36804856523ff8bf048cba6bf15b31a1225ecc Mon Sep 17 00:00:00 2001
From: guanjihuan
Date: Tue, 3 Oct 2023 09:15:25 +0800
Subject: [PATCH] update
---
API_Reference/API_Reference.py | 127 ++---
Source_Code/PyPI/src/guan/__init__.py | 788 +++++++++++++-------------
2 files changed, 441 insertions(+), 474 deletions(-)
diff --git a/API_Reference/API_Reference.py b/API_Reference/API_Reference.py
index b50d464..7823061 100644
--- a/API_Reference/API_Reference.py
+++ b/API_Reference/API_Reference.py
@@ -14,6 +14,8 @@ import guan
+
+
# Module 1: basic functions
guan.test()
@@ -75,6 +77,12 @@ sigma_zz = guan.sigma_zz()
+
+
+
+
+
+
@@ -453,6 +461,8 @@ local_dos = guan.local_density_of_states_for_square_lattice_with_self_energy_usi
+
+
@@ -610,6 +620,8 @@ wilson_loop_array = guan.calculate_wilson_loop(hamiltonian_function, k_min=-math
+
+
@@ -793,7 +805,32 @@ hashed_password = guan.encryption_MD5(password, salt='')
# 使用SHA-256进行散列加密
hashed_password = guan.encryption_SHA_256(password, salt='')
+# 获取当前日期字符串
+datetime_date = guan.get_date(bar=True)
+# 获取当前时间字符串
+datetime_time = guan.get_time()
+
+# 获取所有股票
+title, stock_data = guan.all_stocks()
+
+# 获取所有股票的代码
+stock_symbols = guan.all_stock_symbols()
+
+# 从股票代码获取股票名称
+stock_name = guan.find_stock_name_from_symbol(symbol='000002')
+
+# 获取单个股票的历史数据
+title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101')
+
+# 播放学术单词
+guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
+
+# 播放挑选过后的学术单词
+guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3)
+
+# 播放元素周期表上的单词
+guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)
@@ -824,6 +861,18 @@ guan.make_directory(directory='./test')
# 复制一份文件
guan.copy_file(file1='./a.txt', file2='./b.txt')
+# 拼接两个PDF文件
+guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
+
+# 将PDF文件转成文本
+content = guan.pdf_to_text(pdf_path)
+
+# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
+links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
+
+# 通过Sci-Hub网站下载文献
+guan.download_with_scihub(address=None, num=1)
+
# 将文件目录结构写入Markdown文件
guan.write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None)
@@ -845,76 +894,9 @@ guan.move_all_files_to_root_directory(directory)
# 改变当前的目录位置
guan.change_directory_by_replacement(current_key_word='code', new_key_word='data')
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Module 14: others
-
-# time
-
-# 获取当前日期字符串
-datetime_date = guan.get_date(bar=True)
-
-# 获取当前时间字符串
-datetime_time = guan.get_time()
-
-# stocks
-
-# 获取所有股票
-title, stock_data = guan.all_stocks()
-
-# 获取所有股票的代码
-stock_symbols = guan.all_stock_symbols()
-
-# 从股票代码获取股票名称
-stock_name = guan.find_stock_name_from_symbol(symbol='000002')
-
-# 获取单个股票的历史数据
-title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101')
-
-# 拼接两个PDF文件
-guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
-
-# download
-
-# 通过Sci-Hub网站下载文献
-guan.download_with_scihub(address=None, num=1)
-
-# PDF
-
-# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
-links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
-
-# 将PDF文件转成文本
-content = guan.pdf_to_text(pdf_path)
-
-# image
-
# 生成二维码
guan.creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png')
-# audio
-
# 将文本转成音频
guan.str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0)
@@ -926,14 +908,3 @@ guan.pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitra
# 将wav音频文件压缩成MP3音频文件
guan.compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k')
-
-# words
-
-# 播放学术单词
-guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
-
-# 播放挑选过后的学术单词
-guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3)
-
-# 播放元素周期表上的单词
-guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)
\ No newline at end of file
diff --git a/Source_Code/PyPI/src/guan/__init__.py b/Source_Code/PyPI/src/guan/__init__.py
index e9a7de9..8520aeb 100644
--- a/Source_Code/PyPI/src/guan/__init__.py
+++ b/Source_Code/PyPI/src/guan/__init__.py
@@ -50,6 +50,9 @@
+
+
+
@@ -904,6 +907,10 @@ def hamiltonian_of_kagome_lattice(kx, ky, t=1):
+
+
+
+
@@ -2040,6 +2047,18 @@ def calculate_scattering_matrix_with_disorder_and_get_averaged_information(fermi
+
+
+
+
+
+
+
+
+
+
+
+
@@ -3010,6 +3029,16 @@ def color_matplotlib():
+
+
+
+
+
+
+
+
+
+
@@ -3240,6 +3269,40 @@ def print_array_with_index(array, show_index=1, index_type=0):
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Module 12: data processing
# 并行计算前的预处理,把参数分成多份
@@ -3321,6 +3384,228 @@ def encryption_SHA_256(password, salt=''):
hashed_password = hashlib.sha256(password.encode()).hexdigest()
return hashed_password
+# 获取当前日期字符串
+def get_date(bar=True):
+ import datetime
+ datetime_date = str(datetime.date.today())
+ if bar==False:
+ datetime_date = datetime_date.replace('-', '')
+ return datetime_date
+
+# 获取当前时间字符串
+def get_time():
+ import datetime
+ datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
+ return datetime_time
+
+# 获取所有股票
+def all_stocks():
+ import akshare as ak
+ stocks = ak.stock_zh_a_spot_em()
+ title = np.array(stocks.columns)
+ stock_data = stocks.values
+ return title, stock_data
+
+# 获取所有股票的代码
+def all_stock_symbols():
+ title, stock_data = guan.all_stocks()
+ stock_symbols = stock_data[:, 1]
+ return stock_symbols
+
+# 从股票代码获取股票名称
+def find_stock_name_from_symbol(symbol='000002'):
+ title, stock_data = guan.all_stocks()
+ for stock in stock_data:
+ if symbol in stock:
+ stock_name = stock[2]
+ return stock_name
+
+# 获取单个股票的历史数据
+def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
+ # period = 'daily'
+ # period = 'weekly'
+ # period = 'monthly'
+ import akshare as ak
+ stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
+ title = np.array(stock.columns)
+ stock_data = stock.values[::-1]
+ return title, stock_data
+
+# 播放学术单词
+def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
+ from bs4 import BeautifulSoup
+ import re
+ import urllib.request
+ import requests
+ import os
+ import pygame
+ import time
+ import ssl
+ import random
+ ssl._create_default_https_context = ssl._create_unverified_context
+ html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
+ if bre_or_ame == 'ame':
+ directory = 'words_mp3_ameProns/'
+ elif bre_or_ame == 'bre':
+ directory = 'words_mp3_breProns/'
+ exist_directory = os.path.exists(directory)
+ html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
+ if exist_directory == 0:
+ os.makedirs(directory)
+ soup = BeautifulSoup(html, features='lxml')
+ contents = re.findall('
', html, re.S)
+ if random_on==1:
+ random.shuffle(contents)
+ if reverse==1:
+ contents.reverse()
+ for content in contents:
+ soup2 = BeautifulSoup(content, features='lxml')
+ all_h2 = soup2.find_all('h2')
+ for h2 in all_h2:
+ if re.search('\d*. ', h2.get_text()):
+ word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
+ exist = os.path.exists(directory+word+'.mp3')
+ if not exist:
+ try:
+ if re.search(word, html_file):
+ r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
+ with open(directory+word+'.mp3', 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ except:
+ pass
+ print(h2.get_text())
+ try:
+ pygame.mixer.init()
+ track = pygame.mixer.music.load(directory+word+'.mp3')
+ pygame.mixer.music.play()
+ if show_link==1:
+ print('https://www.ldoceonline.com/dictionary/'+word)
+ except:
+ pass
+ translation = re.findall('.*?
', content, re.S)[0][3:-4]
+ if show_translation==1:
+ time.sleep(translation_time)
+ print(translation)
+ time.sleep(rest_time)
+ pygame.mixer.music.stop()
+ print()
+
+# 播放挑选过后的学术单词
+def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
+ from bs4 import BeautifulSoup
+ import re
+ import urllib.request
+ import requests
+ import os
+ import pygame
+ import time
+ import ssl
+ import random
+ ssl._create_default_https_context = ssl._create_unverified_context
+ html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
+ if bre_or_ame == 'ame':
+ directory = 'words_mp3_ameProns/'
+ elif bre_or_ame == 'bre':
+ directory = 'words_mp3_breProns/'
+ exist_directory = os.path.exists(directory)
+ html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
+ if exist_directory == 0:
+ os.makedirs(directory)
+ soup = BeautifulSoup(html, features='lxml')
+ contents = re.findall('\d.*?', html, re.S)
+ if random_on==1:
+ random.shuffle(contents)
+ if reverse==1:
+ contents.reverse()
+ for content in contents:
+ soup2 = BeautifulSoup(content, features='lxml')
+ all_li = soup2.find_all('li')
+ for li in all_li:
+ if re.search('\d*. ', li.get_text()):
+ word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
+ exist = os.path.exists(directory+word+'.mp3')
+ if not exist:
+ try:
+ if re.search(word, html_file):
+ r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
+ with open(directory+word+'.mp3', 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ except:
+ pass
+ print(li.get_text())
+ try:
+ pygame.mixer.init()
+ track = pygame.mixer.music.load(directory+word+'.mp3')
+ pygame.mixer.music.play()
+ if show_link==1:
+ print('https://www.ldoceonline.com/dictionary/'+word)
+ except:
+ pass
+ time.sleep(rest_time)
+ pygame.mixer.music.stop()
+ print()
+
+# 播放元素周期表上的单词
+def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
+ from bs4 import BeautifulSoup
+ import re
+ import urllib.request
+ import requests
+ import os
+ import pygame
+ import time
+ import ssl
+ import random
+ ssl._create_default_https_context = ssl._create_unverified_context
+ html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
+ directory = 'prons/'
+ exist_directory = os.path.exists(directory)
+ html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
+ if exist_directory == 0:
+ os.makedirs(directory)
+ soup = BeautifulSoup(html, features='lxml')
+ contents = re.findall('', html, re.S)
+ if random_on==1:
+ random.shuffle(contents)
+ for content in contents:
+ soup2 = BeautifulSoup(content, features='lxml')
+ all_h2 = soup2.find_all('h2')
+ for h2 in all_h2:
+ if re.search('\d*. ', h2.get_text()):
+ word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
+ exist = os.path.exists(directory+word+'.mp3')
+ if not exist:
+ try:
+ if re.search(word, html_file):
+ r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
+ with open(directory+word+'.mp3', 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ except:
+ pass
+ print(h2.get_text())
+ try:
+ pygame.mixer.init()
+ track = pygame.mixer.music.load(directory+word+'.mp3')
+ pygame.mixer.music.play()
+ if show_link==1:
+ print('https://www.merriam-webster.com/dictionary/'+word)
+ except:
+ pass
+ translation = re.findall('.*?
', content, re.S)[0][3:-4]
+ if show_translation==1:
+ time.sleep(translation_time)
+ print(translation)
+ time.sleep(rest_time)
+ pygame.mixer.music.stop()
+ print()
+
+
+
+
+
@@ -3369,6 +3654,113 @@ def copy_file(file1='./a.txt', file2='./b.txt'):
import shutil
shutil.copy(file1, file2)
+# 拼接两个PDF文件
+def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
+ import PyPDF2
+ output_pdf = PyPDF2.PdfWriter()
+ with open(input_file_1, 'rb') as file1:
+ pdf1 = PyPDF2.PdfReader(file1)
+ for page in range(len(pdf1.pages)):
+ output_pdf.add_page(pdf1.pages[page])
+ with open(input_file_2, 'rb') as file2:
+ pdf2 = PyPDF2.PdfReader(file2)
+ for page in range(len(pdf2.pages)):
+ output_pdf.add_page(pdf2.pages[page])
+ with open(output_file, 'wb') as combined_file:
+ output_pdf.write(combined_file)
+
+# 将PDF文件转成文本
+def pdf_to_text(pdf_path):
+ from pdfminer.pdfparser import PDFParser, PDFDocument
+ from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+ from pdfminer.converter import PDFPageAggregator
+ from pdfminer.layout import LAParams, LTTextBox
+ from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
+ import logging
+ logging.Logger.propagate = False
+ logging.getLogger().setLevel(logging.ERROR)
+ praser = PDFParser(open(pdf_path, 'rb'))
+ doc = PDFDocument()
+ praser.set_document(doc)
+ doc.set_parser(praser)
+ doc.initialize()
+ if not doc.is_extractable:
+ raise PDFTextExtractionNotAllowed
+ else:
+ rsrcmgr = PDFResourceManager()
+ laparams = LAParams()
+ device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+ interpreter = PDFPageInterpreter(rsrcmgr, device)
+ content = ''
+ for page in doc.get_pages():
+ interpreter.process_page(page)
+ layout = device.get_result()
+ for x in layout:
+ if isinstance(x, LTTextBox):
+ content = content + x.get_text().strip()
+ return content
+
+# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
+def get_links_from_pdf(pdf_path, link_starting_form=''):
+ import PyPDF2
+ import re
+ pdfReader = PyPDF2.PdfFileReader(pdf_path)
+ pages = pdfReader.getNumPages()
+ i0 = 0
+ links = []
+ for page in range(pages):
+ pageSliced = pdfReader.getPage(page)
+ pageObject = pageSliced.getObject()
+ if '/Annots' in pageObject.keys():
+ ann = pageObject['/Annots']
+ old = ''
+ for a in ann:
+ u = a.getObject()
+ if '/A' in u.keys():
+ if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
+ if u['/A']['/URI'] != old:
+ links.append(u['/A']['/URI'])
+ i0 += 1
+ old = u['/A']['/URI']
+ return links
+
+# 通过Sci-Hub网站下载文献
+def download_with_scihub(address=None, num=1):
+ from bs4 import BeautifulSoup
+ import re
+ import requests
+ import os
+ if num==1 and address!=None:
+ address_array = [address]
+ else:
+ address_array = []
+ for i in range(num):
+ address = input('\nInput:')
+ address_array.append(address)
+ for address in address_array:
+ r = requests.post('https://sci-hub.st/', data={'request': address})
+ print('\nResponse:', r)
+ print('Address:', r.url)
+ soup = BeautifulSoup(r.text, features='lxml')
+ pdf_URL = soup.embed['src']
+ # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
+ if re.search(re.compile('^https:'), pdf_URL):
+ pass
+ else:
+ pdf_URL = 'https:'+pdf_URL
+ print('PDF address:', pdf_URL)
+ name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
+ print('PDF name:', name)
+ print('Directory:', os.getcwd())
+ print('\nDownloading...')
+ r = requests.get(pdf_URL, stream=True)
+ with open(name, 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ print('Completed!\n')
+ if num != 1:
+ print('All completed!\n')
+
# 将文件目录结构写入Markdown文件
def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):
import os
@@ -3582,235 +3974,12 @@ def change_directory_by_replacement(current_key_word='code', new_key_word='data'
os.makedirs(data_path)
os.chdir(data_path)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Module 14: others
-
-## time
-
-# 获取当前日期字符串
-def get_date(bar=True):
- import datetime
- datetime_date = str(datetime.date.today())
- if bar==False:
- datetime_date = datetime_date.replace('-', '')
- return datetime_date
-
-# 获取当前时间字符串
-def get_time():
- import datetime
- datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
- return datetime_time
-
-## stocks
-
-# 获取所有股票
-def all_stocks():
- import akshare as ak
- stocks = ak.stock_zh_a_spot_em()
- title = np.array(stocks.columns)
- stock_data = stocks.values
- return title, stock_data
-
-# 获取所有股票的代码
-def all_stock_symbols():
- title, stock_data = guan.all_stocks()
- stock_symbols = stock_data[:, 1]
- return stock_symbols
-
-# 从股票代码获取股票名称
-def find_stock_name_from_symbol(symbol='000002'):
- title, stock_data = guan.all_stocks()
- for stock in stock_data:
- if symbol in stock:
- stock_name = stock[2]
- return stock_name
-
-# 获取单个股票的历史数据
-def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
- # period = 'daily'
- # period = 'weekly'
- # period = 'monthly'
- import akshare as ak
- stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
- title = np.array(stock.columns)
- stock_data = stock.values[::-1]
- return title, stock_data
-
-## download
-
-# 通过Sci-Hub网站下载文献
-def download_with_scihub(address=None, num=1):
- from bs4 import BeautifulSoup
- import re
- import requests
- import os
- if num==1 and address!=None:
- address_array = [address]
- else:
- address_array = []
- for i in range(num):
- address = input('\nInput:')
- address_array.append(address)
- for address in address_array:
- r = requests.post('https://sci-hub.st/', data={'request': address})
- print('\nResponse:', r)
- print('Address:', r.url)
- soup = BeautifulSoup(r.text, features='lxml')
- pdf_URL = soup.embed['src']
- # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
- if re.search(re.compile('^https:'), pdf_URL):
- pass
- else:
- pdf_URL = 'https:'+pdf_URL
- print('PDF address:', pdf_URL)
- name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
- print('PDF name:', name)
- print('Directory:', os.getcwd())
- print('\nDownloading...')
- r = requests.get(pdf_URL, stream=True)
- with open(name, 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- print('Completed!\n')
- if num != 1:
- print('All completed!\n')
-
-## PDF
-
-# 拼接两个PDF文件
-def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
- import PyPDF2
- output_pdf = PyPDF2.PdfWriter()
- with open(input_file_1, 'rb') as file1:
- pdf1 = PyPDF2.PdfReader(file1)
- for page in range(len(pdf1.pages)):
- output_pdf.add_page(pdf1.pages[page])
- with open(input_file_2, 'rb') as file2:
- pdf2 = PyPDF2.PdfReader(file2)
- for page in range(len(pdf2.pages)):
- output_pdf.add_page(pdf2.pages[page])
- with open(output_file, 'wb') as combined_file:
- output_pdf.write(combined_file)
-
-
-# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
-def get_links_from_pdf(pdf_path, link_starting_form=''):
- import PyPDF2
- import re
- pdfReader = PyPDF2.PdfFileReader(pdf_path)
- pages = pdfReader.getNumPages()
- i0 = 0
- links = []
- for page in range(pages):
- pageSliced = pdfReader.getPage(page)
- pageObject = pageSliced.getObject()
- if '/Annots' in pageObject.keys():
- ann = pageObject['/Annots']
- old = ''
- for a in ann:
- u = a.getObject()
- if '/A' in u.keys():
- if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
- if u['/A']['/URI'] != old:
- links.append(u['/A']['/URI'])
- i0 += 1
- old = u['/A']['/URI']
- return links
-
-# 将PDF文件转成文本
-def pdf_to_text(pdf_path):
- from pdfminer.pdfparser import PDFParser, PDFDocument
- from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
- from pdfminer.converter import PDFPageAggregator
- from pdfminer.layout import LAParams, LTTextBox
- from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
- import logging
- logging.Logger.propagate = False
- logging.getLogger().setLevel(logging.ERROR)
- praser = PDFParser(open(pdf_path, 'rb'))
- doc = PDFDocument()
- praser.set_document(doc)
- doc.set_parser(praser)
- doc.initialize()
- if not doc.is_extractable:
- raise PDFTextExtractionNotAllowed
- else:
- rsrcmgr = PDFResourceManager()
- laparams = LAParams()
- device = PDFPageAggregator(rsrcmgr, laparams=laparams)
- interpreter = PDFPageInterpreter(rsrcmgr, device)
- content = ''
- for page in doc.get_pages():
- interpreter.process_page(page)
- layout = device.get_result()
- for x in layout:
- if isinstance(x, LTTextBox):
- content = content + x.get_text().strip()
- return content
-
-
-## image
-
# 生成二维码
def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'):
import qrcode
img = qrcode.make(data)
img.save(filename+file_format)
-
-## audio
-
# 将文本转成音频
def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
import pyttsx3
@@ -3891,176 +4060,3 @@ def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'):
from pydub import AudioSegment
sound = AudioSegment.from_mp3(wav_path)
sound.export(output_filename,format="mp3",bitrate=bitrate)
-
-## words
-
-# 播放学术单词
-def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
- from bs4 import BeautifulSoup
- import re
- import urllib.request
- import requests
- import os
- import pygame
- import time
- import ssl
- import random
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
- if bre_or_ame == 'ame':
- directory = 'words_mp3_ameProns/'
- elif bre_or_ame == 'bre':
- directory = 'words_mp3_breProns/'
- exist_directory = os.path.exists(directory)
- html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
- if exist_directory == 0:
- os.makedirs(directory)
- soup = BeautifulSoup(html, features='lxml')
- contents = re.findall('', html, re.S)
- if random_on==1:
- random.shuffle(contents)
- if reverse==1:
- contents.reverse()
- for content in contents:
- soup2 = BeautifulSoup(content, features='lxml')
- all_h2 = soup2.find_all('h2')
- for h2 in all_h2:
- if re.search('\d*. ', h2.get_text()):
- word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
- exist = os.path.exists(directory+word+'.mp3')
- if not exist:
- try:
- if re.search(word, html_file):
- r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
- with open(directory+word+'.mp3', 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- except:
- pass
- print(h2.get_text())
- try:
- pygame.mixer.init()
- track = pygame.mixer.music.load(directory+word+'.mp3')
- pygame.mixer.music.play()
- if show_link==1:
- print('https://www.ldoceonline.com/dictionary/'+word)
- except:
- pass
- translation = re.findall('.*?
', content, re.S)[0][3:-4]
- if show_translation==1:
- time.sleep(translation_time)
- print(translation)
- time.sleep(rest_time)
- pygame.mixer.music.stop()
- print()
-
-# 播放挑选过后的学术单词
-def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
- from bs4 import BeautifulSoup
- import re
- import urllib.request
- import requests
- import os
- import pygame
- import time
- import ssl
- import random
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
- if bre_or_ame == 'ame':
- directory = 'words_mp3_ameProns/'
- elif bre_or_ame == 'bre':
- directory = 'words_mp3_breProns/'
- exist_directory = os.path.exists(directory)
- html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
- if exist_directory == 0:
- os.makedirs(directory)
- soup = BeautifulSoup(html, features='lxml')
- contents = re.findall('\d.*?', html, re.S)
- if random_on==1:
- random.shuffle(contents)
- if reverse==1:
- contents.reverse()
- for content in contents:
- soup2 = BeautifulSoup(content, features='lxml')
- all_li = soup2.find_all('li')
- for li in all_li:
- if re.search('\d*. ', li.get_text()):
- word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
- exist = os.path.exists(directory+word+'.mp3')
- if not exist:
- try:
- if re.search(word, html_file):
- r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
- with open(directory+word+'.mp3', 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- except:
- pass
- print(li.get_text())
- try:
- pygame.mixer.init()
- track = pygame.mixer.music.load(directory+word+'.mp3')
- pygame.mixer.music.play()
- if show_link==1:
- print('https://www.ldoceonline.com/dictionary/'+word)
- except:
- pass
- time.sleep(rest_time)
- pygame.mixer.music.stop()
- print()
-
-# 播放元素周期表上的单词
-def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
- from bs4 import BeautifulSoup
- import re
- import urllib.request
- import requests
- import os
- import pygame
- import time
- import ssl
- import random
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
- directory = 'prons/'
- exist_directory = os.path.exists(directory)
- html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
- if exist_directory == 0:
- os.makedirs(directory)
- soup = BeautifulSoup(html, features='lxml')
- contents = re.findall('', html, re.S)
- if random_on==1:
- random.shuffle(contents)
- for content in contents:
- soup2 = BeautifulSoup(content, features='lxml')
- all_h2 = soup2.find_all('h2')
- for h2 in all_h2:
- if re.search('\d*. ', h2.get_text()):
- word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
- exist = os.path.exists(directory+word+'.mp3')
- if not exist:
- try:
- if re.search(word, html_file):
- r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
- with open(directory+word+'.mp3', 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- except:
- pass
- print(h2.get_text())
- try:
- pygame.mixer.init()
- track = pygame.mixer.music.load(directory+word+'.mp3')
- pygame.mixer.music.play()
- if show_link==1:
- print('https://www.merriam-webster.com/dictionary/'+word)
- except:
- pass
- translation = re.findall('.*?
', content, re.S)[0][3:-4]
- if show_translation==1:
- time.sleep(translation_time)
- print(translation)
- time.sleep(rest_time)
- pygame.mixer.music.stop()
- print()
\ No newline at end of file