update

2023-10-03 09:15:25 +08:00
parent ea023e26b3
commit 2d36804856
2 changed files with 441 additions and 474 deletions
--- a/API_Reference/API_Reference.py
+++ b/API_Reference/API_Reference.py
@@ -14,6 +14,8 @@ import guan



+
+
 # Module 1: basic functions

 guan.test()
@@ -75,6 +77,12 @@ sigma_zz = guan.sigma_zz()



+
+
+
+
+
+



@@ -453,6 +461,8 @@ local_dos = guan.local_density_of_states_for_square_lattice_with_self_energy_usi



+
+



@@ -610,6 +620,8 @@ wilson_loop_array = guan.calculate_wilson_loop(hamiltonian_function, k_min=-math



+
+



@@ -793,7 +805,32 @@ hashed_password = guan.encryption_MD5(password, salt='')
 # 使用SHA-256进行散列加密
 hashed_password = guan.encryption_SHA_256(password, salt='')

+# 获取当前日期字符串
+datetime_date = guan.get_date(bar=True)

+# 获取当前时间字符串
+datetime_time = guan.get_time()
+
+# 获取所有股票
+title, stock_data = guan.all_stocks()
+
+# 获取所有股票的代码
+stock_symbols = guan.all_stock_symbols()
+
+# 从股票代码获取股票名称
+stock_name = guan.find_stock_name_from_symbol(symbol='000002')
+
+# 获取单个股票的历史数据
+title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101')
+
+# 播放学术单词
+guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
+
+# 播放挑选过后的学术单词
+guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3)
+
+# 播放元素周期表上的单词
+guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)



@@ -824,6 +861,18 @@ guan.make_directory(directory='./test')
 # 复制一份文件
 guan.copy_file(file1='./a.txt', file2='./b.txt')

+# 拼接两个PDF文件
+guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
+
+# 将PDF文件转成文本
+content = guan.pdf_to_text(pdf_path)
+
+# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
+links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
+
+# 通过Sci-Hub网站下载文献
+guan.download_with_scihub(address=None, num=1)
+
 # 将文件目录结构写入Markdown文件
 guan.write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None)

@@ -845,76 +894,9 @@ guan.move_all_files_to_root_directory(directory)
 # 改变当前的目录位置
 guan.change_directory_by_replacement(current_key_word='code', new_key_word='data')

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Module 14: others
-
-# time
-
-# 获取当前日期字符串
-datetime_date = guan.get_date(bar=True)
-
-# 获取当前时间字符串
-datetime_time = guan.get_time()
-
-# stocks
-
-# 获取所有股票
-title, stock_data = guan.all_stocks()
-
-# 获取所有股票的代码
-stock_symbols = guan.all_stock_symbols()
-
-# 从股票代码获取股票名称
-stock_name = guan.find_stock_name_from_symbol(symbol='000002')
-
-# 获取单个股票的历史数据
-title, stock_data = guan.history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101')
-
-# 拼接两个PDF文件
-guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
-
-# download
-
-# 通过Sci-Hub网站下载文献
-guan.download_with_scihub(address=None, num=1)
-
-# PDF
-
-# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
-links = guan.get_links_from_pdf(pdf_path, link_starting_form='')
-
-# 将PDF文件转成文本
-content = guan.pdf_to_text(pdf_path)
-
-# image
-
 # 生成二维码
 guan.creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png')

-# audio
-
 # 将文本转成音频
 guan.str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0)

@@ -926,14 +908,3 @@ guan.pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitra

 # 将wav音频文件压缩成MP3音频文件
 guan.compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k')
-
-# words
-
-# 播放学术单词
-guan.play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1)
-
-# 播放挑选过后的学术单词
-guan.play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3)
-
-# 播放元素周期表上的单词
-guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)
--- a/Source_Code/PyPI/src/guan/init.py
+++ b/Source_Code/PyPI/src/guan/init.py
@@ -50,6 +50,9 @@



+
+
+



@@ -904,6 +907,10 @@ def hamiltonian_of_kagome_lattice(kx, ky, t=1):



+
+
+
+



@@ -2040,6 +2047,18 @@ def calculate_scattering_matrix_with_disorder_and_get_averaged_information(fermi



+
+
+
+
+
+
+
+
+
+
+
+



@@ -3010,6 +3029,16 @@ def color_matplotlib():



+
+
+
+
+
+
+
+
+
+



@@ -3240,6 +3269,40 @@ def print_array_with_index(array, show_index=1, index_type=0):



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 # Module 12: data processing

 # 并行计算前的预处理，把参数分成多份
@@ -3321,6 +3384,228 @@ def encryption_SHA_256(password, salt=''):
    hashed_password = hashlib.sha256(password.encode()).hexdigest()
    return hashed_password

+# 获取当前日期字符串
+def get_date(bar=True):
+    import datetime
+    datetime_date = str(datetime.date.today())
+    if bar==False:
+        datetime_date = datetime_date.replace('-', '')
+    return datetime_date
+
+# 获取当前时间字符串
+def get_time():
+    import datetime
+    datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
+    return datetime_time
+
+# 获取所有股票
+def all_stocks():
+    import akshare as ak
+    stocks = ak.stock_zh_a_spot_em()
+    title = np.array(stocks.columns)
+    stock_data = stocks.values
+    return title, stock_data
+
+# 获取所有股票的代码
+def all_stock_symbols():
+    title, stock_data = guan.all_stocks()
+    stock_symbols = stock_data[:, 1]
+    return stock_symbols
+
+# 从股票代码获取股票名称
+def find_stock_name_from_symbol(symbol='000002'):
+    title, stock_data = guan.all_stocks()
+    for stock in stock_data:
+        if symbol in stock:
+           stock_name = stock[2]
+    return stock_name
+
+# 获取单个股票的历史数据
+def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
+    # period = 'daily'
+    # period = 'weekly'
+    # period = 'monthly'
+    import akshare as ak
+    stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
+    title = np.array(stock.columns)
+    stock_data = stock.values[::-1]
+    return title, stock_data
+
+# 播放学术单词
+def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
+    from bs4 import BeautifulSoup
+    import re
+    import urllib.request
+    import requests
+    import os
+    import pygame
+    import time
+    import ssl
+    import random
+    ssl._create_default_https_context = ssl._create_unverified_context
+    html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
+    if bre_or_ame == 'ame':
+        directory = 'words_mp3_ameProns/'
+    elif bre_or_ame == 'bre':
+        directory = 'words_mp3_breProns/'
+    exist_directory = os.path.exists(directory)
+    html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
+    if exist_directory == 0:
+        os.makedirs(directory)
+    soup = BeautifulSoup(html, features='lxml')
+    contents = re.findall('<h2.*?</a></p>', html, re.S)
+    if random_on==1:
+        random.shuffle(contents)
+    if reverse==1:
+        contents.reverse()
+    for content in contents:
+        soup2 = BeautifulSoup(content, features='lxml')
+        all_h2 = soup2.find_all('h2')
+        for h2 in all_h2:
+            if re.search('\d*. ', h2.get_text()):
+                word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
+                exist = os.path.exists(directory+word+'.mp3')
+                if not exist:
+                    try:
+                        if re.search(word, html_file):
+                            r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
+                            with open(directory+word+'.mp3', 'wb') as f:
+                                for chunk in r.iter_content(chunk_size=32):
+                                    f.write(chunk)
+                    except:
+                        pass
+                print(h2.get_text())
+                try:
+                    pygame.mixer.init()
+                    track = pygame.mixer.music.load(directory+word+'.mp3')
+                    pygame.mixer.music.play()
+                    if show_link==1:
+                        print('https://www.ldoceonline.com/dictionary/'+word)
+                except:
+                    pass
+                translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
+                if show_translation==1:
+                    time.sleep(translation_time)
+                    print(translation)
+                time.sleep(rest_time)
+                pygame.mixer.music.stop()
+                print()
+
+# 播放挑选过后的学术单词
+def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
+    from bs4 import BeautifulSoup
+    import re
+    import urllib.request
+    import requests
+    import os
+    import pygame
+    import time
+    import ssl
+    import random
+    ssl._create_default_https_context = ssl._create_unverified_context
+    html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
+    if bre_or_ame == 'ame':
+        directory = 'words_mp3_ameProns/'
+    elif bre_or_ame == 'bre':
+        directory = 'words_mp3_breProns/'
+    exist_directory = os.path.exists(directory)
+    html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
+    if exist_directory == 0:
+        os.makedirs(directory)
+    soup = BeautifulSoup(html, features='lxml')
+    contents = re.findall('<li>\d.*?</li>', html, re.S)
+    if random_on==1:
+        random.shuffle(contents)
+    if reverse==1:
+        contents.reverse()
+    for content in contents:
+        soup2 = BeautifulSoup(content, features='lxml')
+        all_li = soup2.find_all('li')
+        for li in all_li:
+            if re.search('\d*. ', li.get_text()):
+                word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
+                exist = os.path.exists(directory+word+'.mp3')
+                if not exist:
+                    try:
+                        if re.search(word, html_file):
+                            r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
+                            with open(directory+word+'.mp3', 'wb') as f:
+                                for chunk in r.iter_content(chunk_size=32):
+                                    f.write(chunk)
+                    except:
+                        pass
+                print(li.get_text())
+                try:
+                    pygame.mixer.init()
+                    track = pygame.mixer.music.load(directory+word+'.mp3')
+                    pygame.mixer.music.play()
+                    if show_link==1:
+                        print('https://www.ldoceonline.com/dictionary/'+word)
+                except:
+                    pass
+                time.sleep(rest_time)
+                pygame.mixer.music.stop()
+                print()
+
+# 播放元素周期表上的单词
+def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
+    from bs4 import BeautifulSoup
+    import re
+    import urllib.request
+    import requests
+    import os
+    import pygame
+    import time
+    import ssl
+    import random
+    ssl._create_default_https_context = ssl._create_unverified_context
+    html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
+    directory = 'prons/'
+    exist_directory = os.path.exists(directory)
+    html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
+    if exist_directory == 0:
+        os.makedirs(directory)
+    soup = BeautifulSoup(html, features='lxml')
+    contents = re.findall('<h2.*?</a></p>', html, re.S)
+    if random_on==1:
+        random.shuffle(contents)
+    for content in contents:
+        soup2 = BeautifulSoup(content, features='lxml')
+        all_h2 = soup2.find_all('h2')
+        for h2 in all_h2:
+            if re.search('\d*. ', h2.get_text()):
+                word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
+                exist = os.path.exists(directory+word+'.mp3')
+                if not exist:
+                    try:
+                        if re.search(word, html_file):
+                            r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
+                            with open(directory+word+'.mp3', 'wb') as f:
+                                for chunk in r.iter_content(chunk_size=32):
+                                    f.write(chunk)
+                    except:
+                        pass
+                print(h2.get_text())
+                try:
+                    pygame.mixer.init()
+                    track = pygame.mixer.music.load(directory+word+'.mp3')
+                    pygame.mixer.music.play()
+                    if show_link==1:
+                        print('https://www.merriam-webster.com/dictionary/'+word)
+                except:
+                    pass
+                translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
+                if show_translation==1:
+                    time.sleep(translation_time)
+                    print(translation)
+                time.sleep(rest_time)
+                pygame.mixer.music.stop()
+                print()
+
+
+
+
+



@@ -3369,6 +3654,113 @@ def copy_file(file1='./a.txt', file2='./b.txt'):
    import shutil
    shutil.copy(file1, file2)

+# 拼接两个PDF文件
+def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
+    import PyPDF2
+    output_pdf = PyPDF2.PdfWriter()
+    with open(input_file_1, 'rb') as file1:
+        pdf1 = PyPDF2.PdfReader(file1)
+        for page in range(len(pdf1.pages)):
+            output_pdf.add_page(pdf1.pages[page])
+    with open(input_file_2, 'rb') as file2:
+        pdf2 = PyPDF2.PdfReader(file2)
+        for page in range(len(pdf2.pages)):
+            output_pdf.add_page(pdf2.pages[page])
+    with open(output_file, 'wb') as combined_file:
+        output_pdf.write(combined_file)
+
+# 将PDF文件转成文本
+def pdf_to_text(pdf_path):
+    from pdfminer.pdfparser import PDFParser, PDFDocument
+    from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+    from pdfminer.converter import PDFPageAggregator
+    from pdfminer.layout import LAParams, LTTextBox
+    from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
+    import logging 
+    logging.Logger.propagate = False 
+    logging.getLogger().setLevel(logging.ERROR) 
+    praser = PDFParser(open(pdf_path, 'rb'))
+    doc = PDFDocument()
+    praser.set_document(doc)
+    doc.set_parser(praser)
+    doc.initialize()
+    if not doc.is_extractable:
+        raise PDFTextExtractionNotAllowed
+    else:
+        rsrcmgr = PDFResourceManager()
+        laparams = LAParams()
+        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+        interpreter = PDFPageInterpreter(rsrcmgr, device)
+        content = ''
+        for page in doc.get_pages():
+            interpreter.process_page(page)                        
+            layout = device.get_result()                     
+            for x in layout:
+                if isinstance(x, LTTextBox):
+                    content  = content + x.get_text().strip()
+    return content
+
+# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
+def get_links_from_pdf(pdf_path, link_starting_form=''):
+    import PyPDF2
+    import re
+    pdfReader = PyPDF2.PdfFileReader(pdf_path)
+    pages = pdfReader.getNumPages()
+    i0 = 0
+    links = []
+    for page in range(pages):
+        pageSliced = pdfReader.getPage(page)
+        pageObject = pageSliced.getObject()
+        if '/Annots' in pageObject.keys():
+            ann = pageObject['/Annots']
+            old = ''
+            for a in ann:
+                u = a.getObject()
+                if '/A' in u.keys():
+                    if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
+                        if u['/A']['/URI'] != old:
+                            links.append(u['/A']['/URI']) 
+                            i0 += 1
+                            old = u['/A']['/URI']        
+    return links
+
+# 通过Sci-Hub网站下载文献
+def download_with_scihub(address=None, num=1):
+    from bs4 import BeautifulSoup
+    import re
+    import requests
+    import os
+    if num==1 and address!=None:
+        address_array = [address]
+    else:
+        address_array = []
+        for i in range(num):
+            address = input('\nInput：')
+            address_array.append(address)
+    for address in address_array:
+        r = requests.post('https://sci-hub.st/', data={'request': address})
+        print('\nResponse：', r)
+        print('Address：', r.url)
+        soup = BeautifulSoup(r.text, features='lxml')
+        pdf_URL = soup.embed['src']
+        # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
+        if re.search(re.compile('^https:'), pdf_URL):
+            pass
+        else:
+            pdf_URL = 'https:'+pdf_URL
+        print('PDF address：', pdf_URL)
+        name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
+        print('PDF name：', name)
+        print('Directory：', os.getcwd())
+        print('\nDownloading...')
+        r = requests.get(pdf_URL, stream=True)
+        with open(name, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=32):
+                f.write(chunk)
+        print('Completed!\n')
+    if num != 1:
+        print('All completed!\n')
+
 # 将文件目录结构写入Markdown文件
 def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None): 
    import os
@@ -3582,235 +3974,12 @@ def change_directory_by_replacement(current_key_word='code', new_key_word='data'
        os.makedirs(data_path)
    os.chdir(data_path)

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Module 14: others
-
-## time
-
-# 获取当前日期字符串
-def get_date(bar=True):
-    import datetime
-    datetime_date = str(datetime.date.today())
-    if bar==False:
-        datetime_date = datetime_date.replace('-', '')
-    return datetime_date
-
-# 获取当前时间字符串
-def get_time():
-    import datetime
-    datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
-    return datetime_time
-
-## stocks
-
-# 获取所有股票
-def all_stocks():
-    import akshare as ak
-    stocks = ak.stock_zh_a_spot_em()
-    title = np.array(stocks.columns)
-    stock_data = stocks.values
-    return title, stock_data
-
-# 获取所有股票的代码
-def all_stock_symbols():
-    title, stock_data = guan.all_stocks()
-    stock_symbols = stock_data[:, 1]
-    return stock_symbols
-
-# 从股票代码获取股票名称
-def find_stock_name_from_symbol(symbol='000002'):
-    title, stock_data = guan.all_stocks()
-    for stock in stock_data:
-        if symbol in stock:
-           stock_name = stock[2]
-    return stock_name
-
-# 获取单个股票的历史数据
-def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
-    # period = 'daily'
-    # period = 'weekly'
-    # period = 'monthly'
-    import akshare as ak
-    stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
-    title = np.array(stock.columns)
-    stock_data = stock.values[::-1]
-    return title, stock_data
-
-## download
-
-# 通过Sci-Hub网站下载文献
-def download_with_scihub(address=None, num=1):
-    from bs4 import BeautifulSoup
-    import re
-    import requests
-    import os
-    if num==1 and address!=None:
-        address_array = [address]
-    else:
-        address_array = []
-        for i in range(num):
-            address = input('\nInput：')
-            address_array.append(address)
-    for address in address_array:
-        r = requests.post('https://sci-hub.st/', data={'request': address})
-        print('\nResponse：', r)
-        print('Address：', r.url)
-        soup = BeautifulSoup(r.text, features='lxml')
-        pdf_URL = soup.embed['src']
-        # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
-        if re.search(re.compile('^https:'), pdf_URL):
-            pass
-        else:
-            pdf_URL = 'https:'+pdf_URL
-        print('PDF address：', pdf_URL)
-        name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
-        print('PDF name：', name)
-        print('Directory：', os.getcwd())
-        print('\nDownloading...')
-        r = requests.get(pdf_URL, stream=True)
-        with open(name, 'wb') as f:
-            for chunk in r.iter_content(chunk_size=32):
-                f.write(chunk)
-        print('Completed!\n')
-    if num != 1:
-        print('All completed!\n')
-
-## PDF
-
-# 拼接两个PDF文件
-def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
-    import PyPDF2
-    output_pdf = PyPDF2.PdfWriter()
-    with open(input_file_1, 'rb') as file1:
-        pdf1 = PyPDF2.PdfReader(file1)
-        for page in range(len(pdf1.pages)):
-            output_pdf.add_page(pdf1.pages[page])
-    with open(input_file_2, 'rb') as file2:
-        pdf2 = PyPDF2.PdfReader(file2)
-        for page in range(len(pdf2.pages)):
-            output_pdf.add_page(pdf2.pages[page])
-    with open(output_file, 'wb') as combined_file:
-        output_pdf.write(combined_file)
-
-
-# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
-def get_links_from_pdf(pdf_path, link_starting_form=''):
-    import PyPDF2
-    import re
-    pdfReader = PyPDF2.PdfFileReader(pdf_path)
-    pages = pdfReader.getNumPages()
-    i0 = 0
-    links = []
-    for page in range(pages):
-        pageSliced = pdfReader.getPage(page)
-        pageObject = pageSliced.getObject()
-        if '/Annots' in pageObject.keys():
-            ann = pageObject['/Annots']
-            old = ''
-            for a in ann:
-                u = a.getObject()
-                if '/A' in u.keys():
-                    if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
-                        if u['/A']['/URI'] != old:
-                            links.append(u['/A']['/URI']) 
-                            i0 += 1
-                            old = u['/A']['/URI']        
-    return links
-
-# 将PDF文件转成文本
-def pdf_to_text(pdf_path):
-    from pdfminer.pdfparser import PDFParser, PDFDocument
-    from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
-    from pdfminer.converter import PDFPageAggregator
-    from pdfminer.layout import LAParams, LTTextBox
-    from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
-    import logging 
-    logging.Logger.propagate = False 
-    logging.getLogger().setLevel(logging.ERROR) 
-    praser = PDFParser(open(pdf_path, 'rb'))
-    doc = PDFDocument()
-    praser.set_document(doc)
-    doc.set_parser(praser)
-    doc.initialize()
-    if not doc.is_extractable:
-        raise PDFTextExtractionNotAllowed
-    else:
-        rsrcmgr = PDFResourceManager()
-        laparams = LAParams()
-        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
-        interpreter = PDFPageInterpreter(rsrcmgr, device)
-        content = ''
-        for page in doc.get_pages():
-            interpreter.process_page(page)                        
-            layout = device.get_result()                     
-            for x in layout:
-                if isinstance(x, LTTextBox):
-                    content  = content + x.get_text().strip()
-    return content
-
-
-## image
-
 # 生成二维码
 def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'):
    import qrcode
    img = qrcode.make(data)
    img.save(filename+file_format)

-
-## audio
-
 # 将文本转成音频
 def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
    import pyttsx3
@@ -3891,176 +4060,3 @@ def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'):
    from pydub import AudioSegment
    sound = AudioSegment.from_mp3(wav_path)
    sound.export(output_filename,format="mp3",bitrate=bitrate)
-
-## words
-
-# 播放学术单词
-def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
-    from bs4 import BeautifulSoup
-    import re
-    import urllib.request
-    import requests
-    import os
-    import pygame
-    import time
-    import ssl
-    import random
-    ssl._create_default_https_context = ssl._create_unverified_context
-    html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
-    if bre_or_ame == 'ame':
-        directory = 'words_mp3_ameProns/'
-    elif bre_or_ame == 'bre':
-        directory = 'words_mp3_breProns/'
-    exist_directory = os.path.exists(directory)
-    html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
-    if exist_directory == 0:
-        os.makedirs(directory)
-    soup = BeautifulSoup(html, features='lxml')
-    contents = re.findall('<h2.*?</a></p>', html, re.S)
-    if random_on==1:
-        random.shuffle(contents)
-    if reverse==1:
-        contents.reverse()
-    for content in contents:
-        soup2 = BeautifulSoup(content, features='lxml')
-        all_h2 = soup2.find_all('h2')
-        for h2 in all_h2:
-            if re.search('\d*. ', h2.get_text()):
-                word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
-                exist = os.path.exists(directory+word+'.mp3')
-                if not exist:
-                    try:
-                        if re.search(word, html_file):
-                            r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
-                            with open(directory+word+'.mp3', 'wb') as f:
-                                for chunk in r.iter_content(chunk_size=32):
-                                    f.write(chunk)
-                    except:
-                        pass
-                print(h2.get_text())
-                try:
-                    pygame.mixer.init()
-                    track = pygame.mixer.music.load(directory+word+'.mp3')
-                    pygame.mixer.music.play()
-                    if show_link==1:
-                        print('https://www.ldoceonline.com/dictionary/'+word)
-                except:
-                    pass
-                translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
-                if show_translation==1:
-                    time.sleep(translation_time)
-                    print(translation)
-                time.sleep(rest_time)
-                pygame.mixer.music.stop()
-                print()
-
-# 播放挑选过后的学术单词
-def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
-    from bs4 import BeautifulSoup
-    import re
-    import urllib.request
-    import requests
-    import os
-    import pygame
-    import time
-    import ssl
-    import random
-    ssl._create_default_https_context = ssl._create_unverified_context
-    html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
-    if bre_or_ame == 'ame':
-        directory = 'words_mp3_ameProns/'
-    elif bre_or_ame == 'bre':
-        directory = 'words_mp3_breProns/'
-    exist_directory = os.path.exists(directory)
-    html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
-    if exist_directory == 0:
-        os.makedirs(directory)
-    soup = BeautifulSoup(html, features='lxml')
-    contents = re.findall('<li>\d.*?</li>', html, re.S)
-    if random_on==1:
-        random.shuffle(contents)
-    if reverse==1:
-        contents.reverse()
-    for content in contents:
-        soup2 = BeautifulSoup(content, features='lxml')
-        all_li = soup2.find_all('li')
-        for li in all_li:
-            if re.search('\d*. ', li.get_text()):
-                word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
-                exist = os.path.exists(directory+word+'.mp3')
-                if not exist:
-                    try:
-                        if re.search(word, html_file):
-                            r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
-                            with open(directory+word+'.mp3', 'wb') as f:
-                                for chunk in r.iter_content(chunk_size=32):
-                                    f.write(chunk)
-                    except:
-                        pass
-                print(li.get_text())
-                try:
-                    pygame.mixer.init()
-                    track = pygame.mixer.music.load(directory+word+'.mp3')
-                    pygame.mixer.music.play()
-                    if show_link==1:
-                        print('https://www.ldoceonline.com/dictionary/'+word)
-                except:
-                    pass
-                time.sleep(rest_time)
-                pygame.mixer.music.stop()
-                print()
-
-# 播放元素周期表上的单词
-def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
-    from bs4 import BeautifulSoup
-    import re
-    import urllib.request
-    import requests
-    import os
-    import pygame
-    import time
-    import ssl
-    import random
-    ssl._create_default_https_context = ssl._create_unverified_context
-    html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
-    directory = 'prons/'
-    exist_directory = os.path.exists(directory)
-    html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
-    if exist_directory == 0:
-        os.makedirs(directory)
-    soup = BeautifulSoup(html, features='lxml')
-    contents = re.findall('<h2.*?</a></p>', html, re.S)
-    if random_on==1:
-        random.shuffle(contents)
-    for content in contents:
-        soup2 = BeautifulSoup(content, features='lxml')
-        all_h2 = soup2.find_all('h2')
-        for h2 in all_h2:
-            if re.search('\d*. ', h2.get_text()):
-                word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
-                exist = os.path.exists(directory+word+'.mp3')
-                if not exist:
-                    try:
-                        if re.search(word, html_file):
-                            r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
-                            with open(directory+word+'.mp3', 'wb') as f:
-                                for chunk in r.iter_content(chunk_size=32):
-                                    f.write(chunk)
-                    except:
-                        pass
-                print(h2.get_text())
-                try:
-                    pygame.mixer.init()
-                    track = pygame.mixer.music.load(directory+word+'.mp3')
-                    pygame.mixer.music.play()
-                    if show_link==1:
-                        print('https://www.merriam-webster.com/dictionary/'+word)
-                except:
-                    pass
-                translation = re.findall('<p>.*?</p>', content, re.S)[0][3:-4]
-                if show_translation==1:
-                    time.sleep(translation_time)
-                    print(translation)
-                time.sleep(rest_time)
-                pygame.mixer.music.stop()
-                print()