diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index 2c5ee10..f3a9e1f 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.18 +version = 0.1.19 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index 65ad05e..a8f13c5 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: guan -Version: 0.1.18 +Version: 0.1.19 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/data_processing.py b/PyPI/src/guan/data_processing.py index 00c2595..eb89c63 100644 --- a/PyPI/src/guan/data_processing.py +++ b/PyPI/src/guan/data_processing.py @@ -140,26 +140,6 @@ def split_text(text, wrap_width=3000): guan.statistics_of_guan_package() return split_text_list -# 从网页的标签中获取内容 -def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']): - from bs4 import BeautifulSoup - import urllib.request - import ssl - ssl._create_default_https_context = ssl._create_unverified_context - html = urllib.request.urlopen(link).read().decode('utf-8') - soup = BeautifulSoup(html, features="lxml") - all_tags = soup.find_all(tags) - content = '' - for tag in all_tags: - text = tag.get_text().replace('\n', '') - if content == '': - content = text - else: - content = content + '\n\n' + text - import guan - guan.statistics_of_guan_package() - return content - # 将RGB转成HEX def rgb_to_hex(rgb, pound=1): import guan @@ -195,14 +175,6 @@ def encryption_SHA_256(password, salt=''): guan.statistics_of_guan_package() return hashed_password -# 生成二维码 -def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): - import qrcode - img = qrcode.make(data) - img.save(filename+file_format) - import guan - guan.statistics_of_guan_package() - # 获取CPU使用率 def get_cpu_usage(interval=1): import psutil @@ -352,183 +324,6 @@ def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000 guan.statistics_of_guan_package() return title, stock_data -# 播放学术单词 -def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1): - from bs4 import BeautifulSoup - import re - import urllib.request - import requests - import os - import pygame - import time - import ssl - import random - ssl._create_default_https_context = ssl._create_unverified_context - html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8') - if bre_or_ame == 'ame': - directory = 'words_mp3_ameProns/' - elif bre_or_ame == 'bre': - directory = 'words_mp3_breProns/' - exist_directory = os.path.exists(directory) - html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') - if exist_directory == 0: - os.makedirs(directory) - soup = BeautifulSoup(html, features='lxml') - contents = re.findall('

', html, re.S) - if random_on==1: - random.shuffle(contents) - if reverse==1: - contents.reverse() - for content in contents: - soup2 = BeautifulSoup(content, features='lxml') - all_h2 = soup2.find_all('h2') - for h2 in all_h2: - if re.search('\d*. ', h2.get_text()): - word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0] - exist = os.path.exists(directory+word+'.mp3') - if not exist: - try: - if re.search(word, html_file): - r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True) - with open(directory+word+'.mp3', 'wb') as f: - for chunk in r.iter_content(chunk_size=32): - f.write(chunk) - except: - pass - print(h2.get_text()) - try: - pygame.mixer.init() - track = pygame.mixer.music.load(directory+word+'.mp3') - pygame.mixer.music.play() - if show_link==1: - print('https://www.ldoceonline.com/dictionary/'+word) - except: - pass - translation = re.findall('

.*?

', content, re.S)[0][3:-4] - if show_translation==1: - time.sleep(translation_time) - print(translation) - time.sleep(rest_time) - pygame.mixer.music.stop() - print() - import guan - guan.statistics_of_guan_package() - -# 播放挑选过后的学术单词 -def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3): - from bs4 import BeautifulSoup - import re - import urllib.request - import requests - import os - import pygame - import time - import ssl - import random - ssl._create_default_https_context = ssl._create_unverified_context - html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8') - if bre_or_ame == 'ame': - directory = 'words_mp3_ameProns/' - elif bre_or_ame == 'bre': - directory = 'words_mp3_breProns/' - exist_directory = os.path.exists(directory) - html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') - if exist_directory == 0: - os.makedirs(directory) - soup = BeautifulSoup(html, features='lxml') - contents = re.findall('
  • \d.*?
  • ', html, re.S) - if random_on==1: - random.shuffle(contents) - if reverse==1: - contents.reverse() - for content in contents: - soup2 = BeautifulSoup(content, features='lxml') - all_li = soup2.find_all('li') - for li in all_li: - if re.search('\d*. ', li.get_text()): - word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1] - exist = os.path.exists(directory+word+'.mp3') - if not exist: - try: - if re.search(word, html_file): - r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True) - with open(directory+word+'.mp3', 'wb') as f: - for chunk in r.iter_content(chunk_size=32): - f.write(chunk) - except: - pass - print(li.get_text()) - try: - pygame.mixer.init() - track = pygame.mixer.music.load(directory+word+'.mp3') - pygame.mixer.music.play() - if show_link==1: - print('https://www.ldoceonline.com/dictionary/'+word) - except: - pass - time.sleep(rest_time) - pygame.mixer.music.stop() - print() - import guan - guan.statistics_of_guan_package() - -# 播放元素周期表上的单词 -def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1): - from bs4 import BeautifulSoup - import re - import urllib.request - import requests - import os - import pygame - import time - import ssl - import random - ssl._create_default_https_context = ssl._create_unverified_context - html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8') - directory = 'prons/' - exist_directory = os.path.exists(directory) - html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8') - if exist_directory == 0: - os.makedirs(directory) - soup = BeautifulSoup(html, features='lxml') - contents = re.findall('

    ', html, re.S) - if random_on==1: - random.shuffle(contents) - for content in contents: - soup2 = BeautifulSoup(content, features='lxml') - all_h2 = soup2.find_all('h2') - for h2 in all_h2: - if re.search('\d*. ', h2.get_text()): - word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2] - exist = os.path.exists(directory+word+'.mp3') - if not exist: - try: - if re.search(word, html_file): - r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True) - with open(directory+word+'.mp3', 'wb') as f: - for chunk in r.iter_content(chunk_size=32): - f.write(chunk) - except: - pass - print(h2.get_text()) - try: - pygame.mixer.init() - track = pygame.mixer.music.load(directory+word+'.mp3') - pygame.mixer.music.play() - if show_link==1: - print('https://www.merriam-webster.com/dictionary/'+word) - except: - pass - translation = re.findall('

    .*?

    ', content, re.S)[0][3:-4] - if show_translation==1: - time.sleep(translation_time) - print(translation) - time.sleep(rest_time) - pygame.mixer.music.stop() - print() - import guan - guan.statistics_of_guan_package() - # 获取Guan软件包当前模块的所有函数名 def get_all_function_names_in_current_module(): import inspect diff --git a/PyPI/src/guan/file_processing.py b/PyPI/src/guan/file_processing.py index 2860754..f945e23 100644 --- a/PyPI/src/guan/file_processing.py +++ b/PyPI/src/guan/file_processing.py @@ -53,6 +53,290 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil import guan guan.statistics_of_guan_package() +# 查找文件名相同的文件 +def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000): + import os + from collections import Counter + file_list = [] + for root, dirs, files in os.walk(directory): + for i0 in range(len(files)): + file_list.append(files[i0]) + for word in ignored_directory_with_words: + if word in root: + file_list.remove(files[i0]) + for word in ignored_file_with_words: + if word in files[i0]: + try: + file_list.remove(files[i0]) + except: + pass + count_file = Counter(file_list).most_common(num) + repeated_file = [] + for item in count_file: + if item[1]>1: + repeated_file.append(item) + import guan + guan.statistics_of_guan_package() + return repeated_file + +# 统计各个子文件夹中的文件数量 +def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None): + import os + import numpy as np + dirs_list = [] + for root, dirs, files in os.walk(directory): + if dirs != []: + for i0 in range(len(dirs)): + dirs_list.append(root+'/'+dirs[i0]) + count_file_array = [] + for sub_dir in dirs_list: + file_list = [] + for root, dirs, files in os.walk(sub_dir): + for i0 in range(len(files)): + file_list.append(files[i0]) + count_file = len(file_list) + count_file_array.append(count_file) + if sort == 0: + if print_show == 1: + if smaller_than_num == None: + print(sub_dir) + print(count_file) + print() + else: + if count_file1: - repeated_file.append(item) - import guan - guan.statistics_of_guan_package() - return repeated_file - -# 统计各个子文件夹中的文件数量 -def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None): - import os - import numpy as np - dirs_list = [] - for root, dirs, files in os.walk(directory): - if dirs != []: - for i0 in range(len(dirs)): - dirs_list.append(root+'/'+dirs[i0]) - count_file_array = [] - for sub_dir in dirs_list: - file_list = [] - for root, dirs, files in os.walk(sub_dir): - for i0 in range(len(files)): - file_list.append(files[i0]) - count_file = len(file_list) - count_file_array.append(count_file) - if sort == 0: - if print_show == 1: - if smaller_than_num == None: - print(sub_dir) - print(count_file) - print() - else: - if count_file

    ', html, re.S) + if random_on==1: + random.shuffle(contents) + if reverse==1: + contents.reverse() + for content in contents: + soup2 = BeautifulSoup(content, features='lxml') + all_h2 = soup2.find_all('h2') + for h2 in all_h2: + if re.search('\d*. ', h2.get_text()): + word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0] + exist = os.path.exists(directory+word+'.mp3') + if not exist: + try: + if re.search(word, html_file): + r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True) + with open(directory+word+'.mp3', 'wb') as f: + for chunk in r.iter_content(chunk_size=32): + f.write(chunk) + except: + pass + print(h2.get_text()) + try: + pygame.mixer.init() + track = pygame.mixer.music.load(directory+word+'.mp3') + pygame.mixer.music.play() + if show_link==1: + print('https://www.ldoceonline.com/dictionary/'+word) + except: + pass + translation = re.findall('

    .*?

    ', content, re.S)[0][3:-4] + if show_translation==1: + time.sleep(translation_time) + print(translation) + time.sleep(rest_time) + pygame.mixer.music.stop() + print() + import guan + guan.statistics_of_guan_package() + +# 播放挑选过后的学术单词 +def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3): + from bs4 import BeautifulSoup + import re + import urllib.request + import requests + import os + import pygame + import time + import ssl + import random + ssl._create_default_https_context = ssl._create_unverified_context + html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8') + if bre_or_ame == 'ame': + directory = 'words_mp3_ameProns/' + elif bre_or_ame == 'bre': + directory = 'words_mp3_breProns/' + exist_directory = os.path.exists(directory) + html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8') + if exist_directory == 0: + os.makedirs(directory) + soup = BeautifulSoup(html, features='lxml') + contents = re.findall('
  • \d.*?
  • ', html, re.S) + if random_on==1: + random.shuffle(contents) + if reverse==1: + contents.reverse() + for content in contents: + soup2 = BeautifulSoup(content, features='lxml') + all_li = soup2.find_all('li') + for li in all_li: + if re.search('\d*. ', li.get_text()): + word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1] + exist = os.path.exists(directory+word+'.mp3') + if not exist: + try: + if re.search(word, html_file): + r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True) + with open(directory+word+'.mp3', 'wb') as f: + for chunk in r.iter_content(chunk_size=32): + f.write(chunk) + except: + pass + print(li.get_text()) + try: + pygame.mixer.init() + track = pygame.mixer.music.load(directory+word+'.mp3') + pygame.mixer.music.play() + if show_link==1: + print('https://www.ldoceonline.com/dictionary/'+word) + except: + pass + time.sleep(rest_time) + pygame.mixer.music.stop() + print() + import guan + guan.statistics_of_guan_package() + +# 播放元素周期表上的单词 +def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1): + from bs4 import BeautifulSoup + import re + import urllib.request + import requests + import os + import pygame + import time + import ssl + import random + ssl._create_default_https_context = ssl._create_unverified_context + html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8') + directory = 'prons/' + exist_directory = os.path.exists(directory) + html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8') + if exist_directory == 0: + os.makedirs(directory) + soup = BeautifulSoup(html, features='lxml') + contents = re.findall('

    ', html, re.S) + if random_on==1: + random.shuffle(contents) + for content in contents: + soup2 = BeautifulSoup(content, features='lxml') + all_h2 = soup2.find_all('h2') + for h2 in all_h2: + if re.search('\d*. ', h2.get_text()): + word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2] + exist = os.path.exists(directory+word+'.mp3') + if not exist: + try: + if re.search(word, html_file): + r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True) + with open(directory+word+'.mp3', 'wb') as f: + for chunk in r.iter_content(chunk_size=32): + f.write(chunk) + except: + pass + print(h2.get_text()) + try: + pygame.mixer.init() + track = pygame.mixer.music.load(directory+word+'.mp3') + pygame.mixer.music.play() + if show_link==1: + print('https://www.merriam-webster.com/dictionary/'+word) + except: + pass + translation = re.findall('

    .*?

    ', content, re.S)[0][3:-4] + if show_translation==1: + time.sleep(translation_time) + print(translation) + time.sleep(rest_time) + pygame.mixer.music.stop() + print() + import guan + guan.statistics_of_guan_package() \ No newline at end of file