From 00495bcf84c51e44681170988febfe2934dcbd19 Mon Sep 17 00:00:00 2001
From: guanjihuan
Date: Mon, 6 Nov 2023 05:24:58 +0800
Subject: [PATCH] 0.1.19
---
PyPI/setup.cfg | 2 +-
PyPI/src/guan.egg-info/PKG-INFO | 2 +-
PyPI/src/guan/data_processing.py | 205 ---------
PyPI/src/guan/file_processing.py | 717 ++++++++++++++++++++-----------
4 files changed, 463 insertions(+), 463 deletions(-)
diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg
index 2c5ee10..f3a9e1f 100644
--- a/PyPI/setup.cfg
+++ b/PyPI/setup.cfg
@@ -1,7 +1,7 @@
[metadata]
# replace with your username:
name = guan
-version = 0.1.18
+version = 0.1.19
author = guanjihuan
author_email = guanjihuan@163.com
description = An open source python package
diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO
index 65ad05e..a8f13c5 100644
--- a/PyPI/src/guan.egg-info/PKG-INFO
+++ b/PyPI/src/guan.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: guan
-Version: 0.1.18
+Version: 0.1.19
Summary: An open source python package
Home-page: https://py.guanjihuan.com
Author: guanjihuan
diff --git a/PyPI/src/guan/data_processing.py b/PyPI/src/guan/data_processing.py
index 00c2595..eb89c63 100644
--- a/PyPI/src/guan/data_processing.py
+++ b/PyPI/src/guan/data_processing.py
@@ -140,26 +140,6 @@ def split_text(text, wrap_width=3000):
guan.statistics_of_guan_package()
return split_text_list
-# 从网页的标签中获取内容
-def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']):
- from bs4 import BeautifulSoup
- import urllib.request
- import ssl
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen(link).read().decode('utf-8')
- soup = BeautifulSoup(html, features="lxml")
- all_tags = soup.find_all(tags)
- content = ''
- for tag in all_tags:
- text = tag.get_text().replace('\n', '')
- if content == '':
- content = text
- else:
- content = content + '\n\n' + text
- import guan
- guan.statistics_of_guan_package()
- return content
-
# 将RGB转成HEX
def rgb_to_hex(rgb, pound=1):
import guan
@@ -195,14 +175,6 @@ def encryption_SHA_256(password, salt=''):
guan.statistics_of_guan_package()
return hashed_password
-# 生成二维码
-def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'):
- import qrcode
- img = qrcode.make(data)
- img.save(filename+file_format)
- import guan
- guan.statistics_of_guan_package()
-
# 获取CPU使用率
def get_cpu_usage(interval=1):
import psutil
@@ -352,183 +324,6 @@ def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000
guan.statistics_of_guan_package()
return title, stock_data
-# 播放学术单词
-def play_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_translation=1, show_link=1, translation_time=2, rest_time=1):
- from bs4 import BeautifulSoup
- import re
- import urllib.request
- import requests
- import os
- import pygame
- import time
- import ssl
- import random
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
- if bre_or_ame == 'ame':
- directory = 'words_mp3_ameProns/'
- elif bre_or_ame == 'bre':
- directory = 'words_mp3_breProns/'
- exist_directory = os.path.exists(directory)
- html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
- if exist_directory == 0:
- os.makedirs(directory)
- soup = BeautifulSoup(html, features='lxml')
- contents = re.findall('
', html, re.S)
- if random_on==1:
- random.shuffle(contents)
- if reverse==1:
- contents.reverse()
- for content in contents:
- soup2 = BeautifulSoup(content, features='lxml')
- all_h2 = soup2.find_all('h2')
- for h2 in all_h2:
- if re.search('\d*. ', h2.get_text()):
- word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
- exist = os.path.exists(directory+word+'.mp3')
- if not exist:
- try:
- if re.search(word, html_file):
- r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
- with open(directory+word+'.mp3', 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- except:
- pass
- print(h2.get_text())
- try:
- pygame.mixer.init()
- track = pygame.mixer.music.load(directory+word+'.mp3')
- pygame.mixer.music.play()
- if show_link==1:
- print('https://www.ldoceonline.com/dictionary/'+word)
- except:
- pass
- translation = re.findall('.*?
', content, re.S)[0][3:-4]
- if show_translation==1:
- time.sleep(translation_time)
- print(translation)
- time.sleep(rest_time)
- pygame.mixer.music.stop()
- print()
- import guan
- guan.statistics_of_guan_package()
-
-# 播放挑选过后的学术单词
-def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
- from bs4 import BeautifulSoup
- import re
- import urllib.request
- import requests
- import os
- import pygame
- import time
- import ssl
- import random
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
- if bre_or_ame == 'ame':
- directory = 'words_mp3_ameProns/'
- elif bre_or_ame == 'bre':
- directory = 'words_mp3_breProns/'
- exist_directory = os.path.exists(directory)
- html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
- if exist_directory == 0:
- os.makedirs(directory)
- soup = BeautifulSoup(html, features='lxml')
- contents = re.findall('\d.*?', html, re.S)
- if random_on==1:
- random.shuffle(contents)
- if reverse==1:
- contents.reverse()
- for content in contents:
- soup2 = BeautifulSoup(content, features='lxml')
- all_li = soup2.find_all('li')
- for li in all_li:
- if re.search('\d*. ', li.get_text()):
- word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
- exist = os.path.exists(directory+word+'.mp3')
- if not exist:
- try:
- if re.search(word, html_file):
- r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
- with open(directory+word+'.mp3', 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- except:
- pass
- print(li.get_text())
- try:
- pygame.mixer.init()
- track = pygame.mixer.music.load(directory+word+'.mp3')
- pygame.mixer.music.play()
- if show_link==1:
- print('https://www.ldoceonline.com/dictionary/'+word)
- except:
- pass
- time.sleep(rest_time)
- pygame.mixer.music.stop()
- print()
- import guan
- guan.statistics_of_guan_package()
-
-# 播放元素周期表上的单词
-def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
- from bs4 import BeautifulSoup
- import re
- import urllib.request
- import requests
- import os
- import pygame
- import time
- import ssl
- import random
- ssl._create_default_https_context = ssl._create_unverified_context
- html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
- directory = 'prons/'
- exist_directory = os.path.exists(directory)
- html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
- if exist_directory == 0:
- os.makedirs(directory)
- soup = BeautifulSoup(html, features='lxml')
- contents = re.findall('', html, re.S)
- if random_on==1:
- random.shuffle(contents)
- for content in contents:
- soup2 = BeautifulSoup(content, features='lxml')
- all_h2 = soup2.find_all('h2')
- for h2 in all_h2:
- if re.search('\d*. ', h2.get_text()):
- word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
- exist = os.path.exists(directory+word+'.mp3')
- if not exist:
- try:
- if re.search(word, html_file):
- r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
- with open(directory+word+'.mp3', 'wb') as f:
- for chunk in r.iter_content(chunk_size=32):
- f.write(chunk)
- except:
- pass
- print(h2.get_text())
- try:
- pygame.mixer.init()
- track = pygame.mixer.music.load(directory+word+'.mp3')
- pygame.mixer.music.play()
- if show_link==1:
- print('https://www.merriam-webster.com/dictionary/'+word)
- except:
- pass
- translation = re.findall('.*?
', content, re.S)[0][3:-4]
- if show_translation==1:
- time.sleep(translation_time)
- print(translation)
- time.sleep(rest_time)
- pygame.mixer.music.stop()
- print()
- import guan
- guan.statistics_of_guan_package()
-
# 获取Guan软件包当前模块的所有函数名
def get_all_function_names_in_current_module():
import inspect
diff --git a/PyPI/src/guan/file_processing.py b/PyPI/src/guan/file_processing.py
index 2860754..f945e23 100644
--- a/PyPI/src/guan/file_processing.py
+++ b/PyPI/src/guan/file_processing.py
@@ -53,6 +53,290 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil
import guan
guan.statistics_of_guan_package()
+# 查找文件名相同的文件
+def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000):
+ import os
+ from collections import Counter
+ file_list = []
+ for root, dirs, files in os.walk(directory):
+ for i0 in range(len(files)):
+ file_list.append(files[i0])
+ for word in ignored_directory_with_words:
+ if word in root:
+ file_list.remove(files[i0])
+ for word in ignored_file_with_words:
+ if word in files[i0]:
+ try:
+ file_list.remove(files[i0])
+ except:
+ pass
+ count_file = Counter(file_list).most_common(num)
+ repeated_file = []
+ for item in count_file:
+ if item[1]>1:
+ repeated_file.append(item)
+ import guan
+ guan.statistics_of_guan_package()
+ return repeated_file
+
+# 统计各个子文件夹中的文件数量
+def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None):
+ import os
+ import numpy as np
+ dirs_list = []
+ for root, dirs, files in os.walk(directory):
+ if dirs != []:
+ for i0 in range(len(dirs)):
+ dirs_list.append(root+'/'+dirs[i0])
+ count_file_array = []
+ for sub_dir in dirs_list:
+ file_list = []
+ for root, dirs, files in os.walk(sub_dir):
+ for i0 in range(len(files)):
+ file_list.append(files[i0])
+ count_file = len(file_list)
+ count_file_array.append(count_file)
+ if sort == 0:
+ if print_show == 1:
+ if smaller_than_num == None:
+ print(sub_dir)
+ print(count_file)
+ print()
+ else:
+ if count_file1:
- repeated_file.append(item)
- import guan
- guan.statistics_of_guan_package()
- return repeated_file
-
-# 统计各个子文件夹中的文件数量
-def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None):
- import os
- import numpy as np
- dirs_list = []
- for root, dirs, files in os.walk(directory):
- if dirs != []:
- for i0 in range(len(dirs)):
- dirs_list.append(root+'/'+dirs[i0])
- count_file_array = []
- for sub_dir in dirs_list:
- file_list = []
- for root, dirs, files in os.walk(sub_dir):
- for i0 in range(len(files)):
- file_list.append(files[i0])
- count_file = len(file_list)
- count_file_array.append(count_file)
- if sort == 0:
- if print_show == 1:
- if smaller_than_num == None:
- print(sub_dir)
- print(count_file)
- print()
- else:
- if count_file', html, re.S)
+ if random_on==1:
+ random.shuffle(contents)
+ if reverse==1:
+ contents.reverse()
+ for content in contents:
+ soup2 = BeautifulSoup(content, features='lxml')
+ all_h2 = soup2.find_all('h2')
+ for h2 in all_h2:
+ if re.search('\d*. ', h2.get_text()):
+ word = re.findall('[a-zA-Z].*', h2.get_text(), re.S)[0]
+ exist = os.path.exists(directory+word+'.mp3')
+ if not exist:
+ try:
+ if re.search(word, html_file):
+ r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
+ with open(directory+word+'.mp3', 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ except:
+ pass
+ print(h2.get_text())
+ try:
+ pygame.mixer.init()
+ track = pygame.mixer.music.load(directory+word+'.mp3')
+ pygame.mixer.music.play()
+ if show_link==1:
+ print('https://www.ldoceonline.com/dictionary/'+word)
+ except:
+ pass
+ translation = re.findall('.*?
', content, re.S)[0][3:-4]
+ if show_translation==1:
+ time.sleep(translation_time)
+ print(translation)
+ time.sleep(rest_time)
+ pygame.mixer.music.stop()
+ print()
+ import guan
+ guan.statistics_of_guan_package()
+
+# 播放挑选过后的学术单词
+def play_selected_academic_words(reverse=0, random_on=0, bre_or_ame='ame', show_link=1, rest_time=3):
+ from bs4 import BeautifulSoup
+ import re
+ import urllib.request
+ import requests
+ import os
+ import pygame
+ import time
+ import ssl
+ import random
+ ssl._create_default_https_context = ssl._create_unverified_context
+ html = urllib.request.urlopen("https://www.guanjihuan.com/archives/24732").read().decode('utf-8')
+ if bre_or_ame == 'ame':
+ directory = 'words_mp3_ameProns/'
+ elif bre_or_ame == 'bre':
+ directory = 'words_mp3_breProns/'
+ exist_directory = os.path.exists(directory)
+ html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/"+directory).read().decode('utf-8')
+ if exist_directory == 0:
+ os.makedirs(directory)
+ soup = BeautifulSoup(html, features='lxml')
+ contents = re.findall('\d.*?', html, re.S)
+ if random_on==1:
+ random.shuffle(contents)
+ if reverse==1:
+ contents.reverse()
+ for content in contents:
+ soup2 = BeautifulSoup(content, features='lxml')
+ all_li = soup2.find_all('li')
+ for li in all_li:
+ if re.search('\d*. ', li.get_text()):
+ word = re.findall('\s[a-zA-Z].*?\s', li.get_text(), re.S)[0][1:-1]
+ exist = os.path.exists(directory+word+'.mp3')
+ if not exist:
+ try:
+ if re.search(word, html_file):
+ r = requests.get("https://file.guanjihuan.com/words/"+directory+word+".mp3", stream=True)
+ with open(directory+word+'.mp3', 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ except:
+ pass
+ print(li.get_text())
+ try:
+ pygame.mixer.init()
+ track = pygame.mixer.music.load(directory+word+'.mp3')
+ pygame.mixer.music.play()
+ if show_link==1:
+ print('https://www.ldoceonline.com/dictionary/'+word)
+ except:
+ pass
+ time.sleep(rest_time)
+ pygame.mixer.music.stop()
+ print()
+ import guan
+ guan.statistics_of_guan_package()
+
+# 播放元素周期表上的单词
+def play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1):
+ from bs4 import BeautifulSoup
+ import re
+ import urllib.request
+ import requests
+ import os
+ import pygame
+ import time
+ import ssl
+ import random
+ ssl._create_default_https_context = ssl._create_unverified_context
+ html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
+ directory = 'prons/'
+ exist_directory = os.path.exists(directory)
+ html_file = urllib.request.urlopen("https://file.guanjihuan.com/words/periodic_table_of_elements/"+directory).read().decode('utf-8')
+ if exist_directory == 0:
+ os.makedirs(directory)
+ soup = BeautifulSoup(html, features='lxml')
+ contents = re.findall('', html, re.S)
+ if random_on==1:
+ random.shuffle(contents)
+ for content in contents:
+ soup2 = BeautifulSoup(content, features='lxml')
+ all_h2 = soup2.find_all('h2')
+ for h2 in all_h2:
+ if re.search('\d*. ', h2.get_text()):
+ word = re.findall('[a-zA-Z].* \(', h2.get_text(), re.S)[0][:-2]
+ exist = os.path.exists(directory+word+'.mp3')
+ if not exist:
+ try:
+ if re.search(word, html_file):
+ r = requests.get("https://file.guanjihuan.com/words/periodic_table_of_elements/prons/"+word+".mp3", stream=True)
+ with open(directory+word+'.mp3', 'wb') as f:
+ for chunk in r.iter_content(chunk_size=32):
+ f.write(chunk)
+ except:
+ pass
+ print(h2.get_text())
+ try:
+ pygame.mixer.init()
+ track = pygame.mixer.music.load(directory+word+'.mp3')
+ pygame.mixer.music.play()
+ if show_link==1:
+ print('https://www.merriam-webster.com/dictionary/'+word)
+ except:
+ pass
+ translation = re.findall('.*?
', content, re.S)[0][3:-4]
+ if show_translation==1:
+ time.sleep(translation_time)
+ print(translation)
+ time.sleep(rest_time)
+ pygame.mixer.music.stop()
+ print()
+ import guan
+ guan.statistics_of_guan_package()
\ No newline at end of file