0.1.127
This commit is contained in:
parent
5b9a2d7267
commit
4982967bd6
@ -1,7 +1,7 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
# replace with your username:
|
# replace with your username:
|
||||||
name = guan
|
name = guan
|
||||||
version = 0.1.126
|
version = 0.1.127
|
||||||
author = guanjihuan
|
author = guanjihuan
|
||||||
author_email = guanjihuan@163.com
|
author_email = guanjihuan@163.com
|
||||||
description = An open source python package
|
description = An open source python package
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: guan
|
Name: guan
|
||||||
Version: 0.1.126
|
Version: 0.1.127
|
||||||
Summary: An open source python package
|
Summary: An open source python package
|
||||||
Home-page: https://py.guanjihuan.com
|
Home-page: https://py.guanjihuan.com
|
||||||
Author: guanjihuan
|
Author: guanjihuan
|
||||||
|
@ -15,7 +15,6 @@ src/guan/deprecated.py
|
|||||||
src/guan/figure_plotting.py
|
src/guan/figure_plotting.py
|
||||||
src/guan/file_reading_and_writing.py
|
src/guan/file_reading_and_writing.py
|
||||||
src/guan/machine_learning.py
|
src/guan/machine_learning.py
|
||||||
src/guan/others.py
|
|
||||||
src/guan/quantum_transport.py
|
src/guan/quantum_transport.py
|
||||||
src/guan/topological_invariant.py
|
src/guan/topological_invariant.py
|
||||||
src/guan.egg-info/PKG-INFO
|
src/guan.egg-info/PKG-INFO
|
||||||
|
@ -12,7 +12,6 @@ from .machine_learning import *
|
|||||||
from .file_reading_and_writing import *
|
from .file_reading_and_writing import *
|
||||||
from .figure_plotting import *
|
from .figure_plotting import *
|
||||||
from .data_processing import *
|
from .data_processing import *
|
||||||
from .others import *
|
|
||||||
from .decorators import *
|
from .decorators import *
|
||||||
from .deprecated import *
|
from .deprecated import *
|
||||||
statistics_of_guan_package()
|
statistics_of_guan_package()
|
@ -40,6 +40,40 @@ def chat(prompt='你好', model=1, stream=0, top_p=0.8, temperature=0.85):
|
|||||||
print('\n--- End Stream Message ---\n')
|
print('\n--- End Stream Message ---\n')
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
# 将XYZ数据转成矩阵数据(说明:x_array/y_array的输入和输出不一样。要求z_array数据中y对应的数据为小循环,x对应的数据为大循环)
|
||||||
|
def convert_xyz_data_into_matrix_data(x_array, y_array, z_array):
|
||||||
|
import numpy as np
|
||||||
|
x_array_input = np.array(x_array)
|
||||||
|
y_array_input = np.array(y_array)
|
||||||
|
x_array = np.array(list(set(x_array_input)))
|
||||||
|
y_array = np.array(list(set(y_array_input)))
|
||||||
|
z_array = np.array(z_array)
|
||||||
|
len_x = len(x_array)
|
||||||
|
len_y = len(y_array)
|
||||||
|
matrix = np.zeros((len_x, len_y))
|
||||||
|
for ix in range(len_x):
|
||||||
|
for iy in range(len_y):
|
||||||
|
matrix[ix, iy] = z_array[ix*len_y+iy]
|
||||||
|
return x_array, y_array, matrix
|
||||||
|
|
||||||
|
# 将矩阵数据转成XYZ数据(说明:x_array/y_array的输入和输出不一样。生成的z_array数据中y对应的数据为小循环,x对应的数据为大循环)
|
||||||
|
def convert_matrix_data_into_xyz_data(x_array, y_array, matrix):
|
||||||
|
import numpy as np
|
||||||
|
x_array_input = np.array(x_array)
|
||||||
|
y_array_input = np.array(y_array)
|
||||||
|
matrix = np.array(matrix)
|
||||||
|
len_x = len(x_array_input)
|
||||||
|
len_y = len(y_array_input)
|
||||||
|
x_array = np.zeros((len_x*len_y))
|
||||||
|
y_array = np.zeros((len_x*len_y))
|
||||||
|
z_array = np.zeros((len_x*len_y))
|
||||||
|
for ix in range(len_x):
|
||||||
|
for iy in range(len_y):
|
||||||
|
x_array[ix*len_y+iy] = x_array_input[ix]
|
||||||
|
y_array[ix*len_y+iy] = y_array_input[iy]
|
||||||
|
z_array[ix*len_y+iy] = matrix[ix, iy]
|
||||||
|
return x_array, y_array, z_array
|
||||||
|
|
||||||
# 从列表中删除某个匹配的元素
|
# 从列表中删除某个匹配的元素
|
||||||
def remove_item_in_one_array(array, item):
|
def remove_item_in_one_array(array, item):
|
||||||
new_array = [x for x in array if x != item]
|
new_array = [x for x in array if x != item]
|
||||||
@ -178,62 +212,6 @@ def run_programs_sequentially(program_files=['./a.py', './b.py'], execute='pytho
|
|||||||
end = time.time()
|
end = time.time()
|
||||||
print('Total running time = '+str((end-start)/60)+' min')
|
print('Total running time = '+str((end-start)/60)+' min')
|
||||||
|
|
||||||
# 将XYZ数据转成矩阵数据(说明:x_array/y_array的输入和输出不一样。要求z_array数据中y对应的数据为小循环,x对应的数据为大循环)
|
|
||||||
def convert_xyz_data_into_matrix_data(x_array, y_array, z_array):
|
|
||||||
import numpy as np
|
|
||||||
x_array_input = np.array(x_array)
|
|
||||||
y_array_input = np.array(y_array)
|
|
||||||
x_array = np.array(list(set(x_array_input)))
|
|
||||||
y_array = np.array(list(set(y_array_input)))
|
|
||||||
z_array = np.array(z_array)
|
|
||||||
len_x = len(x_array)
|
|
||||||
len_y = len(y_array)
|
|
||||||
matrix = np.zeros((len_x, len_y))
|
|
||||||
for ix in range(len_x):
|
|
||||||
for iy in range(len_y):
|
|
||||||
matrix[ix, iy] = z_array[ix*len_y+iy]
|
|
||||||
return x_array, y_array, matrix
|
|
||||||
|
|
||||||
# 将矩阵数据转成XYZ数据(说明:x_array/y_array的输入和输出不一样。生成的z_array数据中y对应的数据为小循环,x对应的数据为大循环)
|
|
||||||
def convert_matrix_data_into_xyz_data(x_array, y_array, matrix):
|
|
||||||
import numpy as np
|
|
||||||
x_array_input = np.array(x_array)
|
|
||||||
y_array_input = np.array(y_array)
|
|
||||||
matrix = np.array(matrix)
|
|
||||||
len_x = len(x_array_input)
|
|
||||||
len_y = len(y_array_input)
|
|
||||||
x_array = np.zeros((len_x*len_y))
|
|
||||||
y_array = np.zeros((len_x*len_y))
|
|
||||||
z_array = np.zeros((len_x*len_y))
|
|
||||||
for ix in range(len_x):
|
|
||||||
for iy in range(len_y):
|
|
||||||
x_array[ix*len_y+iy] = x_array_input[ix]
|
|
||||||
y_array[ix*len_y+iy] = y_array_input[iy]
|
|
||||||
z_array[ix*len_y+iy] = matrix[ix, iy]
|
|
||||||
return x_array, y_array, z_array
|
|
||||||
|
|
||||||
# 通过定义计算R^2(基于实际值和预测值,数值有可能小于0)
|
|
||||||
def calculate_R2_with_definition(y_true_array, y_pred_array):
|
|
||||||
import numpy as np
|
|
||||||
y_mean = np.mean(y_true_array)
|
|
||||||
SS_tot = np.sum((y_true_array - y_mean) ** 2)
|
|
||||||
SS_res = np.sum((y_true_array - y_pred_array) ** 2)
|
|
||||||
R2 = 1 - (SS_res / SS_tot)
|
|
||||||
return R2
|
|
||||||
|
|
||||||
# 通过sklearn计算R^2,和上面定义的计算结果一致
|
|
||||||
def calculate_R2_with_sklearn(y_true_array, y_pred_array):
|
|
||||||
from sklearn.metrics import r2_score
|
|
||||||
R2 = r2_score(y_true_array, y_pred_array)
|
|
||||||
return R2
|
|
||||||
|
|
||||||
# 通过scipy计算线性回归后的R^2(基于线性回归模型,范围在0和1之间)
|
|
||||||
def calculate_R2_after_linear_regression_with_scipy(y_true_array, y_pred_array):
|
|
||||||
from scipy import stats
|
|
||||||
slope, intercept, r_value, p_value, std_err = stats.linregress(y_true_array, y_pred_array)
|
|
||||||
R2 = r_value**2
|
|
||||||
return R2
|
|
||||||
|
|
||||||
# 判断一个数是否接近于整数
|
# 判断一个数是否接近于整数
|
||||||
def close_to_integer(value, abs_tol=1e-3):
|
def close_to_integer(value, abs_tol=1e-3):
|
||||||
import math
|
import math
|
||||||
@ -356,6 +334,98 @@ def hex_to_rgb(hex):
|
|||||||
length = len(hex)
|
length = len(hex)
|
||||||
return tuple(int(hex[i:i+length//3], 16) for i in range(0, length, length//3))
|
return tuple(int(hex[i:i+length//3], 16) for i in range(0, length, length//3))
|
||||||
|
|
||||||
|
# 拼接两个PDF文件
|
||||||
|
def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
|
||||||
|
import PyPDF2
|
||||||
|
output_pdf = PyPDF2.PdfWriter()
|
||||||
|
with open(input_file_1, 'rb') as file1:
|
||||||
|
pdf1 = PyPDF2.PdfReader(file1)
|
||||||
|
for page in range(len(pdf1.pages)):
|
||||||
|
output_pdf.add_page(pdf1.pages[page])
|
||||||
|
with open(input_file_2, 'rb') as file2:
|
||||||
|
pdf2 = PyPDF2.PdfReader(file2)
|
||||||
|
for page in range(len(pdf2.pages)):
|
||||||
|
output_pdf.add_page(pdf2.pages[page])
|
||||||
|
with open(output_file, 'wb') as combined_file:
|
||||||
|
output_pdf.write(combined_file)
|
||||||
|
|
||||||
|
# 将PDF文件转成文本
|
||||||
|
def pdf_to_text(pdf_path):
|
||||||
|
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||||
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||||
|
from pdfminer.converter import PDFPageAggregator
|
||||||
|
from pdfminer.layout import LAParams, LTTextBox
|
||||||
|
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||||
|
import logging
|
||||||
|
logging.Logger.propagate = False
|
||||||
|
logging.getLogger().setLevel(logging.ERROR)
|
||||||
|
praser = PDFParser(open(pdf_path, 'rb'))
|
||||||
|
doc = PDFDocument()
|
||||||
|
praser.set_document(doc)
|
||||||
|
doc.set_parser(praser)
|
||||||
|
doc.initialize()
|
||||||
|
if not doc.is_extractable:
|
||||||
|
raise PDFTextExtractionNotAllowed
|
||||||
|
else:
|
||||||
|
rsrcmgr = PDFResourceManager()
|
||||||
|
laparams = LAParams()
|
||||||
|
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||||
|
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||||
|
content = ''
|
||||||
|
for page in doc.get_pages():
|
||||||
|
interpreter.process_page(page)
|
||||||
|
layout = device.get_result()
|
||||||
|
for x in layout:
|
||||||
|
if isinstance(x, LTTextBox):
|
||||||
|
content = content + x.get_text().strip()
|
||||||
|
return content
|
||||||
|
|
||||||
|
# 获取PDF文件页数
|
||||||
|
def get_pdf_page_number(pdf_path):
|
||||||
|
import PyPDF2
|
||||||
|
pdf_file = open(pdf_path, 'rb')
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
num_pages = len(pdf_reader.pages)
|
||||||
|
return num_pages
|
||||||
|
|
||||||
|
# 获取PDF文件指定页面的内容
|
||||||
|
def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1):
|
||||||
|
import PyPDF2
|
||||||
|
pdf_file = open(pdf_path, 'rb')
|
||||||
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
|
num_pages = len(pdf_reader.pages)
|
||||||
|
for page_num0 in range(num_pages):
|
||||||
|
if page_num0 == page_num-1:
|
||||||
|
page = pdf_reader.pages[page_num0]
|
||||||
|
page_text = page.extract_text()
|
||||||
|
pdf_file.close()
|
||||||
|
return page_text
|
||||||
|
|
||||||
|
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
||||||
|
def get_links_from_pdf(pdf_path, link_starting_form=''):
|
||||||
|
import PyPDF2
|
||||||
|
import re
|
||||||
|
reader = PyPDF2.PdfReader(pdf_path)
|
||||||
|
pages = len(reader.pages)
|
||||||
|
i0 = 0
|
||||||
|
links = []
|
||||||
|
for page in range(pages):
|
||||||
|
pageSliced = reader.pages[page]
|
||||||
|
pageObject = pageSliced.get_object()
|
||||||
|
if '/Annots' in pageObject.keys():
|
||||||
|
ann = pageObject['/Annots']
|
||||||
|
old = ''
|
||||||
|
for a in ann:
|
||||||
|
u = a.get_object()
|
||||||
|
if '/A' in u.keys():
|
||||||
|
if '/URI' in u['/A']:
|
||||||
|
if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
|
||||||
|
if u['/A']['/URI'] != old:
|
||||||
|
links.append(u['/A']['/URI'])
|
||||||
|
i0 += 1
|
||||||
|
old = u['/A']['/URI']
|
||||||
|
return links
|
||||||
|
|
||||||
# 使用MD5进行散列加密
|
# 使用MD5进行散列加密
|
||||||
def encryption_MD5(password, salt=''):
|
def encryption_MD5(password, salt=''):
|
||||||
import hashlib
|
import hashlib
|
||||||
@ -713,130 +783,635 @@ def count_number_of_import_statements(filename, file_format='.py', num=1000):
|
|||||||
import_statement_counter = Counter(import_array).most_common(num)
|
import_statement_counter = Counter(import_array).most_common(num)
|
||||||
return import_statement_counter
|
return import_statement_counter
|
||||||
|
|
||||||
# 查找文件名相同的文件
|
# 获取软件包的本机版本
|
||||||
def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000):
|
def get_current_version(package_name='guan'):
|
||||||
import os
|
import importlib.metadata
|
||||||
from collections import Counter
|
try:
|
||||||
file_list = []
|
current_version = importlib.metadata.version(package_name)
|
||||||
for root, dirs, files in os.walk(directory):
|
return current_version
|
||||||
for i0 in range(len(files)):
|
except:
|
||||||
file_list.append(files[i0])
|
return None
|
||||||
for word in ignored_directory_with_words:
|
|
||||||
if word in root:
|
|
||||||
file_list.remove(files[i0])
|
|
||||||
for word in ignored_file_with_words:
|
|
||||||
if word in files[i0]:
|
|
||||||
try:
|
|
||||||
file_list.remove(files[i0])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
count_file = Counter(file_list).most_common(num)
|
|
||||||
repeated_file = []
|
|
||||||
for item in count_file:
|
|
||||||
if item[1]>1:
|
|
||||||
repeated_file.append(item)
|
|
||||||
return repeated_file
|
|
||||||
|
|
||||||
# 统计各个子文件夹中的文件数量
|
# 获取Python软件包的最新版本
|
||||||
def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None):
|
def get_latest_version(package_name='guan', timeout=5):
|
||||||
|
import requests
|
||||||
|
url = f"https://pypi.org/pypi/{package_name}/json"
|
||||||
|
try:
|
||||||
|
response = requests.get(url, timeout=timeout)
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
latest_version = data["info"]["version"]
|
||||||
|
return latest_version
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 获取包含某个字符的进程PID值
|
||||||
|
def get_PID_array(name):
|
||||||
|
import subprocess
|
||||||
|
command = "ps -ef | grep "+name
|
||||||
|
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
if result.returncode == 0:
|
||||||
|
ps_ef = result.stdout
|
||||||
|
import re
|
||||||
|
ps_ef_1 = re.split(r'\n', ps_ef)
|
||||||
|
id_running_array = []
|
||||||
|
for ps_ef_item in ps_ef_1:
|
||||||
|
if ps_ef_item != '':
|
||||||
|
ps_ef_2 = re.split(r'\s+', ps_ef_item)
|
||||||
|
id_running_array.append(ps_ef_2[1])
|
||||||
|
return id_running_array
|
||||||
|
|
||||||
|
# 每日git commit次数的统计
|
||||||
|
def statistics_of_git_commits(print_show=0, str_or_datetime='str'):
|
||||||
|
import subprocess
|
||||||
|
import collections
|
||||||
|
since_date = '100 year ago'
|
||||||
|
result = subprocess.run(
|
||||||
|
['git', 'log', f'--since={since_date}', '--pretty=format:%ad', '--date=short'],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
text=True)
|
||||||
|
commits = result.stdout.strip().split('\n')
|
||||||
|
counter = collections.Counter(commits)
|
||||||
|
daily_commit_counts = dict(sorted(counter.items()))
|
||||||
|
date_array = []
|
||||||
|
commit_count_array = []
|
||||||
|
for date, count in daily_commit_counts.items():
|
||||||
|
if print_show == 1:
|
||||||
|
print(f"{date}: {count} commits")
|
||||||
|
if str_or_datetime=='datetime':
|
||||||
|
import datetime
|
||||||
|
date_array.append(datetime.datetime.strptime(date, "%Y-%m-%d"))
|
||||||
|
elif str_or_datetime=='str':
|
||||||
|
date_array.append(date)
|
||||||
|
commit_count_array.append(count)
|
||||||
|
return date_array, commit_count_array
|
||||||
|
|
||||||
|
# 将文件目录结构写入Markdown文件
|
||||||
|
def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
f = open(filename+'.md', 'w', encoding="utf-8")
|
||||||
dirs_list = []
|
filenames1 = os.listdir(directory)
|
||||||
for root, dirs, files in os.walk(directory):
|
u0 = 0
|
||||||
if dirs != []:
|
for filename1 in filenames1[::reverse_positive_or_negative]:
|
||||||
for i0 in range(len(dirs)):
|
filename1_with_path = os.path.join(directory,filename1)
|
||||||
dirs_list.append(root+'/'+dirs[i0])
|
if os.path.isfile(filename1_with_path):
|
||||||
count_file_array = []
|
if os.path.splitext(filename1)[1] not in banned_file_format:
|
||||||
for sub_dir in dirs_list:
|
if hide_file_format == None:
|
||||||
file_list = []
|
f.write('+ '+str(filename1)+'\n\n')
|
||||||
for root, dirs, files in os.walk(sub_dir):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
file_list.append(files[i0])
|
|
||||||
count_file = len(file_list)
|
|
||||||
count_file_array.append(count_file)
|
|
||||||
if sort == 0:
|
|
||||||
if print_show == 1:
|
|
||||||
if smaller_than_num == None:
|
|
||||||
print(sub_dir)
|
|
||||||
print(count_file)
|
|
||||||
print()
|
|
||||||
else:
|
else:
|
||||||
if count_file<smaller_than_num:
|
f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n')
|
||||||
print(sub_dir)
|
|
||||||
print(count_file)
|
|
||||||
print()
|
|
||||||
if sort == 0:
|
|
||||||
sub_directory = dirs_list
|
|
||||||
num_in_sub_directory = count_file_array
|
|
||||||
if sort == 1:
|
|
||||||
sub_directory = []
|
|
||||||
num_in_sub_directory = []
|
|
||||||
if reverse == 1:
|
|
||||||
index_array = np.argsort(count_file_array)[::-1]
|
|
||||||
else:
|
else:
|
||||||
index_array = np.argsort(count_file_array)
|
u0 += 1
|
||||||
for i0 in index_array:
|
if divided_line != None and u0 != 1:
|
||||||
sub_directory.append(dirs_list[i0])
|
f.write('--------\n\n')
|
||||||
num_in_sub_directory.append(count_file_array[i0])
|
if starting_from_h1 == None:
|
||||||
if print_show == 1:
|
f.write('#')
|
||||||
if smaller_than_num == None:
|
f.write('# '+str(filename1)+'\n\n')
|
||||||
print(dirs_list[i0])
|
|
||||||
print(count_file_array[i0])
|
filenames2 = os.listdir(filename1_with_path)
|
||||||
print()
|
i0 = 0
|
||||||
|
for filename2 in filenames2[::reverse_positive_or_negative]:
|
||||||
|
filename2_with_path = os.path.join(directory, filename1, filename2)
|
||||||
|
if os.path.isfile(filename2_with_path):
|
||||||
|
if os.path.splitext(filename2)[1] not in banned_file_format:
|
||||||
|
if hide_file_format == None:
|
||||||
|
f.write('+ '+str(filename2)+'\n\n')
|
||||||
|
else:
|
||||||
|
f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n')
|
||||||
else:
|
else:
|
||||||
if count_file_array[i0]<smaller_than_num:
|
i0 += 1
|
||||||
print(dirs_list[i0])
|
if starting_from_h1 == None:
|
||||||
print(count_file_array[i0])
|
f.write('#')
|
||||||
print()
|
if show_second_number != None:
|
||||||
return sub_directory, num_in_sub_directory
|
f.write('## '+str(i0)+'. '+str(filename2)+'\n\n')
|
||||||
|
else:
|
||||||
|
f.write('## '+str(filename2)+'\n\n')
|
||||||
|
|
||||||
# 在多个子文件夹中产生必要的文件,例如 readme.md
|
j0 = 0
|
||||||
def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]):
|
filenames3 = os.listdir(filename2_with_path)
|
||||||
import os
|
for filename3 in filenames3[::reverse_positive_or_negative]:
|
||||||
directory_with_file = []
|
filename3_with_path = os.path.join(directory, filename1, filename2, filename3)
|
||||||
ignored_directory = []
|
if os.path.isfile(filename3_with_path):
|
||||||
for root, dirs, files in os.walk(directory):
|
if os.path.splitext(filename3)[1] not in banned_file_format:
|
||||||
for i0 in range(len(files)):
|
if hide_file_format == None:
|
||||||
if root not in directory_with_file:
|
f.write('+ '+str(filename3)+'\n\n')
|
||||||
directory_with_file.append(root)
|
else:
|
||||||
if files[i0] == filename+file_format:
|
f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n')
|
||||||
if root not in ignored_directory:
|
else:
|
||||||
ignored_directory.append(root)
|
j0 += 1
|
||||||
if overwrite == None:
|
if starting_from_h1 == None:
|
||||||
for root in ignored_directory:
|
f.write('#')
|
||||||
directory_with_file.remove(root)
|
if show_third_number != None:
|
||||||
ignored_directory_more =[]
|
f.write('### ('+str(j0)+') '+str(filename3)+'\n\n')
|
||||||
for root in directory_with_file:
|
else:
|
||||||
for word in ignored_directory_with_words:
|
f.write('### '+str(filename3)+'\n\n')
|
||||||
if word in root:
|
|
||||||
if root not in ignored_directory_more:
|
|
||||||
ignored_directory_more.append(root)
|
|
||||||
for root in ignored_directory_more:
|
|
||||||
directory_with_file.remove(root)
|
|
||||||
for root in directory_with_file:
|
|
||||||
os.chdir(root)
|
|
||||||
f = open(filename+file_format, 'w', encoding="utf-8")
|
|
||||||
f.write(content)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# 删除特定文件名的文件(谨慎使用)
|
filenames4 = os.listdir(filename3_with_path)
|
||||||
def delete_file_with_specific_name(directory, filename='readme', file_format='.md'):
|
for filename4 in filenames4[::reverse_positive_or_negative]:
|
||||||
import os
|
filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4)
|
||||||
for root, dirs, files in os.walk(directory):
|
if os.path.isfile(filename4_with_path):
|
||||||
for i0 in range(len(files)):
|
if os.path.splitext(filename4)[1] not in banned_file_format:
|
||||||
if files[i0] == filename+file_format:
|
if hide_file_format == None:
|
||||||
os.remove(root+'/'+files[i0])
|
f.write('+ '+str(filename4)+'\n\n')
|
||||||
|
else:
|
||||||
|
f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n')
|
||||||
|
else:
|
||||||
|
if starting_from_h1 == None:
|
||||||
|
f.write('#')
|
||||||
|
f.write('#### '+str(filename4)+'\n\n')
|
||||||
|
|
||||||
# 将所有文件移到根目录(谨慎使用)
|
filenames5 = os.listdir(filename4_with_path)
|
||||||
def move_all_files_to_root_directory(directory):
|
for filename5 in filenames5[::reverse_positive_or_negative]:
|
||||||
import os
|
filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5)
|
||||||
import shutil
|
if os.path.isfile(filename5_with_path):
|
||||||
for root, dirs, files in os.walk(directory):
|
if os.path.splitext(filename5)[1] not in banned_file_format:
|
||||||
for i0 in range(len(files)):
|
if hide_file_format == None:
|
||||||
shutil.move(root+'/'+files[i0], directory+'/'+files[i0])
|
f.write('+ '+str(filename5)+'\n\n')
|
||||||
for i0 in range(100):
|
else:
|
||||||
for root, dirs, files in os.walk(directory):
|
f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n')
|
||||||
try:
|
else:
|
||||||
os.rmdir(root)
|
if starting_from_h1 == None:
|
||||||
except:
|
f.write('#')
|
||||||
|
f.write('##### '+str(filename5)+'\n\n')
|
||||||
|
|
||||||
|
filenames6 = os.listdir(filename5_with_path)
|
||||||
|
for filename6 in filenames6[::reverse_positive_or_negative]:
|
||||||
|
filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6)
|
||||||
|
if os.path.isfile(filename6_with_path):
|
||||||
|
if os.path.splitext(filename6)[1] not in banned_file_format:
|
||||||
|
if hide_file_format == None:
|
||||||
|
f.write('+ '+str(filename6)+'\n\n')
|
||||||
|
else:
|
||||||
|
f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n')
|
||||||
|
else:
|
||||||
|
if starting_from_h1 == None:
|
||||||
|
f.write('#')
|
||||||
|
f.write('###### '+str(filename6)+'\n\n')
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# 从网页的标签中获取内容
|
||||||
|
def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']):
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import urllib.request
|
||||||
|
import ssl
|
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
|
html = urllib.request.urlopen(link).read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(html, features="lxml")
|
||||||
|
all_tags = soup.find_all(tags)
|
||||||
|
content = ''
|
||||||
|
for tag in all_tags:
|
||||||
|
text = tag.get_text().replace('\n', '')
|
||||||
|
if content == '':
|
||||||
|
content = text
|
||||||
|
else:
|
||||||
|
content = content + '\n\n' + text
|
||||||
|
return content
|
||||||
|
|
||||||
|
# 从HTML中获取所有的链接
|
||||||
|
def get_links_from_html(html_link, links_with_text=0):
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import urllib.request
|
||||||
|
import ssl
|
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
|
html = urllib.request.urlopen(html_link).read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(html, features="lxml")
|
||||||
|
a_tags = soup.find_all('a')
|
||||||
|
if links_with_text == 0:
|
||||||
|
link_array = [tag.get('href') for tag in a_tags if tag.get('href')]
|
||||||
|
return link_array
|
||||||
|
else:
|
||||||
|
link_array_with_text = [(tag.get('href'), tag.text) for tag in a_tags if tag.get('href')]
|
||||||
|
return link_array_with_text
|
||||||
|
|
||||||
|
# 检查链接的有效性
|
||||||
|
def check_link(url, timeout=3, allow_redirects=True):
|
||||||
|
import requests
|
||||||
|
try:
|
||||||
|
response = requests.head(url, timeout=timeout, allow_redirects=allow_redirects)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 检查链接数组中链接的有效性
|
||||||
|
def check_link_array(link_array, timeout=3, allow_redirects=True, try_again=0, print_show=1):
|
||||||
|
import guan
|
||||||
|
failed_link_array0 = []
|
||||||
|
for link in link_array:
|
||||||
|
if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
failed_link_array0.append(link)
|
||||||
|
if print_show:
|
||||||
|
print(link)
|
||||||
|
failed_link_array = []
|
||||||
|
if try_again:
|
||||||
|
if print_show:
|
||||||
|
print('\nTry again:\n')
|
||||||
|
for link in failed_link_array0:
|
||||||
|
if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects):
|
||||||
pass
|
pass
|
||||||
|
else:
|
||||||
|
failed_link_array.append(link)
|
||||||
|
if print_show:
|
||||||
|
print(link)
|
||||||
|
else:
|
||||||
|
failed_link_array = failed_link_array0
|
||||||
|
return failed_link_array
|
||||||
|
|
||||||
|
# 生成二维码
|
||||||
|
def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'):
|
||||||
|
import qrcode
|
||||||
|
img = qrcode.make(data)
|
||||||
|
img.save(filename+file_format)
|
||||||
|
|
||||||
|
# 通过Sci-Hub网站下载文献
|
||||||
|
def download_with_scihub(address=None, num=1):
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
if num==1 and address!=None:
|
||||||
|
address_array = [address]
|
||||||
|
else:
|
||||||
|
address_array = []
|
||||||
|
for i in range(num):
|
||||||
|
address = input('\nInput:')
|
||||||
|
address_array.append(address)
|
||||||
|
for address in address_array:
|
||||||
|
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||||||
|
print('\nResponse:', r)
|
||||||
|
print('Address:', r.url)
|
||||||
|
soup = BeautifulSoup(r.text, features='lxml')
|
||||||
|
pdf_URL = soup.embed['src']
|
||||||
|
# pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
|
||||||
|
if re.search(re.compile('^https:'), pdf_URL):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pdf_URL = 'https:'+pdf_URL
|
||||||
|
print('PDF address:', pdf_URL)
|
||||||
|
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||||||
|
print('PDF name:', name)
|
||||||
|
print('Directory:', os.getcwd())
|
||||||
|
print('\nDownloading...')
|
||||||
|
r = requests.get(pdf_URL, stream=True)
|
||||||
|
with open(name, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=32):
|
||||||
|
f.write(chunk)
|
||||||
|
print('Completed!\n')
|
||||||
|
if num != 1:
|
||||||
|
print('All completed!\n')
|
||||||
|
|
||||||
|
# 将字符串转成音频
|
||||||
|
def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
||||||
|
import pyttsx3
|
||||||
|
import guan
|
||||||
|
if print_text==1:
|
||||||
|
print(str)
|
||||||
|
engine = pyttsx3.init()
|
||||||
|
voices = engine.getProperty('voices')
|
||||||
|
engine.setProperty('voice', voices[voice].id)
|
||||||
|
engine.setProperty("rate", rate)
|
||||||
|
if save==1:
|
||||||
|
engine.save_to_file(str, filename+'.wav')
|
||||||
|
engine.runAndWait()
|
||||||
|
print('Wav file saved!')
|
||||||
|
if compress==1:
|
||||||
|
import os
|
||||||
|
os.rename(filename+'.wav', 'temp.wav')
|
||||||
|
guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate)
|
||||||
|
os.remove('temp.wav')
|
||||||
|
if read==1:
|
||||||
|
engine.say(str)
|
||||||
|
engine.runAndWait()
|
||||||
|
|
||||||
|
# 将txt文件转成音频
|
||||||
|
def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
||||||
|
import pyttsx3
|
||||||
|
import guan
|
||||||
|
f = open(txt_path, 'r', encoding ='utf-8')
|
||||||
|
text = f.read()
|
||||||
|
if print_text==1:
|
||||||
|
print(text)
|
||||||
|
engine = pyttsx3.init()
|
||||||
|
voices = engine.getProperty('voices')
|
||||||
|
engine.setProperty('voice', voices[voice].id)
|
||||||
|
engine.setProperty("rate", rate)
|
||||||
|
if save==1:
|
||||||
|
import re
|
||||||
|
filename = re.split('[/,\\\]', txt_path)[-1][:-4]
|
||||||
|
engine.save_to_file(text, filename+'.wav')
|
||||||
|
engine.runAndWait()
|
||||||
|
print('Wav file saved!')
|
||||||
|
if compress==1:
|
||||||
|
import os
|
||||||
|
os.rename(filename+'.wav', 'temp.wav')
|
||||||
|
guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate)
|
||||||
|
os.remove('temp.wav')
|
||||||
|
if read==1:
|
||||||
|
engine.say(text)
|
||||||
|
engine.runAndWait()
|
||||||
|
|
||||||
|
# 将PDF文件转成音频
|
||||||
|
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
||||||
|
import pyttsx3
|
||||||
|
import guan
|
||||||
|
text = guan.pdf_to_text(pdf_path)
|
||||||
|
text = text.replace('\n', ' ')
|
||||||
|
if print_text==1:
|
||||||
|
print(text)
|
||||||
|
engine = pyttsx3.init()
|
||||||
|
voices = engine.getProperty('voices')
|
||||||
|
engine.setProperty('voice', voices[voice].id)
|
||||||
|
engine.setProperty("rate", rate)
|
||||||
|
if save==1:
|
||||||
|
import re
|
||||||
|
filename = re.split('[/,\\\]', pdf_path)[-1][:-4]
|
||||||
|
engine.save_to_file(text, filename+'.wav')
|
||||||
|
engine.runAndWait()
|
||||||
|
print('Wav file saved!')
|
||||||
|
if compress==1:
|
||||||
|
import os
|
||||||
|
os.rename(filename+'.wav', 'temp.wav')
|
||||||
|
guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate)
|
||||||
|
os.remove('temp.wav')
|
||||||
|
if read==1:
|
||||||
|
engine.say(text)
|
||||||
|
engine.runAndWait()
|
||||||
|
|
||||||
|
# 将wav音频文件压缩成MP3音频文件
|
||||||
|
def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'):
|
||||||
|
# Note: Beside the installation of pydub, you may also need download FFmpeg on http://www.ffmpeg.org/download.html and add the bin path to the environment variable.
|
||||||
|
from pydub import AudioSegment
|
||||||
|
sound = AudioSegment.from_mp3(wav_path)
|
||||||
|
sound.export(output_filename,format="mp3",bitrate=bitrate)
|
||||||
|
|
||||||
|
# 将WordPress导出的XML格式文件转换成多个MarkDown格式的文件
|
||||||
|
def convert_wordpress_xml_to_markdown(xml_file='./a.xml', convert_content=1, replace_more=[]):
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import re
|
||||||
|
tree = ET.parse(xml_file)
|
||||||
|
root = tree.getroot()
|
||||||
|
for item in root.findall('.//item'):
|
||||||
|
title = item.find('title').text
|
||||||
|
content = item.find('.//content:encoded', namespaces={'content': 'http://purl.org/rss/1.0/modules/content/'}).text
|
||||||
|
if convert_content == 1:
|
||||||
|
content = re.sub(r'<!--.*?-->', '', content)
|
||||||
|
content = content.replace('<p>', '')
|
||||||
|
content = content.replace('</p>', '')
|
||||||
|
content = content.replace('<ol>', '')
|
||||||
|
content = content.replace('</ol>', '')
|
||||||
|
content = content.replace('<ul>', '')
|
||||||
|
content = content.replace('</ul>', '')
|
||||||
|
content = content.replace('<strong>', '')
|
||||||
|
content = content.replace('</strong>', '')
|
||||||
|
content = content.replace('</li>', '')
|
||||||
|
content = content.replace('<li>', '+ ')
|
||||||
|
content = content.replace('</h3>', '')
|
||||||
|
content = re.sub(r'<h2.*?>', '## ', content)
|
||||||
|
content = re.sub(r'<h3.*?>', '### ', content)
|
||||||
|
content = re.sub(r'<h4.*?>', '#### ', content)
|
||||||
|
for replace_item in replace_more:
|
||||||
|
content = content.replace(replace_item, '')
|
||||||
|
for _ in range(100):
|
||||||
|
content = content.replace('\n\n\n', '\n\n')
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
markdown_content = f"# {title}\n{content}"
|
||||||
|
markdown_file_path = f"{title}.md"
|
||||||
|
cleaned_filename = re.sub(r'[/:*?"<>|\'\\]', ' ', markdown_file_path)
|
||||||
|
with open(cleaned_filename, 'w', encoding='utf-8') as md_file:
|
||||||
|
md_file.write(markdown_content)
|
||||||
|
|
||||||
|
# 凯利公式
|
||||||
|
def kelly_formula(p, b, a=1):
|
||||||
|
f=(p/a)-((1-p)/b)
|
||||||
|
return f
|
||||||
|
|
||||||
|
# 获取所有股票
|
||||||
|
def all_stocks():
|
||||||
|
import numpy as np
|
||||||
|
import akshare as ak
|
||||||
|
stocks = ak.stock_zh_a_spot_em()
|
||||||
|
title = np.array(stocks.columns)
|
||||||
|
stock_data = stocks.values
|
||||||
|
return title, stock_data
|
||||||
|
|
||||||
|
# 获取所有股票的代码
|
||||||
|
def all_stock_symbols():
|
||||||
|
import guan
|
||||||
|
title, stock_data = guan.all_stocks()
|
||||||
|
stock_symbols = stock_data[:, 1]
|
||||||
|
return stock_symbols
|
||||||
|
|
||||||
|
# 股票代码的分类
|
||||||
|
def stock_symbols_classification():
|
||||||
|
import guan
|
||||||
|
import re
|
||||||
|
stock_symbols = guan.all_stock_symbols()
|
||||||
|
# 上交所主板
|
||||||
|
stock_symbols_60 = []
|
||||||
|
for stock_symbol in stock_symbols:
|
||||||
|
find_600 = re.findall(r'^600', stock_symbol)
|
||||||
|
find_601 = re.findall(r'^601', stock_symbol)
|
||||||
|
find_603 = re.findall(r'^603', stock_symbol)
|
||||||
|
find_605 = re.findall(r'^605', stock_symbol)
|
||||||
|
if find_600 != [] or find_601 != [] or find_603 != [] or find_605 != []:
|
||||||
|
stock_symbols_60.append(stock_symbol)
|
||||||
|
# 深交所主板
|
||||||
|
stock_symbols_00 = []
|
||||||
|
for stock_symbol in stock_symbols:
|
||||||
|
find_000 = re.findall(r'^000', stock_symbol)
|
||||||
|
find_001 = re.findall(r'^001', stock_symbol)
|
||||||
|
find_002 = re.findall(r'^002', stock_symbol)
|
||||||
|
find_003 = re.findall(r'^003', stock_symbol)
|
||||||
|
if find_000 != [] or find_001 != [] or find_002 != [] or find_003 != []:
|
||||||
|
stock_symbols_00.append(stock_symbol)
|
||||||
|
# 创业板
|
||||||
|
stock_symbols_30 = []
|
||||||
|
for stock_symbol in stock_symbols:
|
||||||
|
find_300 = re.findall(r'^300', stock_symbol)
|
||||||
|
find_301 = re.findall(r'^301', stock_symbol)
|
||||||
|
if find_300 != [] or find_301 != []:
|
||||||
|
stock_symbols_30.append(stock_symbol)
|
||||||
|
# 科创板
|
||||||
|
stock_symbols_68 = []
|
||||||
|
for stock_symbol in stock_symbols:
|
||||||
|
find_688 = re.findall(r'^688', stock_symbol)
|
||||||
|
find_689 = re.findall(r'^689', stock_symbol)
|
||||||
|
if find_688 != [] or find_689 != []:
|
||||||
|
stock_symbols_68.append(stock_symbol)
|
||||||
|
# 新三板
|
||||||
|
stock_symbols_8_4 = []
|
||||||
|
for stock_symbol in stock_symbols:
|
||||||
|
find_82 = re.findall(r'^82', stock_symbol)
|
||||||
|
find_83 = re.findall(r'^83', stock_symbol)
|
||||||
|
find_87 = re.findall(r'^87', stock_symbol)
|
||||||
|
find_88 = re.findall(r'^88', stock_symbol)
|
||||||
|
find_430 = re.findall(r'^430', stock_symbol)
|
||||||
|
find_420 = re.findall(r'^420', stock_symbol)
|
||||||
|
find_400 = re.findall(r'^400', stock_symbol)
|
||||||
|
if find_82 != [] or find_83 != [] or find_87 != [] or find_88 != [] or find_430 != [] or find_420 != [] or find_400 != []:
|
||||||
|
stock_symbols_8_4.append(stock_symbol)
|
||||||
|
# 检查遗漏的股票代码
|
||||||
|
stock_symbols_others = []
|
||||||
|
for stock_symbol in stock_symbols:
|
||||||
|
if stock_symbol not in stock_symbols_60 and stock_symbol not in stock_symbols_00 and stock_symbol not in stock_symbols_30 and stock_symbol not in stock_symbols_68 and stock_symbol not in stock_symbols_8_4:
|
||||||
|
stock_symbols_others.others.append(stock_symbol)
|
||||||
|
return stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others
|
||||||
|
|
||||||
|
# 股票代码各个分类的数量
|
||||||
|
def statistics_of_stock_symbols_classification():
|
||||||
|
import guan
|
||||||
|
stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others = guan.stock_symbols_classification()
|
||||||
|
num_stocks_60 = len(stock_symbols_60)
|
||||||
|
num_stocks_00 = len(stock_symbols_00)
|
||||||
|
num_stocks_30 = len(stock_symbols_30)
|
||||||
|
num_stocks_68 = len(stock_symbols_68)
|
||||||
|
num_stocks_8_4 = len(stock_symbols_8_4)
|
||||||
|
num_stocks_others= len(stock_symbols_others)
|
||||||
|
return num_stocks_60, num_stocks_00, num_stocks_30, num_stocks_68, num_stocks_8_4, num_stocks_others
|
||||||
|
|
||||||
|
# 从股票代码获取股票名称
|
||||||
|
def find_stock_name_from_symbol(symbol='000002'):
|
||||||
|
import guan
|
||||||
|
title, stock_data = guan.all_stocks()
|
||||||
|
for stock in stock_data:
|
||||||
|
if symbol in stock:
|
||||||
|
stock_name = stock[2]
|
||||||
|
return stock_name
|
||||||
|
|
||||||
|
# 市值排序
|
||||||
|
def sorted_market_capitalization(num=10):
|
||||||
|
import numpy as np
|
||||||
|
import guan
|
||||||
|
title, stock_data = guan.all_stocks()
|
||||||
|
new_stock_data = []
|
||||||
|
for stock in stock_data:
|
||||||
|
if np.isnan(float(stock[9])):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
new_stock_data.append(stock)
|
||||||
|
new_stock_data = np.array(new_stock_data)
|
||||||
|
list_index = np.argsort(new_stock_data[:, 17])
|
||||||
|
list_index = list_index[::-1]
|
||||||
|
if num == None:
|
||||||
|
num = len(list_index)
|
||||||
|
sorted_array = []
|
||||||
|
for i0 in range(num):
|
||||||
|
stock_symbol = new_stock_data[list_index[i0], 1]
|
||||||
|
stock_name = new_stock_data[list_index[i0], 2]
|
||||||
|
market_capitalization = new_stock_data[list_index[i0], 17]/1e8
|
||||||
|
sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization])
|
||||||
|
return sorted_array
|
||||||
|
|
||||||
|
# 美股市值排序
|
||||||
|
def sorted_market_capitalization_us(num=10):
|
||||||
|
import akshare as ak
|
||||||
|
import numpy as np
|
||||||
|
stocks = ak.stock_us_spot_em()
|
||||||
|
stock_data = stocks.values
|
||||||
|
new_stock_data = []
|
||||||
|
for stock in stock_data:
|
||||||
|
if np.isnan(float(stock[9])):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
new_stock_data.append(stock)
|
||||||
|
new_stock_data = np.array(new_stock_data)
|
||||||
|
list_index = np.argsort(new_stock_data[:, 9])
|
||||||
|
list_index = list_index[::-1]
|
||||||
|
if num == None:
|
||||||
|
num = len(list_index)
|
||||||
|
sorted_array = []
|
||||||
|
for i0 in range(num):
|
||||||
|
stock_symbol = new_stock_data[list_index[i0], 15]
|
||||||
|
stock_name = new_stock_data[list_index[i0], 1]
|
||||||
|
market_capitalization = new_stock_data[list_index[i0], 9]/1e8
|
||||||
|
sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization])
|
||||||
|
return sorted_array
|
||||||
|
|
||||||
|
# 获取单个股票的历史数据
|
||||||
|
def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
|
||||||
|
# period = 'daily'
|
||||||
|
# period = 'weekly'
|
||||||
|
# period = 'monthly'
|
||||||
|
import numpy as np
|
||||||
|
import akshare as ak
|
||||||
|
stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
|
||||||
|
title = np.array(stock.columns)
|
||||||
|
stock_data = stock.values[::-1]
|
||||||
|
return title, stock_data
|
||||||
|
|
||||||
|
# 绘制股票图
|
||||||
|
def plot_stock_line(date_array, opening_array, closing_array, high_array, low_array, lw_open_close=6, lw_high_low=2, xlabel='date', ylabel='price', title='', fontsize=20, labelsize=20, adjust_bottom=0.2, adjust_left=0.2, fontfamily='Times New Roman'):
|
||||||
|
import guan
|
||||||
|
plt, fig, ax = guan.import_plt_and_start_fig_ax(adjust_bottom=adjust_bottom, adjust_left=adjust_left, labelsize=labelsize, fontfamily=fontfamily)
|
||||||
|
if fontfamily=='Times New Roman':
|
||||||
|
ax.set_title(title, fontsize=fontsize, fontfamily='Times New Roman')
|
||||||
|
ax.set_xlabel(xlabel, fontsize=fontsize, fontfamily='Times New Roman')
|
||||||
|
ax.set_ylabel(ylabel, fontsize=fontsize, fontfamily='Times New Roman')
|
||||||
|
else:
|
||||||
|
ax.set_title(title, fontsize=fontsize)
|
||||||
|
ax.set_xlabel(xlabel, fontsize=fontsize)
|
||||||
|
ax.set_ylabel(ylabel, fontsize=fontsize)
|
||||||
|
for i0 in range(len(date_array)):
|
||||||
|
if opening_array[i0] <= closing_array[i0]:
|
||||||
|
ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='red', lw=lw_open_close)
|
||||||
|
ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='red', linestyle='-', lw=lw_high_low)
|
||||||
|
else:
|
||||||
|
ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='green', lw=lw_open_close)
|
||||||
|
ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='green', linestyle='-', lw=lw_high_low)
|
||||||
|
plt.show()
|
||||||
|
plt.close('all')
|
||||||
|
|
||||||
|
# Guan软件包的使用统计(仅仅统计装机数和import次数)
|
||||||
|
def statistics_of_guan_package(function_name=None):
|
||||||
|
import guan
|
||||||
|
try:
|
||||||
|
import socket
|
||||||
|
datetime_date = guan.get_date()
|
||||||
|
datetime_time = guan.get_time()
|
||||||
|
current_version = guan.get_current_version('guan')
|
||||||
|
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
client_socket.settimeout(0.5)
|
||||||
|
client_socket.connect(('socket.guanjihuan.com', 12345))
|
||||||
|
mac_address = guan.get_mac_address()
|
||||||
|
if function_name == None:
|
||||||
|
message = {
|
||||||
|
'server': 'py.guanjihuan.com',
|
||||||
|
'date': datetime_date,
|
||||||
|
'time': datetime_time,
|
||||||
|
'version': current_version,
|
||||||
|
'MAC_address': mac_address,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
message = {
|
||||||
|
'server': 'py.guanjihuan.com',
|
||||||
|
'date': datetime_date,
|
||||||
|
'time': datetime_time,
|
||||||
|
'version': current_version,
|
||||||
|
'MAC_address': mac_address,
|
||||||
|
'function_name': function_name
|
||||||
|
}
|
||||||
|
import json
|
||||||
|
send_message = json.dumps(message)
|
||||||
|
client_socket.send(send_message.encode())
|
||||||
|
client_socket.close()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Guan软件包升级检查和提示(如果无法连接或者版本为最新,那么均没有提示)
|
||||||
|
def notification_of_upgrade(timeout=5):
|
||||||
|
try:
|
||||||
|
import guan
|
||||||
|
latest_version = guan.get_latest_version(package_name='guan', timeout=timeout)
|
||||||
|
current_version = guan.get_current_version('guan')
|
||||||
|
if latest_version != None and current_version != None:
|
||||||
|
if latest_version != current_version:
|
||||||
|
print('升级提示:您当前使用的版本是 guan-'+current_version+',目前已经有最新版本 guan-'+latest_version+'。您可以通过以下命令对软件包进行升级:pip install --upgrade guan -i https://pypi.python.org/simple 或 pip install --upgrade guan')
|
||||||
|
except:
|
||||||
|
pass
|
@ -35,138 +35,6 @@ def load_txt_data(filename):
|
|||||||
data = np.loadtxt(filename+'.txt')
|
data = np.loadtxt(filename+'.txt')
|
||||||
return data
|
return data
|
||||||
|
|
||||||
# 如果不存在文件夹,则新建文件夹
|
|
||||||
def make_directory(directory='./test'):
|
|
||||||
import os
|
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
# 如果不存在文件,则新建空文件
|
|
||||||
def make_file(file_path='./a.txt'):
|
|
||||||
import os
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
with open(file_path, 'w') as f:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 打开文件用于写入,默认为新增内容
|
|
||||||
def open_file(filename='a', file_format='.txt', mode='add'):
|
|
||||||
if mode == 'add':
|
|
||||||
f = open(filename+file_format, 'a', encoding='UTF-8')
|
|
||||||
elif mode == 'overwrite':
|
|
||||||
f = open(filename+file_format, 'w', encoding='UTF-8')
|
|
||||||
return f
|
|
||||||
|
|
||||||
def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True):
|
|
||||||
if print_on==True:
|
|
||||||
for arg in args:
|
|
||||||
print(arg, end=' ')
|
|
||||||
print()
|
|
||||||
f = open(filename+file_format, 'a', encoding='UTF-8')
|
|
||||||
for arg in args:
|
|
||||||
f.write(str(arg)+' ')
|
|
||||||
f.write('\n')
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# 读取文本文件内容。如果文件不存在,返回空字符串
|
|
||||||
def read_text_file(file_path='./a.txt', make_file=None):
|
|
||||||
import os
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
if make_file != None:
|
|
||||||
with open(file_path, 'w') as f:
|
|
||||||
pass
|
|
||||||
return ''
|
|
||||||
else:
|
|
||||||
with open(file_path, 'r') as f:
|
|
||||||
content = f.read()
|
|
||||||
return content
|
|
||||||
|
|
||||||
# 获取目录中的所有文件名
|
|
||||||
def get_all_filenames_in_directory(directory='./', file_format=None, show_root_path=0, sort=1, include_subdirectory=1):
|
|
||||||
import os
|
|
||||||
file_list = []
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
if file_format == None:
|
|
||||||
if show_root_path == 0:
|
|
||||||
file_list.append(files[i0])
|
|
||||||
else:
|
|
||||||
file_list.append(root+'/'+files[i0])
|
|
||||||
else:
|
|
||||||
if file_format in files[i0]:
|
|
||||||
if show_root_path == 0:
|
|
||||||
file_list.append(files[i0])
|
|
||||||
else:
|
|
||||||
file_list.append(root+'/'+files[i0])
|
|
||||||
if include_subdirectory != 1:
|
|
||||||
break
|
|
||||||
if sort == 1:
|
|
||||||
file_list = sorted(file_list)
|
|
||||||
return file_list
|
|
||||||
|
|
||||||
# 获取文件夹中某种文本类型的文件以及读取所有内容
|
|
||||||
def read_text_files_in_directory(directory='./', file_format='.md'):
|
|
||||||
import os
|
|
||||||
file_list = []
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
if file_format in files[i0]:
|
|
||||||
file_list.append(root+'/'+files[i0])
|
|
||||||
content_array = []
|
|
||||||
for file in file_list:
|
|
||||||
with open(file, 'r', encoding='UTF-8') as f:
|
|
||||||
content_array.append(f.read())
|
|
||||||
return file_list, content_array
|
|
||||||
|
|
||||||
# 在多个文本文件中查找关键词
|
|
||||||
def find_words_in_multiple_files(words, directory='./', file_format='.md'):
|
|
||||||
import guan
|
|
||||||
file_list, content_array = guan.read_text_files_in_directory(directory=directory, file_format=file_format)
|
|
||||||
num_files = len(file_list)
|
|
||||||
file_list_with_words = []
|
|
||||||
for i0 in range(num_files):
|
|
||||||
if words in content_array[i0]:
|
|
||||||
file_list_with_words.append(file_list[i0])
|
|
||||||
return file_list_with_words
|
|
||||||
|
|
||||||
# 复制一份文件
|
|
||||||
def copy_file(old_file='./a.txt', new_file='./b.txt'):
|
|
||||||
import shutil
|
|
||||||
shutil.copy(old_file, new_file)
|
|
||||||
|
|
||||||
# 打开文件,替代某字符串
|
|
||||||
def open_file_and_replace_str(file_path='./a.txt', old_str='', new_str=''):
|
|
||||||
import guan
|
|
||||||
content = guan.read_text_file(file_path=file_path)
|
|
||||||
content = content.replace(old_str, new_str)
|
|
||||||
f = guan.open_file(filename=file_path, file_format='', mode='overwrite')
|
|
||||||
f.write(content)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# 复制一份文件,然后再替代某字符串
|
|
||||||
def copy_file_and_replace_str(old_file='./a.txt', new_file='./b.txt', old_str='', new_str=''):
|
|
||||||
import guan
|
|
||||||
guan.copy_file(old_file=old_file, new_file=new_file)
|
|
||||||
content = guan.read_text_file(file_path=new_file)
|
|
||||||
content = content.replace(old_str, new_str)
|
|
||||||
f = guan.open_file(filename=new_file, file_format='', mode='overwrite')
|
|
||||||
f.write(content)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# 拼接两个PDF文件
|
|
||||||
def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'):
|
|
||||||
import PyPDF2
|
|
||||||
output_pdf = PyPDF2.PdfWriter()
|
|
||||||
with open(input_file_1, 'rb') as file1:
|
|
||||||
pdf1 = PyPDF2.PdfReader(file1)
|
|
||||||
for page in range(len(pdf1.pages)):
|
|
||||||
output_pdf.add_page(pdf1.pages[page])
|
|
||||||
with open(input_file_2, 'rb') as file2:
|
|
||||||
pdf2 = PyPDF2.PdfReader(file2)
|
|
||||||
for page in range(len(pdf2.pages)):
|
|
||||||
output_pdf.add_page(pdf2.pages[page])
|
|
||||||
with open(output_file, 'wb') as combined_file:
|
|
||||||
output_pdf.write(combined_file)
|
|
||||||
|
|
||||||
# 读取文件中的一维数据(一行一组x和y)
|
# 读取文件中的一维数据(一行一组x和y)
|
||||||
def read_one_dimensional_data(filename='a', file_format='.txt'):
|
def read_one_dimensional_data(filename='a', file_format='.txt'):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -375,6 +243,124 @@ def write_two_dimensional_data_without_xy_array_and_without_opening_file(matrix,
|
|||||||
f.write(str(element)+' ')
|
f.write(str(element)+' ')
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
|
|
||||||
|
# 如果不存在文件夹,则新建文件夹
|
||||||
|
def make_directory(directory='./test'):
|
||||||
|
import os
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
|
||||||
|
# 如果不存在文件,则新建空文件
|
||||||
|
def make_file(file_path='./a.txt'):
|
||||||
|
import os
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 打开文件用于写入,默认为新增内容
|
||||||
|
def open_file(filename='a', file_format='.txt', mode='add'):
|
||||||
|
if mode == 'add':
|
||||||
|
f = open(filename+file_format, 'a', encoding='UTF-8')
|
||||||
|
elif mode == 'overwrite':
|
||||||
|
f = open(filename+file_format, 'w', encoding='UTF-8')
|
||||||
|
return f
|
||||||
|
|
||||||
|
# 打印到TXT文件
|
||||||
|
def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True):
|
||||||
|
if print_on==True:
|
||||||
|
for arg in args:
|
||||||
|
print(arg, end=' ')
|
||||||
|
print()
|
||||||
|
f = open(filename+file_format, 'a', encoding='UTF-8')
|
||||||
|
for arg in args:
|
||||||
|
f.write(str(arg)+' ')
|
||||||
|
f.write('\n')
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# 读取文本文件内容。如果文件不存在,返回空字符串
|
||||||
|
def read_text_file(file_path='./a.txt', make_file=None):
|
||||||
|
import os
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
if make_file != None:
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
pass
|
||||||
|
return ''
|
||||||
|
else:
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
return content
|
||||||
|
|
||||||
|
# 获取目录中的所有文件名
|
||||||
|
def get_all_filenames_in_directory(directory='./', file_format=None, show_root_path=0, sort=1, include_subdirectory=1):
|
||||||
|
import os
|
||||||
|
file_list = []
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
if file_format == None:
|
||||||
|
if show_root_path == 0:
|
||||||
|
file_list.append(files[i0])
|
||||||
|
else:
|
||||||
|
file_list.append(root+'/'+files[i0])
|
||||||
|
else:
|
||||||
|
if file_format in files[i0]:
|
||||||
|
if show_root_path == 0:
|
||||||
|
file_list.append(files[i0])
|
||||||
|
else:
|
||||||
|
file_list.append(root+'/'+files[i0])
|
||||||
|
if include_subdirectory != 1:
|
||||||
|
break
|
||||||
|
if sort == 1:
|
||||||
|
file_list = sorted(file_list)
|
||||||
|
return file_list
|
||||||
|
|
||||||
|
# 获取文件夹中某种文本类型的文件以及读取所有内容
|
||||||
|
def read_text_files_in_directory(directory='./', file_format='.md'):
|
||||||
|
import os
|
||||||
|
file_list = []
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
if file_format in files[i0]:
|
||||||
|
file_list.append(root+'/'+files[i0])
|
||||||
|
content_array = []
|
||||||
|
for file in file_list:
|
||||||
|
with open(file, 'r', encoding='UTF-8') as f:
|
||||||
|
content_array.append(f.read())
|
||||||
|
return file_list, content_array
|
||||||
|
|
||||||
|
# 在多个文本文件中查找关键词
|
||||||
|
def find_words_in_multiple_files(words, directory='./', file_format='.md'):
|
||||||
|
import guan
|
||||||
|
file_list, content_array = guan.read_text_files_in_directory(directory=directory, file_format=file_format)
|
||||||
|
num_files = len(file_list)
|
||||||
|
file_list_with_words = []
|
||||||
|
for i0 in range(num_files):
|
||||||
|
if words in content_array[i0]:
|
||||||
|
file_list_with_words.append(file_list[i0])
|
||||||
|
return file_list_with_words
|
||||||
|
|
||||||
|
# 复制一份文件
|
||||||
|
def copy_file(old_file='./a.txt', new_file='./b.txt'):
|
||||||
|
import shutil
|
||||||
|
shutil.copy(old_file, new_file)
|
||||||
|
|
||||||
|
# 打开文件,替代某字符串
|
||||||
|
def open_file_and_replace_str(file_path='./a.txt', old_str='', new_str=''):
|
||||||
|
import guan
|
||||||
|
content = guan.read_text_file(file_path=file_path)
|
||||||
|
content = content.replace(old_str, new_str)
|
||||||
|
f = guan.open_file(filename=file_path, file_format='', mode='overwrite')
|
||||||
|
f.write(content)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# 复制一份文件,然后再替代某字符串
|
||||||
|
def copy_file_and_replace_str(old_file='./a.txt', new_file='./b.txt', old_str='', new_str=''):
|
||||||
|
import guan
|
||||||
|
guan.copy_file(old_file=old_file, new_file=new_file)
|
||||||
|
content = guan.read_text_file(file_path=new_file)
|
||||||
|
content = content.replace(old_str, new_str)
|
||||||
|
f = guan.open_file(filename=new_file, file_format='', mode='overwrite')
|
||||||
|
f.write(content)
|
||||||
|
f.close()
|
||||||
|
|
||||||
# 改变当前的目录位置
|
# 改变当前的目录位置
|
||||||
def change_directory_by_replacement(current_key_word='code', new_key_word='data'):
|
def change_directory_by_replacement(current_key_word='code', new_key_word='data'):
|
||||||
import os
|
import os
|
||||||
@ -384,3 +370,131 @@ def change_directory_by_replacement(current_key_word='code', new_key_word='data'
|
|||||||
if os.path.exists(data_path) == False:
|
if os.path.exists(data_path) == False:
|
||||||
os.makedirs(data_path)
|
os.makedirs(data_path)
|
||||||
os.chdir(data_path)
|
os.chdir(data_path)
|
||||||
|
|
||||||
|
# 查找文件名相同的文件
|
||||||
|
def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000):
|
||||||
|
import os
|
||||||
|
from collections import Counter
|
||||||
|
file_list = []
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
file_list.append(files[i0])
|
||||||
|
for word in ignored_directory_with_words:
|
||||||
|
if word in root:
|
||||||
|
file_list.remove(files[i0])
|
||||||
|
for word in ignored_file_with_words:
|
||||||
|
if word in files[i0]:
|
||||||
|
try:
|
||||||
|
file_list.remove(files[i0])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
count_file = Counter(file_list).most_common(num)
|
||||||
|
repeated_file = []
|
||||||
|
for item in count_file:
|
||||||
|
if item[1]>1:
|
||||||
|
repeated_file.append(item)
|
||||||
|
return repeated_file
|
||||||
|
|
||||||
|
# 统计各个子文件夹中的文件数量
|
||||||
|
def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None):
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
dirs_list = []
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
if dirs != []:
|
||||||
|
for i0 in range(len(dirs)):
|
||||||
|
dirs_list.append(root+'/'+dirs[i0])
|
||||||
|
count_file_array = []
|
||||||
|
for sub_dir in dirs_list:
|
||||||
|
file_list = []
|
||||||
|
for root, dirs, files in os.walk(sub_dir):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
file_list.append(files[i0])
|
||||||
|
count_file = len(file_list)
|
||||||
|
count_file_array.append(count_file)
|
||||||
|
if sort == 0:
|
||||||
|
if print_show == 1:
|
||||||
|
if smaller_than_num == None:
|
||||||
|
print(sub_dir)
|
||||||
|
print(count_file)
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
if count_file<smaller_than_num:
|
||||||
|
print(sub_dir)
|
||||||
|
print(count_file)
|
||||||
|
print()
|
||||||
|
if sort == 0:
|
||||||
|
sub_directory = dirs_list
|
||||||
|
num_in_sub_directory = count_file_array
|
||||||
|
if sort == 1:
|
||||||
|
sub_directory = []
|
||||||
|
num_in_sub_directory = []
|
||||||
|
if reverse == 1:
|
||||||
|
index_array = np.argsort(count_file_array)[::-1]
|
||||||
|
else:
|
||||||
|
index_array = np.argsort(count_file_array)
|
||||||
|
for i0 in index_array:
|
||||||
|
sub_directory.append(dirs_list[i0])
|
||||||
|
num_in_sub_directory.append(count_file_array[i0])
|
||||||
|
if print_show == 1:
|
||||||
|
if smaller_than_num == None:
|
||||||
|
print(dirs_list[i0])
|
||||||
|
print(count_file_array[i0])
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
if count_file_array[i0]<smaller_than_num:
|
||||||
|
print(dirs_list[i0])
|
||||||
|
print(count_file_array[i0])
|
||||||
|
print()
|
||||||
|
return sub_directory, num_in_sub_directory
|
||||||
|
|
||||||
|
# 在多个子文件夹中产生必要的文件,例如 readme.md
|
||||||
|
def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]):
|
||||||
|
import os
|
||||||
|
directory_with_file = []
|
||||||
|
ignored_directory = []
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
if root not in directory_with_file:
|
||||||
|
directory_with_file.append(root)
|
||||||
|
if files[i0] == filename+file_format:
|
||||||
|
if root not in ignored_directory:
|
||||||
|
ignored_directory.append(root)
|
||||||
|
if overwrite == None:
|
||||||
|
for root in ignored_directory:
|
||||||
|
directory_with_file.remove(root)
|
||||||
|
ignored_directory_more =[]
|
||||||
|
for root in directory_with_file:
|
||||||
|
for word in ignored_directory_with_words:
|
||||||
|
if word in root:
|
||||||
|
if root not in ignored_directory_more:
|
||||||
|
ignored_directory_more.append(root)
|
||||||
|
for root in ignored_directory_more:
|
||||||
|
directory_with_file.remove(root)
|
||||||
|
for root in directory_with_file:
|
||||||
|
os.chdir(root)
|
||||||
|
f = open(filename+file_format, 'w', encoding="utf-8")
|
||||||
|
f.write(content)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# 删除特定文件名的文件(谨慎使用)
|
||||||
|
def delete_file_with_specific_name(directory, filename='readme', file_format='.md'):
|
||||||
|
import os
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
if files[i0] == filename+file_format:
|
||||||
|
os.remove(root+'/'+files[i0])
|
||||||
|
|
||||||
|
# 将所有文件移到根目录(谨慎使用)
|
||||||
|
def move_all_files_to_root_directory(directory):
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for i0 in range(len(files)):
|
||||||
|
shutil.move(root+'/'+files[i0], directory+'/'+files[i0])
|
||||||
|
for i0 in range(100):
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
try:
|
||||||
|
os.rmdir(root)
|
||||||
|
except:
|
||||||
|
pass
|
@ -382,3 +382,25 @@ def pca_of_data(data, n_components=None, standard=1):
|
|||||||
data_transformed = pca.fit_transform(data_scaled)
|
data_transformed = pca.fit_transform(data_scaled)
|
||||||
explained_variance_ratio = pca.explained_variance_ratio_
|
explained_variance_ratio = pca.explained_variance_ratio_
|
||||||
return data_transformed, explained_variance_ratio
|
return data_transformed, explained_variance_ratio
|
||||||
|
|
||||||
|
# 通过定义计算R^2(基于实际值和预测值,数值有可能小于0)
|
||||||
|
def calculate_R2_with_definition(y_true_array, y_pred_array):
|
||||||
|
import numpy as np
|
||||||
|
y_mean = np.mean(y_true_array)
|
||||||
|
SS_tot = np.sum((y_true_array - y_mean) ** 2)
|
||||||
|
SS_res = np.sum((y_true_array - y_pred_array) ** 2)
|
||||||
|
R2 = 1 - (SS_res / SS_tot)
|
||||||
|
return R2
|
||||||
|
|
||||||
|
# 通过sklearn计算R^2,和上面定义的计算结果一致
|
||||||
|
def calculate_R2_with_sklearn(y_true_array, y_pred_array):
|
||||||
|
from sklearn.metrics import r2_score
|
||||||
|
R2 = r2_score(y_true_array, y_pred_array)
|
||||||
|
return R2
|
||||||
|
|
||||||
|
# 通过scipy计算线性回归后的R^2(基于线性回归模型,范围在0和1之间)
|
||||||
|
def calculate_R2_after_linear_regression_with_scipy(y_true_array, y_pred_array):
|
||||||
|
from scipy import stats
|
||||||
|
slope, intercept, r_value, p_value, std_err = stats.linregress(y_true_array, y_pred_array)
|
||||||
|
R2 = r_value**2
|
||||||
|
return R2
|
@ -1,711 +0,0 @@
|
|||||||
# Module: others
|
|
||||||
|
|
||||||
# 获取软件包的本机版本
|
|
||||||
def get_current_version(package_name='guan'):
|
|
||||||
import importlib.metadata
|
|
||||||
try:
|
|
||||||
current_version = importlib.metadata.version(package_name)
|
|
||||||
return current_version
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 获取Python软件包的最新版本
|
|
||||||
def get_latest_version(package_name='guan', timeout=5):
|
|
||||||
import requests
|
|
||||||
url = f"https://pypi.org/pypi/{package_name}/json"
|
|
||||||
try:
|
|
||||||
response = requests.get(url, timeout=timeout)
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
if response.status_code == 200:
|
|
||||||
data = response.json()
|
|
||||||
latest_version = data["info"]["version"]
|
|
||||||
return latest_version
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 获取包含某个字符的进程PID值
|
|
||||||
def get_PID_array(name):
|
|
||||||
import subprocess
|
|
||||||
command = "ps -ef | grep "+name
|
|
||||||
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
||||||
if result.returncode == 0:
|
|
||||||
ps_ef = result.stdout
|
|
||||||
import re
|
|
||||||
ps_ef_1 = re.split(r'\n', ps_ef)
|
|
||||||
id_running_array = []
|
|
||||||
for ps_ef_item in ps_ef_1:
|
|
||||||
if ps_ef_item != '':
|
|
||||||
ps_ef_2 = re.split(r'\s+', ps_ef_item)
|
|
||||||
id_running_array.append(ps_ef_2[1])
|
|
||||||
return id_running_array
|
|
||||||
|
|
||||||
# 每日git commit次数的统计
|
|
||||||
def statistics_of_git_commits(print_show=0, str_or_datetime='str'):
|
|
||||||
import subprocess
|
|
||||||
import collections
|
|
||||||
since_date = '100 year ago'
|
|
||||||
result = subprocess.run(
|
|
||||||
['git', 'log', f'--since={since_date}', '--pretty=format:%ad', '--date=short'],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
text=True)
|
|
||||||
commits = result.stdout.strip().split('\n')
|
|
||||||
counter = collections.Counter(commits)
|
|
||||||
daily_commit_counts = dict(sorted(counter.items()))
|
|
||||||
date_array = []
|
|
||||||
commit_count_array = []
|
|
||||||
for date, count in daily_commit_counts.items():
|
|
||||||
if print_show == 1:
|
|
||||||
print(f"{date}: {count} commits")
|
|
||||||
if str_or_datetime=='datetime':
|
|
||||||
import datetime
|
|
||||||
date_array.append(datetime.datetime.strptime(date, "%Y-%m-%d"))
|
|
||||||
elif str_or_datetime=='str':
|
|
||||||
date_array.append(date)
|
|
||||||
commit_count_array.append(count)
|
|
||||||
return date_array, commit_count_array
|
|
||||||
|
|
||||||
# 将文件目录结构写入Markdown文件
|
|
||||||
def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):
|
|
||||||
import os
|
|
||||||
f = open(filename+'.md', 'w', encoding="utf-8")
|
|
||||||
filenames1 = os.listdir(directory)
|
|
||||||
u0 = 0
|
|
||||||
for filename1 in filenames1[::reverse_positive_or_negative]:
|
|
||||||
filename1_with_path = os.path.join(directory,filename1)
|
|
||||||
if os.path.isfile(filename1_with_path):
|
|
||||||
if os.path.splitext(filename1)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename1)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n')
|
|
||||||
else:
|
|
||||||
u0 += 1
|
|
||||||
if divided_line != None and u0 != 1:
|
|
||||||
f.write('--------\n\n')
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('# '+str(filename1)+'\n\n')
|
|
||||||
|
|
||||||
filenames2 = os.listdir(filename1_with_path)
|
|
||||||
i0 = 0
|
|
||||||
for filename2 in filenames2[::reverse_positive_or_negative]:
|
|
||||||
filename2_with_path = os.path.join(directory, filename1, filename2)
|
|
||||||
if os.path.isfile(filename2_with_path):
|
|
||||||
if os.path.splitext(filename2)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename2)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n')
|
|
||||||
else:
|
|
||||||
i0 += 1
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
if show_second_number != None:
|
|
||||||
f.write('## '+str(i0)+'. '+str(filename2)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('## '+str(filename2)+'\n\n')
|
|
||||||
|
|
||||||
j0 = 0
|
|
||||||
filenames3 = os.listdir(filename2_with_path)
|
|
||||||
for filename3 in filenames3[::reverse_positive_or_negative]:
|
|
||||||
filename3_with_path = os.path.join(directory, filename1, filename2, filename3)
|
|
||||||
if os.path.isfile(filename3_with_path):
|
|
||||||
if os.path.splitext(filename3)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename3)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n')
|
|
||||||
else:
|
|
||||||
j0 += 1
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
if show_third_number != None:
|
|
||||||
f.write('### ('+str(j0)+') '+str(filename3)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('### '+str(filename3)+'\n\n')
|
|
||||||
|
|
||||||
filenames4 = os.listdir(filename3_with_path)
|
|
||||||
for filename4 in filenames4[::reverse_positive_or_negative]:
|
|
||||||
filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4)
|
|
||||||
if os.path.isfile(filename4_with_path):
|
|
||||||
if os.path.splitext(filename4)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename4)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n')
|
|
||||||
else:
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('#### '+str(filename4)+'\n\n')
|
|
||||||
|
|
||||||
filenames5 = os.listdir(filename4_with_path)
|
|
||||||
for filename5 in filenames5[::reverse_positive_or_negative]:
|
|
||||||
filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5)
|
|
||||||
if os.path.isfile(filename5_with_path):
|
|
||||||
if os.path.splitext(filename5)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename5)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n')
|
|
||||||
else:
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('##### '+str(filename5)+'\n\n')
|
|
||||||
|
|
||||||
filenames6 = os.listdir(filename5_with_path)
|
|
||||||
for filename6 in filenames6[::reverse_positive_or_negative]:
|
|
||||||
filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6)
|
|
||||||
if os.path.isfile(filename6_with_path):
|
|
||||||
if os.path.splitext(filename6)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename6)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n')
|
|
||||||
else:
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('###### '+str(filename6)+'\n\n')
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# 从网页的标签中获取内容
|
|
||||||
def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']):
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import urllib.request
|
|
||||||
import ssl
|
|
||||||
ssl._create_default_https_context = ssl._create_unverified_context
|
|
||||||
html = urllib.request.urlopen(link).read().decode('utf-8')
|
|
||||||
soup = BeautifulSoup(html, features="lxml")
|
|
||||||
all_tags = soup.find_all(tags)
|
|
||||||
content = ''
|
|
||||||
for tag in all_tags:
|
|
||||||
text = tag.get_text().replace('\n', '')
|
|
||||||
if content == '':
|
|
||||||
content = text
|
|
||||||
else:
|
|
||||||
content = content + '\n\n' + text
|
|
||||||
return content
|
|
||||||
|
|
||||||
# 从HTML中获取所有的链接
|
|
||||||
def get_links_from_html(html_link, links_with_text=0):
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import urllib.request
|
|
||||||
import ssl
|
|
||||||
ssl._create_default_https_context = ssl._create_unverified_context
|
|
||||||
html = urllib.request.urlopen(html_link).read().decode('utf-8')
|
|
||||||
soup = BeautifulSoup(html, features="lxml")
|
|
||||||
a_tags = soup.find_all('a')
|
|
||||||
if links_with_text == 0:
|
|
||||||
link_array = [tag.get('href') for tag in a_tags if tag.get('href')]
|
|
||||||
return link_array
|
|
||||||
else:
|
|
||||||
link_array_with_text = [(tag.get('href'), tag.text) for tag in a_tags if tag.get('href')]
|
|
||||||
return link_array_with_text
|
|
||||||
|
|
||||||
# 检查链接的有效性
|
|
||||||
def check_link(url, timeout=3, allow_redirects=True):
|
|
||||||
import requests
|
|
||||||
try:
|
|
||||||
response = requests.head(url, timeout=timeout, allow_redirects=allow_redirects)
|
|
||||||
if response.status_code == 200:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
except requests.exceptions.RequestException:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 检查链接数组中链接的有效性
|
|
||||||
def check_link_array(link_array, timeout=3, allow_redirects=True, try_again=0, print_show=1):
|
|
||||||
import guan
|
|
||||||
failed_link_array0 = []
|
|
||||||
for link in link_array:
|
|
||||||
if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
failed_link_array0.append(link)
|
|
||||||
if print_show:
|
|
||||||
print(link)
|
|
||||||
failed_link_array = []
|
|
||||||
if try_again:
|
|
||||||
if print_show:
|
|
||||||
print('\nTry again:\n')
|
|
||||||
for link in failed_link_array0:
|
|
||||||
if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
failed_link_array.append(link)
|
|
||||||
if print_show:
|
|
||||||
print(link)
|
|
||||||
else:
|
|
||||||
failed_link_array = failed_link_array0
|
|
||||||
return failed_link_array
|
|
||||||
|
|
||||||
# 生成二维码
|
|
||||||
def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'):
|
|
||||||
import qrcode
|
|
||||||
img = qrcode.make(data)
|
|
||||||
img.save(filename+file_format)
|
|
||||||
|
|
||||||
# 将PDF文件转成文本
|
|
||||||
def pdf_to_text(pdf_path):
|
|
||||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
||||||
from pdfminer.converter import PDFPageAggregator
|
|
||||||
from pdfminer.layout import LAParams, LTTextBox
|
|
||||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
|
||||||
import logging
|
|
||||||
logging.Logger.propagate = False
|
|
||||||
logging.getLogger().setLevel(logging.ERROR)
|
|
||||||
praser = PDFParser(open(pdf_path, 'rb'))
|
|
||||||
doc = PDFDocument()
|
|
||||||
praser.set_document(doc)
|
|
||||||
doc.set_parser(praser)
|
|
||||||
doc.initialize()
|
|
||||||
if not doc.is_extractable:
|
|
||||||
raise PDFTextExtractionNotAllowed
|
|
||||||
else:
|
|
||||||
rsrcmgr = PDFResourceManager()
|
|
||||||
laparams = LAParams()
|
|
||||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
|
||||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
||||||
content = ''
|
|
||||||
for page in doc.get_pages():
|
|
||||||
interpreter.process_page(page)
|
|
||||||
layout = device.get_result()
|
|
||||||
for x in layout:
|
|
||||||
if isinstance(x, LTTextBox):
|
|
||||||
content = content + x.get_text().strip()
|
|
||||||
return content
|
|
||||||
|
|
||||||
# 获取PDF文件页数
|
|
||||||
def get_pdf_page_number(pdf_path):
|
|
||||||
import PyPDF2
|
|
||||||
pdf_file = open(pdf_path, 'rb')
|
|
||||||
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
||||||
num_pages = len(pdf_reader.pages)
|
|
||||||
return num_pages
|
|
||||||
|
|
||||||
# 获取PDF文件指定页面的内容
|
|
||||||
def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1):
|
|
||||||
import PyPDF2
|
|
||||||
pdf_file = open(pdf_path, 'rb')
|
|
||||||
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
||||||
num_pages = len(pdf_reader.pages)
|
|
||||||
for page_num0 in range(num_pages):
|
|
||||||
if page_num0 == page_num-1:
|
|
||||||
page = pdf_reader.pages[page_num0]
|
|
||||||
page_text = page.extract_text()
|
|
||||||
pdf_file.close()
|
|
||||||
return page_text
|
|
||||||
|
|
||||||
# 获取PDF文献中的链接。例如: link_starting_form='https://doi.org'
|
|
||||||
def get_links_from_pdf(pdf_path, link_starting_form=''):
|
|
||||||
import PyPDF2
|
|
||||||
import re
|
|
||||||
reader = PyPDF2.PdfReader(pdf_path)
|
|
||||||
pages = len(reader.pages)
|
|
||||||
i0 = 0
|
|
||||||
links = []
|
|
||||||
for page in range(pages):
|
|
||||||
pageSliced = reader.pages[page]
|
|
||||||
pageObject = pageSliced.get_object()
|
|
||||||
if '/Annots' in pageObject.keys():
|
|
||||||
ann = pageObject['/Annots']
|
|
||||||
old = ''
|
|
||||||
for a in ann:
|
|
||||||
u = a.get_object()
|
|
||||||
if '/A' in u.keys():
|
|
||||||
if '/URI' in u['/A']:
|
|
||||||
if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']):
|
|
||||||
if u['/A']['/URI'] != old:
|
|
||||||
links.append(u['/A']['/URI'])
|
|
||||||
i0 += 1
|
|
||||||
old = u['/A']['/URI']
|
|
||||||
return links
|
|
||||||
|
|
||||||
# 通过Sci-Hub网站下载文献
|
|
||||||
def download_with_scihub(address=None, num=1):
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import re
|
|
||||||
import requests
|
|
||||||
import os
|
|
||||||
if num==1 and address!=None:
|
|
||||||
address_array = [address]
|
|
||||||
else:
|
|
||||||
address_array = []
|
|
||||||
for i in range(num):
|
|
||||||
address = input('\nInput:')
|
|
||||||
address_array.append(address)
|
|
||||||
for address in address_array:
|
|
||||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
|
||||||
print('\nResponse:', r)
|
|
||||||
print('Address:', r.url)
|
|
||||||
soup = BeautifulSoup(r.text, features='lxml')
|
|
||||||
pdf_URL = soup.embed['src']
|
|
||||||
# pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
|
|
||||||
if re.search(re.compile('^https:'), pdf_URL):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pdf_URL = 'https:'+pdf_URL
|
|
||||||
print('PDF address:', pdf_URL)
|
|
||||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
|
||||||
print('PDF name:', name)
|
|
||||||
print('Directory:', os.getcwd())
|
|
||||||
print('\nDownloading...')
|
|
||||||
r = requests.get(pdf_URL, stream=True)
|
|
||||||
with open(name, 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=32):
|
|
||||||
f.write(chunk)
|
|
||||||
print('Completed!\n')
|
|
||||||
if num != 1:
|
|
||||||
print('All completed!\n')
|
|
||||||
|
|
||||||
# 将字符串转成音频
|
|
||||||
def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
|
||||||
import pyttsx3
|
|
||||||
import guan
|
|
||||||
if print_text==1:
|
|
||||||
print(str)
|
|
||||||
engine = pyttsx3.init()
|
|
||||||
voices = engine.getProperty('voices')
|
|
||||||
engine.setProperty('voice', voices[voice].id)
|
|
||||||
engine.setProperty("rate", rate)
|
|
||||||
if save==1:
|
|
||||||
engine.save_to_file(str, filename+'.wav')
|
|
||||||
engine.runAndWait()
|
|
||||||
print('Wav file saved!')
|
|
||||||
if compress==1:
|
|
||||||
import os
|
|
||||||
os.rename(filename+'.wav', 'temp.wav')
|
|
||||||
guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate)
|
|
||||||
os.remove('temp.wav')
|
|
||||||
if read==1:
|
|
||||||
engine.say(str)
|
|
||||||
engine.runAndWait()
|
|
||||||
|
|
||||||
# 将txt文件转成音频
|
|
||||||
def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
|
||||||
import pyttsx3
|
|
||||||
import guan
|
|
||||||
f = open(txt_path, 'r', encoding ='utf-8')
|
|
||||||
text = f.read()
|
|
||||||
if print_text==1:
|
|
||||||
print(text)
|
|
||||||
engine = pyttsx3.init()
|
|
||||||
voices = engine.getProperty('voices')
|
|
||||||
engine.setProperty('voice', voices[voice].id)
|
|
||||||
engine.setProperty("rate", rate)
|
|
||||||
if save==1:
|
|
||||||
import re
|
|
||||||
filename = re.split('[/,\\\]', txt_path)[-1][:-4]
|
|
||||||
engine.save_to_file(text, filename+'.wav')
|
|
||||||
engine.runAndWait()
|
|
||||||
print('Wav file saved!')
|
|
||||||
if compress==1:
|
|
||||||
import os
|
|
||||||
os.rename(filename+'.wav', 'temp.wav')
|
|
||||||
guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate)
|
|
||||||
os.remove('temp.wav')
|
|
||||||
if read==1:
|
|
||||||
engine.say(text)
|
|
||||||
engine.runAndWait()
|
|
||||||
|
|
||||||
# 将PDF文件转成音频
|
|
||||||
def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0):
|
|
||||||
import pyttsx3
|
|
||||||
import guan
|
|
||||||
text = guan.pdf_to_text(pdf_path)
|
|
||||||
text = text.replace('\n', ' ')
|
|
||||||
if print_text==1:
|
|
||||||
print(text)
|
|
||||||
engine = pyttsx3.init()
|
|
||||||
voices = engine.getProperty('voices')
|
|
||||||
engine.setProperty('voice', voices[voice].id)
|
|
||||||
engine.setProperty("rate", rate)
|
|
||||||
if save==1:
|
|
||||||
import re
|
|
||||||
filename = re.split('[/,\\\]', pdf_path)[-1][:-4]
|
|
||||||
engine.save_to_file(text, filename+'.wav')
|
|
||||||
engine.runAndWait()
|
|
||||||
print('Wav file saved!')
|
|
||||||
if compress==1:
|
|
||||||
import os
|
|
||||||
os.rename(filename+'.wav', 'temp.wav')
|
|
||||||
guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate)
|
|
||||||
os.remove('temp.wav')
|
|
||||||
if read==1:
|
|
||||||
engine.say(text)
|
|
||||||
engine.runAndWait()
|
|
||||||
|
|
||||||
# 将wav音频文件压缩成MP3音频文件
|
|
||||||
def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'):
|
|
||||||
# Note: Beside the installation of pydub, you may also need download FFmpeg on http://www.ffmpeg.org/download.html and add the bin path to the environment variable.
|
|
||||||
from pydub import AudioSegment
|
|
||||||
sound = AudioSegment.from_mp3(wav_path)
|
|
||||||
sound.export(output_filename,format="mp3",bitrate=bitrate)
|
|
||||||
|
|
||||||
# 将WordPress导出的XML格式文件转换成多个MarkDown格式的文件
|
|
||||||
def convert_wordpress_xml_to_markdown(xml_file='./a.xml', convert_content=1, replace_more=[]):
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
import re
|
|
||||||
tree = ET.parse(xml_file)
|
|
||||||
root = tree.getroot()
|
|
||||||
for item in root.findall('.//item'):
|
|
||||||
title = item.find('title').text
|
|
||||||
content = item.find('.//content:encoded', namespaces={'content': 'http://purl.org/rss/1.0/modules/content/'}).text
|
|
||||||
if convert_content == 1:
|
|
||||||
content = re.sub(r'<!--.*?-->', '', content)
|
|
||||||
content = content.replace('<p>', '')
|
|
||||||
content = content.replace('</p>', '')
|
|
||||||
content = content.replace('<ol>', '')
|
|
||||||
content = content.replace('</ol>', '')
|
|
||||||
content = content.replace('<ul>', '')
|
|
||||||
content = content.replace('</ul>', '')
|
|
||||||
content = content.replace('<strong>', '')
|
|
||||||
content = content.replace('</strong>', '')
|
|
||||||
content = content.replace('</li>', '')
|
|
||||||
content = content.replace('<li>', '+ ')
|
|
||||||
content = content.replace('</h3>', '')
|
|
||||||
content = re.sub(r'<h2.*?>', '## ', content)
|
|
||||||
content = re.sub(r'<h3.*?>', '### ', content)
|
|
||||||
content = re.sub(r'<h4.*?>', '#### ', content)
|
|
||||||
for replace_item in replace_more:
|
|
||||||
content = content.replace(replace_item, '')
|
|
||||||
for _ in range(100):
|
|
||||||
content = content.replace('\n\n\n', '\n\n')
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
markdown_content = f"# {title}\n{content}"
|
|
||||||
markdown_file_path = f"{title}.md"
|
|
||||||
cleaned_filename = re.sub(r'[/:*?"<>|\'\\]', ' ', markdown_file_path)
|
|
||||||
with open(cleaned_filename, 'w', encoding='utf-8') as md_file:
|
|
||||||
md_file.write(markdown_content)
|
|
||||||
|
|
||||||
# 凯利公式
|
|
||||||
def kelly_formula(p, b, a=1):
|
|
||||||
f=(p/a)-((1-p)/b)
|
|
||||||
return f
|
|
||||||
|
|
||||||
# 获取所有股票
|
|
||||||
def all_stocks():
|
|
||||||
import numpy as np
|
|
||||||
import akshare as ak
|
|
||||||
stocks = ak.stock_zh_a_spot_em()
|
|
||||||
title = np.array(stocks.columns)
|
|
||||||
stock_data = stocks.values
|
|
||||||
return title, stock_data
|
|
||||||
|
|
||||||
# 获取所有股票的代码
|
|
||||||
def all_stock_symbols():
|
|
||||||
import guan
|
|
||||||
title, stock_data = guan.all_stocks()
|
|
||||||
stock_symbols = stock_data[:, 1]
|
|
||||||
return stock_symbols
|
|
||||||
|
|
||||||
# 股票代码的分类
|
|
||||||
def stock_symbols_classification():
|
|
||||||
import guan
|
|
||||||
import re
|
|
||||||
stock_symbols = guan.all_stock_symbols()
|
|
||||||
# 上交所主板
|
|
||||||
stock_symbols_60 = []
|
|
||||||
for stock_symbol in stock_symbols:
|
|
||||||
find_600 = re.findall(r'^600', stock_symbol)
|
|
||||||
find_601 = re.findall(r'^601', stock_symbol)
|
|
||||||
find_603 = re.findall(r'^603', stock_symbol)
|
|
||||||
find_605 = re.findall(r'^605', stock_symbol)
|
|
||||||
if find_600 != [] or find_601 != [] or find_603 != [] or find_605 != []:
|
|
||||||
stock_symbols_60.append(stock_symbol)
|
|
||||||
# 深交所主板
|
|
||||||
stock_symbols_00 = []
|
|
||||||
for stock_symbol in stock_symbols:
|
|
||||||
find_000 = re.findall(r'^000', stock_symbol)
|
|
||||||
find_001 = re.findall(r'^001', stock_symbol)
|
|
||||||
find_002 = re.findall(r'^002', stock_symbol)
|
|
||||||
find_003 = re.findall(r'^003', stock_symbol)
|
|
||||||
if find_000 != [] or find_001 != [] or find_002 != [] or find_003 != []:
|
|
||||||
stock_symbols_00.append(stock_symbol)
|
|
||||||
# 创业板
|
|
||||||
stock_symbols_30 = []
|
|
||||||
for stock_symbol in stock_symbols:
|
|
||||||
find_300 = re.findall(r'^300', stock_symbol)
|
|
||||||
find_301 = re.findall(r'^301', stock_symbol)
|
|
||||||
if find_300 != [] or find_301 != []:
|
|
||||||
stock_symbols_30.append(stock_symbol)
|
|
||||||
# 科创板
|
|
||||||
stock_symbols_68 = []
|
|
||||||
for stock_symbol in stock_symbols:
|
|
||||||
find_688 = re.findall(r'^688', stock_symbol)
|
|
||||||
find_689 = re.findall(r'^689', stock_symbol)
|
|
||||||
if find_688 != [] or find_689 != []:
|
|
||||||
stock_symbols_68.append(stock_symbol)
|
|
||||||
# 新三板
|
|
||||||
stock_symbols_8_4 = []
|
|
||||||
for stock_symbol in stock_symbols:
|
|
||||||
find_82 = re.findall(r'^82', stock_symbol)
|
|
||||||
find_83 = re.findall(r'^83', stock_symbol)
|
|
||||||
find_87 = re.findall(r'^87', stock_symbol)
|
|
||||||
find_88 = re.findall(r'^88', stock_symbol)
|
|
||||||
find_430 = re.findall(r'^430', stock_symbol)
|
|
||||||
find_420 = re.findall(r'^420', stock_symbol)
|
|
||||||
find_400 = re.findall(r'^400', stock_symbol)
|
|
||||||
if find_82 != [] or find_83 != [] or find_87 != [] or find_88 != [] or find_430 != [] or find_420 != [] or find_400 != []:
|
|
||||||
stock_symbols_8_4.append(stock_symbol)
|
|
||||||
# 检查遗漏的股票代码
|
|
||||||
stock_symbols_others = []
|
|
||||||
for stock_symbol in stock_symbols:
|
|
||||||
if stock_symbol not in stock_symbols_60 and stock_symbol not in stock_symbols_00 and stock_symbol not in stock_symbols_30 and stock_symbol not in stock_symbols_68 and stock_symbol not in stock_symbols_8_4:
|
|
||||||
stock_symbols_others.others.append(stock_symbol)
|
|
||||||
return stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others
|
|
||||||
|
|
||||||
# 股票代码各个分类的数量
|
|
||||||
def statistics_of_stock_symbols_classification():
|
|
||||||
import guan
|
|
||||||
stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others = guan.stock_symbols_classification()
|
|
||||||
num_stocks_60 = len(stock_symbols_60)
|
|
||||||
num_stocks_00 = len(stock_symbols_00)
|
|
||||||
num_stocks_30 = len(stock_symbols_30)
|
|
||||||
num_stocks_68 = len(stock_symbols_68)
|
|
||||||
num_stocks_8_4 = len(stock_symbols_8_4)
|
|
||||||
num_stocks_others= len(stock_symbols_others)
|
|
||||||
return num_stocks_60, num_stocks_00, num_stocks_30, num_stocks_68, num_stocks_8_4, num_stocks_others
|
|
||||||
|
|
||||||
# 从股票代码获取股票名称
|
|
||||||
def find_stock_name_from_symbol(symbol='000002'):
|
|
||||||
import guan
|
|
||||||
title, stock_data = guan.all_stocks()
|
|
||||||
for stock in stock_data:
|
|
||||||
if symbol in stock:
|
|
||||||
stock_name = stock[2]
|
|
||||||
return stock_name
|
|
||||||
|
|
||||||
# 市值排序
|
|
||||||
def sorted_market_capitalization(num=10):
|
|
||||||
import numpy as np
|
|
||||||
import guan
|
|
||||||
title, stock_data = guan.all_stocks()
|
|
||||||
new_stock_data = []
|
|
||||||
for stock in stock_data:
|
|
||||||
if np.isnan(float(stock[9])):
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
new_stock_data.append(stock)
|
|
||||||
new_stock_data = np.array(new_stock_data)
|
|
||||||
list_index = np.argsort(new_stock_data[:, 17])
|
|
||||||
list_index = list_index[::-1]
|
|
||||||
if num == None:
|
|
||||||
num = len(list_index)
|
|
||||||
sorted_array = []
|
|
||||||
for i0 in range(num):
|
|
||||||
stock_symbol = new_stock_data[list_index[i0], 1]
|
|
||||||
stock_name = new_stock_data[list_index[i0], 2]
|
|
||||||
market_capitalization = new_stock_data[list_index[i0], 17]/1e8
|
|
||||||
sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization])
|
|
||||||
return sorted_array
|
|
||||||
|
|
||||||
# 美股市值排序
|
|
||||||
def sorted_market_capitalization_us(num=10):
|
|
||||||
import akshare as ak
|
|
||||||
import numpy as np
|
|
||||||
stocks = ak.stock_us_spot_em()
|
|
||||||
stock_data = stocks.values
|
|
||||||
new_stock_data = []
|
|
||||||
for stock in stock_data:
|
|
||||||
if np.isnan(float(stock[9])):
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
new_stock_data.append(stock)
|
|
||||||
new_stock_data = np.array(new_stock_data)
|
|
||||||
list_index = np.argsort(new_stock_data[:, 9])
|
|
||||||
list_index = list_index[::-1]
|
|
||||||
if num == None:
|
|
||||||
num = len(list_index)
|
|
||||||
sorted_array = []
|
|
||||||
for i0 in range(num):
|
|
||||||
stock_symbol = new_stock_data[list_index[i0], 15]
|
|
||||||
stock_name = new_stock_data[list_index[i0], 1]
|
|
||||||
market_capitalization = new_stock_data[list_index[i0], 9]/1e8
|
|
||||||
sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization])
|
|
||||||
return sorted_array
|
|
||||||
|
|
||||||
# 获取单个股票的历史数据
|
|
||||||
def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'):
|
|
||||||
# period = 'daily'
|
|
||||||
# period = 'weekly'
|
|
||||||
# period = 'monthly'
|
|
||||||
import numpy as np
|
|
||||||
import akshare as ak
|
|
||||||
stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date)
|
|
||||||
title = np.array(stock.columns)
|
|
||||||
stock_data = stock.values[::-1]
|
|
||||||
return title, stock_data
|
|
||||||
|
|
||||||
# 绘制股票图
|
|
||||||
def plot_stock_line(date_array, opening_array, closing_array, high_array, low_array, lw_open_close=6, lw_high_low=2, xlabel='date', ylabel='price', title='', fontsize=20, labelsize=20, adjust_bottom=0.2, adjust_left=0.2, fontfamily='Times New Roman'):
|
|
||||||
import guan
|
|
||||||
plt, fig, ax = guan.import_plt_and_start_fig_ax(adjust_bottom=adjust_bottom, adjust_left=adjust_left, labelsize=labelsize, fontfamily=fontfamily)
|
|
||||||
if fontfamily=='Times New Roman':
|
|
||||||
ax.set_title(title, fontsize=fontsize, fontfamily='Times New Roman')
|
|
||||||
ax.set_xlabel(xlabel, fontsize=fontsize, fontfamily='Times New Roman')
|
|
||||||
ax.set_ylabel(ylabel, fontsize=fontsize, fontfamily='Times New Roman')
|
|
||||||
else:
|
|
||||||
ax.set_title(title, fontsize=fontsize)
|
|
||||||
ax.set_xlabel(xlabel, fontsize=fontsize)
|
|
||||||
ax.set_ylabel(ylabel, fontsize=fontsize)
|
|
||||||
for i0 in range(len(date_array)):
|
|
||||||
if opening_array[i0] <= closing_array[i0]:
|
|
||||||
ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='red', lw=lw_open_close)
|
|
||||||
ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='red', linestyle='-', lw=lw_high_low)
|
|
||||||
else:
|
|
||||||
ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='green', lw=lw_open_close)
|
|
||||||
ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='green', linestyle='-', lw=lw_high_low)
|
|
||||||
plt.show()
|
|
||||||
plt.close('all')
|
|
||||||
|
|
||||||
# Guan软件包的使用统计(仅仅统计装机数和import次数)
|
|
||||||
def statistics_of_guan_package(function_name=None):
|
|
||||||
import guan
|
|
||||||
try:
|
|
||||||
import socket
|
|
||||||
datetime_date = guan.get_date()
|
|
||||||
datetime_time = guan.get_time()
|
|
||||||
current_version = guan.get_current_version('guan')
|
|
||||||
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
||||||
client_socket.settimeout(0.5)
|
|
||||||
client_socket.connect(('socket.guanjihuan.com', 12345))
|
|
||||||
mac_address = guan.get_mac_address()
|
|
||||||
if function_name == None:
|
|
||||||
message = {
|
|
||||||
'server': 'py.guanjihuan.com',
|
|
||||||
'date': datetime_date,
|
|
||||||
'time': datetime_time,
|
|
||||||
'version': current_version,
|
|
||||||
'MAC_address': mac_address,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
message = {
|
|
||||||
'server': 'py.guanjihuan.com',
|
|
||||||
'date': datetime_date,
|
|
||||||
'time': datetime_time,
|
|
||||||
'version': current_version,
|
|
||||||
'MAC_address': mac_address,
|
|
||||||
'function_name': function_name
|
|
||||||
}
|
|
||||||
import json
|
|
||||||
send_message = json.dumps(message)
|
|
||||||
client_socket.send(send_message.encode())
|
|
||||||
client_socket.close()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# # Guan软件包升级检查和提示(如果无法连接或者版本为最新,那么均没有提示)
|
|
||||||
# def notification_of_upgrade(timeout=5):
|
|
||||||
# try:
|
|
||||||
# import guan
|
|
||||||
# latest_version = guan.get_latest_version(package_name='guan', timeout=timeout)
|
|
||||||
# current_version = guan.get_current_version('guan')
|
|
||||||
# if latest_version != None and current_version != None:
|
|
||||||
# if latest_version != current_version:
|
|
||||||
# print('升级提示:您当前使用的版本是 guan-'+current_version+',目前已经有最新版本 guan-'+latest_version+'。您可以通过以下命令对软件包进行升级:pip install --upgrade guan -i https://pypi.python.org/simple 或 pip install --upgrade guan')
|
|
||||||
# except:
|
|
||||||
# pass
|
|
Loading…
x
Reference in New Issue
Block a user