0.1.127
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| [metadata] | ||||
| # replace with your username: | ||||
| name = guan | ||||
| version = 0.1.126 | ||||
| version = 0.1.127 | ||||
| author = guanjihuan | ||||
| author_email = guanjihuan@163.com | ||||
| description = An open source python package | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| Metadata-Version: 2.1 | ||||
| Name: guan | ||||
| Version: 0.1.126 | ||||
| Version: 0.1.127 | ||||
| Summary: An open source python package | ||||
| Home-page: https://py.guanjihuan.com | ||||
| Author: guanjihuan | ||||
|   | ||||
| @@ -15,7 +15,6 @@ src/guan/deprecated.py | ||||
| src/guan/figure_plotting.py | ||||
| src/guan/file_reading_and_writing.py | ||||
| src/guan/machine_learning.py | ||||
| src/guan/others.py | ||||
| src/guan/quantum_transport.py | ||||
| src/guan/topological_invariant.py | ||||
| src/guan.egg-info/PKG-INFO | ||||
|   | ||||
| @@ -12,7 +12,6 @@ from .machine_learning import * | ||||
| from .file_reading_and_writing import * | ||||
| from .figure_plotting import * | ||||
| from .data_processing import * | ||||
| from .others import * | ||||
| from .decorators import * | ||||
| from .deprecated import * | ||||
| statistics_of_guan_package() | ||||
| @@ -40,6 +40,40 @@ def chat(prompt='你好', model=1, stream=0, top_p=0.8, temperature=0.85): | ||||
|             print('\n--- End Stream Message ---\n') | ||||
|     return response | ||||
|  | ||||
| # 将XYZ数据转成矩阵数据(说明:x_array/y_array的输入和输出不一样。要求z_array数据中y对应的数据为小循环,x对应的数据为大循环) | ||||
| def convert_xyz_data_into_matrix_data(x_array, y_array, z_array): | ||||
|     import numpy as np | ||||
|     x_array_input = np.array(x_array) | ||||
|     y_array_input = np.array(y_array) | ||||
|     x_array = np.array(list(set(x_array_input))) | ||||
|     y_array = np.array(list(set(y_array_input))) | ||||
|     z_array = np.array(z_array) | ||||
|     len_x = len(x_array) | ||||
|     len_y = len(y_array) | ||||
|     matrix = np.zeros((len_x, len_y)) | ||||
|     for ix in range(len_x): | ||||
|         for iy in range(len_y): | ||||
|             matrix[ix, iy] = z_array[ix*len_y+iy] | ||||
|     return x_array, y_array, matrix | ||||
|  | ||||
| # 将矩阵数据转成XYZ数据(说明:x_array/y_array的输入和输出不一样。生成的z_array数据中y对应的数据为小循环,x对应的数据为大循环) | ||||
| def convert_matrix_data_into_xyz_data(x_array, y_array, matrix): | ||||
|     import numpy as np | ||||
|     x_array_input = np.array(x_array) | ||||
|     y_array_input = np.array(y_array) | ||||
|     matrix = np.array(matrix) | ||||
|     len_x = len(x_array_input) | ||||
|     len_y = len(y_array_input) | ||||
|     x_array = np.zeros((len_x*len_y)) | ||||
|     y_array = np.zeros((len_x*len_y)) | ||||
|     z_array = np.zeros((len_x*len_y)) | ||||
|     for ix in range(len_x): | ||||
|         for iy in range(len_y): | ||||
|             x_array[ix*len_y+iy] = x_array_input[ix] | ||||
|             y_array[ix*len_y+iy] = y_array_input[iy] | ||||
|             z_array[ix*len_y+iy] = matrix[ix, iy] | ||||
|     return x_array, y_array, z_array | ||||
|  | ||||
| # 从列表中删除某个匹配的元素 | ||||
| def remove_item_in_one_array(array, item): | ||||
|     new_array = [x for x in array if x != item] | ||||
| @@ -178,62 +212,6 @@ def run_programs_sequentially(program_files=['./a.py', './b.py'], execute='pytho | ||||
|         end = time.time() | ||||
|         print('Total running time = '+str((end-start)/60)+' min') | ||||
|  | ||||
| # 将XYZ数据转成矩阵数据(说明:x_array/y_array的输入和输出不一样。要求z_array数据中y对应的数据为小循环,x对应的数据为大循环) | ||||
| def convert_xyz_data_into_matrix_data(x_array, y_array, z_array): | ||||
|     import numpy as np | ||||
|     x_array_input = np.array(x_array) | ||||
|     y_array_input = np.array(y_array) | ||||
|     x_array = np.array(list(set(x_array_input))) | ||||
|     y_array = np.array(list(set(y_array_input))) | ||||
|     z_array = np.array(z_array) | ||||
|     len_x = len(x_array) | ||||
|     len_y = len(y_array) | ||||
|     matrix = np.zeros((len_x, len_y)) | ||||
|     for ix in range(len_x): | ||||
|         for iy in range(len_y): | ||||
|             matrix[ix, iy] = z_array[ix*len_y+iy] | ||||
|     return x_array, y_array, matrix | ||||
|  | ||||
| # 将矩阵数据转成XYZ数据(说明:x_array/y_array的输入和输出不一样。生成的z_array数据中y对应的数据为小循环,x对应的数据为大循环) | ||||
| def convert_matrix_data_into_xyz_data(x_array, y_array, matrix): | ||||
|     import numpy as np | ||||
|     x_array_input = np.array(x_array) | ||||
|     y_array_input = np.array(y_array) | ||||
|     matrix = np.array(matrix) | ||||
|     len_x = len(x_array_input) | ||||
|     len_y = len(y_array_input) | ||||
|     x_array = np.zeros((len_x*len_y)) | ||||
|     y_array = np.zeros((len_x*len_y)) | ||||
|     z_array = np.zeros((len_x*len_y)) | ||||
|     for ix in range(len_x): | ||||
|         for iy in range(len_y): | ||||
|             x_array[ix*len_y+iy] = x_array_input[ix] | ||||
|             y_array[ix*len_y+iy] = y_array_input[iy] | ||||
|             z_array[ix*len_y+iy] = matrix[ix, iy] | ||||
|     return x_array, y_array, z_array | ||||
|  | ||||
| # 通过定义计算R^2(基于实际值和预测值,数值有可能小于0) | ||||
| def calculate_R2_with_definition(y_true_array, y_pred_array): | ||||
|     import numpy as np | ||||
|     y_mean = np.mean(y_true_array) | ||||
|     SS_tot = np.sum((y_true_array - y_mean) ** 2) | ||||
|     SS_res = np.sum((y_true_array - y_pred_array) ** 2) | ||||
|     R2 = 1 - (SS_res / SS_tot) | ||||
|     return R2 | ||||
|  | ||||
| # 通过sklearn计算R^2,和上面定义的计算结果一致 | ||||
| def calculate_R2_with_sklearn(y_true_array, y_pred_array): | ||||
|     from sklearn.metrics import r2_score | ||||
|     R2 = r2_score(y_true_array, y_pred_array) | ||||
|     return R2 | ||||
|  | ||||
| # 通过scipy计算线性回归后的R^2(基于线性回归模型,范围在0和1之间) | ||||
| def calculate_R2_after_linear_regression_with_scipy(y_true_array, y_pred_array): | ||||
|     from scipy import stats | ||||
|     slope, intercept, r_value, p_value, std_err = stats.linregress(y_true_array, y_pred_array) | ||||
|     R2 = r_value**2 | ||||
|     return R2 | ||||
|  | ||||
| # 判断一个数是否接近于整数 | ||||
| def close_to_integer(value, abs_tol=1e-3): | ||||
|     import math | ||||
| @@ -356,6 +334,98 @@ def hex_to_rgb(hex): | ||||
|     length = len(hex) | ||||
|     return tuple(int(hex[i:i+length//3], 16) for i in range(0, length, length//3)) | ||||
|  | ||||
| # 拼接两个PDF文件 | ||||
| def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'): | ||||
|     import PyPDF2 | ||||
|     output_pdf = PyPDF2.PdfWriter() | ||||
|     with open(input_file_1, 'rb') as file1: | ||||
|         pdf1 = PyPDF2.PdfReader(file1) | ||||
|         for page in range(len(pdf1.pages)): | ||||
|             output_pdf.add_page(pdf1.pages[page]) | ||||
|     with open(input_file_2, 'rb') as file2: | ||||
|         pdf2 = PyPDF2.PdfReader(file2) | ||||
|         for page in range(len(pdf2.pages)): | ||||
|             output_pdf.add_page(pdf2.pages[page]) | ||||
|     with open(output_file, 'wb') as combined_file: | ||||
|         output_pdf.write(combined_file) | ||||
|  | ||||
| # 将PDF文件转成文本 | ||||
| def pdf_to_text(pdf_path): | ||||
|     from pdfminer.pdfparser import PDFParser, PDFDocument | ||||
|     from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter | ||||
|     from pdfminer.converter import PDFPageAggregator | ||||
|     from pdfminer.layout import LAParams, LTTextBox | ||||
|     from pdfminer.pdfinterp import PDFTextExtractionNotAllowed | ||||
|     import logging  | ||||
|     logging.Logger.propagate = False  | ||||
|     logging.getLogger().setLevel(logging.ERROR)  | ||||
|     praser = PDFParser(open(pdf_path, 'rb')) | ||||
|     doc = PDFDocument() | ||||
|     praser.set_document(doc) | ||||
|     doc.set_parser(praser) | ||||
|     doc.initialize() | ||||
|     if not doc.is_extractable: | ||||
|         raise PDFTextExtractionNotAllowed | ||||
|     else: | ||||
|         rsrcmgr = PDFResourceManager() | ||||
|         laparams = LAParams() | ||||
|         device = PDFPageAggregator(rsrcmgr, laparams=laparams) | ||||
|         interpreter = PDFPageInterpreter(rsrcmgr, device) | ||||
|         content = '' | ||||
|         for page in doc.get_pages(): | ||||
|             interpreter.process_page(page)                         | ||||
|             layout = device.get_result()                      | ||||
|             for x in layout: | ||||
|                 if isinstance(x, LTTextBox): | ||||
|                     content  = content + x.get_text().strip() | ||||
|     return content | ||||
|  | ||||
| # 获取PDF文件页数 | ||||
| def get_pdf_page_number(pdf_path): | ||||
|     import PyPDF2 | ||||
|     pdf_file = open(pdf_path, 'rb') | ||||
|     pdf_reader = PyPDF2.PdfReader(pdf_file) | ||||
|     num_pages = len(pdf_reader.pages) | ||||
|     return num_pages | ||||
|  | ||||
| # 获取PDF文件指定页面的内容 | ||||
| def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1): | ||||
|     import PyPDF2 | ||||
|     pdf_file = open(pdf_path, 'rb') | ||||
|     pdf_reader = PyPDF2.PdfReader(pdf_file) | ||||
|     num_pages = len(pdf_reader.pages) | ||||
|     for page_num0 in range(num_pages): | ||||
|         if page_num0 == page_num-1: | ||||
|             page = pdf_reader.pages[page_num0] | ||||
|             page_text = page.extract_text() | ||||
|     pdf_file.close() | ||||
|     return page_text | ||||
|  | ||||
| # 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' | ||||
| def get_links_from_pdf(pdf_path, link_starting_form=''): | ||||
|     import PyPDF2 | ||||
|     import re | ||||
|     reader = PyPDF2.PdfReader(pdf_path) | ||||
|     pages = len(reader.pages) | ||||
|     i0 = 0 | ||||
|     links = [] | ||||
|     for page in range(pages): | ||||
|         pageSliced = reader.pages[page] | ||||
|         pageObject = pageSliced.get_object()  | ||||
|         if '/Annots' in pageObject.keys(): | ||||
|             ann = pageObject['/Annots'] | ||||
|             old = '' | ||||
|             for a in ann: | ||||
|                 u = a.get_object()  | ||||
|                 if '/A' in u.keys(): | ||||
|                     if '/URI' in u['/A']:  | ||||
|                         if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): | ||||
|                             if u['/A']['/URI'] != old: | ||||
|                                 links.append(u['/A']['/URI'])  | ||||
|                                 i0 += 1 | ||||
|                                 old = u['/A']['/URI'] | ||||
|     return links | ||||
|  | ||||
| # 使用MD5进行散列加密 | ||||
| def encryption_MD5(password, salt=''): | ||||
|     import hashlib | ||||
| @@ -713,130 +783,635 @@ def count_number_of_import_statements(filename, file_format='.py', num=1000): | ||||
|     import_statement_counter = Counter(import_array).most_common(num) | ||||
|     return import_statement_counter | ||||
|  | ||||
| # 查找文件名相同的文件 | ||||
| def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000): | ||||
|     import os | ||||
|     from collections import Counter | ||||
|     file_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             file_list.append(files[i0]) | ||||
|             for word in ignored_directory_with_words: | ||||
|                 if word in root: | ||||
|                     file_list.remove(files[i0])        | ||||
|             for word in ignored_file_with_words: | ||||
|                 if word in files[i0]: | ||||
|                     try: | ||||
|                         file_list.remove(files[i0])    | ||||
|                     except: | ||||
|                         pass  | ||||
|     count_file = Counter(file_list).most_common(num) | ||||
|     repeated_file = [] | ||||
|     for item in count_file: | ||||
|         if item[1]>1: | ||||
|             repeated_file.append(item) | ||||
|     return repeated_file | ||||
| # 获取软件包的本机版本 | ||||
| def get_current_version(package_name='guan'): | ||||
|     import importlib.metadata | ||||
|     try: | ||||
|         current_version = importlib.metadata.version(package_name) | ||||
|         return current_version | ||||
|     except: | ||||
|         return None | ||||
|  | ||||
| # 统计各个子文件夹中的文件数量 | ||||
| def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None): | ||||
| # 获取Python软件包的最新版本 | ||||
| def get_latest_version(package_name='guan', timeout=5): | ||||
|     import requests | ||||
|     url = f"https://pypi.org/pypi/{package_name}/json" | ||||
|     try: | ||||
|         response = requests.get(url, timeout=timeout) | ||||
|     except: | ||||
|         return None | ||||
|     if response.status_code == 200: | ||||
|         data = response.json() | ||||
|         latest_version = data["info"]["version"] | ||||
|         return latest_version | ||||
|     else: | ||||
|         return None | ||||
|  | ||||
| # 获取包含某个字符的进程PID值 | ||||
| def get_PID_array(name): | ||||
|     import subprocess | ||||
|     command = "ps -ef | grep "+name | ||||
|     result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | ||||
|     if result.returncode == 0: | ||||
|         ps_ef = result.stdout | ||||
|     import re | ||||
|     ps_ef_1 = re.split(r'\n', ps_ef) | ||||
|     id_running_array = [] | ||||
|     for ps_ef_item in ps_ef_1: | ||||
|         if ps_ef_item != '': | ||||
|             ps_ef_2 = re.split(r'\s+', ps_ef_item) | ||||
|             id_running_array.append(ps_ef_2[1]) | ||||
|     return id_running_array | ||||
|  | ||||
| # 每日git commit次数的统计 | ||||
| def statistics_of_git_commits(print_show=0, str_or_datetime='str'): | ||||
|     import subprocess | ||||
|     import collections | ||||
|     since_date = '100 year ago' | ||||
|     result = subprocess.run( | ||||
|         ['git', 'log', f'--since={since_date}', '--pretty=format:%ad', '--date=short'], | ||||
|         stdout=subprocess.PIPE, | ||||
|         text=True) | ||||
|     commits = result.stdout.strip().split('\n') | ||||
|     counter = collections.Counter(commits) | ||||
|     daily_commit_counts = dict(sorted(counter.items())) | ||||
|     date_array = [] | ||||
|     commit_count_array = [] | ||||
|     for date, count in daily_commit_counts.items(): | ||||
|         if print_show == 1: | ||||
|             print(f"{date}: {count} commits") | ||||
|         if str_or_datetime=='datetime': | ||||
|             import datetime | ||||
|             date_array.append(datetime.datetime.strptime(date, "%Y-%m-%d")) | ||||
|         elif str_or_datetime=='str': | ||||
|             date_array.append(date) | ||||
|         commit_count_array.append(count) | ||||
|     return date_array, commit_count_array | ||||
|  | ||||
| # 将文件目录结构写入Markdown文件 | ||||
| def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):  | ||||
|     import os | ||||
|     import numpy as np | ||||
|     dirs_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         if dirs != []: | ||||
|             for i0 in range(len(dirs)): | ||||
|                 dirs_list.append(root+'/'+dirs[i0]) | ||||
|     count_file_array = [] | ||||
|     for sub_dir in dirs_list: | ||||
|         file_list = [] | ||||
|         for root, dirs, files in os.walk(sub_dir): | ||||
|             for i0 in range(len(files)): | ||||
|                 file_list.append(files[i0]) | ||||
|         count_file = len(file_list) | ||||
|         count_file_array.append(count_file) | ||||
|         if sort == 0: | ||||
|             if print_show == 1: | ||||
|                 if smaller_than_num == None: | ||||
|                     print(sub_dir) | ||||
|                     print(count_file) | ||||
|                     print() | ||||
|     f = open(filename+'.md', 'w', encoding="utf-8") | ||||
|     filenames1 = os.listdir(directory) | ||||
|     u0 = 0 | ||||
|     for filename1 in filenames1[::reverse_positive_or_negative]: | ||||
|         filename1_with_path = os.path.join(directory,filename1)  | ||||
|         if os.path.isfile(filename1_with_path): | ||||
|             if os.path.splitext(filename1)[1] not in banned_file_format: | ||||
|                 if hide_file_format == None: | ||||
|                     f.write('+ '+str(filename1)+'\n\n') | ||||
|                 else: | ||||
|                     if count_file<smaller_than_num: | ||||
|                         print(sub_dir) | ||||
|                         print(count_file) | ||||
|                         print() | ||||
|     if sort == 0: | ||||
|         sub_directory = dirs_list | ||||
|         num_in_sub_directory = count_file_array | ||||
|     if sort == 1: | ||||
|         sub_directory = [] | ||||
|         num_in_sub_directory = [] | ||||
|         if reverse == 1: | ||||
|             index_array = np.argsort(count_file_array)[::-1] | ||||
|                     f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n') | ||||
|         else: | ||||
|             index_array = np.argsort(count_file_array) | ||||
|         for i0 in index_array: | ||||
|             sub_directory.append(dirs_list[i0]) | ||||
|             num_in_sub_directory.append(count_file_array[i0]) | ||||
|             if print_show == 1: | ||||
|                 if smaller_than_num == None: | ||||
|                     print(dirs_list[i0]) | ||||
|                     print(count_file_array[i0]) | ||||
|                     print() | ||||
|                 else: | ||||
|                     if count_file_array[i0]<smaller_than_num: | ||||
|                         print(dirs_list[i0]) | ||||
|                         print(count_file_array[i0]) | ||||
|                         print() | ||||
|     return sub_directory, num_in_sub_directory | ||||
|             u0 += 1 | ||||
|             if divided_line != None and u0 != 1: | ||||
|                 f.write('--------\n\n') | ||||
|             if starting_from_h1 == None: | ||||
|                 f.write('#') | ||||
|             f.write('# '+str(filename1)+'\n\n') | ||||
|  | ||||
| # 在多个子文件夹中产生必要的文件,例如 readme.md | ||||
| def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]): | ||||
|     import os | ||||
|     directory_with_file = [] | ||||
|     ignored_directory = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if root not in directory_with_file: | ||||
|                 directory_with_file.append(root) | ||||
|             if files[i0] == filename+file_format: | ||||
|                 if root not in ignored_directory: | ||||
|                     ignored_directory.append(root) | ||||
|     if overwrite == None: | ||||
|         for root in ignored_directory: | ||||
|             directory_with_file.remove(root) | ||||
|     ignored_directory_more =[] | ||||
|     for root in directory_with_file:  | ||||
|         for word in ignored_directory_with_words: | ||||
|             if word in root: | ||||
|                 if root not in ignored_directory_more: | ||||
|                     ignored_directory_more.append(root) | ||||
|     for root in ignored_directory_more: | ||||
|         directory_with_file.remove(root)  | ||||
|     for root in directory_with_file: | ||||
|         os.chdir(root) | ||||
|         f = open(filename+file_format, 'w', encoding="utf-8") | ||||
|         f.write(content) | ||||
|         f.close() | ||||
|             filenames2 = os.listdir(filename1_with_path)  | ||||
|             i0 = 0      | ||||
|             for filename2 in filenames2[::reverse_positive_or_negative]: | ||||
|                 filename2_with_path = os.path.join(directory, filename1, filename2)  | ||||
|                 if os.path.isfile(filename2_with_path): | ||||
|                     if os.path.splitext(filename2)[1] not in banned_file_format: | ||||
|                         if hide_file_format == None: | ||||
|                             f.write('+ '+str(filename2)+'\n\n') | ||||
|                         else: | ||||
|                             f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n') | ||||
|                 else:  | ||||
|                     i0 += 1 | ||||
|                     if starting_from_h1 == None: | ||||
|                         f.write('#') | ||||
|                     if show_second_number != None: | ||||
|                         f.write('## '+str(i0)+'. '+str(filename2)+'\n\n') | ||||
|                     else: | ||||
|                         f.write('## '+str(filename2)+'\n\n') | ||||
|                      | ||||
|                     j0 = 0 | ||||
|                     filenames3 = os.listdir(filename2_with_path) | ||||
|                     for filename3 in filenames3[::reverse_positive_or_negative]: | ||||
|                         filename3_with_path = os.path.join(directory, filename1, filename2, filename3)  | ||||
|                         if os.path.isfile(filename3_with_path):  | ||||
|                             if os.path.splitext(filename3)[1] not in banned_file_format: | ||||
|                                 if hide_file_format == None: | ||||
|                                     f.write('+ '+str(filename3)+'\n\n') | ||||
|                                 else: | ||||
|                                     f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n') | ||||
|                         else: | ||||
|                             j0 += 1 | ||||
|                             if starting_from_h1 == None: | ||||
|                                 f.write('#') | ||||
|                             if show_third_number != None: | ||||
|                                 f.write('### ('+str(j0)+') '+str(filename3)+'\n\n') | ||||
|                             else: | ||||
|                                 f.write('### '+str(filename3)+'\n\n') | ||||
|  | ||||
| # 删除特定文件名的文件(谨慎使用) | ||||
| def delete_file_with_specific_name(directory, filename='readme', file_format='.md'): | ||||
|     import os | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if files[i0] == filename+file_format: | ||||
|                 os.remove(root+'/'+files[i0]) | ||||
|                             filenames4 = os.listdir(filename3_with_path) | ||||
|                             for filename4 in filenames4[::reverse_positive_or_negative]: | ||||
|                                 filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4)  | ||||
|                                 if os.path.isfile(filename4_with_path): | ||||
|                                     if os.path.splitext(filename4)[1] not in banned_file_format: | ||||
|                                         if hide_file_format == None: | ||||
|                                             f.write('+ '+str(filename4)+'\n\n') | ||||
|                                         else: | ||||
|                                             f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n') | ||||
|                                 else:  | ||||
|                                     if starting_from_h1 == None: | ||||
|                                         f.write('#') | ||||
|                                     f.write('#### '+str(filename4)+'\n\n') | ||||
|  | ||||
| # 将所有文件移到根目录(谨慎使用) | ||||
| def move_all_files_to_root_directory(directory): | ||||
|                                     filenames5 = os.listdir(filename4_with_path) | ||||
|                                     for filename5 in filenames5[::reverse_positive_or_negative]: | ||||
|                                         filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5)  | ||||
|                                         if os.path.isfile(filename5_with_path):  | ||||
|                                             if os.path.splitext(filename5)[1] not in banned_file_format: | ||||
|                                                 if hide_file_format == None: | ||||
|                                                     f.write('+ '+str(filename5)+'\n\n') | ||||
|                                                 else: | ||||
|                                                     f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n') | ||||
|                                         else: | ||||
|                                             if starting_from_h1 == None: | ||||
|                                                 f.write('#') | ||||
|                                             f.write('##### '+str(filename5)+'\n\n') | ||||
|  | ||||
|                                             filenames6 = os.listdir(filename5_with_path) | ||||
|                                             for filename6 in filenames6[::reverse_positive_or_negative]: | ||||
|                                                 filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6)  | ||||
|                                                 if os.path.isfile(filename6_with_path):  | ||||
|                                                     if os.path.splitext(filename6)[1] not in banned_file_format: | ||||
|                                                         if hide_file_format == None: | ||||
|                                                             f.write('+ '+str(filename6)+'\n\n') | ||||
|                                                         else: | ||||
|                                                             f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n') | ||||
|                                                 else: | ||||
|                                                     if starting_from_h1 == None: | ||||
|                                                         f.write('#') | ||||
|                                                     f.write('###### '+str(filename6)+'\n\n') | ||||
|     f.close() | ||||
|  | ||||
| # 从网页的标签中获取内容 | ||||
| def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']): | ||||
|     from bs4 import BeautifulSoup | ||||
|     import urllib.request | ||||
|     import ssl | ||||
|     ssl._create_default_https_context = ssl._create_unverified_context | ||||
|     html = urllib.request.urlopen(link).read().decode('utf-8') | ||||
|     soup = BeautifulSoup(html, features="lxml") | ||||
|     all_tags = soup.find_all(tags) | ||||
|     content = '' | ||||
|     for tag in all_tags: | ||||
|         text = tag.get_text().replace('\n', '') | ||||
|         if content == '': | ||||
|             content = text | ||||
|         else: | ||||
|             content = content + '\n\n' + text | ||||
|     return content | ||||
|  | ||||
| # 从HTML中获取所有的链接 | ||||
| def get_links_from_html(html_link, links_with_text=0): | ||||
|     from bs4 import BeautifulSoup | ||||
|     import urllib.request | ||||
|     import ssl | ||||
|     ssl._create_default_https_context = ssl._create_unverified_context | ||||
|     html = urllib.request.urlopen(html_link).read().decode('utf-8') | ||||
|     soup = BeautifulSoup(html, features="lxml") | ||||
|     a_tags = soup.find_all('a') | ||||
|     if links_with_text == 0: | ||||
|         link_array = [tag.get('href') for tag in a_tags if tag.get('href')] | ||||
|         return link_array | ||||
|     else: | ||||
|         link_array_with_text = [(tag.get('href'), tag.text) for tag in a_tags if tag.get('href')] | ||||
|         return link_array_with_text | ||||
|  | ||||
| # 检查链接的有效性 | ||||
| def check_link(url, timeout=3, allow_redirects=True): | ||||
|     import requests | ||||
|     try: | ||||
|         response = requests.head(url, timeout=timeout, allow_redirects=allow_redirects) | ||||
|         if response.status_code == 200: | ||||
|             return True | ||||
|         else: | ||||
|             return False | ||||
|     except requests.exceptions.RequestException: | ||||
|         return False | ||||
|  | ||||
| # 检查链接数组中链接的有效性 | ||||
| def check_link_array(link_array, timeout=3, allow_redirects=True, try_again=0, print_show=1): | ||||
|     import guan | ||||
|     failed_link_array0 = [] | ||||
|     for link in link_array: | ||||
|         if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): | ||||
|             pass | ||||
|         else: | ||||
|             failed_link_array0.append(link) | ||||
|             if print_show: | ||||
|                 print(link) | ||||
|     failed_link_array = [] | ||||
|     if try_again: | ||||
|         if print_show: | ||||
|             print('\nTry again:\n') | ||||
|         for link in failed_link_array0: | ||||
|             if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): | ||||
|                 pass | ||||
|             else: | ||||
|                 failed_link_array.append(link) | ||||
|                 if print_show: | ||||
|                     print(link) | ||||
|     else: | ||||
|         failed_link_array = failed_link_array0 | ||||
|     return failed_link_array | ||||
|  | ||||
| # 生成二维码 | ||||
| def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): | ||||
|     import qrcode | ||||
|     img = qrcode.make(data) | ||||
|     img.save(filename+file_format) | ||||
|  | ||||
| # 通过Sci-Hub网站下载文献 | ||||
| def download_with_scihub(address=None, num=1): | ||||
|     from bs4 import BeautifulSoup | ||||
|     import re | ||||
|     import requests | ||||
|     import os | ||||
|     import shutil | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             shutil.move(root+'/'+files[i0], directory+'/'+files[i0]) | ||||
|     for i0 in range(100): | ||||
|         for root, dirs, files in os.walk(directory): | ||||
|             try: | ||||
|                 os.rmdir(root)  | ||||
|             except: | ||||
|                 pass | ||||
|     if num==1 and address!=None: | ||||
|         address_array = [address] | ||||
|     else: | ||||
|         address_array = [] | ||||
|         for i in range(num): | ||||
|             address = input('\nInput:') | ||||
|             address_array.append(address) | ||||
|     for address in address_array: | ||||
|         r = requests.post('https://sci-hub.st/', data={'request': address}) | ||||
|         print('\nResponse:', r) | ||||
|         print('Address:', r.url) | ||||
|         soup = BeautifulSoup(r.text, features='lxml') | ||||
|         pdf_URL = soup.embed['src'] | ||||
|         # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL. | ||||
|         if re.search(re.compile('^https:'), pdf_URL): | ||||
|             pass | ||||
|         else: | ||||
|             pdf_URL = 'https:'+pdf_URL | ||||
|         print('PDF address:', pdf_URL) | ||||
|         name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::] | ||||
|         print('PDF name:', name) | ||||
|         print('Directory:', os.getcwd()) | ||||
|         print('\nDownloading...') | ||||
|         r = requests.get(pdf_URL, stream=True) | ||||
|         with open(name, 'wb') as f: | ||||
|             for chunk in r.iter_content(chunk_size=32): | ||||
|                 f.write(chunk) | ||||
|         print('Completed!\n') | ||||
|     if num != 1: | ||||
|         print('All completed!\n') | ||||
|  | ||||
| # 将字符串转成音频 | ||||
| def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): | ||||
|     import pyttsx3 | ||||
|     import guan | ||||
|     if print_text==1: | ||||
|         print(str) | ||||
|     engine = pyttsx3.init() | ||||
|     voices = engine.getProperty('voices')   | ||||
|     engine.setProperty('voice', voices[voice].id) | ||||
|     engine.setProperty("rate", rate) | ||||
|     if save==1: | ||||
|         engine.save_to_file(str, filename+'.wav') | ||||
|         engine.runAndWait() | ||||
|         print('Wav file saved!') | ||||
|         if compress==1: | ||||
|             import os | ||||
|             os.rename(filename+'.wav', 'temp.wav') | ||||
|             guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate) | ||||
|             os.remove('temp.wav') | ||||
|     if read==1: | ||||
|         engine.say(str) | ||||
|         engine.runAndWait() | ||||
|  | ||||
| # 将txt文件转成音频 | ||||
| def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): | ||||
|     import pyttsx3 | ||||
|     import guan | ||||
|     f = open(txt_path, 'r', encoding ='utf-8') | ||||
|     text = f.read() | ||||
|     if print_text==1: | ||||
|         print(text) | ||||
|     engine = pyttsx3.init() | ||||
|     voices = engine.getProperty('voices')   | ||||
|     engine.setProperty('voice', voices[voice].id) | ||||
|     engine.setProperty("rate", rate) | ||||
|     if save==1: | ||||
|         import re | ||||
|         filename = re.split('[/,\\\]', txt_path)[-1][:-4] | ||||
|         engine.save_to_file(text, filename+'.wav') | ||||
|         engine.runAndWait() | ||||
|         print('Wav file saved!') | ||||
|         if compress==1: | ||||
|             import os | ||||
|             os.rename(filename+'.wav', 'temp.wav') | ||||
|             guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate) | ||||
|             os.remove('temp.wav') | ||||
|     if read==1: | ||||
|         engine.say(text) | ||||
|         engine.runAndWait() | ||||
|  | ||||
| # 将PDF文件转成音频 | ||||
| def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): | ||||
|     import pyttsx3 | ||||
|     import guan | ||||
|     text = guan.pdf_to_text(pdf_path) | ||||
|     text = text.replace('\n', ' ') | ||||
|     if print_text==1: | ||||
|         print(text) | ||||
|     engine = pyttsx3.init() | ||||
|     voices = engine.getProperty('voices')   | ||||
|     engine.setProperty('voice', voices[voice].id) | ||||
|     engine.setProperty("rate", rate) | ||||
|     if save==1: | ||||
|         import re | ||||
|         filename = re.split('[/,\\\]', pdf_path)[-1][:-4] | ||||
|         engine.save_to_file(text, filename+'.wav') | ||||
|         engine.runAndWait() | ||||
|         print('Wav file saved!') | ||||
|         if compress==1: | ||||
|             import os | ||||
|             os.rename(filename+'.wav', 'temp.wav') | ||||
|             guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate) | ||||
|             os.remove('temp.wav') | ||||
|     if read==1: | ||||
|         engine.say(text) | ||||
|         engine.runAndWait() | ||||
|  | ||||
| # 将wav音频文件压缩成MP3音频文件 | ||||
| def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'): | ||||
|     # Note: Beside the installation of pydub, you may also need download FFmpeg on http://www.ffmpeg.org/download.html and add the bin path to the environment variable. | ||||
|     from pydub import AudioSegment | ||||
|     sound = AudioSegment.from_mp3(wav_path) | ||||
|     sound.export(output_filename,format="mp3",bitrate=bitrate) | ||||
|  | ||||
| # 将WordPress导出的XML格式文件转换成多个MarkDown格式的文件 | ||||
| def convert_wordpress_xml_to_markdown(xml_file='./a.xml', convert_content=1, replace_more=[]): | ||||
|     import xml.etree.ElementTree as ET | ||||
|     import re | ||||
|     tree = ET.parse(xml_file) | ||||
|     root = tree.getroot() | ||||
|     for item in root.findall('.//item'): | ||||
|         title = item.find('title').text | ||||
|         content = item.find('.//content:encoded', namespaces={'content': 'http://purl.org/rss/1.0/modules/content/'}).text | ||||
|         if convert_content == 1: | ||||
|             content = re.sub(r'<!--.*?-->', '', content) | ||||
|             content = content.replace('<p>', '') | ||||
|             content = content.replace('</p>', '') | ||||
|             content = content.replace('<ol>', '') | ||||
|             content = content.replace('</ol>', '') | ||||
|             content = content.replace('<ul>', '') | ||||
|             content = content.replace('</ul>', '') | ||||
|             content = content.replace('<strong>', '') | ||||
|             content = content.replace('</strong>', '') | ||||
|             content = content.replace('</li>', '') | ||||
|             content = content.replace('<li>', '+ ') | ||||
|             content = content.replace('</h3>', '') | ||||
|             content = re.sub(r'<h2.*?>', '## ', content) | ||||
|             content = re.sub(r'<h3.*?>', '### ', content) | ||||
|             content = re.sub(r'<h4.*?>', '#### ', content) | ||||
|             for replace_item in replace_more: | ||||
|                 content = content.replace(replace_item, '') | ||||
|             for _ in range(100): | ||||
|                 content = content.replace('\n\n\n', '\n\n') | ||||
|         else: | ||||
|             pass | ||||
|         markdown_content = f"# {title}\n{content}" | ||||
|         markdown_file_path = f"{title}.md" | ||||
|         cleaned_filename = re.sub(r'[/:*?"<>|\'\\]', ' ', markdown_file_path) | ||||
|         with open(cleaned_filename, 'w', encoding='utf-8') as md_file: | ||||
|             md_file.write(markdown_content) | ||||
|  | ||||
| # 凯利公式 | ||||
| def kelly_formula(p, b, a=1): | ||||
|     f=(p/a)-((1-p)/b) | ||||
|     return f | ||||
|  | ||||
| # 获取所有股票 | ||||
| def all_stocks(): | ||||
|     import numpy as np | ||||
|     import akshare as ak | ||||
|     stocks = ak.stock_zh_a_spot_em() | ||||
|     title = np.array(stocks.columns) | ||||
|     stock_data = stocks.values | ||||
|     return title, stock_data | ||||
|  | ||||
| # 获取所有股票的代码 | ||||
| def all_stock_symbols(): | ||||
|     import guan | ||||
|     title, stock_data = guan.all_stocks() | ||||
|     stock_symbols = stock_data[:, 1] | ||||
|     return stock_symbols | ||||
|  | ||||
| # 股票代码的分类 | ||||
| def stock_symbols_classification(): | ||||
|     import guan | ||||
|     import re | ||||
|     stock_symbols = guan.all_stock_symbols() | ||||
|     # 上交所主板 | ||||
|     stock_symbols_60 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_600 = re.findall(r'^600', stock_symbol) | ||||
|         find_601 = re.findall(r'^601', stock_symbol) | ||||
|         find_603 = re.findall(r'^603', stock_symbol) | ||||
|         find_605 = re.findall(r'^605', stock_symbol) | ||||
|         if find_600 != [] or find_601 != [] or find_603 != [] or find_605 != []: | ||||
|             stock_symbols_60.append(stock_symbol) | ||||
|     # 深交所主板 | ||||
|     stock_symbols_00 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_000 = re.findall(r'^000', stock_symbol) | ||||
|         find_001 = re.findall(r'^001', stock_symbol) | ||||
|         find_002 = re.findall(r'^002', stock_symbol) | ||||
|         find_003 = re.findall(r'^003', stock_symbol) | ||||
|         if find_000 != [] or find_001 != [] or find_002 != [] or find_003 != []: | ||||
|             stock_symbols_00.append(stock_symbol) | ||||
|     # 创业板 | ||||
|     stock_symbols_30 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_300 = re.findall(r'^300', stock_symbol) | ||||
|         find_301 = re.findall(r'^301', stock_symbol) | ||||
|         if find_300 != [] or find_301 != []: | ||||
|             stock_symbols_30.append(stock_symbol) | ||||
|     # 科创板 | ||||
|     stock_symbols_68 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_688 = re.findall(r'^688', stock_symbol) | ||||
|         find_689 = re.findall(r'^689', stock_symbol) | ||||
|         if find_688 != [] or find_689 != []: | ||||
|             stock_symbols_68.append(stock_symbol) | ||||
|     # 新三板 | ||||
|     stock_symbols_8_4 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_82 = re.findall(r'^82', stock_symbol) | ||||
|         find_83 = re.findall(r'^83', stock_symbol) | ||||
|         find_87 = re.findall(r'^87', stock_symbol) | ||||
|         find_88 = re.findall(r'^88', stock_symbol) | ||||
|         find_430 = re.findall(r'^430', stock_symbol) | ||||
|         find_420 = re.findall(r'^420', stock_symbol) | ||||
|         find_400 = re.findall(r'^400', stock_symbol) | ||||
|         if find_82 != [] or find_83 != [] or find_87 != [] or find_88 != [] or find_430 != [] or find_420 != [] or find_400 != []: | ||||
|             stock_symbols_8_4.append(stock_symbol) | ||||
|     # 检查遗漏的股票代码 | ||||
|     stock_symbols_others = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         if stock_symbol not in stock_symbols_60 and stock_symbol not in stock_symbols_00 and stock_symbol not in stock_symbols_30 and stock_symbol not in stock_symbols_68 and stock_symbol not in stock_symbols_8_4: | ||||
|             stock_symbols_others.others.append(stock_symbol)  | ||||
|     return stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others | ||||
|  | ||||
| # 股票代码各个分类的数量 | ||||
| def statistics_of_stock_symbols_classification(): | ||||
|     import guan | ||||
|     stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others = guan.stock_symbols_classification() | ||||
|     num_stocks_60 = len(stock_symbols_60) | ||||
|     num_stocks_00 = len(stock_symbols_00) | ||||
|     num_stocks_30 = len(stock_symbols_30) | ||||
|     num_stocks_68 = len(stock_symbols_68) | ||||
|     num_stocks_8_4 = len(stock_symbols_8_4) | ||||
|     num_stocks_others= len(stock_symbols_others) | ||||
|     return num_stocks_60, num_stocks_00, num_stocks_30, num_stocks_68, num_stocks_8_4, num_stocks_others | ||||
|  | ||||
| # 从股票代码获取股票名称 | ||||
| def find_stock_name_from_symbol(symbol='000002'): | ||||
|     import guan | ||||
|     title, stock_data = guan.all_stocks() | ||||
|     for stock in stock_data: | ||||
|         if symbol in stock: | ||||
|            stock_name = stock[2] | ||||
|     return stock_name | ||||
|  | ||||
| # 市值排序 | ||||
| def sorted_market_capitalization(num=10): | ||||
|     import numpy as np | ||||
|     import guan | ||||
|     title, stock_data = guan.all_stocks() | ||||
|     new_stock_data = [] | ||||
|     for stock in stock_data: | ||||
|         if np.isnan(float(stock[9])): | ||||
|             continue | ||||
|         else: | ||||
|             new_stock_data.append(stock) | ||||
|     new_stock_data = np.array(new_stock_data) | ||||
|     list_index = np.argsort(new_stock_data[:, 17]) | ||||
|     list_index = list_index[::-1] | ||||
|     if num == None: | ||||
|         num = len(list_index) | ||||
|     sorted_array = [] | ||||
|     for i0 in range(num): | ||||
|         stock_symbol = new_stock_data[list_index[i0], 1] | ||||
|         stock_name = new_stock_data[list_index[i0], 2] | ||||
|         market_capitalization = new_stock_data[list_index[i0], 17]/1e8 | ||||
|         sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization]) | ||||
|     return sorted_array | ||||
|  | ||||
| # 美股市值排序 | ||||
| def sorted_market_capitalization_us(num=10): | ||||
|     import akshare as ak | ||||
|     import numpy as np | ||||
|     stocks = ak.stock_us_spot_em() | ||||
|     stock_data = stocks.values | ||||
|     new_stock_data = [] | ||||
|     for stock in stock_data: | ||||
|         if np.isnan(float(stock[9])): | ||||
|             continue | ||||
|         else: | ||||
|             new_stock_data.append(stock) | ||||
|     new_stock_data = np.array(new_stock_data) | ||||
|     list_index = np.argsort(new_stock_data[:, 9]) | ||||
|     list_index = list_index[::-1] | ||||
|     if num == None: | ||||
|         num = len(list_index) | ||||
|     sorted_array = [] | ||||
|     for i0 in range(num): | ||||
|         stock_symbol = new_stock_data[list_index[i0], 15] | ||||
|         stock_name = new_stock_data[list_index[i0], 1] | ||||
|         market_capitalization = new_stock_data[list_index[i0], 9]/1e8 | ||||
|         sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization]) | ||||
|     return sorted_array | ||||
|  | ||||
| # 获取单个股票的历史数据 | ||||
| def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'): | ||||
|     # period = 'daily' | ||||
|     # period = 'weekly' | ||||
|     # period = 'monthly' | ||||
|     import numpy as np | ||||
|     import akshare as ak | ||||
|     stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date) | ||||
|     title = np.array(stock.columns) | ||||
|     stock_data = stock.values[::-1] | ||||
|     return title, stock_data | ||||
|  | ||||
| # 绘制股票图 | ||||
| def plot_stock_line(date_array, opening_array, closing_array, high_array, low_array, lw_open_close=6, lw_high_low=2, xlabel='date', ylabel='price', title='', fontsize=20, labelsize=20, adjust_bottom=0.2, adjust_left=0.2, fontfamily='Times New Roman'): | ||||
|     import guan | ||||
|     plt, fig, ax = guan.import_plt_and_start_fig_ax(adjust_bottom=adjust_bottom, adjust_left=adjust_left, labelsize=labelsize, fontfamily=fontfamily) | ||||
|     if fontfamily=='Times New Roman': | ||||
|         ax.set_title(title, fontsize=fontsize, fontfamily='Times New Roman') | ||||
|         ax.set_xlabel(xlabel, fontsize=fontsize, fontfamily='Times New Roman')  | ||||
|         ax.set_ylabel(ylabel, fontsize=fontsize, fontfamily='Times New Roman') | ||||
|     else: | ||||
|         ax.set_title(title, fontsize=fontsize) | ||||
|         ax.set_xlabel(xlabel, fontsize=fontsize) | ||||
|         ax.set_ylabel(ylabel, fontsize=fontsize) | ||||
|     for i0 in range(len(date_array)): | ||||
|         if opening_array[i0] <= closing_array[i0]: | ||||
|             ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='red', lw=lw_open_close) | ||||
|             ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='red', linestyle='-', lw=lw_high_low) | ||||
|         else: | ||||
|             ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='green', lw=lw_open_close) | ||||
|             ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='green', linestyle='-', lw=lw_high_low) | ||||
|     plt.show() | ||||
|     plt.close('all') | ||||
|  | ||||
| # Guan软件包的使用统计(仅仅统计装机数和import次数) | ||||
| def statistics_of_guan_package(function_name=None): | ||||
|     import guan | ||||
|     try: | ||||
|         import socket | ||||
|         datetime_date = guan.get_date() | ||||
|         datetime_time = guan.get_time() | ||||
|         current_version = guan.get_current_version('guan') | ||||
|         client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||||
|         client_socket.settimeout(0.5) | ||||
|         client_socket.connect(('socket.guanjihuan.com', 12345)) | ||||
|         mac_address = guan.get_mac_address() | ||||
|         if function_name == None: | ||||
|             message = { | ||||
|                 'server': 'py.guanjihuan.com', | ||||
|                 'date': datetime_date, | ||||
|                 'time': datetime_time, | ||||
|                 'version': current_version, | ||||
|                 'MAC_address': mac_address, | ||||
|             } | ||||
|         else: | ||||
|             message = { | ||||
|                 'server': 'py.guanjihuan.com', | ||||
|                 'date': datetime_date, | ||||
|                 'time': datetime_time, | ||||
|                 'version': current_version, | ||||
|                 'MAC_address': mac_address, | ||||
|                 'function_name': function_name | ||||
|             } | ||||
|         import json | ||||
|         send_message = json.dumps(message) | ||||
|         client_socket.send(send_message.encode()) | ||||
|         client_socket.close() | ||||
|     except: | ||||
|         pass | ||||
|  | ||||
| # Guan软件包升级检查和提示(如果无法连接或者版本为最新,那么均没有提示) | ||||
| def notification_of_upgrade(timeout=5): | ||||
|     try: | ||||
|         import guan | ||||
|         latest_version = guan.get_latest_version(package_name='guan', timeout=timeout) | ||||
|         current_version = guan.get_current_version('guan') | ||||
|         if latest_version != None and current_version != None: | ||||
|             if latest_version != current_version: | ||||
|                 print('升级提示:您当前使用的版本是 guan-'+current_version+',目前已经有最新版本 guan-'+latest_version+'。您可以通过以下命令对软件包进行升级:pip install --upgrade guan -i https://pypi.python.org/simple 或 pip install --upgrade guan') | ||||
|     except: | ||||
|         pass | ||||
| @@ -35,138 +35,6 @@ def load_txt_data(filename): | ||||
|     data = np.loadtxt(filename+'.txt') | ||||
|     return data | ||||
|  | ||||
| # 如果不存在文件夹,则新建文件夹 | ||||
| def make_directory(directory='./test'): | ||||
|     import os | ||||
|     if not os.path.exists(directory): | ||||
|         os.makedirs(directory) | ||||
|  | ||||
| # 如果不存在文件,则新建空文件 | ||||
| def make_file(file_path='./a.txt'): | ||||
|     import os | ||||
|     if not os.path.exists(file_path): | ||||
|         with open(file_path, 'w') as f: | ||||
|             pass | ||||
|  | ||||
| # 打开文件用于写入,默认为新增内容 | ||||
| def open_file(filename='a', file_format='.txt', mode='add'): | ||||
|     if mode == 'add': | ||||
|         f = open(filename+file_format, 'a', encoding='UTF-8') | ||||
|     elif mode == 'overwrite': | ||||
|         f = open(filename+file_format, 'w', encoding='UTF-8') | ||||
|     return f | ||||
|  | ||||
| def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True): | ||||
|     if print_on==True: | ||||
|         for arg in args: | ||||
|             print(arg, end=' ') | ||||
|         print() | ||||
|     f = open(filename+file_format, 'a', encoding='UTF-8') | ||||
|     for arg in args: | ||||
|         f.write(str(arg)+' ') | ||||
|     f.write('\n') | ||||
|     f.close() | ||||
|  | ||||
| # 读取文本文件内容。如果文件不存在,返回空字符串 | ||||
| def read_text_file(file_path='./a.txt', make_file=None): | ||||
|     import os | ||||
|     if not os.path.exists(file_path): | ||||
|         if make_file != None: | ||||
|             with open(file_path, 'w') as f: | ||||
|                 pass | ||||
|         return '' | ||||
|     else: | ||||
|         with open(file_path, 'r') as f: | ||||
|             content = f.read() | ||||
|         return content | ||||
|  | ||||
| # 获取目录中的所有文件名 | ||||
| def get_all_filenames_in_directory(directory='./', file_format=None, show_root_path=0, sort=1, include_subdirectory=1): | ||||
|     import os | ||||
|     file_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if file_format == None: | ||||
|                 if show_root_path == 0: | ||||
|                     file_list.append(files[i0]) | ||||
|                 else: | ||||
|                     file_list.append(root+'/'+files[i0]) | ||||
|             else: | ||||
|                 if file_format in files[i0]: | ||||
|                     if show_root_path == 0: | ||||
|                         file_list.append(files[i0]) | ||||
|                     else: | ||||
|                         file_list.append(root+'/'+files[i0]) | ||||
|         if include_subdirectory != 1: | ||||
|             break | ||||
|     if sort == 1: | ||||
|         file_list = sorted(file_list) | ||||
|     return file_list | ||||
|  | ||||
| # 获取文件夹中某种文本类型的文件以及读取所有内容 | ||||
| def read_text_files_in_directory(directory='./', file_format='.md'): | ||||
|     import os | ||||
|     file_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if file_format in files[i0]: | ||||
|                 file_list.append(root+'/'+files[i0]) | ||||
|     content_array = [] | ||||
|     for file in file_list: | ||||
|         with open(file, 'r', encoding='UTF-8') as f: | ||||
|             content_array.append(f.read()) | ||||
|     return file_list, content_array | ||||
|  | ||||
| # 在多个文本文件中查找关键词 | ||||
| def find_words_in_multiple_files(words, directory='./', file_format='.md'): | ||||
|     import guan | ||||
|     file_list, content_array = guan.read_text_files_in_directory(directory=directory, file_format=file_format) | ||||
|     num_files = len(file_list) | ||||
|     file_list_with_words = [] | ||||
|     for i0 in range(num_files): | ||||
|         if words in content_array[i0]: | ||||
|             file_list_with_words.append(file_list[i0]) | ||||
|     return file_list_with_words | ||||
|  | ||||
| # 复制一份文件 | ||||
| def copy_file(old_file='./a.txt', new_file='./b.txt'): | ||||
|     import shutil | ||||
|     shutil.copy(old_file, new_file) | ||||
|  | ||||
| # 打开文件,替代某字符串 | ||||
| def open_file_and_replace_str(file_path='./a.txt', old_str='', new_str=''): | ||||
|     import guan | ||||
|     content = guan.read_text_file(file_path=file_path) | ||||
|     content = content.replace(old_str, new_str) | ||||
|     f = guan.open_file(filename=file_path, file_format='', mode='overwrite') | ||||
|     f.write(content) | ||||
|     f.close() | ||||
|  | ||||
| # 复制一份文件,然后再替代某字符串 | ||||
| def copy_file_and_replace_str(old_file='./a.txt', new_file='./b.txt', old_str='', new_str=''): | ||||
|     import guan | ||||
|     guan.copy_file(old_file=old_file, new_file=new_file) | ||||
|     content = guan.read_text_file(file_path=new_file) | ||||
|     content = content.replace(old_str, new_str) | ||||
|     f = guan.open_file(filename=new_file, file_format='', mode='overwrite') | ||||
|     f.write(content) | ||||
|     f.close() | ||||
|  | ||||
| # 拼接两个PDF文件 | ||||
| def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf'): | ||||
|     import PyPDF2 | ||||
|     output_pdf = PyPDF2.PdfWriter() | ||||
|     with open(input_file_1, 'rb') as file1: | ||||
|         pdf1 = PyPDF2.PdfReader(file1) | ||||
|         for page in range(len(pdf1.pages)): | ||||
|             output_pdf.add_page(pdf1.pages[page]) | ||||
|     with open(input_file_2, 'rb') as file2: | ||||
|         pdf2 = PyPDF2.PdfReader(file2) | ||||
|         for page in range(len(pdf2.pages)): | ||||
|             output_pdf.add_page(pdf2.pages[page]) | ||||
|     with open(output_file, 'wb') as combined_file: | ||||
|         output_pdf.write(combined_file) | ||||
|  | ||||
| # 读取文件中的一维数据(一行一组x和y) | ||||
| def read_one_dimensional_data(filename='a', file_format='.txt'):  | ||||
|     import numpy as np | ||||
| @@ -375,6 +243,124 @@ def write_two_dimensional_data_without_xy_array_and_without_opening_file(matrix, | ||||
|             f.write(str(element)+'   ') | ||||
|         f.write('\n') | ||||
|  | ||||
| # 如果不存在文件夹,则新建文件夹 | ||||
| def make_directory(directory='./test'): | ||||
|     import os | ||||
|     if not os.path.exists(directory): | ||||
|         os.makedirs(directory) | ||||
|  | ||||
| # 如果不存在文件,则新建空文件 | ||||
| def make_file(file_path='./a.txt'): | ||||
|     import os | ||||
|     if not os.path.exists(file_path): | ||||
|         with open(file_path, 'w') as f: | ||||
|             pass | ||||
|  | ||||
| # 打开文件用于写入,默认为新增内容 | ||||
| def open_file(filename='a', file_format='.txt', mode='add'): | ||||
|     if mode == 'add': | ||||
|         f = open(filename+file_format, 'a', encoding='UTF-8') | ||||
|     elif mode == 'overwrite': | ||||
|         f = open(filename+file_format, 'w', encoding='UTF-8') | ||||
|     return f | ||||
|  | ||||
| # 打印到TXT文件 | ||||
| def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True): | ||||
|     if print_on==True: | ||||
|         for arg in args: | ||||
|             print(arg, end=' ') | ||||
|         print() | ||||
|     f = open(filename+file_format, 'a', encoding='UTF-8') | ||||
|     for arg in args: | ||||
|         f.write(str(arg)+' ') | ||||
|     f.write('\n') | ||||
|     f.close() | ||||
|  | ||||
| # 读取文本文件内容。如果文件不存在,返回空字符串 | ||||
| def read_text_file(file_path='./a.txt', make_file=None): | ||||
|     import os | ||||
|     if not os.path.exists(file_path): | ||||
|         if make_file != None: | ||||
|             with open(file_path, 'w') as f: | ||||
|                 pass | ||||
|         return '' | ||||
|     else: | ||||
|         with open(file_path, 'r') as f: | ||||
|             content = f.read() | ||||
|         return content | ||||
|  | ||||
| # 获取目录中的所有文件名 | ||||
| def get_all_filenames_in_directory(directory='./', file_format=None, show_root_path=0, sort=1, include_subdirectory=1): | ||||
|     import os | ||||
|     file_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if file_format == None: | ||||
|                 if show_root_path == 0: | ||||
|                     file_list.append(files[i0]) | ||||
|                 else: | ||||
|                     file_list.append(root+'/'+files[i0]) | ||||
|             else: | ||||
|                 if file_format in files[i0]: | ||||
|                     if show_root_path == 0: | ||||
|                         file_list.append(files[i0]) | ||||
|                     else: | ||||
|                         file_list.append(root+'/'+files[i0]) | ||||
|         if include_subdirectory != 1: | ||||
|             break | ||||
|     if sort == 1: | ||||
|         file_list = sorted(file_list) | ||||
|     return file_list | ||||
|  | ||||
| # 获取文件夹中某种文本类型的文件以及读取所有内容 | ||||
| def read_text_files_in_directory(directory='./', file_format='.md'): | ||||
|     import os | ||||
|     file_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if file_format in files[i0]: | ||||
|                 file_list.append(root+'/'+files[i0]) | ||||
|     content_array = [] | ||||
|     for file in file_list: | ||||
|         with open(file, 'r', encoding='UTF-8') as f: | ||||
|             content_array.append(f.read()) | ||||
|     return file_list, content_array | ||||
|  | ||||
| # 在多个文本文件中查找关键词 | ||||
| def find_words_in_multiple_files(words, directory='./', file_format='.md'): | ||||
|     import guan | ||||
|     file_list, content_array = guan.read_text_files_in_directory(directory=directory, file_format=file_format) | ||||
|     num_files = len(file_list) | ||||
|     file_list_with_words = [] | ||||
|     for i0 in range(num_files): | ||||
|         if words in content_array[i0]: | ||||
|             file_list_with_words.append(file_list[i0]) | ||||
|     return file_list_with_words | ||||
|  | ||||
| # 复制一份文件 | ||||
| def copy_file(old_file='./a.txt', new_file='./b.txt'): | ||||
|     import shutil | ||||
|     shutil.copy(old_file, new_file) | ||||
|  | ||||
| # 打开文件,替代某字符串 | ||||
| def open_file_and_replace_str(file_path='./a.txt', old_str='', new_str=''): | ||||
|     import guan | ||||
|     content = guan.read_text_file(file_path=file_path) | ||||
|     content = content.replace(old_str, new_str) | ||||
|     f = guan.open_file(filename=file_path, file_format='', mode='overwrite') | ||||
|     f.write(content) | ||||
|     f.close() | ||||
|  | ||||
| # 复制一份文件,然后再替代某字符串 | ||||
| def copy_file_and_replace_str(old_file='./a.txt', new_file='./b.txt', old_str='', new_str=''): | ||||
|     import guan | ||||
|     guan.copy_file(old_file=old_file, new_file=new_file) | ||||
|     content = guan.read_text_file(file_path=new_file) | ||||
|     content = content.replace(old_str, new_str) | ||||
|     f = guan.open_file(filename=new_file, file_format='', mode='overwrite') | ||||
|     f.write(content) | ||||
|     f.close() | ||||
|  | ||||
| # 改变当前的目录位置 | ||||
| def change_directory_by_replacement(current_key_word='code', new_key_word='data'): | ||||
|     import os | ||||
| @@ -383,4 +369,132 @@ def change_directory_by_replacement(current_key_word='code', new_key_word='data' | ||||
|     data_path = data_path.replace(current_key_word, new_key_word)  | ||||
|     if os.path.exists(data_path) == False: | ||||
|         os.makedirs(data_path) | ||||
|     os.chdir(data_path) | ||||
|     os.chdir(data_path) | ||||
|  | ||||
| # 查找文件名相同的文件 | ||||
| def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000): | ||||
|     import os | ||||
|     from collections import Counter | ||||
|     file_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             file_list.append(files[i0]) | ||||
|             for word in ignored_directory_with_words: | ||||
|                 if word in root: | ||||
|                     file_list.remove(files[i0])        | ||||
|             for word in ignored_file_with_words: | ||||
|                 if word in files[i0]: | ||||
|                     try: | ||||
|                         file_list.remove(files[i0])    | ||||
|                     except: | ||||
|                         pass  | ||||
|     count_file = Counter(file_list).most_common(num) | ||||
|     repeated_file = [] | ||||
|     for item in count_file: | ||||
|         if item[1]>1: | ||||
|             repeated_file.append(item) | ||||
|     return repeated_file | ||||
|  | ||||
| # 统计各个子文件夹中的文件数量 | ||||
| def count_file_in_sub_directory(directory='./', sort=0, reverse=1, print_show=1, smaller_than_num=None): | ||||
|     import os | ||||
|     import numpy as np | ||||
|     dirs_list = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         if dirs != []: | ||||
|             for i0 in range(len(dirs)): | ||||
|                 dirs_list.append(root+'/'+dirs[i0]) | ||||
|     count_file_array = [] | ||||
|     for sub_dir in dirs_list: | ||||
|         file_list = [] | ||||
|         for root, dirs, files in os.walk(sub_dir): | ||||
|             for i0 in range(len(files)): | ||||
|                 file_list.append(files[i0]) | ||||
|         count_file = len(file_list) | ||||
|         count_file_array.append(count_file) | ||||
|         if sort == 0: | ||||
|             if print_show == 1: | ||||
|                 if smaller_than_num == None: | ||||
|                     print(sub_dir) | ||||
|                     print(count_file) | ||||
|                     print() | ||||
|                 else: | ||||
|                     if count_file<smaller_than_num: | ||||
|                         print(sub_dir) | ||||
|                         print(count_file) | ||||
|                         print() | ||||
|     if sort == 0: | ||||
|         sub_directory = dirs_list | ||||
|         num_in_sub_directory = count_file_array | ||||
|     if sort == 1: | ||||
|         sub_directory = [] | ||||
|         num_in_sub_directory = [] | ||||
|         if reverse == 1: | ||||
|             index_array = np.argsort(count_file_array)[::-1] | ||||
|         else: | ||||
|             index_array = np.argsort(count_file_array) | ||||
|         for i0 in index_array: | ||||
|             sub_directory.append(dirs_list[i0]) | ||||
|             num_in_sub_directory.append(count_file_array[i0]) | ||||
|             if print_show == 1: | ||||
|                 if smaller_than_num == None: | ||||
|                     print(dirs_list[i0]) | ||||
|                     print(count_file_array[i0]) | ||||
|                     print() | ||||
|                 else: | ||||
|                     if count_file_array[i0]<smaller_than_num: | ||||
|                         print(dirs_list[i0]) | ||||
|                         print(count_file_array[i0]) | ||||
|                         print() | ||||
|     return sub_directory, num_in_sub_directory | ||||
|  | ||||
| # 在多个子文件夹中产生必要的文件,例如 readme.md | ||||
| def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]): | ||||
|     import os | ||||
|     directory_with_file = [] | ||||
|     ignored_directory = [] | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if root not in directory_with_file: | ||||
|                 directory_with_file.append(root) | ||||
|             if files[i0] == filename+file_format: | ||||
|                 if root not in ignored_directory: | ||||
|                     ignored_directory.append(root) | ||||
|     if overwrite == None: | ||||
|         for root in ignored_directory: | ||||
|             directory_with_file.remove(root) | ||||
|     ignored_directory_more =[] | ||||
|     for root in directory_with_file:  | ||||
|         for word in ignored_directory_with_words: | ||||
|             if word in root: | ||||
|                 if root not in ignored_directory_more: | ||||
|                     ignored_directory_more.append(root) | ||||
|     for root in ignored_directory_more: | ||||
|         directory_with_file.remove(root)  | ||||
|     for root in directory_with_file: | ||||
|         os.chdir(root) | ||||
|         f = open(filename+file_format, 'w', encoding="utf-8") | ||||
|         f.write(content) | ||||
|         f.close() | ||||
|  | ||||
| # 删除特定文件名的文件(谨慎使用) | ||||
| def delete_file_with_specific_name(directory, filename='readme', file_format='.md'): | ||||
|     import os | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             if files[i0] == filename+file_format: | ||||
|                 os.remove(root+'/'+files[i0]) | ||||
|  | ||||
| # 将所有文件移到根目录(谨慎使用) | ||||
| def move_all_files_to_root_directory(directory): | ||||
|     import os | ||||
|     import shutil | ||||
|     for root, dirs, files in os.walk(directory): | ||||
|         for i0 in range(len(files)): | ||||
|             shutil.move(root+'/'+files[i0], directory+'/'+files[i0]) | ||||
|     for i0 in range(100): | ||||
|         for root, dirs, files in os.walk(directory): | ||||
|             try: | ||||
|                 os.rmdir(root)  | ||||
|             except: | ||||
|                 pass | ||||
| @@ -381,4 +381,26 @@ def pca_of_data(data, n_components=None, standard=1): | ||||
|         pca = PCA(n_components=n_components) | ||||
|     data_transformed = pca.fit_transform(data_scaled) | ||||
|     explained_variance_ratio = pca.explained_variance_ratio_ | ||||
|     return data_transformed, explained_variance_ratio | ||||
|     return data_transformed, explained_variance_ratio | ||||
|  | ||||
| # 通过定义计算R^2(基于实际值和预测值,数值有可能小于0) | ||||
| def calculate_R2_with_definition(y_true_array, y_pred_array): | ||||
|     import numpy as np | ||||
|     y_mean = np.mean(y_true_array) | ||||
|     SS_tot = np.sum((y_true_array - y_mean) ** 2) | ||||
|     SS_res = np.sum((y_true_array - y_pred_array) ** 2) | ||||
|     R2 = 1 - (SS_res / SS_tot) | ||||
|     return R2 | ||||
|  | ||||
| # 通过sklearn计算R^2,和上面定义的计算结果一致 | ||||
| def calculate_R2_with_sklearn(y_true_array, y_pred_array): | ||||
|     from sklearn.metrics import r2_score | ||||
|     R2 = r2_score(y_true_array, y_pred_array) | ||||
|     return R2 | ||||
|  | ||||
| # 通过scipy计算线性回归后的R^2(基于线性回归模型,范围在0和1之间) | ||||
| def calculate_R2_after_linear_regression_with_scipy(y_true_array, y_pred_array): | ||||
|     from scipy import stats | ||||
|     slope, intercept, r_value, p_value, std_err = stats.linregress(y_true_array, y_pred_array) | ||||
|     R2 = r_value**2 | ||||
|     return R2 | ||||
| @@ -1,711 +0,0 @@ | ||||
| # Module: others | ||||
|  | ||||
| # 获取软件包的本机版本 | ||||
| def get_current_version(package_name='guan'): | ||||
|     import importlib.metadata | ||||
|     try: | ||||
|         current_version = importlib.metadata.version(package_name) | ||||
|         return current_version | ||||
|     except: | ||||
|         return None | ||||
|  | ||||
| # 获取Python软件包的最新版本 | ||||
| def get_latest_version(package_name='guan', timeout=5): | ||||
|     import requests | ||||
|     url = f"https://pypi.org/pypi/{package_name}/json" | ||||
|     try: | ||||
|         response = requests.get(url, timeout=timeout) | ||||
|     except: | ||||
|         return None | ||||
|     if response.status_code == 200: | ||||
|         data = response.json() | ||||
|         latest_version = data["info"]["version"] | ||||
|         return latest_version | ||||
|     else: | ||||
|         return None | ||||
|  | ||||
| # 获取包含某个字符的进程PID值 | ||||
| def get_PID_array(name): | ||||
|     import subprocess | ||||
|     command = "ps -ef | grep "+name | ||||
|     result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | ||||
|     if result.returncode == 0: | ||||
|         ps_ef = result.stdout | ||||
|     import re | ||||
|     ps_ef_1 = re.split(r'\n', ps_ef) | ||||
|     id_running_array = [] | ||||
|     for ps_ef_item in ps_ef_1: | ||||
|         if ps_ef_item != '': | ||||
|             ps_ef_2 = re.split(r'\s+', ps_ef_item) | ||||
|             id_running_array.append(ps_ef_2[1]) | ||||
|     return id_running_array | ||||
|  | ||||
| # 每日git commit次数的统计 | ||||
| def statistics_of_git_commits(print_show=0, str_or_datetime='str'): | ||||
|     import subprocess | ||||
|     import collections | ||||
|     since_date = '100 year ago' | ||||
|     result = subprocess.run( | ||||
|         ['git', 'log', f'--since={since_date}', '--pretty=format:%ad', '--date=short'], | ||||
|         stdout=subprocess.PIPE, | ||||
|         text=True) | ||||
|     commits = result.stdout.strip().split('\n') | ||||
|     counter = collections.Counter(commits) | ||||
|     daily_commit_counts = dict(sorted(counter.items())) | ||||
|     date_array = [] | ||||
|     commit_count_array = [] | ||||
|     for date, count in daily_commit_counts.items(): | ||||
|         if print_show == 1: | ||||
|             print(f"{date}: {count} commits") | ||||
|         if str_or_datetime=='datetime': | ||||
|             import datetime | ||||
|             date_array.append(datetime.datetime.strptime(date, "%Y-%m-%d")) | ||||
|         elif str_or_datetime=='str': | ||||
|             date_array.append(date) | ||||
|         commit_count_array.append(count) | ||||
|     return date_array, commit_count_array | ||||
|  | ||||
| # 将文件目录结构写入Markdown文件 | ||||
| def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):  | ||||
|     import os | ||||
|     f = open(filename+'.md', 'w', encoding="utf-8") | ||||
|     filenames1 = os.listdir(directory) | ||||
|     u0 = 0 | ||||
|     for filename1 in filenames1[::reverse_positive_or_negative]: | ||||
|         filename1_with_path = os.path.join(directory,filename1)  | ||||
|         if os.path.isfile(filename1_with_path): | ||||
|             if os.path.splitext(filename1)[1] not in banned_file_format: | ||||
|                 if hide_file_format == None: | ||||
|                     f.write('+ '+str(filename1)+'\n\n') | ||||
|                 else: | ||||
|                     f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n') | ||||
|         else: | ||||
|             u0 += 1 | ||||
|             if divided_line != None and u0 != 1: | ||||
|                 f.write('--------\n\n') | ||||
|             if starting_from_h1 == None: | ||||
|                 f.write('#') | ||||
|             f.write('# '+str(filename1)+'\n\n') | ||||
|  | ||||
|             filenames2 = os.listdir(filename1_with_path)  | ||||
|             i0 = 0      | ||||
|             for filename2 in filenames2[::reverse_positive_or_negative]: | ||||
|                 filename2_with_path = os.path.join(directory, filename1, filename2)  | ||||
|                 if os.path.isfile(filename2_with_path): | ||||
|                     if os.path.splitext(filename2)[1] not in banned_file_format: | ||||
|                         if hide_file_format == None: | ||||
|                             f.write('+ '+str(filename2)+'\n\n') | ||||
|                         else: | ||||
|                             f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n') | ||||
|                 else:  | ||||
|                     i0 += 1 | ||||
|                     if starting_from_h1 == None: | ||||
|                         f.write('#') | ||||
|                     if show_second_number != None: | ||||
|                         f.write('## '+str(i0)+'. '+str(filename2)+'\n\n') | ||||
|                     else: | ||||
|                         f.write('## '+str(filename2)+'\n\n') | ||||
|                      | ||||
|                     j0 = 0 | ||||
|                     filenames3 = os.listdir(filename2_with_path) | ||||
|                     for filename3 in filenames3[::reverse_positive_or_negative]: | ||||
|                         filename3_with_path = os.path.join(directory, filename1, filename2, filename3)  | ||||
|                         if os.path.isfile(filename3_with_path):  | ||||
|                             if os.path.splitext(filename3)[1] not in banned_file_format: | ||||
|                                 if hide_file_format == None: | ||||
|                                     f.write('+ '+str(filename3)+'\n\n') | ||||
|                                 else: | ||||
|                                     f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n') | ||||
|                         else: | ||||
|                             j0 += 1 | ||||
|                             if starting_from_h1 == None: | ||||
|                                 f.write('#') | ||||
|                             if show_third_number != None: | ||||
|                                 f.write('### ('+str(j0)+') '+str(filename3)+'\n\n') | ||||
|                             else: | ||||
|                                 f.write('### '+str(filename3)+'\n\n') | ||||
|  | ||||
|                             filenames4 = os.listdir(filename3_with_path) | ||||
|                             for filename4 in filenames4[::reverse_positive_or_negative]: | ||||
|                                 filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4)  | ||||
|                                 if os.path.isfile(filename4_with_path): | ||||
|                                     if os.path.splitext(filename4)[1] not in banned_file_format: | ||||
|                                         if hide_file_format == None: | ||||
|                                             f.write('+ '+str(filename4)+'\n\n') | ||||
|                                         else: | ||||
|                                             f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n') | ||||
|                                 else:  | ||||
|                                     if starting_from_h1 == None: | ||||
|                                         f.write('#') | ||||
|                                     f.write('#### '+str(filename4)+'\n\n') | ||||
|  | ||||
|                                     filenames5 = os.listdir(filename4_with_path) | ||||
|                                     for filename5 in filenames5[::reverse_positive_or_negative]: | ||||
|                                         filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5)  | ||||
|                                         if os.path.isfile(filename5_with_path):  | ||||
|                                             if os.path.splitext(filename5)[1] not in banned_file_format: | ||||
|                                                 if hide_file_format == None: | ||||
|                                                     f.write('+ '+str(filename5)+'\n\n') | ||||
|                                                 else: | ||||
|                                                     f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n') | ||||
|                                         else: | ||||
|                                             if starting_from_h1 == None: | ||||
|                                                 f.write('#') | ||||
|                                             f.write('##### '+str(filename5)+'\n\n') | ||||
|  | ||||
|                                             filenames6 = os.listdir(filename5_with_path) | ||||
|                                             for filename6 in filenames6[::reverse_positive_or_negative]: | ||||
|                                                 filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6)  | ||||
|                                                 if os.path.isfile(filename6_with_path):  | ||||
|                                                     if os.path.splitext(filename6)[1] not in banned_file_format: | ||||
|                                                         if hide_file_format == None: | ||||
|                                                             f.write('+ '+str(filename6)+'\n\n') | ||||
|                                                         else: | ||||
|                                                             f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n') | ||||
|                                                 else: | ||||
|                                                     if starting_from_h1 == None: | ||||
|                                                         f.write('#') | ||||
|                                                     f.write('###### '+str(filename6)+'\n\n') | ||||
|     f.close() | ||||
|  | ||||
| # 从网页的标签中获取内容 | ||||
| def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']): | ||||
|     from bs4 import BeautifulSoup | ||||
|     import urllib.request | ||||
|     import ssl | ||||
|     ssl._create_default_https_context = ssl._create_unverified_context | ||||
|     html = urllib.request.urlopen(link).read().decode('utf-8') | ||||
|     soup = BeautifulSoup(html, features="lxml") | ||||
|     all_tags = soup.find_all(tags) | ||||
|     content = '' | ||||
|     for tag in all_tags: | ||||
|         text = tag.get_text().replace('\n', '') | ||||
|         if content == '': | ||||
|             content = text | ||||
|         else: | ||||
|             content = content + '\n\n' + text | ||||
|     return content | ||||
|  | ||||
| # 从HTML中获取所有的链接 | ||||
| def get_links_from_html(html_link, links_with_text=0): | ||||
|     from bs4 import BeautifulSoup | ||||
|     import urllib.request | ||||
|     import ssl | ||||
|     ssl._create_default_https_context = ssl._create_unverified_context | ||||
|     html = urllib.request.urlopen(html_link).read().decode('utf-8') | ||||
|     soup = BeautifulSoup(html, features="lxml") | ||||
|     a_tags = soup.find_all('a') | ||||
|     if links_with_text == 0: | ||||
|         link_array = [tag.get('href') for tag in a_tags if tag.get('href')] | ||||
|         return link_array | ||||
|     else: | ||||
|         link_array_with_text = [(tag.get('href'), tag.text) for tag in a_tags if tag.get('href')] | ||||
|         return link_array_with_text | ||||
|  | ||||
| # 检查链接的有效性 | ||||
| def check_link(url, timeout=3, allow_redirects=True): | ||||
|     import requests | ||||
|     try: | ||||
|         response = requests.head(url, timeout=timeout, allow_redirects=allow_redirects) | ||||
|         if response.status_code == 200: | ||||
|             return True | ||||
|         else: | ||||
|             return False | ||||
|     except requests.exceptions.RequestException: | ||||
|         return False | ||||
|  | ||||
| # 检查链接数组中链接的有效性 | ||||
| def check_link_array(link_array, timeout=3, allow_redirects=True, try_again=0, print_show=1): | ||||
|     import guan | ||||
|     failed_link_array0 = [] | ||||
|     for link in link_array: | ||||
|         if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): | ||||
|             pass | ||||
|         else: | ||||
|             failed_link_array0.append(link) | ||||
|             if print_show: | ||||
|                 print(link) | ||||
|     failed_link_array = [] | ||||
|     if try_again: | ||||
|         if print_show: | ||||
|             print('\nTry again:\n') | ||||
|         for link in failed_link_array0: | ||||
|             if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): | ||||
|                 pass | ||||
|             else: | ||||
|                 failed_link_array.append(link) | ||||
|                 if print_show: | ||||
|                     print(link) | ||||
|     else: | ||||
|         failed_link_array = failed_link_array0 | ||||
|     return failed_link_array | ||||
|  | ||||
| # 生成二维码 | ||||
| def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): | ||||
|     import qrcode | ||||
|     img = qrcode.make(data) | ||||
|     img.save(filename+file_format) | ||||
|  | ||||
| # 将PDF文件转成文本 | ||||
| def pdf_to_text(pdf_path): | ||||
|     from pdfminer.pdfparser import PDFParser, PDFDocument | ||||
|     from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter | ||||
|     from pdfminer.converter import PDFPageAggregator | ||||
|     from pdfminer.layout import LAParams, LTTextBox | ||||
|     from pdfminer.pdfinterp import PDFTextExtractionNotAllowed | ||||
|     import logging  | ||||
|     logging.Logger.propagate = False  | ||||
|     logging.getLogger().setLevel(logging.ERROR)  | ||||
|     praser = PDFParser(open(pdf_path, 'rb')) | ||||
|     doc = PDFDocument() | ||||
|     praser.set_document(doc) | ||||
|     doc.set_parser(praser) | ||||
|     doc.initialize() | ||||
|     if not doc.is_extractable: | ||||
|         raise PDFTextExtractionNotAllowed | ||||
|     else: | ||||
|         rsrcmgr = PDFResourceManager() | ||||
|         laparams = LAParams() | ||||
|         device = PDFPageAggregator(rsrcmgr, laparams=laparams) | ||||
|         interpreter = PDFPageInterpreter(rsrcmgr, device) | ||||
|         content = '' | ||||
|         for page in doc.get_pages(): | ||||
|             interpreter.process_page(page)                         | ||||
|             layout = device.get_result()                      | ||||
|             for x in layout: | ||||
|                 if isinstance(x, LTTextBox): | ||||
|                     content  = content + x.get_text().strip() | ||||
|     return content | ||||
|  | ||||
| # 获取PDF文件页数 | ||||
| def get_pdf_page_number(pdf_path): | ||||
|     import PyPDF2 | ||||
|     pdf_file = open(pdf_path, 'rb') | ||||
|     pdf_reader = PyPDF2.PdfReader(pdf_file) | ||||
|     num_pages = len(pdf_reader.pages) | ||||
|     return num_pages | ||||
|  | ||||
| # 获取PDF文件指定页面的内容 | ||||
| def pdf_to_txt_for_a_specific_page(pdf_path, page_num=1): | ||||
|     import PyPDF2 | ||||
|     pdf_file = open(pdf_path, 'rb') | ||||
|     pdf_reader = PyPDF2.PdfReader(pdf_file) | ||||
|     num_pages = len(pdf_reader.pages) | ||||
|     for page_num0 in range(num_pages): | ||||
|         if page_num0 == page_num-1: | ||||
|             page = pdf_reader.pages[page_num0] | ||||
|             page_text = page.extract_text() | ||||
|     pdf_file.close() | ||||
|     return page_text | ||||
|  | ||||
| # 获取PDF文献中的链接。例如: link_starting_form='https://doi.org' | ||||
| def get_links_from_pdf(pdf_path, link_starting_form=''): | ||||
|     import PyPDF2 | ||||
|     import re | ||||
|     reader = PyPDF2.PdfReader(pdf_path) | ||||
|     pages = len(reader.pages) | ||||
|     i0 = 0 | ||||
|     links = [] | ||||
|     for page in range(pages): | ||||
|         pageSliced = reader.pages[page] | ||||
|         pageObject = pageSliced.get_object()  | ||||
|         if '/Annots' in pageObject.keys(): | ||||
|             ann = pageObject['/Annots'] | ||||
|             old = '' | ||||
|             for a in ann: | ||||
|                 u = a.get_object()  | ||||
|                 if '/A' in u.keys(): | ||||
|                     if '/URI' in u['/A']:  | ||||
|                         if re.search(re.compile('^'+link_starting_form), u['/A']['/URI']): | ||||
|                             if u['/A']['/URI'] != old: | ||||
|                                 links.append(u['/A']['/URI'])  | ||||
|                                 i0 += 1 | ||||
|                                 old = u['/A']['/URI'] | ||||
|     return links | ||||
|  | ||||
| # 通过Sci-Hub网站下载文献 | ||||
| def download_with_scihub(address=None, num=1): | ||||
|     from bs4 import BeautifulSoup | ||||
|     import re | ||||
|     import requests | ||||
|     import os | ||||
|     if num==1 and address!=None: | ||||
|         address_array = [address] | ||||
|     else: | ||||
|         address_array = [] | ||||
|         for i in range(num): | ||||
|             address = input('\nInput:') | ||||
|             address_array.append(address) | ||||
|     for address in address_array: | ||||
|         r = requests.post('https://sci-hub.st/', data={'request': address}) | ||||
|         print('\nResponse:', r) | ||||
|         print('Address:', r.url) | ||||
|         soup = BeautifulSoup(r.text, features='lxml') | ||||
|         pdf_URL = soup.embed['src'] | ||||
|         # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL. | ||||
|         if re.search(re.compile('^https:'), pdf_URL): | ||||
|             pass | ||||
|         else: | ||||
|             pdf_URL = 'https:'+pdf_URL | ||||
|         print('PDF address:', pdf_URL) | ||||
|         name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::] | ||||
|         print('PDF name:', name) | ||||
|         print('Directory:', os.getcwd()) | ||||
|         print('\nDownloading...') | ||||
|         r = requests.get(pdf_URL, stream=True) | ||||
|         with open(name, 'wb') as f: | ||||
|             for chunk in r.iter_content(chunk_size=32): | ||||
|                 f.write(chunk) | ||||
|         print('Completed!\n') | ||||
|     if num != 1: | ||||
|         print('All completed!\n') | ||||
|  | ||||
| # 将字符串转成音频 | ||||
| def str_to_audio(str='hello world', filename='str', rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): | ||||
|     import pyttsx3 | ||||
|     import guan | ||||
|     if print_text==1: | ||||
|         print(str) | ||||
|     engine = pyttsx3.init() | ||||
|     voices = engine.getProperty('voices')   | ||||
|     engine.setProperty('voice', voices[voice].id) | ||||
|     engine.setProperty("rate", rate) | ||||
|     if save==1: | ||||
|         engine.save_to_file(str, filename+'.wav') | ||||
|         engine.runAndWait() | ||||
|         print('Wav file saved!') | ||||
|         if compress==1: | ||||
|             import os | ||||
|             os.rename(filename+'.wav', 'temp.wav') | ||||
|             guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate) | ||||
|             os.remove('temp.wav') | ||||
|     if read==1: | ||||
|         engine.say(str) | ||||
|         engine.runAndWait() | ||||
|  | ||||
| # 将txt文件转成音频 | ||||
| def txt_to_audio(txt_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): | ||||
|     import pyttsx3 | ||||
|     import guan | ||||
|     f = open(txt_path, 'r', encoding ='utf-8') | ||||
|     text = f.read() | ||||
|     if print_text==1: | ||||
|         print(text) | ||||
|     engine = pyttsx3.init() | ||||
|     voices = engine.getProperty('voices')   | ||||
|     engine.setProperty('voice', voices[voice].id) | ||||
|     engine.setProperty("rate", rate) | ||||
|     if save==1: | ||||
|         import re | ||||
|         filename = re.split('[/,\\\]', txt_path)[-1][:-4] | ||||
|         engine.save_to_file(text, filename+'.wav') | ||||
|         engine.runAndWait() | ||||
|         print('Wav file saved!') | ||||
|         if compress==1: | ||||
|             import os | ||||
|             os.rename(filename+'.wav', 'temp.wav') | ||||
|             guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate) | ||||
|             os.remove('temp.wav') | ||||
|     if read==1: | ||||
|         engine.say(text) | ||||
|         engine.runAndWait() | ||||
|  | ||||
| # 将PDF文件转成音频 | ||||
| def pdf_to_audio(pdf_path, rate=125, voice=1, read=1, save=0, compress=0, bitrate='16k', print_text=0): | ||||
|     import pyttsx3 | ||||
|     import guan | ||||
|     text = guan.pdf_to_text(pdf_path) | ||||
|     text = text.replace('\n', ' ') | ||||
|     if print_text==1: | ||||
|         print(text) | ||||
|     engine = pyttsx3.init() | ||||
|     voices = engine.getProperty('voices')   | ||||
|     engine.setProperty('voice', voices[voice].id) | ||||
|     engine.setProperty("rate", rate) | ||||
|     if save==1: | ||||
|         import re | ||||
|         filename = re.split('[/,\\\]', pdf_path)[-1][:-4] | ||||
|         engine.save_to_file(text, filename+'.wav') | ||||
|         engine.runAndWait() | ||||
|         print('Wav file saved!') | ||||
|         if compress==1: | ||||
|             import os | ||||
|             os.rename(filename+'.wav', 'temp.wav') | ||||
|             guan.compress_wav_to_mp3('temp.wav', output_filename=filename+'.mp3', bitrate=bitrate) | ||||
|             os.remove('temp.wav') | ||||
|     if read==1: | ||||
|         engine.say(text) | ||||
|         engine.runAndWait() | ||||
|  | ||||
| # 将wav音频文件压缩成MP3音频文件 | ||||
| def compress_wav_to_mp3(wav_path, output_filename='a.mp3', bitrate='16k'): | ||||
|     # Note: Beside the installation of pydub, you may also need download FFmpeg on http://www.ffmpeg.org/download.html and add the bin path to the environment variable. | ||||
|     from pydub import AudioSegment | ||||
|     sound = AudioSegment.from_mp3(wav_path) | ||||
|     sound.export(output_filename,format="mp3",bitrate=bitrate) | ||||
|  | ||||
| # 将WordPress导出的XML格式文件转换成多个MarkDown格式的文件 | ||||
| def convert_wordpress_xml_to_markdown(xml_file='./a.xml', convert_content=1, replace_more=[]): | ||||
|     import xml.etree.ElementTree as ET | ||||
|     import re | ||||
|     tree = ET.parse(xml_file) | ||||
|     root = tree.getroot() | ||||
|     for item in root.findall('.//item'): | ||||
|         title = item.find('title').text | ||||
|         content = item.find('.//content:encoded', namespaces={'content': 'http://purl.org/rss/1.0/modules/content/'}).text | ||||
|         if convert_content == 1: | ||||
|             content = re.sub(r'<!--.*?-->', '', content) | ||||
|             content = content.replace('<p>', '') | ||||
|             content = content.replace('</p>', '') | ||||
|             content = content.replace('<ol>', '') | ||||
|             content = content.replace('</ol>', '') | ||||
|             content = content.replace('<ul>', '') | ||||
|             content = content.replace('</ul>', '') | ||||
|             content = content.replace('<strong>', '') | ||||
|             content = content.replace('</strong>', '') | ||||
|             content = content.replace('</li>', '') | ||||
|             content = content.replace('<li>', '+ ') | ||||
|             content = content.replace('</h3>', '') | ||||
|             content = re.sub(r'<h2.*?>', '## ', content) | ||||
|             content = re.sub(r'<h3.*?>', '### ', content) | ||||
|             content = re.sub(r'<h4.*?>', '#### ', content) | ||||
|             for replace_item in replace_more: | ||||
|                 content = content.replace(replace_item, '') | ||||
|             for _ in range(100): | ||||
|                 content = content.replace('\n\n\n', '\n\n') | ||||
|         else: | ||||
|             pass | ||||
|         markdown_content = f"# {title}\n{content}" | ||||
|         markdown_file_path = f"{title}.md" | ||||
|         cleaned_filename = re.sub(r'[/:*?"<>|\'\\]', ' ', markdown_file_path) | ||||
|         with open(cleaned_filename, 'w', encoding='utf-8') as md_file: | ||||
|             md_file.write(markdown_content) | ||||
|  | ||||
| # 凯利公式 | ||||
| def kelly_formula(p, b, a=1): | ||||
|     f=(p/a)-((1-p)/b) | ||||
|     return f | ||||
|  | ||||
| # 获取所有股票 | ||||
| def all_stocks(): | ||||
|     import numpy as np | ||||
|     import akshare as ak | ||||
|     stocks = ak.stock_zh_a_spot_em() | ||||
|     title = np.array(stocks.columns) | ||||
|     stock_data = stocks.values | ||||
|     return title, stock_data | ||||
|  | ||||
| # 获取所有股票的代码 | ||||
| def all_stock_symbols(): | ||||
|     import guan | ||||
|     title, stock_data = guan.all_stocks() | ||||
|     stock_symbols = stock_data[:, 1] | ||||
|     return stock_symbols | ||||
|  | ||||
| # 股票代码的分类 | ||||
| def stock_symbols_classification(): | ||||
|     import guan | ||||
|     import re | ||||
|     stock_symbols = guan.all_stock_symbols() | ||||
|     # 上交所主板 | ||||
|     stock_symbols_60 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_600 = re.findall(r'^600', stock_symbol) | ||||
|         find_601 = re.findall(r'^601', stock_symbol) | ||||
|         find_603 = re.findall(r'^603', stock_symbol) | ||||
|         find_605 = re.findall(r'^605', stock_symbol) | ||||
|         if find_600 != [] or find_601 != [] or find_603 != [] or find_605 != []: | ||||
|             stock_symbols_60.append(stock_symbol) | ||||
|     # 深交所主板 | ||||
|     stock_symbols_00 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_000 = re.findall(r'^000', stock_symbol) | ||||
|         find_001 = re.findall(r'^001', stock_symbol) | ||||
|         find_002 = re.findall(r'^002', stock_symbol) | ||||
|         find_003 = re.findall(r'^003', stock_symbol) | ||||
|         if find_000 != [] or find_001 != [] or find_002 != [] or find_003 != []: | ||||
|             stock_symbols_00.append(stock_symbol) | ||||
|     # 创业板 | ||||
|     stock_symbols_30 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_300 = re.findall(r'^300', stock_symbol) | ||||
|         find_301 = re.findall(r'^301', stock_symbol) | ||||
|         if find_300 != [] or find_301 != []: | ||||
|             stock_symbols_30.append(stock_symbol) | ||||
|     # 科创板 | ||||
|     stock_symbols_68 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_688 = re.findall(r'^688', stock_symbol) | ||||
|         find_689 = re.findall(r'^689', stock_symbol) | ||||
|         if find_688 != [] or find_689 != []: | ||||
|             stock_symbols_68.append(stock_symbol) | ||||
|     # 新三板 | ||||
|     stock_symbols_8_4 = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         find_82 = re.findall(r'^82', stock_symbol) | ||||
|         find_83 = re.findall(r'^83', stock_symbol) | ||||
|         find_87 = re.findall(r'^87', stock_symbol) | ||||
|         find_88 = re.findall(r'^88', stock_symbol) | ||||
|         find_430 = re.findall(r'^430', stock_symbol) | ||||
|         find_420 = re.findall(r'^420', stock_symbol) | ||||
|         find_400 = re.findall(r'^400', stock_symbol) | ||||
|         if find_82 != [] or find_83 != [] or find_87 != [] or find_88 != [] or find_430 != [] or find_420 != [] or find_400 != []: | ||||
|             stock_symbols_8_4.append(stock_symbol) | ||||
|     # 检查遗漏的股票代码 | ||||
|     stock_symbols_others = [] | ||||
|     for stock_symbol in stock_symbols: | ||||
|         if stock_symbol not in stock_symbols_60 and stock_symbol not in stock_symbols_00 and stock_symbol not in stock_symbols_30 and stock_symbol not in stock_symbols_68 and stock_symbol not in stock_symbols_8_4: | ||||
|             stock_symbols_others.others.append(stock_symbol)  | ||||
|     return stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others | ||||
|  | ||||
| # 股票代码各个分类的数量 | ||||
| def statistics_of_stock_symbols_classification(): | ||||
|     import guan | ||||
|     stock_symbols_60, stock_symbols_00, stock_symbols_30, stock_symbols_68, stock_symbols_8_4, stock_symbols_others = guan.stock_symbols_classification() | ||||
|     num_stocks_60 = len(stock_symbols_60) | ||||
|     num_stocks_00 = len(stock_symbols_00) | ||||
|     num_stocks_30 = len(stock_symbols_30) | ||||
|     num_stocks_68 = len(stock_symbols_68) | ||||
|     num_stocks_8_4 = len(stock_symbols_8_4) | ||||
|     num_stocks_others= len(stock_symbols_others) | ||||
|     return num_stocks_60, num_stocks_00, num_stocks_30, num_stocks_68, num_stocks_8_4, num_stocks_others | ||||
|  | ||||
| # 从股票代码获取股票名称 | ||||
| def find_stock_name_from_symbol(symbol='000002'): | ||||
|     import guan | ||||
|     title, stock_data = guan.all_stocks() | ||||
|     for stock in stock_data: | ||||
|         if symbol in stock: | ||||
|            stock_name = stock[2] | ||||
|     return stock_name | ||||
|  | ||||
| # 市值排序 | ||||
| def sorted_market_capitalization(num=10): | ||||
|     import numpy as np | ||||
|     import guan | ||||
|     title, stock_data = guan.all_stocks() | ||||
|     new_stock_data = [] | ||||
|     for stock in stock_data: | ||||
|         if np.isnan(float(stock[9])): | ||||
|             continue | ||||
|         else: | ||||
|             new_stock_data.append(stock) | ||||
|     new_stock_data = np.array(new_stock_data) | ||||
|     list_index = np.argsort(new_stock_data[:, 17]) | ||||
|     list_index = list_index[::-1] | ||||
|     if num == None: | ||||
|         num = len(list_index) | ||||
|     sorted_array = [] | ||||
|     for i0 in range(num): | ||||
|         stock_symbol = new_stock_data[list_index[i0], 1] | ||||
|         stock_name = new_stock_data[list_index[i0], 2] | ||||
|         market_capitalization = new_stock_data[list_index[i0], 17]/1e8 | ||||
|         sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization]) | ||||
|     return sorted_array | ||||
|  | ||||
| # 美股市值排序 | ||||
| def sorted_market_capitalization_us(num=10): | ||||
|     import akshare as ak | ||||
|     import numpy as np | ||||
|     stocks = ak.stock_us_spot_em() | ||||
|     stock_data = stocks.values | ||||
|     new_stock_data = [] | ||||
|     for stock in stock_data: | ||||
|         if np.isnan(float(stock[9])): | ||||
|             continue | ||||
|         else: | ||||
|             new_stock_data.append(stock) | ||||
|     new_stock_data = np.array(new_stock_data) | ||||
|     list_index = np.argsort(new_stock_data[:, 9]) | ||||
|     list_index = list_index[::-1] | ||||
|     if num == None: | ||||
|         num = len(list_index) | ||||
|     sorted_array = [] | ||||
|     for i0 in range(num): | ||||
|         stock_symbol = new_stock_data[list_index[i0], 15] | ||||
|         stock_name = new_stock_data[list_index[i0], 1] | ||||
|         market_capitalization = new_stock_data[list_index[i0], 9]/1e8 | ||||
|         sorted_array.append([i0+1, stock_symbol, stock_name, market_capitalization]) | ||||
|     return sorted_array | ||||
|  | ||||
| # 获取单个股票的历史数据 | ||||
| def history_data_of_one_stock(symbol='000002', period='daily', start_date="19000101", end_date='21000101'): | ||||
|     # period = 'daily' | ||||
|     # period = 'weekly' | ||||
|     # period = 'monthly' | ||||
|     import numpy as np | ||||
|     import akshare as ak | ||||
|     stock = ak.stock_zh_a_hist(symbol=symbol, period=period, start_date=start_date, end_date=end_date) | ||||
|     title = np.array(stock.columns) | ||||
|     stock_data = stock.values[::-1] | ||||
|     return title, stock_data | ||||
|  | ||||
| # 绘制股票图 | ||||
| def plot_stock_line(date_array, opening_array, closing_array, high_array, low_array, lw_open_close=6, lw_high_low=2, xlabel='date', ylabel='price', title='', fontsize=20, labelsize=20, adjust_bottom=0.2, adjust_left=0.2, fontfamily='Times New Roman'): | ||||
|     import guan | ||||
|     plt, fig, ax = guan.import_plt_and_start_fig_ax(adjust_bottom=adjust_bottom, adjust_left=adjust_left, labelsize=labelsize, fontfamily=fontfamily) | ||||
|     if fontfamily=='Times New Roman': | ||||
|         ax.set_title(title, fontsize=fontsize, fontfamily='Times New Roman') | ||||
|         ax.set_xlabel(xlabel, fontsize=fontsize, fontfamily='Times New Roman')  | ||||
|         ax.set_ylabel(ylabel, fontsize=fontsize, fontfamily='Times New Roman') | ||||
|     else: | ||||
|         ax.set_title(title, fontsize=fontsize) | ||||
|         ax.set_xlabel(xlabel, fontsize=fontsize) | ||||
|         ax.set_ylabel(ylabel, fontsize=fontsize) | ||||
|     for i0 in range(len(date_array)): | ||||
|         if opening_array[i0] <= closing_array[i0]: | ||||
|             ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='red', lw=lw_open_close) | ||||
|             ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='red', linestyle='-', lw=lw_high_low) | ||||
|         else: | ||||
|             ax.vlines(date_array[i0], opening_array[i0], closing_array[i0], linestyle='-', color='green', lw=lw_open_close) | ||||
|             ax.vlines(date_array[i0], low_array[i0], high_array[i0], color='green', linestyle='-', lw=lw_high_low) | ||||
|     plt.show() | ||||
|     plt.close('all') | ||||
|  | ||||
| # Guan软件包的使用统计(仅仅统计装机数和import次数) | ||||
| def statistics_of_guan_package(function_name=None): | ||||
|     import guan | ||||
|     try: | ||||
|         import socket | ||||
|         datetime_date = guan.get_date() | ||||
|         datetime_time = guan.get_time() | ||||
|         current_version = guan.get_current_version('guan') | ||||
|         client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||||
|         client_socket.settimeout(0.5) | ||||
|         client_socket.connect(('socket.guanjihuan.com', 12345)) | ||||
|         mac_address = guan.get_mac_address() | ||||
|         if function_name == None: | ||||
|             message = { | ||||
|                 'server': 'py.guanjihuan.com', | ||||
|                 'date': datetime_date, | ||||
|                 'time': datetime_time, | ||||
|                 'version': current_version, | ||||
|                 'MAC_address': mac_address, | ||||
|             } | ||||
|         else: | ||||
|             message = { | ||||
|                 'server': 'py.guanjihuan.com', | ||||
|                 'date': datetime_date, | ||||
|                 'time': datetime_time, | ||||
|                 'version': current_version, | ||||
|                 'MAC_address': mac_address, | ||||
|                 'function_name': function_name | ||||
|             } | ||||
|         import json | ||||
|         send_message = json.dumps(message) | ||||
|         client_socket.send(send_message.encode()) | ||||
|         client_socket.close() | ||||
|     except: | ||||
|         pass | ||||
|  | ||||
| # # Guan软件包升级检查和提示(如果无法连接或者版本为最新,那么均没有提示) | ||||
| # def notification_of_upgrade(timeout=5): | ||||
| #     try: | ||||
| #         import guan | ||||
| #         latest_version = guan.get_latest_version(package_name='guan', timeout=timeout) | ||||
| #         current_version = guan.get_current_version('guan') | ||||
| #         if latest_version != None and current_version != None: | ||||
| #             if latest_version != current_version: | ||||
| #                 print('升级提示:您当前使用的版本是 guan-'+current_version+',目前已经有最新版本 guan-'+latest_version+'。您可以通过以下命令对软件包进行升级:pip install --upgrade guan -i https://pypi.python.org/simple 或 pip install --upgrade guan') | ||||
| #     except: | ||||
| #         pass | ||||
		Reference in New Issue
	
	Block a user