Compare commits

..

7 Commits

Author SHA1 Message Date
4986a0d617 0.1.200 2026-01-11 03:45:44 +08:00
a65f06332e 0.1.199 2026-01-08 14:19:45 +08:00
b8fea19fc3 0.1.198 2026-01-08 12:19:44 +08:00
fd43e303fa 0.1.196 2026-01-05 16:08:34 +08:00
e75bf4d5f4 0.1.195 2025-12-23 15:42:55 +08:00
2a65f6f639 0.1.194 2025-12-19 12:15:54 +08:00
579a575087 0.1.194 2025-12-19 12:15:03 +08:00
5 changed files with 101 additions and 8 deletions

View File

@@ -1,7 +1,7 @@
[metadata] [metadata]
# replace with your username: # replace with your username:
name = guan name = guan
version = 0.1.193 version = 0.1.200
author = guanjihuan author = guanjihuan
author_email = guanjihuan@163.com author_email = guanjihuan@163.com
description = An open source python package description = An open source python package

View File

@@ -1,6 +1,6 @@
Metadata-Version: 2.4 Metadata-Version: 2.4
Name: guan Name: guan
Version: 0.1.193 Version: 0.1.200
Summary: An open source python package Summary: An open source python package
Home-page: https://py.guanjihuan.com Home-page: https://py.guanjihuan.com
Author: guanjihuan Author: guanjihuan

View File

@@ -11,6 +11,16 @@ def logging_with_day_and_time(content='', filename='time_logging', file_format='
else: else:
f2.write(datetime_today+' '+datetime_time+' '+str(content)+'\n') f2.write(datetime_today+' '+datetime_time+' '+str(content)+'\n')
# 获取当前位置的 Unix 时间戳,并打印某段程序的运行时间
def record_time_and_print_running_time(start_time=None):
import time
current_time = time.time()
if start_time == None:
print("\n--- 开始计时(第一个记录点)---\n")
else:
print(f"\n--- 自上一个记录点已运行: {current_time - start_time:.2f} 秒 ---\n")
return current_time
# 使用该函数运行某个函数并获取函数计算时间(秒) # 使用该函数运行某个函数并获取函数计算时间(秒)
def timer(function_name, *args, **kwargs): def timer(function_name, *args, **kwargs):
import time import time
@@ -88,7 +98,7 @@ def word_diff(a, b, print_show=1):
jieba.setLogLevel(logging.ERROR) jieba.setLogLevel(logging.ERROR)
a_words = jieba.lcut(a) a_words = jieba.lcut(a)
b_words = jieba.lcut(b) b_words = jieba.lcut(b)
sm = difflib.SequenceMatcher(None, a_words, b_words) sm = difflib.SequenceMatcher(None, a_words, b_words, autojunk=False)
result = [] result = []
for tag, i1, i2, j1, j2 in sm.get_opcodes(): for tag, i1, i2, j1, j2 in sm.get_opcodes():
if tag == "equal": if tag == "equal":
@@ -115,7 +125,7 @@ def word_diff_to_html(a, b, filename='diff_result', write_file=1):
jieba.setLogLevel(logging.ERROR) jieba.setLogLevel(logging.ERROR)
a_words = jieba.lcut(a) a_words = jieba.lcut(a)
b_words = jieba.lcut(b) b_words = jieba.lcut(b)
sm = difflib.SequenceMatcher(None, a_words, b_words) sm = difflib.SequenceMatcher(None, a_words, b_words, autojunk=False)
html_parts = [] html_parts = []
for tag, i1, i2, j1, j2 in sm.get_opcodes(): for tag, i1, i2, j1, j2 in sm.get_opcodes():
if tag == "equal": if tag == "equal":
@@ -231,6 +241,42 @@ def standard_deviation_with_formula(data_array):
std_result = np.sqrt(averaged_squared_data-averaged_data**2) std_result = np.sqrt(averaged_squared_data-averaged_data**2)
return std_result return std_result
# 使用公式计算皮尔逊相关系数
def calculate_pearson_correlation(x_array, y_array):
import numpy as np
mean_x = np.mean(x_array)
mean_y = np.mean(y_array)
numerator = np.sum((x_array - mean_x) * (y_array - mean_y))
sum_sq_x = np.sum((x_array - mean_x) ** 2)
sum_sq_y = np.sum((y_array - mean_y) ** 2)
denominator = np.sqrt(sum_sq_x * sum_sq_y)
correlation = numerator / denominator
return correlation
# 使用 scipy 计算皮尔逊相关系数和 p 值
def calculate_pearson_correlation_with_scipy(x_array, y_array):
import scipy.stats
correlation, p_value = scipy.stats.pearsonr(x_array, y_array)
return correlation, p_value
# 使用 scipy 计算多个数组的皮尔逊相关系数和 p 值的矩阵
def calculate_correlation_matrix_for_multiple_arrays(multiple_arrays):
import scipy.stats
import numpy as np
num_arrays = len(multiple_arrays)
correlation_matrix = np.zeros((num_arrays, num_arrays))
p_value_matrix = np.zeros((num_arrays, num_arrays))
row_idx = 0
for array_1 in multiple_arrays:
col_idx = 0
for array_2 in multiple_arrays:
correlation, p_value = scipy.stats.pearsonr(array_1, array_2)
correlation_matrix[row_idx, col_idx] = correlation
p_value_matrix[row_idx, col_idx] = p_value
col_idx += 1
row_idx += 1
return correlation_matrix, p_value_matrix
# 获取两个模式之间的字符串 # 获取两个模式之间的字符串
def get_string_between_two_patterns(original_string, start, end, include_start_and_end=0): def get_string_between_two_patterns(original_string, start, end, include_start_and_end=0):
import re import re
@@ -337,6 +383,14 @@ def run_programs_sequentially(program_files=['./a.py', './b.py'], execute='pytho
end = time.time() end = time.time()
print('Total running time = '+str((end-start)/60)+' min') print('Total running time = '+str((end-start)/60)+' min')
# 根据 “.” 和 “。” 符号进行分句
def split_text_into_sentences(text):
import re
pattern = r'(?<=[。])|(?<=\.)(?=\s|$)'
sentences = re.split(pattern, text)
sentence_array = [s.strip() for s in sentences if s.strip()]
return sentence_array
# 根据一定的字符长度来分割文本 # 根据一定的字符长度来分割文本
def split_text(text, width=100): def split_text(text, width=100):
split_text_list = [text[i:i+width] for i in range(0, len(text), width)] split_text_list = [text[i:i+width] for i in range(0, len(text), width)]

View File

@@ -471,7 +471,7 @@ def open_file(filename='a', file_format='.txt', mode='add'):
f = open(filename+file_format, 'w', encoding='UTF-8') f = open(filename+file_format, 'w', encoding='UTF-8')
return f return f
# 打印到TXT文件 # 打印到TXT文件(补充内容)
def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True): def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True):
if print_on==True: if print_on==True:
for arg in args: for arg in args:
@@ -483,7 +483,12 @@ def print_to_file(*args, filename='print_result', file_format='.txt', print_on=T
f.write('\n') f.write('\n')
f.close() f.close()
# 读取文本文件内容。如果文件不存在,返回空字符串 # 写入到文本文件(覆盖内容)
def write_text_file(content, filename='a', file_format='.txt'):
with open(filename+file_format, 'w', encoding='UTF-8') as f:
f.write(content)
# 读取文本文件内容(如果文件不存在,返回空字符串)
def read_text_file(file_path='./a.txt', make_file=None): def read_text_file(file_path='./a.txt', make_file=None):
import os import os
if not os.path.exists(file_path): if not os.path.exists(file_path):

View File

@@ -28,7 +28,18 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil
with open(output_file, 'wb') as combined_file: with open(output_file, 'wb') as combined_file:
output_pdf.write(combined_file) output_pdf.write(combined_file)
# 使用pdfminer3k将PDF文件转成文本 # 使用pdfplumber将PDF文件转成文本
def pdf_to_text_with_pdfplumber(pdf_path):
import pdfplumber
with pdfplumber.open(pdf_path) as pdf:
all_text = []
for page in pdf.pages:
text = page.extract_text()
all_text.append(text)
content = "\n\n".join(all_text)
return content
# 使用pdfminer3k将PDF文件转成文本仅仅支持旧版本的 pdfminer3k
def pdf_to_text_with_pdfminer3k(pdf_path): def pdf_to_text_with_pdfminer3k(pdf_path):
from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
@@ -115,6 +126,29 @@ def get_links_from_pdf(pdf_path, link_starting_form=''):
old = u['/A']['/URI'] old = u['/A']['/URI']
return links return links
# 将某个文件夹中的某个类型的文本文件全部修改为另外一个编码,其他文件不变
def convert_file_encoding_for_one_directory(source_directory, target_directory, file_formats=['.m'], src_encoding='utf-8', dst_encoding='gb18030'):
import os
import shutil
os.makedirs(target_directory, exist_ok=True)
for root, dirs, files in os.walk(source_directory):
rel_path = os.path.relpath(root, source_directory)
target_subdir = os.path.join(target_directory, rel_path) if rel_path != '.' else target_directory
os.makedirs(target_subdir, exist_ok=True)
for file in files:
src_file = os.path.join(root, file)
dst_file = os.path.join(target_subdir, file)
if any(file.lower().endswith(ext.lower()) for ext in file_formats):
try:
with open(src_file, 'r', encoding=src_encoding) as f:
content = f.read()
with open(dst_file, 'w', encoding=dst_encoding) as f:
f.write(content)
except Exception as e:
shutil.copy2(src_file, dst_file)
else:
shutil.copy2(src_file, dst_file)
# 获取当前日期字符串 # 获取当前日期字符串
def get_date(bar=True): def get_date(bar=True):
import datetime import datetime
@@ -770,7 +804,7 @@ def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.
img = qrcode.make(data) img = qrcode.make(data)
img.save(filename+file_format) img.save(filename+file_format)
# 通过Sci-Hub网站下载文献 # 通过Sci-Hub网站下载文献(该方法可能失效)
def download_with_scihub(address=None, num=1): def download_with_scihub(address=None, num=1):
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re