删除一些不常用的文件

This commit is contained in:
guanjihuan 2023-12-05 15:54:45 +08:00
parent 817938eac1
commit f4b24eddf1
16 changed files with 0 additions and 1015 deletions

View File

@ -1,7 +1,4 @@
import imageio
import numpy as np
import os
# os.chdir('D:/data') # 设置文件读取和保存的位置
images = []
for i in range(1000):

View File

@ -1,78 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
"""
import PyPDF2
import os
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
def main():
os.chdir('D:/') # PDF文件存放的位置
filename = input('输入PDF文件名')
pdfFile = open(filename+'.pdf','rb') # 打开PDF文件
links = all_links_in_pdf(pdfFile) # 获取PDF文件中的链接
pdfFile.close() # 关闭PDF文件
os.chdir('D:/Reference') # 设置参考文献保存的位置
download(links) # 下载文献
def all_links_in_pdf(pdfFile):
pdfReader = PyPDF2.PdfFileReader(pdfFile)
pages = pdfReader.getNumPages()
i0 = 0
links = []
print()
for page in range(pages):
pageSliced = pdfReader.getPage(page)
pageObject = pageSliced.getObject()
if '/Annots' in pageObject.keys():
ann = pageObject['/Annots']
old = ''
for a in ann:
u = a.getObject()
if '/A' in u.keys():
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
if u['/A']['/URI'] != old: # 排除重复链接
print(i0 , u['/A']['/URI'])
links.append(u['/A']['/URI']) # 链接存在link数组中
i0 += 1
old = u['/A']['/URI']
return links
def download(links):
for i0 in [0, 1, 3]: # 指定参考文献下载如需全部下载用for i0 in range(links.shape[0]):
address = links[i0]
r = requests.post('https://sci-hub.st/', data={'request': address})
print('\n响应结果是:', r)
print('访问的地址是:', r.url)
soup = BeautifulSoup(r.text, features='lxml')
pdf_URL = soup.embed['src']
# pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
if re.search(re.compile('^https:'), pdf_URL):
pass
else:
pdf_URL = 'https:'+pdf_URL
print('PDF的地址是', pdf_URL)
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
print('PDF文件名是', name)
print('保存的位置在:', os.getcwd())
print('\n正在下载第',i0,'')
r = requests.get(pdf_URL, stream=True)
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
print('',i0,'篇下载完成!')
print('\n全部下载完成!')
if __name__ == '__main__':
main()

View File

@ -1,30 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
"""
import PyPDF2
import os
import re
os.chdir('D:/') # PDF文件存放的位置
filename = input('输入PDF文件名')
pdfFile = open(filename+'.pdf','rb')
pdfReader = PyPDF2.PdfFileReader(pdfFile)
pages = pdfReader.getNumPages()
i0 = 0
for page in range(pages):
pageSliced = pdfReader.getPage(page)
pageObject = pageSliced.getObject()
if '/Annots' in pageObject.keys():
ann = pageObject['/Annots']
old = ''
for a in ann:
u = a.getObject()
if '/A' in u.keys():
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
if u['/A']['/URI'] != old: # 排除重复链接
print(i0 , u['/A']['/URI'])
i0 += 1
old = u['/A']['/URI']
pdfFile.close()

View File

@ -1,322 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/8734
函数调用目录
1. x, y = read_one_dimensional_data(filename='a')
2. x, y, matrix = read_two_dimensional_data(filename='a')
3. write_one_dimensional_data(x, y, filename='a')
4. write_two_dimensional_data(x, y, matrix, filename='a')
5. plot(x, y, xlabel='x', ylabel='y', title='', filename='a')
6. plot_3d_surface(x, y, matrix, xlabel='x', ylabel='y', zlabel='z', title='', filename='a')
7. plot_contour(x, y, matrix, xlabel='x', ylabel='y', title='', filename='a')
8. plot_2d_scatter(x, y, value, xlabel='x', ylabel='y', title='', filename='a')
9. plot_3d_surface(x, y, z, value, xlabel='x', ylabel='y', zlabel='z', title='', filename='a')
10. creat_animation(image_names, duration_time=0.5, filename='a')
11. eigenvalue_array = calculate_eigenvalue_with_one_paramete(x, matrix)
12. eigenvalue_array = calculate_eigenvalue_with_two_parameters(x, y, matrix)
函数对应的功能
1. 读取filename.txt文件中的一维数据y(x)
2. 读取filename.txt文件中的二维数据matrix(x,y)
3. 把一维数据y(x)写入filename.txt文件
4. 把二维数据matrix(x,y)写入filename.txt文件
5. 画y(x)并保存到filename.jpg文件具体画图格式可在函数中修改
6. 画3d_surface图并保存到filename.jpg文件具体画图格式可在函数中修改
7. 画contour图并保存到filename.jpg文件具体画图格式可在函数中修改
8. 画2d_scatter图并保存到filename.jpg文件具体画图格式可在函数中修改
9. 画3d_scatter图并保存到filename.jpg文件具体画图格式可在函数中修改
10. 制作动画
11. 在参数x下计算matrix函数的本征值eigenvalue_array[:, index]
12. 在参数(x,y)计算matrix函数的本征值eigenvalue_array[:, :, index]
"""
import numpy as np
# import os
# os.chdir('D:/data')
def main():
pass # 读取数据 + 数据处理 + 保存新数据
# 1. 读取filename.txt文件中的一维数据y(x)
def read_one_dimensional_data(filename='a'):
f = open(filename+'.txt', 'r')
text = f.read()
f.close()
row_list = np.array(text.split('\n'))
dim_column = np.array(row_list[0].split()).shape[0]
x = np.array([])
y = np.array([])
for row in row_list:
column = np.array(row.split())
if column.shape[0] != 0:
x = np.append(x, [float(column[0])], axis=0)
y_row = np.zeros(dim_column-1)
for dim0 in range(dim_column-1):
y_row[dim0] = float(column[dim0+1])
if np.array(y).shape[0] == 0:
y = [y_row]
else:
y = np.append(y, [y_row], axis=0)
return x, y
# 2. 读取filename.txt文件中的二维数据matrix(x,y)
def read_two_dimensional_data(filename='a'):
f = open(filename+'.txt', 'r')
text = f.read()
f.close()
row_list = np.array(text.split('\n'))
dim_column = np.array(row_list[0].split()).shape[0]
x = np.array([])
y = np.array([])
matrix = np.array([])
for i0 in range(row_list.shape[0]):
column = np.array(row_list[i0].split())
if i0 == 0:
x_str = column[1::]
x = np.zeros(x_str.shape[0])
for i00 in range(x_str.shape[0]):
x[i00] = float(x_str[i00])
elif column.shape[0] != 0:
y = np.append(y, [float(column[0])], axis=0)
matrix_row = np.zeros(dim_column-1)
for dim0 in range(dim_column-1):
matrix_row[dim0] = float(column[dim0+1])
if np.array(matrix).shape[0] == 0:
matrix = [matrix_row]
else:
matrix = np.append(matrix, [matrix_row], axis=0)
return x, y, matrix
# 3. 把一维数据y(x)写入filename.txt文件
def write_one_dimensional_data(x, y, filename='a'):
with open(filename+'.txt', 'w') as f:
i0 = 0
for x0 in x:
f.write(str(x0)+' ')
if len(y.shape) == 1:
f.write(str(y[i0])+'\n')
elif len(y.shape) == 2:
for j0 in range(y.shape[1]):
f.write(str(y[i0, j0])+' ')
f.write('\n')
i0 += 1
# 4. 把二维数据matrix(x,y)写入filename.txt文件
def write_two_dimensional_data(x, y, matrix, filename='a'):
with open(filename+'.txt', 'w') as f:
f.write('0 ')
for x0 in x:
f.write(str(x0)+' ')
f.write('\n')
i0 = 0
for y0 in y:
f.write(str(y0))
j0 = 0
for x0 in x:
f.write(' '+str(matrix[i0, j0])+' ')
j0 += 1
f.write('\n')
i0 += 1
# 5. 画y(x)图并保存到filename.jpg文件。具体画图格式可在函数中修改。
def plot(x, y, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=0.20, left=0.18)
ax.plot(x, y, '-o')
ax.grid()
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
ax.tick_params(labelsize=20)
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname('Times New Roman') for label in labels]
if save == 1:
plt.savefig(filename+'.jpg', dpi=300)
if show == 1:
plt.show()
plt.close('all')
# 6. 画3d_surface图并保存到filename.jpg文件。具体画图格式可在函数中修改。
def plot_3d_surface(x, y, matrix, xlabel='x', ylabel='y', zlabel='z', title='', filename='a', show=1, save=0):
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
plt.subplots_adjust(bottom=0.1, right=0.65)
x, y = np.meshgrid(x, y)
if len(matrix.shape) == 2:
surf = ax.plot_surface(x, y, matrix, cmap=cm.coolwarm, linewidth=0, antialiased=False)
elif len(matrix.shape) == 3:
for i0 in range(matrix.shape[2]):
surf = ax.plot_surface(x, y, matrix[:,:,i0], cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
ax.set_zlabel(zlabel, fontsize=20, fontfamily='Times New Roman')
ax.zaxis.set_major_locator(LinearLocator(5))
ax.zaxis.set_major_formatter('{x:.2f}')
ax.tick_params(labelsize=15)
labels = ax.get_xticklabels() + ax.get_yticklabels() + ax.get_zticklabels()
[label.set_fontname('Times New Roman') for label in labels]
cax = plt.axes([0.80, 0.15, 0.05, 0.75])
cbar = fig.colorbar(surf, cax=cax)
cbar.ax.tick_params(labelsize=15)
for l in cbar.ax.yaxis.get_ticklabels():
l.set_family('Times New Roman')
if save == 1:
plt.savefig(filename+'.jpg', dpi=300)
if show == 1:
plt.show()
plt.close('all')
# 7. 画plot_contour图并保存到filename.jpg文件。具体画图格式可在函数中修改。
def plot_contour(x, y, matrix, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
fig, ax = plt.subplots()
plt.subplots_adjust(bottom=0.2, right=0.75, left = 0.16)
x, y = np.meshgrid(x, y)
contour = ax.contourf(x,y,matrix,cmap='jet')
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
ax.tick_params(labelsize=15)
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname('Times New Roman') for label in labels]
cax = plt.axes([0.78, 0.17, 0.08, 0.71])
cbar = fig.colorbar(contour, cax=cax)
cbar.ax.tick_params(labelsize=15)
for l in cbar.ax.yaxis.get_ticklabels():
l.set_family('Times New Roman')
if save == 1:
plt.savefig(filename+'.jpg', dpi=300)
if show == 1:
plt.show()
plt.close('all')
# 8. 画2d_scatter图并保存到filename.jpg文件。具体画图格式可在函数中修改
def plot_2d_scatter(x, y, value, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
import matplotlib.pyplot as plt
from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')
fig = plt.figure()
ax = fig.add_subplot(111)
plt.subplots_adjust(bottom=0.2, right=0.8, left=0.2)
for i in range(np.array(x).shape[0]):
ax.scatter(x[i], y[i], marker='o', s=100*value[i], c=(1,0,0))
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
ax.tick_params(labelsize=15)
labels = ax.get_xticklabels() + ax.get_yticklabels()
[label.set_fontname('Times New Roman') for label in labels]
if save == 1:
plt.savefig(filename+'.jpg', dpi=300)
if show == 1:
plt.show()
plt.close('all')
# 9. 画3d_scatter图并保存到filename.jpg文件。具体画图格式可在函数中修改
def plot_3d_scatter(x, y, z, value, xlabel='x', ylabel='y', zlabel='z', title='', filename='a', show=1, save=0):
import matplotlib.pyplot as plt
from matplotlib.ticker import LinearLocator
from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plt.subplots_adjust(bottom=0.1, right=0.8)
for i in range(np.array(x).shape[0]):
ax.scatter(x[i], y[i], z[i], marker='o', s=int(100*value[i]), c=(1,0,0))
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
ax.set_zlabel(zlabel, fontsize=20, fontfamily='Times New Roman')
ax.tick_params(labelsize=15)
labels = ax.get_xticklabels() + ax.get_yticklabels() + ax.get_zticklabels()
[label.set_fontname('Times New Roman') for label in labels]
if save == 1:
plt.savefig(filename+'.jpg', dpi=300)
if show == 1:
plt.show()
plt.close('all')
# 10. 制作动画
def creat_animation(image_names, duration_time=0.5, filename='a'):
import imageio
images = []
for name in image_names:
image = name+'.jpg'
im = imageio.imread(image)
images.append(im)
imageio.mimsave(filename+'.gif', images, 'GIF', duration=duration_time) # durantion是延迟时间
# 11. 在参数x下计算matrix函数的本征值eigenvalue_array[:, index]
def calculate_eigenvalue_with_one_parameter(x, matrix):
dim_x = np.array(x).shape[0]
i0 = 0
if np.array(matrix(0)).shape==():
eigenvalue_array = np.zeros((dim_x, 1))
for x0 in x:
matrix0 = matrix(x0)
eigenvalue_array[i0, 0] = np.real(matrix0)
i0 += 1
else:
dim = np.array(matrix(0)).shape[0]
eigenvalue_array = np.zeros((dim_x, dim))
for x0 in x:
matrix0 = matrix(x0)
eigenvalue, eigenvector = np.linalg.eig(matrix0)
eigenvalue_array[i0, :] = np.sort(np.real(eigenvalue[:]))
i0 += 1
return eigenvalue_array
# 12. 在参数(x,y)下计算matrix函数的本征值eigenvalue_array[:, :, index]
def calculate_eigenvalue_with_two_parameters(x, y, matrix):
dim_x = np.array(x).shape[0]
dim_y = np.array(y).shape[0]
if np.array(matrix(0,0)).shape==():
eigenvalue_array = np.zeros((dim_y, dim_x, 1))
i0 = 0
for y0 in y:
j0 = 0
for x0 in x:
matrix0 = matrix(x0, y0)
eigenvalue_array[i0, j0, 0] = np.real(matrix0)
j0 += 1
i0 += 1
else:
dim = np.array(matrix(0, 0)).shape[0]
eigenvalue_array = np.zeros((dim_y, dim_x, dim))
i0 = 0
for y0 in y:
j0 = 0
for x0 in x:
matrix0 = matrix(x0, y0)
eigenvalue, eigenvector = np.linalg.eig(matrix0)
eigenvalue_array[i0, j0, :] = np.sort(np.real(eigenvalue[:]))
j0 += 1
i0 += 1
return eigenvalue_array
if __name__ == "__main__":
main()

View File

@ -1,137 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
"""
import os
import re
import time
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
def main():
# 参数
key_word_array = ['photonic', 'Berry phase']
original_path = 'D:\\文献'
# 查找所有的PDF文件路径
pdf_file_all = find_files_pdf(original_path)
print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
f = open('error.txt','w',encoding='utf-8')
f.close()
for key_word in key_word_array:
f = open(str(key_word)+'.txt','w',encoding='utf-8')
f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
f.close()
# 查找包含关键词的PDF文件
i0 = 1
begin = time.time()
for pdf_file in pdf_file_all:
print('查找第', i0, '个文件,', end='')
begin0 = time.time()
try:
content = get_text_from_pdf(pdf_file)
for key_word in key_word_array:
if re.search(re.compile(key_word),content):
print('发现文件!关键词', key_word, '对应的文件位置在:\n\n', pdf_file, '\n')
with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
f.write('\n查找第'+str(i0)+'个文件时发现文件!位置在:\n'+pdf_file+'\n')
except:
print('出现异常!位置在:\n\n', pdf_file, '\n')
with open('error.txt','a',encoding='utf-8') as f:
f.write('\n解析第'+str(i0)+'个文件时出现异常!位置在:\n'+pdf_file+'\n')
end0 = time.time()
print('用时', end0-begin0, '')
i0 += 1
print('\n全部搜索结束!')
end = time.time()
print('\n总共用时:', (end-begin)/60, '')
def find_files_pdf(path): # 查找所有PDF文件
file_all = find_files(path)
pdf_file_all = []
for file0 in file_all:
if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
pdf_file_all.append(file0)
return pdf_file_all
def find_files(path): # 查找所有文件
file_all = []
path_next_loop = [path]
for i in range(10000): # i为文件在文件夹中的深度
file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
for file_in_one_loop in file_all_in_one_loop:
file_all.append(file_in_one_loop)
if path_next_loop == []:
break
return file_all
def find_files_loop_module(path_all): # 查找文件的一个循环模块
file_all_in_one_loop = []
path_next_loop = []
for path in path_all:
filenames = os.listdir(path)
for filename in filenames:
filename = os.path.join(path,filename)
if os.path.isfile(filename): # 如果是文件
file_all_in_one_loop.append(filename)
else: # 如果是文件夹
path_next_loop.append(filename)
return file_all_in_one_loop, path_next_loop
def get_text_from_pdf(file_path): # 从PDF中获取文本
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
# 用文件对象来创建一个pdf文档分析器
praser = PDFParser(open(file_path, 'rb'))
# 创建一个PDF文档
doc = PDFDocument()
# 连接分析器 与文档对象
praser.set_document(doc)
doc.set_parser(praser)
# 提供初始化密码
# 如果没有密码 就创建一个空的字符串
doc.initialize()
# 检测文档是否提供txt转换不提供就忽略
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
# 创建PDf 资源管理器 来管理共享资源
rsrcmgr = PDFResourceManager()
# 创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
# 循环遍历列表每次处理一个page的内容
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
# 接受该页面的LTPage对象
layout = device.get_result()
# 这里layout是一个LTPage对象里面存放着这个 page 解析出的各种对象
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
for x in layout:
if isinstance(x, LTTextBox):
# print(x.get_text().strip())
content = content + x.get_text().strip()
return content
if __name__ == "__main__":
main()

View File

@ -1,63 +0,0 @@
import os
os.chdir('D:/') # PDF文件存放的位置
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
def main():
content = get_text_from_pdf('a')
with open('a.txt', 'w', encoding='utf-8') as f:
f.write(content)
def get_text_from_pdf(filename):
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
path = filename+".pdf"
# 用文件对象来创建一个pdf文档分析器
praser = PDFParser(open(path, 'rb'))
# 创建一个PDF文档
doc = PDFDocument()
# 连接分析器 与文档对象
praser.set_document(doc)
doc.set_parser(praser)
# 提供初始化密码
# 如果没有密码 就创建一个空的字符串
doc.initialize()
# 检测文档是否提供txt转换不提供就忽略
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
# 创建PDf 资源管理器 来管理共享资源
rsrcmgr = PDFResourceManager()
# 创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
# 循环遍历列表每次处理一个page的内容
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
# 接受该页面的LTPage对象
layout = device.get_result()
# 这里layout是一个LTPage对象里面存放着这个 page 解析出的各种对象
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
for x in layout:
if isinstance(x, LTTextBox):
# print(x.get_text().strip())
content = content + x.get_text().strip()
return content
if __name__ == "__main__":
main()

View File

@ -1,4 +0,0 @@
import guan
guan.change_directory_by_replacement(current_key_word='code', new_key_word='data')
with open('data.txt', 'w') as f: # 保存数据
f.write('Hello world')

View File

@ -1,4 +0,0 @@
import guan
guan.change_directory_by_replacement(current_key_word='working/code', new_key_word='local/data')
with open('data.txt', 'w') as f: # 保存数据
f.write('Hello world')

View File

@ -1,9 +0,0 @@
import os
code_path = os.getcwd() # 当前代码文件的路径
data_path = code_path.replace('\\', '/') # \改为/,防止路径报错
data_path = data_path.replace('code', 'data') # 把路径中code改为data
if os.path.exists(data_path) == False: # 如果文件夹不存在,新建文件夹
os.makedirs(data_path)
os.chdir(data_path) # 转到数据的存放路径
with open('data.txt', 'w') as f: # 保存数据
f.write('Hello world')

View File

@ -1,41 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25453
"""
import os
# 选取某个目录
directory = 'E:/'
def main():
for root, dirs, files in os.walk(directory):
for i0 in range(len(files)):
if 'pdf' in files[i0] or 'djvu' in files[i0]: # 满足某个条件的文件
# 显示旧文件名
name = files[i0]
print(name) # 显示旧文件名
# 显示新文件名
new_name = modify_name(name)
print(new_name)
print()
# # 修改文件名。注意需要检查前面的代码尤其是modify_name的规则看是否都满足再运行下面的代码否则文件名的修改会出现遗漏或混乱。
# if new_name != None:
# os.rename(root+'/'+name, root+'/'+new_name)
def modify_name(name): # 按某种规则修改文件名
array = name.split(' - ') # 通过' - '把这类型的文件名切开
if len(array) != 3:
print('Miss:', name)
new_name = None # 如果不满足规则,则不修改
else:
new_name= array[1]+' - '+array[0]+' - '+array[2] # 做个对调
return new_name
if __name__ == '__main__':
main()

View File

@ -1,35 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25685
"""
# 注意这个程序请小心使用防止误操作把系统文件或个人文件破坏。在选取好directory目录后请经过再三确认无误后再运行尤其是directory的层级不能太高。
def main():
# 选取某个目录
directory = 'E:/test/all_files'
move_all_files_to_root_directory(directory)
# import guan
# guan.move_all_files_to_root_directory(directory)
def move_all_files_to_root_directory(directory):
import os
import shutil
for root, dirs, files in os.walk(directory):
for i0 in range(len(files)):
# print(root) # 文件对应目录
# print(files[i0], '\n') # 文件
shutil.move(root+'/'+files[i0], directory+'/'+files[i0]) # 移动所有文件至根目录
for i0 in range(100): # 多次尝试删除层数比较多的空文件夹例如100层
for root, dirs, files in os.walk(directory):
try:
os.rmdir(root) # 删除空文件夹
except:
pass
if __name__ == '__main__':
main()

View File

@ -1,116 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25699
"""
def main():
directory = 'E:/literature'
write_file_list_in_markdown(directory)
def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):
import os
f = open(filename+'.md', 'w', encoding="utf-8")
filenames1 = os.listdir(directory)
u0 = 0
for filename1 in filenames1[::reverse_positive_or_negative]:
filename1_with_path = os.path.join(directory,filename1)
if os.path.isfile(filename1_with_path): # 文件
if os.path.splitext(filename1)[1] not in banned_file_format:
if hide_file_format == None:
f.write('+ '+str(filename1)+'\n\n')
else:
f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n')
else: # 文件夹
u0 += 1
if divided_line != None and u0 != 1:
f.write('--------\n\n')
if starting_from_h1 == None:
f.write('#')
f.write('# '+str(filename1)+'\n\n')
filenames2 = os.listdir(filename1_with_path)
i0 = 0
for filename2 in filenames2[::reverse_positive_or_negative]:
filename2_with_path = os.path.join(directory, filename1, filename2)
if os.path.isfile(filename2_with_path): # 文件
if os.path.splitext(filename2)[1] not in banned_file_format:
if hide_file_format == None:
f.write('+ '+str(filename2)+'\n\n')
else:
f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n')
else: # 文件夹
i0 += 1
if starting_from_h1 == None:
f.write('#')
if show_second_number != None:
f.write('## '+str(i0)+'. '+str(filename2)+'\n\n')
else:
f.write('## '+str(filename2)+'\n\n')
j0 = 0
filenames3 = os.listdir(filename2_with_path)
for filename3 in filenames3[::reverse_positive_or_negative]:
filename3_with_path = os.path.join(directory, filename1, filename2, filename3)
if os.path.isfile(filename3_with_path): # 文件
if os.path.splitext(filename3)[1] not in banned_file_format:
if hide_file_format == None:
f.write('+ '+str(filename3)+'\n\n')
else:
f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n')
else: # 文件夹
j0 += 1
if starting_from_h1 == None:
f.write('#')
if show_third_number != None:
f.write('### ('+str(j0)+') '+str(filename3)+'\n\n')
else:
f.write('### '+str(filename3)+'\n\n')
filenames4 = os.listdir(filename3_with_path)
for filename4 in filenames4[::reverse_positive_or_negative]:
filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4)
if os.path.isfile(filename4_with_path): # 文件
if os.path.splitext(filename4)[1] not in banned_file_format:
if hide_file_format == None:
f.write('+ '+str(filename4)+'\n\n')
else:
f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n')
else: # 文件夹
if starting_from_h1 == None:
f.write('#')
f.write('#### '+str(filename4)+'\n\n')
filenames5 = os.listdir(filename4_with_path)
for filename5 in filenames5[::reverse_positive_or_negative]:
filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5)
if os.path.isfile(filename5_with_path): # 文件
if os.path.splitext(filename5)[1] not in banned_file_format:
if hide_file_format == None:
f.write('+ '+str(filename5)+'\n\n')
else:
f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n')
else: # 文件夹
if starting_from_h1 == None:
f.write('#')
f.write('##### '+str(filename5)+'\n\n')
filenames6 = os.listdir(filename5_with_path)
for filename6 in filenames6[::reverse_positive_or_negative]:
filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6)
if os.path.isfile(filename6_with_path): # 文件
if os.path.splitext(filename6)[1] not in banned_file_format:
if hide_file_format == None:
f.write('+ '+str(filename6)+'\n\n')
else:
f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n')
else: # 文件夹
if starting_from_h1 == None:
f.write('#')
f.write('###### '+str(filename6)+'\n\n')
f.close()
if __name__ == '__main__':
main()

View File

@ -1,55 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25943
"""
def main():
directory = 'E:/test'
creat_necessary_file(directory)
# delete_file_with_specific_name(directory)
# import guan
# guan.creat_necessary_file(directory)
# guan.delete_file_with_specific_name(directory)
def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]):
import os
directory_with_file = []
ignored_directory = []
for root, dirs, files in os.walk(directory):
for i0 in range(len(files)):
if root not in directory_with_file:
directory_with_file.append(root)
if files[i0] == filename+file_format:
if root not in ignored_directory:
ignored_directory.append(root)
if overwrite == None:
for root in ignored_directory:
directory_with_file.remove(root)
ignored_directory_more =[]
for root in directory_with_file:
for word in ignored_directory_with_words:
if word in root:
if root not in ignored_directory_more:
ignored_directory_more.append(root)
for root in ignored_directory_more:
directory_with_file.remove(root)
for root in directory_with_file:
os.chdir(root)
f = open(filename+file_format, 'w', encoding="utf-8")
f.write(content)
f.close()
def delete_file_with_specific_name(directory, filename='readme', file_format='.md'):
import os
for root, dirs, files in os.walk(directory):
for i0 in range(len(files)):
if files[i0] == filename+file_format:
os.remove(root+'/'+files[i0])
if __name__ == '__main__':
main()

View File

@ -1,45 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26113
"""
# 仅支持文件名判断是否重复,不支持对文件内容的判断。
# 如需对文件名和内容都判断,需要计算文件的哈希值。这里暂时不考虑。
def main():
directory = 'E:/test'
repeated_file = find_repeated_file_with_same_filename(directory)
print(repeated_file)
# import guan
# repeated_file = guan.find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000)
# print(repeated_file)
def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000):
import os
from collections import Counter
file_list = []
for root, dirs, files in os.walk(directory):
for i0 in range(len(files)):
file_list.append(files[i0])
for word in ignored_directory_with_words:
if word in root:
file_list.remove(files[i0])
for word in ignored_file_with_words:
if word in files[i0]:
try:
file_list.remove(files[i0])
except:
pass
count_file = Counter(file_list).most_common(num)
repeated_file = []
for item in count_file:
if item[1]>1:
repeated_file.append(item)
return repeated_file
if __name__ == '__main__':
main()

View File

@ -1,41 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26536
"""
def main():
# 如果子文件夹中所有文件的数量小于5输出路径。
count_file_in_sub_directory(directory='./', smaller_than_num=5)
# import guan
# guan.count_file_in_sub_directory(directory='./', smaller_than_num=5)
def count_file_in_sub_directory(directory='./', smaller_than_num=None):
import os
from collections import Counter
dirs_list = []
for root, dirs, files in os.walk(directory):
if dirs != []:
for i0 in range(len(dirs)):
dirs_list.append(root+'/'+dirs[i0])
for sub_dir in dirs_list:
file_list = []
for root, dirs, files in os.walk(sub_dir):
for i0 in range(len(files)):
file_list.append(files[i0])
count_file = len(file_list)
if smaller_than_num == None:
print(sub_dir)
print(count_file)
print()
else:
if count_file<smaller_than_num:
print(sub_dir)
print(count_file)
print()
if __name__ == '__main__':
main()

View File

@ -1,32 +0,0 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/34649
"""
import PyPDF2
# 创建一个空的PDF对象
output_pdf = PyPDF2.PdfWriter()
# 打开第一个PDF文件
with open('a.pdf', 'rb') as file1:
pdf1 = PyPDF2.PdfReader(file1)
# 将第一个PDF文件的所有页面添加到输出PDF对象中
for page in range(len(pdf1.pages)):
output_pdf.add_page(pdf1.pages[page])
# 打开第二个PDF文件
with open('b.pdf', 'rb') as file2:
pdf2 = PyPDF2.PdfReader(file2)
# 将第二个PDF文件的所有页面添加到输出PDF对象中
for page in range(len(pdf2.pages)):
output_pdf.add_page(pdf2.pages[page])
# 保存合并后的PDF文件
with open('combined_file.pdf', 'wb') as combined_file:
output_pdf.write(combined_file)
# import guan
# guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')