删除一些不常用的文件
This commit is contained in:
parent
817938eac1
commit
f4b24eddf1
@ -1,7 +1,4 @@
|
|||||||
import imageio
|
import imageio
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
# os.chdir('D:/data') # 设置文件读取和保存的位置
|
|
||||||
|
|
||||||
images = []
|
images = []
|
||||||
for i in range(1000):
|
for i in range(1000):
|
||||||
|
@ -1,78 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
|
|
||||||
"""
|
|
||||||
|
|
||||||
import PyPDF2
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from urllib.request import urlopen
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
os.chdir('D:/') # PDF文件存放的位置
|
|
||||||
filename = input('输入PDF文件名:')
|
|
||||||
pdfFile = open(filename+'.pdf','rb') # 打开PDF文件
|
|
||||||
links = all_links_in_pdf(pdfFile) # 获取PDF文件中的链接
|
|
||||||
pdfFile.close() # 关闭PDF文件
|
|
||||||
os.chdir('D:/Reference') # 设置参考文献保存的位置
|
|
||||||
download(links) # 下载文献
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def all_links_in_pdf(pdfFile):
|
|
||||||
pdfReader = PyPDF2.PdfFileReader(pdfFile)
|
|
||||||
pages = pdfReader.getNumPages()
|
|
||||||
i0 = 0
|
|
||||||
links = []
|
|
||||||
print()
|
|
||||||
for page in range(pages):
|
|
||||||
pageSliced = pdfReader.getPage(page)
|
|
||||||
pageObject = pageSliced.getObject()
|
|
||||||
if '/Annots' in pageObject.keys():
|
|
||||||
ann = pageObject['/Annots']
|
|
||||||
old = ''
|
|
||||||
for a in ann:
|
|
||||||
u = a.getObject()
|
|
||||||
if '/A' in u.keys():
|
|
||||||
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
|
|
||||||
if u['/A']['/URI'] != old: # 排除重复链接
|
|
||||||
print(i0 , u['/A']['/URI'])
|
|
||||||
links.append(u['/A']['/URI']) # 链接存在link数组中
|
|
||||||
i0 += 1
|
|
||||||
old = u['/A']['/URI']
|
|
||||||
return links
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(links):
|
|
||||||
for i0 in [0, 1, 3]: # 指定参考文献下载,如需全部下载用for i0 in range(links.shape[0]):
|
|
||||||
address = links[i0]
|
|
||||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
|
||||||
print('\n响应结果是:', r)
|
|
||||||
print('访问的地址是:', r.url)
|
|
||||||
soup = BeautifulSoup(r.text, features='lxml')
|
|
||||||
pdf_URL = soup.embed['src']
|
|
||||||
# pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
|
|
||||||
if re.search(re.compile('^https:'), pdf_URL):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pdf_URL = 'https:'+pdf_URL
|
|
||||||
print('PDF的地址是:', pdf_URL)
|
|
||||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
|
||||||
print('PDF文件名是:', name)
|
|
||||||
print('保存的位置在:', os.getcwd())
|
|
||||||
print('\n正在下载第',i0,'篇')
|
|
||||||
r = requests.get(pdf_URL, stream=True)
|
|
||||||
with open(name, 'wb') as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=32):
|
|
||||||
f.write(chunk)
|
|
||||||
print('第',i0,'篇下载完成!')
|
|
||||||
print('\n全部下载完成!')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,30 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
|
|
||||||
"""
|
|
||||||
|
|
||||||
import PyPDF2
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
os.chdir('D:/') # PDF文件存放的位置
|
|
||||||
filename = input('输入PDF文件名:')
|
|
||||||
pdfFile = open(filename+'.pdf','rb')
|
|
||||||
pdfReader = PyPDF2.PdfFileReader(pdfFile)
|
|
||||||
pages = pdfReader.getNumPages()
|
|
||||||
i0 = 0
|
|
||||||
for page in range(pages):
|
|
||||||
pageSliced = pdfReader.getPage(page)
|
|
||||||
pageObject = pageSliced.getObject()
|
|
||||||
if '/Annots' in pageObject.keys():
|
|
||||||
ann = pageObject['/Annots']
|
|
||||||
old = ''
|
|
||||||
for a in ann:
|
|
||||||
u = a.getObject()
|
|
||||||
if '/A' in u.keys():
|
|
||||||
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
|
|
||||||
if u['/A']['/URI'] != old: # 排除重复链接
|
|
||||||
print(i0 , u['/A']['/URI'])
|
|
||||||
i0 += 1
|
|
||||||
old = u['/A']['/URI']
|
|
||||||
pdfFile.close()
|
|
@ -1,322 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/8734
|
|
||||||
|
|
||||||
函数调用目录:
|
|
||||||
1. x, y = read_one_dimensional_data(filename='a')
|
|
||||||
2. x, y, matrix = read_two_dimensional_data(filename='a')
|
|
||||||
3. write_one_dimensional_data(x, y, filename='a')
|
|
||||||
4. write_two_dimensional_data(x, y, matrix, filename='a')
|
|
||||||
5. plot(x, y, xlabel='x', ylabel='y', title='', filename='a')
|
|
||||||
6. plot_3d_surface(x, y, matrix, xlabel='x', ylabel='y', zlabel='z', title='', filename='a')
|
|
||||||
7. plot_contour(x, y, matrix, xlabel='x', ylabel='y', title='', filename='a')
|
|
||||||
8. plot_2d_scatter(x, y, value, xlabel='x', ylabel='y', title='', filename='a')
|
|
||||||
9. plot_3d_surface(x, y, z, value, xlabel='x', ylabel='y', zlabel='z', title='', filename='a')
|
|
||||||
10. creat_animation(image_names, duration_time=0.5, filename='a')
|
|
||||||
11. eigenvalue_array = calculate_eigenvalue_with_one_paramete(x, matrix)
|
|
||||||
12. eigenvalue_array = calculate_eigenvalue_with_two_parameters(x, y, matrix)
|
|
||||||
|
|
||||||
函数对应的功能:
|
|
||||||
1. 读取filename.txt文件中的一维数据y(x)
|
|
||||||
2. 读取filename.txt文件中的二维数据matrix(x,y)
|
|
||||||
3. 把一维数据y(x)写入filename.txt文件
|
|
||||||
4. 把二维数据matrix(x,y)写入filename.txt文件
|
|
||||||
5. 画y(x)图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
6. 画3d_surface图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
7. 画contour图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
8. 画2d_scatter图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
9. 画3d_scatter图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
10. 制作动画
|
|
||||||
11. 在参数x下,计算matrix函数的本征值eigenvalue_array[:, index]
|
|
||||||
12. 在参数(x,y)下,计算matrix函数的本征值eigenvalue_array[:, :, index]
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
# import os
|
|
||||||
# os.chdir('D:/data')
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
pass # 读取数据 + 数据处理 + 保存新数据
|
|
||||||
|
|
||||||
|
|
||||||
# 1. 读取filename.txt文件中的一维数据y(x)
|
|
||||||
def read_one_dimensional_data(filename='a'):
|
|
||||||
f = open(filename+'.txt', 'r')
|
|
||||||
text = f.read()
|
|
||||||
f.close()
|
|
||||||
row_list = np.array(text.split('\n'))
|
|
||||||
dim_column = np.array(row_list[0].split()).shape[0]
|
|
||||||
x = np.array([])
|
|
||||||
y = np.array([])
|
|
||||||
for row in row_list:
|
|
||||||
column = np.array(row.split())
|
|
||||||
if column.shape[0] != 0:
|
|
||||||
x = np.append(x, [float(column[0])], axis=0)
|
|
||||||
y_row = np.zeros(dim_column-1)
|
|
||||||
for dim0 in range(dim_column-1):
|
|
||||||
y_row[dim0] = float(column[dim0+1])
|
|
||||||
if np.array(y).shape[0] == 0:
|
|
||||||
y = [y_row]
|
|
||||||
else:
|
|
||||||
y = np.append(y, [y_row], axis=0)
|
|
||||||
return x, y
|
|
||||||
|
|
||||||
|
|
||||||
# 2. 读取filename.txt文件中的二维数据matrix(x,y)
|
|
||||||
def read_two_dimensional_data(filename='a'):
|
|
||||||
f = open(filename+'.txt', 'r')
|
|
||||||
text = f.read()
|
|
||||||
f.close()
|
|
||||||
row_list = np.array(text.split('\n'))
|
|
||||||
dim_column = np.array(row_list[0].split()).shape[0]
|
|
||||||
x = np.array([])
|
|
||||||
y = np.array([])
|
|
||||||
matrix = np.array([])
|
|
||||||
for i0 in range(row_list.shape[0]):
|
|
||||||
column = np.array(row_list[i0].split())
|
|
||||||
if i0 == 0:
|
|
||||||
x_str = column[1::]
|
|
||||||
x = np.zeros(x_str.shape[0])
|
|
||||||
for i00 in range(x_str.shape[0]):
|
|
||||||
x[i00] = float(x_str[i00])
|
|
||||||
elif column.shape[0] != 0:
|
|
||||||
y = np.append(y, [float(column[0])], axis=0)
|
|
||||||
matrix_row = np.zeros(dim_column-1)
|
|
||||||
for dim0 in range(dim_column-1):
|
|
||||||
matrix_row[dim0] = float(column[dim0+1])
|
|
||||||
if np.array(matrix).shape[0] == 0:
|
|
||||||
matrix = [matrix_row]
|
|
||||||
else:
|
|
||||||
matrix = np.append(matrix, [matrix_row], axis=0)
|
|
||||||
return x, y, matrix
|
|
||||||
|
|
||||||
|
|
||||||
# 3. 把一维数据y(x)写入filename.txt文件
|
|
||||||
def write_one_dimensional_data(x, y, filename='a'):
|
|
||||||
with open(filename+'.txt', 'w') as f:
|
|
||||||
i0 = 0
|
|
||||||
for x0 in x:
|
|
||||||
f.write(str(x0)+' ')
|
|
||||||
if len(y.shape) == 1:
|
|
||||||
f.write(str(y[i0])+'\n')
|
|
||||||
elif len(y.shape) == 2:
|
|
||||||
for j0 in range(y.shape[1]):
|
|
||||||
f.write(str(y[i0, j0])+' ')
|
|
||||||
f.write('\n')
|
|
||||||
i0 += 1
|
|
||||||
|
|
||||||
|
|
||||||
# 4. 把二维数据matrix(x,y)写入filename.txt文件
|
|
||||||
def write_two_dimensional_data(x, y, matrix, filename='a'):
|
|
||||||
with open(filename+'.txt', 'w') as f:
|
|
||||||
f.write('0 ')
|
|
||||||
for x0 in x:
|
|
||||||
f.write(str(x0)+' ')
|
|
||||||
f.write('\n')
|
|
||||||
i0 = 0
|
|
||||||
for y0 in y:
|
|
||||||
f.write(str(y0))
|
|
||||||
j0 = 0
|
|
||||||
for x0 in x:
|
|
||||||
f.write(' '+str(matrix[i0, j0])+' ')
|
|
||||||
j0 += 1
|
|
||||||
f.write('\n')
|
|
||||||
i0 += 1
|
|
||||||
|
|
||||||
|
|
||||||
# 5. 画y(x)图,并保存到filename.jpg文件。具体画图格式可在函数中修改。
|
|
||||||
def plot(x, y, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
fig, ax = plt.subplots()
|
|
||||||
plt.subplots_adjust(bottom=0.20, left=0.18)
|
|
||||||
ax.plot(x, y, '-o')
|
|
||||||
ax.grid()
|
|
||||||
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.tick_params(labelsize=20)
|
|
||||||
labels = ax.get_xticklabels() + ax.get_yticklabels()
|
|
||||||
[label.set_fontname('Times New Roman') for label in labels]
|
|
||||||
if save == 1:
|
|
||||||
plt.savefig(filename+'.jpg', dpi=300)
|
|
||||||
if show == 1:
|
|
||||||
plt.show()
|
|
||||||
plt.close('all')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 6. 画3d_surface图,并保存到filename.jpg文件。具体画图格式可在函数中修改。
|
|
||||||
def plot_3d_surface(x, y, matrix, xlabel='x', ylabel='y', zlabel='z', title='', filename='a', show=1, save=0):
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from matplotlib import cm
|
|
||||||
from matplotlib.ticker import LinearLocator
|
|
||||||
fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
|
|
||||||
plt.subplots_adjust(bottom=0.1, right=0.65)
|
|
||||||
x, y = np.meshgrid(x, y)
|
|
||||||
if len(matrix.shape) == 2:
|
|
||||||
surf = ax.plot_surface(x, y, matrix, cmap=cm.coolwarm, linewidth=0, antialiased=False)
|
|
||||||
elif len(matrix.shape) == 3:
|
|
||||||
for i0 in range(matrix.shape[2]):
|
|
||||||
surf = ax.plot_surface(x, y, matrix[:,:,i0], cmap=cm.coolwarm, linewidth=0, antialiased=False)
|
|
||||||
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_zlabel(zlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.zaxis.set_major_locator(LinearLocator(5))
|
|
||||||
ax.zaxis.set_major_formatter('{x:.2f}')
|
|
||||||
ax.tick_params(labelsize=15)
|
|
||||||
labels = ax.get_xticklabels() + ax.get_yticklabels() + ax.get_zticklabels()
|
|
||||||
[label.set_fontname('Times New Roman') for label in labels]
|
|
||||||
cax = plt.axes([0.80, 0.15, 0.05, 0.75])
|
|
||||||
cbar = fig.colorbar(surf, cax=cax)
|
|
||||||
cbar.ax.tick_params(labelsize=15)
|
|
||||||
for l in cbar.ax.yaxis.get_ticklabels():
|
|
||||||
l.set_family('Times New Roman')
|
|
||||||
if save == 1:
|
|
||||||
plt.savefig(filename+'.jpg', dpi=300)
|
|
||||||
if show == 1:
|
|
||||||
plt.show()
|
|
||||||
plt.close('all')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 7. 画plot_contour图,并保存到filename.jpg文件。具体画图格式可在函数中修改。
|
|
||||||
def plot_contour(x, y, matrix, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from matplotlib import cm
|
|
||||||
from matplotlib.ticker import LinearLocator
|
|
||||||
fig, ax = plt.subplots()
|
|
||||||
plt.subplots_adjust(bottom=0.2, right=0.75, left = 0.16)
|
|
||||||
x, y = np.meshgrid(x, y)
|
|
||||||
contour = ax.contourf(x,y,matrix,cmap='jet')
|
|
||||||
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.tick_params(labelsize=15)
|
|
||||||
labels = ax.get_xticklabels() + ax.get_yticklabels()
|
|
||||||
[label.set_fontname('Times New Roman') for label in labels]
|
|
||||||
cax = plt.axes([0.78, 0.17, 0.08, 0.71])
|
|
||||||
cbar = fig.colorbar(contour, cax=cax)
|
|
||||||
cbar.ax.tick_params(labelsize=15)
|
|
||||||
for l in cbar.ax.yaxis.get_ticklabels():
|
|
||||||
l.set_family('Times New Roman')
|
|
||||||
if save == 1:
|
|
||||||
plt.savefig(filename+'.jpg', dpi=300)
|
|
||||||
if show == 1:
|
|
||||||
plt.show()
|
|
||||||
plt.close('all')
|
|
||||||
|
|
||||||
|
|
||||||
# 8. 画2d_scatter图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
def plot_2d_scatter(x, y, value, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from matplotlib.axes._axes import _log as matplotlib_axes_logger
|
|
||||||
matplotlib_axes_logger.setLevel('ERROR')
|
|
||||||
fig = plt.figure()
|
|
||||||
ax = fig.add_subplot(111)
|
|
||||||
plt.subplots_adjust(bottom=0.2, right=0.8, left=0.2)
|
|
||||||
for i in range(np.array(x).shape[0]):
|
|
||||||
ax.scatter(x[i], y[i], marker='o', s=100*value[i], c=(1,0,0))
|
|
||||||
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.tick_params(labelsize=15)
|
|
||||||
labels = ax.get_xticklabels() + ax.get_yticklabels()
|
|
||||||
[label.set_fontname('Times New Roman') for label in labels]
|
|
||||||
if save == 1:
|
|
||||||
plt.savefig(filename+'.jpg', dpi=300)
|
|
||||||
if show == 1:
|
|
||||||
plt.show()
|
|
||||||
plt.close('all')
|
|
||||||
|
|
||||||
|
|
||||||
# 9. 画3d_scatter图,并保存到filename.jpg文件。具体画图格式可在函数中修改!
|
|
||||||
def plot_3d_scatter(x, y, z, value, xlabel='x', ylabel='y', zlabel='z', title='', filename='a', show=1, save=0):
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from matplotlib.ticker import LinearLocator
|
|
||||||
from matplotlib.axes._axes import _log as matplotlib_axes_logger
|
|
||||||
matplotlib_axes_logger.setLevel('ERROR')
|
|
||||||
fig = plt.figure()
|
|
||||||
ax = fig.add_subplot(111, projection='3d')
|
|
||||||
plt.subplots_adjust(bottom=0.1, right=0.8)
|
|
||||||
for i in range(np.array(x).shape[0]):
|
|
||||||
ax.scatter(x[i], y[i], z[i], marker='o', s=int(100*value[i]), c=(1,0,0))
|
|
||||||
ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.set_zlabel(zlabel, fontsize=20, fontfamily='Times New Roman')
|
|
||||||
ax.tick_params(labelsize=15)
|
|
||||||
labels = ax.get_xticklabels() + ax.get_yticklabels() + ax.get_zticklabels()
|
|
||||||
[label.set_fontname('Times New Roman') for label in labels]
|
|
||||||
if save == 1:
|
|
||||||
plt.savefig(filename+'.jpg', dpi=300)
|
|
||||||
if show == 1:
|
|
||||||
plt.show()
|
|
||||||
plt.close('all')
|
|
||||||
|
|
||||||
|
|
||||||
# 10. 制作动画
|
|
||||||
def creat_animation(image_names, duration_time=0.5, filename='a'):
|
|
||||||
import imageio
|
|
||||||
images = []
|
|
||||||
for name in image_names:
|
|
||||||
image = name+'.jpg'
|
|
||||||
im = imageio.imread(image)
|
|
||||||
images.append(im)
|
|
||||||
imageio.mimsave(filename+'.gif', images, 'GIF', duration=duration_time) # durantion是延迟时间
|
|
||||||
|
|
||||||
|
|
||||||
# 11. 在参数x下,计算matrix函数的本征值eigenvalue_array[:, index]
|
|
||||||
def calculate_eigenvalue_with_one_parameter(x, matrix):
|
|
||||||
dim_x = np.array(x).shape[0]
|
|
||||||
i0 = 0
|
|
||||||
if np.array(matrix(0)).shape==():
|
|
||||||
eigenvalue_array = np.zeros((dim_x, 1))
|
|
||||||
for x0 in x:
|
|
||||||
matrix0 = matrix(x0)
|
|
||||||
eigenvalue_array[i0, 0] = np.real(matrix0)
|
|
||||||
i0 += 1
|
|
||||||
else:
|
|
||||||
dim = np.array(matrix(0)).shape[0]
|
|
||||||
eigenvalue_array = np.zeros((dim_x, dim))
|
|
||||||
for x0 in x:
|
|
||||||
matrix0 = matrix(x0)
|
|
||||||
eigenvalue, eigenvector = np.linalg.eig(matrix0)
|
|
||||||
eigenvalue_array[i0, :] = np.sort(np.real(eigenvalue[:]))
|
|
||||||
i0 += 1
|
|
||||||
return eigenvalue_array
|
|
||||||
|
|
||||||
|
|
||||||
# 12. 在参数(x,y)下,计算matrix函数的本征值eigenvalue_array[:, :, index]
|
|
||||||
def calculate_eigenvalue_with_two_parameters(x, y, matrix):
|
|
||||||
dim_x = np.array(x).shape[0]
|
|
||||||
dim_y = np.array(y).shape[0]
|
|
||||||
if np.array(matrix(0,0)).shape==():
|
|
||||||
eigenvalue_array = np.zeros((dim_y, dim_x, 1))
|
|
||||||
i0 = 0
|
|
||||||
for y0 in y:
|
|
||||||
j0 = 0
|
|
||||||
for x0 in x:
|
|
||||||
matrix0 = matrix(x0, y0)
|
|
||||||
eigenvalue_array[i0, j0, 0] = np.real(matrix0)
|
|
||||||
j0 += 1
|
|
||||||
i0 += 1
|
|
||||||
else:
|
|
||||||
dim = np.array(matrix(0, 0)).shape[0]
|
|
||||||
eigenvalue_array = np.zeros((dim_y, dim_x, dim))
|
|
||||||
i0 = 0
|
|
||||||
for y0 in y:
|
|
||||||
j0 = 0
|
|
||||||
for x0 in x:
|
|
||||||
matrix0 = matrix(x0, y0)
|
|
||||||
eigenvalue, eigenvector = np.linalg.eig(matrix0)
|
|
||||||
eigenvalue_array[i0, j0, :] = np.sort(np.real(eigenvalue[:]))
|
|
||||||
j0 += 1
|
|
||||||
i0 += 1
|
|
||||||
return eigenvalue_array
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,137 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
logging.Logger.propagate = False
|
|
||||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# 参数
|
|
||||||
key_word_array = ['photonic', 'Berry phase']
|
|
||||||
original_path = 'D:\\文献'
|
|
||||||
|
|
||||||
# 查找所有的PDF文件路径
|
|
||||||
pdf_file_all = find_files_pdf(original_path)
|
|
||||||
print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
|
|
||||||
|
|
||||||
f = open('error.txt','w',encoding='utf-8')
|
|
||||||
f.close()
|
|
||||||
for key_word in key_word_array:
|
|
||||||
f = open(str(key_word)+'.txt','w',encoding='utf-8')
|
|
||||||
f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# 查找包含关键词的PDF文件
|
|
||||||
i0 = 1
|
|
||||||
begin = time.time()
|
|
||||||
for pdf_file in pdf_file_all:
|
|
||||||
print('查找第', i0, '个文件,', end='')
|
|
||||||
begin0 = time.time()
|
|
||||||
try:
|
|
||||||
content = get_text_from_pdf(pdf_file)
|
|
||||||
for key_word in key_word_array:
|
|
||||||
if re.search(re.compile(key_word),content):
|
|
||||||
print('发现文件!关键词', key_word, '对应的文件位置在:\n\n', pdf_file, '\n')
|
|
||||||
with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
|
|
||||||
f.write('\n查找第'+str(i0)+'个文件时发现文件!位置在:\n'+pdf_file+'\n')
|
|
||||||
except:
|
|
||||||
print('出现异常!位置在:\n\n', pdf_file, '\n')
|
|
||||||
with open('error.txt','a',encoding='utf-8') as f:
|
|
||||||
f.write('\n解析第'+str(i0)+'个文件时出现异常!位置在:\n'+pdf_file+'\n')
|
|
||||||
end0 = time.time()
|
|
||||||
print('用时', end0-begin0, '秒')
|
|
||||||
i0 += 1
|
|
||||||
print('\n全部搜索结束!')
|
|
||||||
end = time.time()
|
|
||||||
print('\n总共用时:', (end-begin)/60, '分')
|
|
||||||
|
|
||||||
|
|
||||||
def find_files_pdf(path): # 查找所有PDF文件
|
|
||||||
file_all = find_files(path)
|
|
||||||
pdf_file_all = []
|
|
||||||
for file0 in file_all:
|
|
||||||
if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
|
|
||||||
pdf_file_all.append(file0)
|
|
||||||
return pdf_file_all
|
|
||||||
|
|
||||||
|
|
||||||
def find_files(path): # 查找所有文件
|
|
||||||
file_all = []
|
|
||||||
path_next_loop = [path]
|
|
||||||
for i in range(10000): # i为文件在文件夹中的深度
|
|
||||||
file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
|
|
||||||
for file_in_one_loop in file_all_in_one_loop:
|
|
||||||
file_all.append(file_in_one_loop)
|
|
||||||
if path_next_loop == []:
|
|
||||||
break
|
|
||||||
return file_all
|
|
||||||
|
|
||||||
|
|
||||||
def find_files_loop_module(path_all): # 查找文件的一个循环模块
|
|
||||||
file_all_in_one_loop = []
|
|
||||||
path_next_loop = []
|
|
||||||
for path in path_all:
|
|
||||||
filenames = os.listdir(path)
|
|
||||||
for filename in filenames:
|
|
||||||
filename = os.path.join(path,filename)
|
|
||||||
if os.path.isfile(filename): # 如果是文件
|
|
||||||
file_all_in_one_loop.append(filename)
|
|
||||||
else: # 如果是文件夹
|
|
||||||
path_next_loop.append(filename)
|
|
||||||
return file_all_in_one_loop, path_next_loop
|
|
||||||
|
|
||||||
|
|
||||||
def get_text_from_pdf(file_path): # 从PDF中获取文本
|
|
||||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
||||||
from pdfminer.converter import PDFPageAggregator
|
|
||||||
from pdfminer.layout import LAParams, LTTextBox
|
|
||||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
|
||||||
|
|
||||||
# 用文件对象来创建一个pdf文档分析器
|
|
||||||
praser = PDFParser(open(file_path, 'rb'))
|
|
||||||
# 创建一个PDF文档
|
|
||||||
doc = PDFDocument()
|
|
||||||
# 连接分析器 与文档对象
|
|
||||||
praser.set_document(doc)
|
|
||||||
doc.set_parser(praser)
|
|
||||||
|
|
||||||
# 提供初始化密码
|
|
||||||
# 如果没有密码 就创建一个空的字符串
|
|
||||||
doc.initialize()
|
|
||||||
|
|
||||||
# 检测文档是否提供txt转换,不提供就忽略
|
|
||||||
if not doc.is_extractable:
|
|
||||||
raise PDFTextExtractionNotAllowed
|
|
||||||
else:
|
|
||||||
# 创建PDf 资源管理器 来管理共享资源
|
|
||||||
rsrcmgr = PDFResourceManager()
|
|
||||||
# 创建一个PDF设备对象
|
|
||||||
laparams = LAParams()
|
|
||||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
|
||||||
# 创建一个PDF解释器对象
|
|
||||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
||||||
|
|
||||||
# 循环遍历列表,每次处理一个page的内容
|
|
||||||
content = ''
|
|
||||||
for page in doc.get_pages():
|
|
||||||
interpreter.process_page(page)
|
|
||||||
# 接受该页面的LTPage对象
|
|
||||||
layout = device.get_result()
|
|
||||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
|
||||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
|
||||||
for x in layout:
|
|
||||||
if isinstance(x, LTTextBox):
|
|
||||||
# print(x.get_text().strip())
|
|
||||||
content = content + x.get_text().strip()
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,63 +0,0 @@
|
|||||||
import os
|
|
||||||
os.chdir('D:/') # PDF文件存放的位置
|
|
||||||
import logging
|
|
||||||
logging.Logger.propagate = False
|
|
||||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
content = get_text_from_pdf('a')
|
|
||||||
with open('a.txt', 'w', encoding='utf-8') as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
|
|
||||||
def get_text_from_pdf(filename):
|
|
||||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
||||||
from pdfminer.converter import PDFPageAggregator
|
|
||||||
from pdfminer.layout import LAParams, LTTextBox
|
|
||||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
|
||||||
|
|
||||||
path = filename+".pdf"
|
|
||||||
|
|
||||||
# 用文件对象来创建一个pdf文档分析器
|
|
||||||
praser = PDFParser(open(path, 'rb'))
|
|
||||||
# 创建一个PDF文档
|
|
||||||
doc = PDFDocument()
|
|
||||||
# 连接分析器 与文档对象
|
|
||||||
praser.set_document(doc)
|
|
||||||
doc.set_parser(praser)
|
|
||||||
|
|
||||||
# 提供初始化密码
|
|
||||||
# 如果没有密码 就创建一个空的字符串
|
|
||||||
doc.initialize()
|
|
||||||
|
|
||||||
# 检测文档是否提供txt转换,不提供就忽略
|
|
||||||
if not doc.is_extractable:
|
|
||||||
raise PDFTextExtractionNotAllowed
|
|
||||||
else:
|
|
||||||
# 创建PDf 资源管理器 来管理共享资源
|
|
||||||
rsrcmgr = PDFResourceManager()
|
|
||||||
# 创建一个PDF设备对象
|
|
||||||
laparams = LAParams()
|
|
||||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
|
||||||
# 创建一个PDF解释器对象
|
|
||||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
||||||
|
|
||||||
# 循环遍历列表,每次处理一个page的内容
|
|
||||||
content = ''
|
|
||||||
for page in doc.get_pages():
|
|
||||||
interpreter.process_page(page)
|
|
||||||
# 接受该页面的LTPage对象
|
|
||||||
layout = device.get_result()
|
|
||||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
|
||||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
|
||||||
for x in layout:
|
|
||||||
if isinstance(x, LTTextBox):
|
|
||||||
# print(x.get_text().strip())
|
|
||||||
content = content + x.get_text().strip()
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,4 +0,0 @@
|
|||||||
import guan
|
|
||||||
guan.change_directory_by_replacement(current_key_word='code', new_key_word='data')
|
|
||||||
with open('data.txt', 'w') as f: # 保存数据
|
|
||||||
f.write('Hello world')
|
|
@ -1,4 +0,0 @@
|
|||||||
import guan
|
|
||||||
guan.change_directory_by_replacement(current_key_word='working/code', new_key_word='local/data')
|
|
||||||
with open('data.txt', 'w') as f: # 保存数据
|
|
||||||
f.write('Hello world')
|
|
@ -1,9 +0,0 @@
|
|||||||
import os
|
|
||||||
code_path = os.getcwd() # 当前代码文件的路径
|
|
||||||
data_path = code_path.replace('\\', '/') # \改为/,防止路径报错
|
|
||||||
data_path = data_path.replace('code', 'data') # 把路径中code改为data
|
|
||||||
if os.path.exists(data_path) == False: # 如果文件夹不存在,新建文件夹
|
|
||||||
os.makedirs(data_path)
|
|
||||||
os.chdir(data_path) # 转到数据的存放路径
|
|
||||||
with open('data.txt', 'w') as f: # 保存数据
|
|
||||||
f.write('Hello world')
|
|
@ -1,41 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25453
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
# 选取某个目录
|
|
||||||
directory = 'E:/'
|
|
||||||
|
|
||||||
def main():
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
if 'pdf' in files[i0] or 'djvu' in files[i0]: # 满足某个条件的文件
|
|
||||||
|
|
||||||
# 显示旧文件名
|
|
||||||
name = files[i0]
|
|
||||||
print(name) # 显示旧文件名
|
|
||||||
|
|
||||||
# 显示新文件名
|
|
||||||
new_name = modify_name(name)
|
|
||||||
print(new_name)
|
|
||||||
print()
|
|
||||||
|
|
||||||
# # 修改文件名。注意:需要检查前面的代码,尤其是modify_name的规则看是否都满足,再运行下面的代码,否则文件名的修改会出现遗漏或混乱。
|
|
||||||
# if new_name != None:
|
|
||||||
# os.rename(root+'/'+name, root+'/'+new_name)
|
|
||||||
|
|
||||||
|
|
||||||
def modify_name(name): # 按某种规则修改文件名
|
|
||||||
array = name.split(' - ') # 通过' - '把这类型的文件名切开
|
|
||||||
if len(array) != 3:
|
|
||||||
print('Miss:', name)
|
|
||||||
new_name = None # 如果不满足规则,则不修改
|
|
||||||
else:
|
|
||||||
new_name= array[1]+' - '+array[0]+' - '+array[2] # 做个对调
|
|
||||||
return new_name
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,35 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25685
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 注意:这个程序请小心使用,防止误操作把系统文件或个人文件破坏。在选取好directory目录后,请经过再三确认无误后再运行,尤其是directory的层级不能太高。
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# 选取某个目录
|
|
||||||
directory = 'E:/test/all_files'
|
|
||||||
move_all_files_to_root_directory(directory)
|
|
||||||
|
|
||||||
# import guan
|
|
||||||
# guan.move_all_files_to_root_directory(directory)
|
|
||||||
|
|
||||||
|
|
||||||
def move_all_files_to_root_directory(directory):
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
# print(root) # 文件对应目录
|
|
||||||
# print(files[i0], '\n') # 文件
|
|
||||||
shutil.move(root+'/'+files[i0], directory+'/'+files[i0]) # 移动所有文件至根目录
|
|
||||||
for i0 in range(100): # 多次尝试删除层数比较多的空文件夹,例如100层
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
try:
|
|
||||||
os.rmdir(root) # 删除空文件夹
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,116 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25699
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
directory = 'E:/literature'
|
|
||||||
write_file_list_in_markdown(directory)
|
|
||||||
|
|
||||||
|
|
||||||
def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None):
|
|
||||||
import os
|
|
||||||
f = open(filename+'.md', 'w', encoding="utf-8")
|
|
||||||
filenames1 = os.listdir(directory)
|
|
||||||
u0 = 0
|
|
||||||
for filename1 in filenames1[::reverse_positive_or_negative]:
|
|
||||||
filename1_with_path = os.path.join(directory,filename1)
|
|
||||||
if os.path.isfile(filename1_with_path): # 文件
|
|
||||||
if os.path.splitext(filename1)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename1)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n')
|
|
||||||
else: # 文件夹
|
|
||||||
u0 += 1
|
|
||||||
if divided_line != None and u0 != 1:
|
|
||||||
f.write('--------\n\n')
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('# '+str(filename1)+'\n\n')
|
|
||||||
|
|
||||||
filenames2 = os.listdir(filename1_with_path)
|
|
||||||
i0 = 0
|
|
||||||
for filename2 in filenames2[::reverse_positive_or_negative]:
|
|
||||||
filename2_with_path = os.path.join(directory, filename1, filename2)
|
|
||||||
if os.path.isfile(filename2_with_path): # 文件
|
|
||||||
if os.path.splitext(filename2)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename2)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n')
|
|
||||||
else: # 文件夹
|
|
||||||
i0 += 1
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
if show_second_number != None:
|
|
||||||
f.write('## '+str(i0)+'. '+str(filename2)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('## '+str(filename2)+'\n\n')
|
|
||||||
|
|
||||||
j0 = 0
|
|
||||||
filenames3 = os.listdir(filename2_with_path)
|
|
||||||
for filename3 in filenames3[::reverse_positive_or_negative]:
|
|
||||||
filename3_with_path = os.path.join(directory, filename1, filename2, filename3)
|
|
||||||
if os.path.isfile(filename3_with_path): # 文件
|
|
||||||
if os.path.splitext(filename3)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename3)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n')
|
|
||||||
else: # 文件夹
|
|
||||||
j0 += 1
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
if show_third_number != None:
|
|
||||||
f.write('### ('+str(j0)+') '+str(filename3)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('### '+str(filename3)+'\n\n')
|
|
||||||
|
|
||||||
filenames4 = os.listdir(filename3_with_path)
|
|
||||||
for filename4 in filenames4[::reverse_positive_or_negative]:
|
|
||||||
filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4)
|
|
||||||
if os.path.isfile(filename4_with_path): # 文件
|
|
||||||
if os.path.splitext(filename4)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename4)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n')
|
|
||||||
else: # 文件夹
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('#### '+str(filename4)+'\n\n')
|
|
||||||
|
|
||||||
filenames5 = os.listdir(filename4_with_path)
|
|
||||||
for filename5 in filenames5[::reverse_positive_or_negative]:
|
|
||||||
filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5)
|
|
||||||
if os.path.isfile(filename5_with_path): # 文件
|
|
||||||
if os.path.splitext(filename5)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename5)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n')
|
|
||||||
else: # 文件夹
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('##### '+str(filename5)+'\n\n')
|
|
||||||
|
|
||||||
filenames6 = os.listdir(filename5_with_path)
|
|
||||||
for filename6 in filenames6[::reverse_positive_or_negative]:
|
|
||||||
filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6)
|
|
||||||
if os.path.isfile(filename6_with_path): # 文件
|
|
||||||
if os.path.splitext(filename6)[1] not in banned_file_format:
|
|
||||||
if hide_file_format == None:
|
|
||||||
f.write('+ '+str(filename6)+'\n\n')
|
|
||||||
else:
|
|
||||||
f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n')
|
|
||||||
else: # 文件夹
|
|
||||||
if starting_from_h1 == None:
|
|
||||||
f.write('#')
|
|
||||||
f.write('###### '+str(filename6)+'\n\n')
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,55 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25943
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
directory = 'E:/test'
|
|
||||||
creat_necessary_file(directory)
|
|
||||||
# delete_file_with_specific_name(directory)
|
|
||||||
|
|
||||||
# import guan
|
|
||||||
# guan.creat_necessary_file(directory)
|
|
||||||
# guan.delete_file_with_specific_name(directory)
|
|
||||||
|
|
||||||
|
|
||||||
def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]):
|
|
||||||
import os
|
|
||||||
directory_with_file = []
|
|
||||||
ignored_directory = []
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
if root not in directory_with_file:
|
|
||||||
directory_with_file.append(root)
|
|
||||||
if files[i0] == filename+file_format:
|
|
||||||
if root not in ignored_directory:
|
|
||||||
ignored_directory.append(root)
|
|
||||||
if overwrite == None:
|
|
||||||
for root in ignored_directory:
|
|
||||||
directory_with_file.remove(root)
|
|
||||||
ignored_directory_more =[]
|
|
||||||
for root in directory_with_file:
|
|
||||||
for word in ignored_directory_with_words:
|
|
||||||
if word in root:
|
|
||||||
if root not in ignored_directory_more:
|
|
||||||
ignored_directory_more.append(root)
|
|
||||||
for root in ignored_directory_more:
|
|
||||||
directory_with_file.remove(root)
|
|
||||||
for root in directory_with_file:
|
|
||||||
os.chdir(root)
|
|
||||||
f = open(filename+file_format, 'w', encoding="utf-8")
|
|
||||||
f.write(content)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
def delete_file_with_specific_name(directory, filename='readme', file_format='.md'):
|
|
||||||
import os
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
if files[i0] == filename+file_format:
|
|
||||||
os.remove(root+'/'+files[i0])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,45 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26113
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 仅支持文件名判断是否重复,不支持对文件内容的判断。
|
|
||||||
# 如需对文件名和内容都判断,需要计算文件的哈希值。这里暂时不考虑。
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
directory = 'E:/test'
|
|
||||||
repeated_file = find_repeated_file_with_same_filename(directory)
|
|
||||||
print(repeated_file)
|
|
||||||
|
|
||||||
# import guan
|
|
||||||
# repeated_file = guan.find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000)
|
|
||||||
# print(repeated_file)
|
|
||||||
|
|
||||||
|
|
||||||
def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000):
|
|
||||||
import os
|
|
||||||
from collections import Counter
|
|
||||||
file_list = []
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
file_list.append(files[i0])
|
|
||||||
for word in ignored_directory_with_words:
|
|
||||||
if word in root:
|
|
||||||
file_list.remove(files[i0])
|
|
||||||
for word in ignored_file_with_words:
|
|
||||||
if word in files[i0]:
|
|
||||||
try:
|
|
||||||
file_list.remove(files[i0])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
count_file = Counter(file_list).most_common(num)
|
|
||||||
repeated_file = []
|
|
||||||
for item in count_file:
|
|
||||||
if item[1]>1:
|
|
||||||
repeated_file.append(item)
|
|
||||||
return repeated_file
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,41 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26536
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# 如果子文件夹中所有文件的数量小于5,输出路径。
|
|
||||||
count_file_in_sub_directory(directory='./', smaller_than_num=5)
|
|
||||||
|
|
||||||
# import guan
|
|
||||||
# guan.count_file_in_sub_directory(directory='./', smaller_than_num=5)
|
|
||||||
|
|
||||||
|
|
||||||
def count_file_in_sub_directory(directory='./', smaller_than_num=None):
|
|
||||||
import os
|
|
||||||
from collections import Counter
|
|
||||||
dirs_list = []
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
if dirs != []:
|
|
||||||
for i0 in range(len(dirs)):
|
|
||||||
dirs_list.append(root+'/'+dirs[i0])
|
|
||||||
for sub_dir in dirs_list:
|
|
||||||
file_list = []
|
|
||||||
for root, dirs, files in os.walk(sub_dir):
|
|
||||||
for i0 in range(len(files)):
|
|
||||||
file_list.append(files[i0])
|
|
||||||
count_file = len(file_list)
|
|
||||||
if smaller_than_num == None:
|
|
||||||
print(sub_dir)
|
|
||||||
print(count_file)
|
|
||||||
print()
|
|
||||||
else:
|
|
||||||
if count_file<smaller_than_num:
|
|
||||||
print(sub_dir)
|
|
||||||
print(count_file)
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,32 +0,0 @@
|
|||||||
"""
|
|
||||||
This code is supported by the website: https://www.guanjihuan.com
|
|
||||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/34649
|
|
||||||
"""
|
|
||||||
|
|
||||||
import PyPDF2
|
|
||||||
|
|
||||||
# 创建一个空的PDF对象
|
|
||||||
output_pdf = PyPDF2.PdfWriter()
|
|
||||||
|
|
||||||
# 打开第一个PDF文件
|
|
||||||
with open('a.pdf', 'rb') as file1:
|
|
||||||
pdf1 = PyPDF2.PdfReader(file1)
|
|
||||||
|
|
||||||
# 将第一个PDF文件的所有页面添加到输出PDF对象中
|
|
||||||
for page in range(len(pdf1.pages)):
|
|
||||||
output_pdf.add_page(pdf1.pages[page])
|
|
||||||
|
|
||||||
# 打开第二个PDF文件
|
|
||||||
with open('b.pdf', 'rb') as file2:
|
|
||||||
pdf2 = PyPDF2.PdfReader(file2)
|
|
||||||
|
|
||||||
# 将第二个PDF文件的所有页面添加到输出PDF对象中
|
|
||||||
for page in range(len(pdf2.pages)):
|
|
||||||
output_pdf.add_page(pdf2.pages[page])
|
|
||||||
|
|
||||||
# 保存合并后的PDF文件
|
|
||||||
with open('combined_file.pdf', 'wb') as combined_file:
|
|
||||||
output_pdf.write(combined_file)
|
|
||||||
|
|
||||||
# import guan
|
|
||||||
# guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')
|
|
Loading…
x
Reference in New Issue
Block a user