删除一些不常用的文件

2023-12-05 15:54:45 +08:00
parent 817938eac1
commit f4b24eddf1
16 changed files with 0 additions and 1015 deletions
--- a/2019.12.03_create_GIF_with_python/create_GIF_with_python.py
+++ b/2019.12.03_create_GIF_with_python/create_GIF_with_python.py
@@ -1,7 +1,4 @@
 import imageio
 import numpy as np
 import os
 # os.chdir('D:/data')  # 设置文件读取和保存的位置
 images = []
 for i in range(1000):
--- a/2020.10.31_download_references_in_a_pdf_file_with_python/download_references_in_a_pdf_file_with_python.py
+++ b/2020.10.31_download_references_in_a_pdf_file_with_python/download_references_in_a_pdf_file_with_python.py
@@ -1,78 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
 """
 import PyPDF2
 import os
 import re 
 from bs4 import BeautifulSoup
 from urllib.request import urlopen
 import requests
 def main():
    os.chdir('D:/')  # PDF文件存放的位置
    filename = input('输入PDF文件名：')
    pdfFile = open(filename+'.pdf','rb')  # 打开PDF文件
    links = all_links_in_pdf(pdfFile)  # 获取PDF文件中的链接
    pdfFile.close()  # 关闭PDF文件
    os.chdir('D:/Reference')  # 设置参考文献保存的位置
    download(links)  # 下载文献
 def all_links_in_pdf(pdfFile): 
    pdfReader = PyPDF2.PdfFileReader(pdfFile)
    pages = pdfReader.getNumPages()
    i0 = 0
    links = []
    print()
    for page in range(pages):
        pageSliced = pdfReader.getPage(page)
        pageObject = pageSliced.getObject()
        if '/Annots' in pageObject.keys():
            ann = pageObject['/Annots']
            old = ''
            for a in ann:
                u = a.getObject()
                if '/A' in u.keys():
                    if re.search(re.compile('^https://doi.org'), u['/A']['/URI']):   # 排除其他形式的链接
                        if u['/A']['/URI'] != old: # 排除重复链接
                            print(i0 , u['/A']['/URI'])
                            links.append(u['/A']['/URI']) # 链接存在link数组中 
                            i0 += 1
                            old = u['/A']['/URI']        
    return links
 def download(links):
    for i0 in [0, 1, 3]:  # 指定参考文献下载，如需全部下载用for i0 in range(links.shape[0]):
        address = links[i0]
        r = requests.post('https://sci-hub.st/', data={'request': address})
        print('\n响应结果是：', r)
        print('访问的地址是：', r.url)
        soup = BeautifulSoup(r.text, features='lxml')
        pdf_URL = soup.embed['src']
        # pdf_URL = soup.iframe['src'] # This is a code line of history version which fails to get pdf URL.
        if re.search(re.compile('^https:'), pdf_URL):
            pass
        else:
            pdf_URL = 'https:'+pdf_URL
        print('PDF的地址是：', pdf_URL)
        name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
        print('PDF文件名是：', name)
        print('保存的位置在：', os.getcwd())
        print('\n正在下载第',i0,'篇')
        r = requests.get(pdf_URL, stream=True)
        with open(name, 'wb') as f:
            for chunk in r.iter_content(chunk_size=32):
                f.write(chunk)
        print('第',i0,'篇下载完成！')
    print('\n全部下载完成！')
 if __name__ == '__main__':
    main()
--- a/2020.10.31_download_references_in_a_pdf_file_with_python/get_links_from_a_pdf_file.py
+++ b/2020.10.31_download_references_in_a_pdf_file_with_python/get_links_from_a_pdf_file.py
@@ -1,30 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
 """
 import PyPDF2
 import os
 import re 
 os.chdir('D:/')  # PDF文件存放的位置
 filename = input('输入PDF文件名：')
 pdfFile = open(filename+'.pdf','rb')
 pdfReader = PyPDF2.PdfFileReader(pdfFile)
 pages = pdfReader.getNumPages()
 i0 = 0
 for page in range(pages):
    pageSliced = pdfReader.getPage(page)
    pageObject = pageSliced.getObject()
    if '/Annots' in pageObject.keys():
        ann = pageObject['/Annots']
        old = ''
        for a in ann:
            u = a.getObject()
            if '/A' in u.keys():
                if re.search(re.compile('^https://doi.org'), u['/A']['/URI']):   # 排除其他形式的链接
                    if u['/A']['/URI'] != old: # 排除重复链接
                        print(i0 , u['/A']['/URI'])
                        i0 += 1
                        old = u['/A']['/URI']        
 pdfFile.close()
--- a/2021.01.13_python_code_for_data_processing/python_code_for_data_processing.py
+++ b/2021.01.13_python_code_for_data_processing/python_code_for_data_processing.py
@@ -1,322 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/8734
 函数调用目录：
 1. x, y = read_one_dimensional_data(filename='a')
 2. x, y, matrix = read_two_dimensional_data(filename='a')
 3. write_one_dimensional_data(x, y, filename='a')
 4. write_two_dimensional_data(x, y, matrix, filename='a')
 5. plot(x, y, xlabel='x', ylabel='y', title='', filename='a')
 6. plot_3d_surface(x, y, matrix, xlabel='x', ylabel='y', zlabel='z', title='', filename='a')
 7. plot_contour(x, y, matrix, xlabel='x', ylabel='y', title='', filename='a')
 8. plot_2d_scatter(x, y, value, xlabel='x', ylabel='y', title='', filename='a')
 9. plot_3d_surface(x, y, z, value, xlabel='x', ylabel='y', zlabel='z', title='', filename='a')
 10. creat_animation(image_names, duration_time=0.5, filename='a')
 11. eigenvalue_array = calculate_eigenvalue_with_one_paramete(x, matrix)
 12. eigenvalue_array = calculate_eigenvalue_with_two_parameters(x, y, matrix)
 函数对应的功能：
 1. 读取filename.txt文件中的一维数据y(x)
 2. 读取filename.txt文件中的二维数据matrix(x,y)
 3. 把一维数据y(x)写入filename.txt文件  
 4. 把二维数据matrix(x,y)写入filename.txt文件
 5. 画y(x)图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 6. 画3d_surface图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 7. 画contour图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 8. 画2d_scatter图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 9. 画3d_scatter图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 10. 制作动画
 11. 在参数x下，计算matrix函数的本征值eigenvalue_array[:, index]
 12. 在参数(x,y)下，计算matrix函数的本征值eigenvalue_array[:, :, index]
 """
 import numpy as np
 # import os
 # os.chdir('D:/data')
 def main():
    pass  # 读取数据 + 数据处理 + 保存新数据
 # 1. 读取filename.txt文件中的一维数据y(x)  
 def read_one_dimensional_data(filename='a'): 
    f = open(filename+'.txt', 'r')
    text = f.read()
    f.close()
    row_list = np.array(text.split('\n')) 
    dim_column = np.array(row_list[0].split()).shape[0] 
    x = np.array([])
    y = np.array([])
    for row in row_list:
        column = np.array(row.split()) 
        if column.shape[0] != 0:  
            x = np.append(x, [float(column[0])], axis=0)  
            y_row = np.zeros(dim_column-1)
            for dim0 in range(dim_column-1):
                y_row[dim0] = float(column[dim0+1])
            if np.array(y).shape[0] == 0:
                y = [y_row]
            else:
                y = np.append(y, [y_row], axis=0)
    return x, y
 # 2. 读取filename.txt文件中的二维数据matrix(x,y)  
 def read_two_dimensional_data(filename='a'): 
    f = open(filename+'.txt', 'r')
    text = f.read()
    f.close()
    row_list = np.array(text.split('\n')) 
    dim_column = np.array(row_list[0].split()).shape[0] 
    x = np.array([])
    y = np.array([])
    matrix = np.array([])
    for i0 in range(row_list.shape[0]):
        column = np.array(row_list[i0].split()) 
        if i0 == 0:
            x_str = column[1::] 
            x = np.zeros(x_str.shape[0])
            for i00 in range(x_str.shape[0]):
                x[i00] = float(x_str[i00]) 
        elif column.shape[0] != 0: 
            y = np.append(y, [float(column[0])], axis=0)  
            matrix_row = np.zeros(dim_column-1)
            for dim0 in range(dim_column-1):
                matrix_row[dim0] = float(column[dim0+1])
            if np.array(matrix).shape[0] == 0:
                matrix = [matrix_row]
            else:
                matrix = np.append(matrix, [matrix_row], axis=0)
    return x, y, matrix
 # 3. 把一维数据y(x)写入filename.txt文件  
 def write_one_dimensional_data(x, y, filename='a'): 
    with open(filename+'.txt', 'w') as f:
        i0 = 0
        for x0 in x:
            f.write(str(x0)+'   ')
            if len(y.shape) == 1:
                f.write(str(y[i0])+'\n')
            elif len(y.shape) == 2:
                for j0 in range(y.shape[1]):
                    f.write(str(y[i0, j0])+'   ')
                f.write('\n')
            i0 += 1
 # 4. 把二维数据matrix(x,y)写入filename.txt文件  
 def write_two_dimensional_data(x, y, matrix, filename='a'): 
    with open(filename+'.txt', 'w') as f:
        f.write('0   ')
        for x0 in x:
            f.write(str(x0)+'   ')
        f.write('\n')
        i0 = 0
        for y0 in y:
            f.write(str(y0))
            j0 = 0
            for x0 in x:
                f.write('   '+str(matrix[i0, j0])+'   ')
                j0 += 1
            f.write('\n')
            i0 += 1   
 # 5. 画y(x)图，并保存到filename.jpg文件。具体画图格式可在函数中修改。
 def plot(x, y, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0): 
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots()
    plt.subplots_adjust(bottom=0.20, left=0.18) 
    ax.plot(x, y, '-o')
    ax.grid()
    ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
    ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman') 
    ax.tick_params(labelsize=20) 
    labels = ax.get_xticklabels() + ax.get_yticklabels()
    [label.set_fontname('Times New Roman') for label in labels]
    if save == 1:
        plt.savefig(filename+'.jpg', dpi=300) 
    if show == 1:
        plt.show()
    plt.close('all')
 # 6. 画3d_surface图，并保存到filename.jpg文件。具体画图格式可在函数中修改。
 def plot_3d_surface(x, y, matrix, xlabel='x', ylabel='y', zlabel='z', title='', filename='a', show=1, save=0): 
    import matplotlib.pyplot as plt
    from matplotlib import cm
    from matplotlib.ticker import LinearLocator
    fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
    plt.subplots_adjust(bottom=0.1, right=0.65) 
    x, y = np.meshgrid(x, y)
    if len(matrix.shape) == 2:
        surf = ax.plot_surface(x, y, matrix, cmap=cm.coolwarm, linewidth=0, antialiased=False) 
    elif len(matrix.shape) == 3:
        for i0 in range(matrix.shape[2]):
            surf = ax.plot_surface(x, y, matrix[:,:,i0], cmap=cm.coolwarm, linewidth=0, antialiased=False) 
    ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
    ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_zlabel(zlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.zaxis.set_major_locator(LinearLocator(5)) 
    ax.zaxis.set_major_formatter('{x:.2f}')   
    ax.tick_params(labelsize=15) 
    labels = ax.get_xticklabels() + ax.get_yticklabels() + ax.get_zticklabels()
    [label.set_fontname('Times New Roman') for label in labels] 
    cax = plt.axes([0.80, 0.15, 0.05, 0.75]) 
    cbar = fig.colorbar(surf, cax=cax)  
    cbar.ax.tick_params(labelsize=15)
    for l in cbar.ax.yaxis.get_ticklabels():
        l.set_family('Times New Roman')
    if save == 1:
        plt.savefig(filename+'.jpg', dpi=300) 
    if show == 1:
        plt.show()
    plt.close('all')
 # 7. 画plot_contour图，并保存到filename.jpg文件。具体画图格式可在函数中修改。
 def plot_contour(x, y, matrix, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):  
    import matplotlib.pyplot as plt
    from matplotlib import cm
    from matplotlib.ticker import LinearLocator
    fig, ax = plt.subplots()
    plt.subplots_adjust(bottom=0.2, right=0.75, left = 0.16) 
    x, y = np.meshgrid(x, y)
    contour = ax.contourf(x,y,matrix,cmap='jet') 
    ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
    ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman') 
    ax.tick_params(labelsize=15) 
    labels = ax.get_xticklabels() + ax.get_yticklabels()
    [label.set_fontname('Times New Roman') for label in labels] 
    cax = plt.axes([0.78, 0.17, 0.08, 0.71])
    cbar = fig.colorbar(contour, cax=cax) 
    cbar.ax.tick_params(labelsize=15) 
    for l in cbar.ax.yaxis.get_ticklabels():
        l.set_family('Times New Roman')
    if save == 1:
        plt.savefig(filename+'.jpg', dpi=300) 
    if show == 1:
        plt.show()
    plt.close('all')
 # 8. 画2d_scatter图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 def plot_2d_scatter(x, y, value, xlabel='x', ylabel='y', title='', filename='a', show=1, save=0):
    import matplotlib.pyplot as plt
    from matplotlib.axes._axes import _log as matplotlib_axes_logger
    matplotlib_axes_logger.setLevel('ERROR') 
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.subplots_adjust(bottom=0.2, right=0.8, left=0.2) 
    for i in range(np.array(x).shape[0]):
        ax.scatter(x[i], y[i], marker='o', s=100*value[i], c=(1,0,0))
    ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
    ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman') 
    ax.tick_params(labelsize=15)
    labels = ax.get_xticklabels() + ax.get_yticklabels() 
    [label.set_fontname('Times New Roman') for label in labels]
    if save == 1:
        plt.savefig(filename+'.jpg', dpi=300) 
    if show == 1:
        plt.show()
    plt.close('all')
 # 9. 画3d_scatter图，并保存到filename.jpg文件。具体画图格式可在函数中修改！
 def plot_3d_scatter(x, y, z, value, xlabel='x', ylabel='y', zlabel='z', title='', filename='a', show=1, save=0):
    import matplotlib.pyplot as plt
    from matplotlib.ticker import LinearLocator
    from matplotlib.axes._axes import _log as matplotlib_axes_logger
    matplotlib_axes_logger.setLevel('ERROR')
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    plt.subplots_adjust(bottom=0.1, right=0.8) 
    for i in range(np.array(x).shape[0]):
        ax.scatter(x[i], y[i], z[i], marker='o', s=int(100*value[i]), c=(1,0,0))
    ax.set_title(title, fontsize=20, fontfamily='Times New Roman')
    ax.set_xlabel(xlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_ylabel(ylabel, fontsize=20, fontfamily='Times New Roman') 
    ax.set_zlabel(zlabel, fontsize=20, fontfamily='Times New Roman') 
    ax.tick_params(labelsize=15) 
    labels = ax.get_xticklabels() + ax.get_yticklabels() + ax.get_zticklabels()
    [label.set_fontname('Times New Roman') for label in labels]
    if save == 1:
        plt.savefig(filename+'.jpg', dpi=300) 
    if show == 1:
        plt.show()
    plt.close('all')
 # 10. 制作动画
 def creat_animation(image_names, duration_time=0.5, filename='a'):  
    import imageio
    images = []
    for name in image_names:
        image = name+'.jpg'
        im = imageio.imread(image)
        images.append(im)
    imageio.mimsave(filename+'.gif', images, 'GIF', duration=duration_time)  # durantion是延迟时间
 # 11. 在参数x下，计算matrix函数的本征值eigenvalue_array[:, index]
 def calculate_eigenvalue_with_one_parameter(x, matrix):
    dim_x = np.array(x).shape[0]
    i0 = 0
    if np.array(matrix(0)).shape==():
        eigenvalue_array = np.zeros((dim_x, 1))
        for x0 in x:
            matrix0 = matrix(x0)
            eigenvalue_array[i0, 0] = np.real(matrix0)
            i0 += 1
    else:
        dim = np.array(matrix(0)).shape[0]
        eigenvalue_array = np.zeros((dim_x, dim))
        for x0 in x:
            matrix0 = matrix(x0)
            eigenvalue, eigenvector = np.linalg.eig(matrix0)
            eigenvalue_array[i0, :] = np.sort(np.real(eigenvalue[:]))
            i0 += 1
    return eigenvalue_array
 # 12. 在参数(x,y)下，计算matrix函数的本征值eigenvalue_array[:, :, index]
 def calculate_eigenvalue_with_two_parameters(x, y, matrix):  
    dim_x = np.array(x).shape[0]
    dim_y = np.array(y).shape[0]
    if np.array(matrix(0,0)).shape==():
        eigenvalue_array = np.zeros((dim_y, dim_x, 1))
        i0 = 0
        for y0 in y:
            j0 = 0
            for x0 in x:
                matrix0 = matrix(x0, y0)
                eigenvalue_array[i0, j0, 0] = np.real(matrix0)
                j0 += 1
            i0 += 1
    else:
        dim = np.array(matrix(0, 0)).shape[0]
        eigenvalue_array = np.zeros((dim_y, dim_x, dim))
        i0 = 0
        for y0 in y:
            j0 = 0
            for x0 in x:
                matrix0 = matrix(x0, y0)
                eigenvalue, eigenvector = np.linalg.eig(matrix0)
                eigenvalue_array[i0, j0, :] = np.sort(np.real(eigenvalue[:]))
                j0 += 1
            i0 += 1
    return eigenvalue_array
 if __name__ == "__main__":
    main()
--- a/2021.01.23_find_key_words_in_pdf_files_with_pdfminer/find_key_words_in_pdf_files.py
+++ b/2021.01.23_find_key_words_in_pdf_files_with_pdfminer/find_key_words_in_pdf_files.py
@@ -1,137 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
 """
 import os
 import re 
 import time
 import logging 
 logging.Logger.propagate = False 
 logging.getLogger().setLevel(logging.ERROR)  # 只显示error级别的通知
 def main():
    # 参数
    key_word_array = ['photonic', 'Berry phase']
    original_path = 'D:\\文献'
    # 查找所有的PDF文件路径
    pdf_file_all = find_files_pdf(original_path)
    print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
    f = open('error.txt','w',encoding='utf-8')
    f.close()
    for key_word in key_word_array:
        f = open(str(key_word)+'.txt','w',encoding='utf-8')
        f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
        f.close()
    # 查找包含关键词的PDF文件
    i0 = 1
    begin = time.time()
    for pdf_file in pdf_file_all:
        print('查找第', i0, '个文件，', end='')
        begin0 = time.time()
        try: 
            content = get_text_from_pdf(pdf_file)
            for key_word in key_word_array:
                if re.search(re.compile(key_word),content):
                    print('发现文件！关键词', key_word, '对应的文件位置在：\n\n', pdf_file, '\n')
                    with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
                        f.write('\n查找第'+str(i0)+'个文件时发现文件！位置在：\n'+pdf_file+'\n')
        except: 
            print('出现异常！位置在：\n\n', pdf_file, '\n')
            with open('error.txt','a',encoding='utf-8') as f:
                f.write('\n解析第'+str(i0)+'个文件时出现异常！位置在：\n'+pdf_file+'\n')
        end0 = time.time()
        print('用时', end0-begin0, '秒')
        i0 += 1
    print('\n全部搜索结束！')
    end = time.time()
    print('\n总共用时：', (end-begin)/60, '分')
 def find_files_pdf(path):  # 查找所有PDF文件
    file_all = find_files(path)
    pdf_file_all = []
    for file0 in file_all:
        if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
            pdf_file_all.append(file0)
    return pdf_file_all
 def find_files(path):  # 查找所有文件
    file_all = []
    path_next_loop = [path]
    for i in range(10000):  # i为文件在文件夹中的深度
        file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
        for file_in_one_loop in file_all_in_one_loop:
            file_all.append(file_in_one_loop)
        if path_next_loop == []:
            break
    return file_all
 def find_files_loop_module(path_all): # 查找文件的一个循环模块
    file_all_in_one_loop = []
    path_next_loop = []
    for path in path_all:
        filenames = os.listdir(path)
        for filename in filenames:
            filename = os.path.join(path,filename) 
            if os.path.isfile(filename): # 如果是文件
                file_all_in_one_loop.append(filename) 
            else:  # 如果是文件夹
                path_next_loop.append(filename)
    return file_all_in_one_loop, path_next_loop
 def get_text_from_pdf(file_path):  # 从PDF中获取文本
    from pdfminer.pdfparser import PDFParser, PDFDocument
    from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
    from pdfminer.converter import PDFPageAggregator
    from pdfminer.layout import LAParams, LTTextBox
    from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
    # 用文件对象来创建一个pdf文档分析器
    praser = PDFParser(open(file_path, 'rb'))
    # 创建一个PDF文档
    doc = PDFDocument()
    # 连接分析器 与文档对象
    praser.set_document(doc)
    doc.set_parser(praser)
    # 提供初始化密码
    # 如果没有密码 就创建一个空的字符串
    doc.initialize()
    # 检测文档是否提供txt转换，不提供就忽略
    if not doc.is_extractable:
        raise PDFTextExtractionNotAllowed
    else:
        # 创建PDf 资源管理器 来管理共享资源
        rsrcmgr = PDFResourceManager()
        # 创建一个PDF设备对象
        laparams = LAParams()
        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
        # 创建一个PDF解释器对象
        interpreter = PDFPageInterpreter(rsrcmgr, device)
        # 循环遍历列表，每次处理一个page的内容
        content = ''
        for page in doc.get_pages():
            interpreter.process_page(page)                        
            # 接受该页面的LTPage对象
            layout = device.get_result()
            # 这里layout是一个LTPage对象，里面存放着这个 page 解析出的各种对象
            # 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等                            
            for x in layout:
                if isinstance(x, LTTextBox):
                    # print(x.get_text().strip())
                    content  = content + x.get_text().strip()
    return content
 if __name__ == "__main__":
    main()
--- a/2021.01.23_find_key_words_in_pdf_files_with_pdfminer/get_content_in_a_pdf_file.py
+++ b/2021.01.23_find_key_words_in_pdf_files_with_pdfminer/get_content_in_a_pdf_file.py
@@ -1,63 +0,0 @@
 import os
 os.chdir('D:/')  # PDF文件存放的位置
 import logging 
 logging.Logger.propagate = False 
 logging.getLogger().setLevel(logging.ERROR)  # 只显示error级别的通知
 def main():
    content = get_text_from_pdf('a')
    with open('a.txt', 'w', encoding='utf-8') as f:
        f.write(content)
 def get_text_from_pdf(filename):
    from pdfminer.pdfparser import PDFParser, PDFDocument
    from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
    from pdfminer.converter import PDFPageAggregator
    from pdfminer.layout import LAParams, LTTextBox
    from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
    path = filename+".pdf"
    # 用文件对象来创建一个pdf文档分析器
    praser = PDFParser(open(path, 'rb'))
    # 创建一个PDF文档
    doc = PDFDocument()
    # 连接分析器 与文档对象
    praser.set_document(doc)
    doc.set_parser(praser)
    # 提供初始化密码
    # 如果没有密码 就创建一个空的字符串
    doc.initialize()
    # 检测文档是否提供txt转换，不提供就忽略
    if not doc.is_extractable:
        raise PDFTextExtractionNotAllowed
    else:
        # 创建PDf 资源管理器 来管理共享资源
        rsrcmgr = PDFResourceManager()
        # 创建一个PDF设备对象
        laparams = LAParams()
        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
        # 创建一个PDF解释器对象
        interpreter = PDFPageInterpreter(rsrcmgr, device)
        # 循环遍历列表，每次处理一个page的内容
        content = ''
        for page in doc.get_pages():
            interpreter.process_page(page)                        
            # 接受该页面的LTPage对象
            layout = device.get_result()
            # 这里layout是一个LTPage对象，里面存放着这个 page 解析出的各种对象
            # 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等                            
            for x in layout:
                if isinstance(x, LTTextBox):
                    # print(x.get_text().strip())
                    content  = content + x.get_text().strip()
    return content
 if __name__ == "__main__":
    main()
--- a/2022.02.16_change_directory_by_replacement/change_directory_by_replacement
+++ b/2022.02.16_change_directory_by_replacement/change_directory_by_replacement
@@ -1,4 +0,0 @@
 import guan
 guan.change_directory_by_replacement(current_key_word='code', new_key_word='data')
 with open('data.txt', 'w') as f: # 保存数据
    f.write('Hello world') 
--- a/2022.02.16_change_directory_by_replacement/change_directory_by_replacement
+++ b/2022.02.16_change_directory_by_replacement/change_directory_by_replacement
@@ -1,4 +0,0 @@
 import guan
 guan.change_directory_by_replacement(current_key_word='working/code', new_key_word='local/data')
 with open('data.txt', 'w') as f: # 保存数据
    f.write('Hello world') 
--- a/2022.02.16_change_directory_by_replacement/change_directory_by_replacement.py
+++ b/2022.02.16_change_directory_by_replacement/change_directory_by_replacement.py
@@ -1,9 +0,0 @@
 import os
 code_path = os.getcwd() # 当前代码文件的路径
 data_path = code_path.replace('\\', '/')  # \改为/，防止路径报错
 data_path = data_path.replace('code', 'data') # 把路径中code改为data
 if os.path.exists(data_path) == False: # 如果文件夹不存在，新建文件夹
    os.makedirs(data_path)
 os.chdir(data_path) # 转到数据的存放路径
 with open('data.txt', 'w') as f: # 保存数据
    f.write('Hello world') 
--- a/2022.08.31_batch_modify_file_name/batch_modify_file_name.py
+++ b/2022.08.31_batch_modify_file_name/batch_modify_file_name.py
@@ -1,41 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25453
 """
 import os
 # 选取某个目录
 directory = 'E:/'
 def main():
    for root, dirs, files in os.walk(directory):
        for i0 in range(len(files)):
            if 'pdf' in files[i0] or 'djvu' in files[i0]: # 满足某个条件的文件 
                # 显示旧文件名
                name = files[i0]
                print(name) # 显示旧文件名
                # 显示新文件名
                new_name = modify_name(name)
                print(new_name)
                print()
                # # 修改文件名。注意：需要检查前面的代码，尤其是modify_name的规则看是否都满足，再运行下面的代码，否则文件名的修改会出现遗漏或混乱。
                # if new_name != None:
                #     os.rename(root+'/'+name, root+'/'+new_name) 
 def modify_name(name):  # 按某种规则修改文件名
    array = name.split(' - ')  # 通过' - '把这类型的文件名切开
    if len(array) != 3:
        print('Miss:', name)
        new_name = None  # 如果不满足规则，则不修改
    else:
        new_name= array[1]+' - '+array[0]+' - '+array[2] # 做个对调
    return new_name
 if __name__ == '__main__':
    main()
--- a/2022.09.07_move_all_files_to_root_directory/move_all_files_to_root_directory.py
+++ b/2022.09.07_move_all_files_to_root_directory/move_all_files_to_root_directory.py
@@ -1,35 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25685
 """
 # 注意：这个程序请小心使用，防止误操作把系统文件或个人文件破坏。在选取好directory目录后，请经过再三确认无误后再运行，尤其是directory的层级不能太高。
 def main():
    # 选取某个目录
    directory = 'E:/test/all_files'
    move_all_files_to_root_directory(directory)
    # import guan
    # guan.move_all_files_to_root_directory(directory)
 def move_all_files_to_root_directory(directory):
    import os
    import shutil
    for root, dirs, files in os.walk(directory):
        for i0 in range(len(files)):
            # print(root) # 文件对应目录
            # print(files[i0], '\n') # 文件
            shutil.move(root+'/'+files[i0], directory+'/'+files[i0]) # 移动所有文件至根目录
    for i0 in range(100): # 多次尝试删除层数比较多的空文件夹，例如100层
        for root, dirs, files in os.walk(directory):
            try:
                os.rmdir(root) # 删除空文件夹
            except:
                pass
 if __name__ == '__main__':
    main()
--- a/2022.09.08_get_file_list_and_write_in_markdown/get_file_list_and_write_in_markdown.py
+++ b/2022.09.08_get_file_list_and_write_in_markdown/get_file_list_and_write_in_markdown.py
@@ -1,116 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25699
 """
 def main():
    directory = 'E:/literature'
    write_file_list_in_markdown(directory)
 def write_file_list_in_markdown(directory='./', filename='a', reverse_positive_or_negative=1, starting_from_h1=None, banned_file_format=[], hide_file_format=None, divided_line=None, show_second_number=None, show_third_number=None): 
    import os
    f = open(filename+'.md', 'w', encoding="utf-8")
    filenames1 = os.listdir(directory)
    u0 = 0
    for filename1 in filenames1[::reverse_positive_or_negative]:
        filename1_with_path = os.path.join(directory,filename1) 
        if os.path.isfile(filename1_with_path):  # 文件
            if os.path.splitext(filename1)[1] not in banned_file_format:
                if hide_file_format == None:
                    f.write('+ '+str(filename1)+'\n\n')
                else:
                    f.write('+ '+str(os.path.splitext(filename1)[0])+'\n\n')
        else:  # 文件夹
            u0 += 1
            if divided_line != None and u0 != 1:
                f.write('--------\n\n')
            if starting_from_h1 == None:
                f.write('#')
            f.write('# '+str(filename1)+'\n\n')
            filenames2 = os.listdir(filename1_with_path) 
            i0 = 0     
            for filename2 in filenames2[::reverse_positive_or_negative]:
                filename2_with_path = os.path.join(directory, filename1, filename2) 
                if os.path.isfile(filename2_with_path):  # 文件
                    if os.path.splitext(filename2)[1] not in banned_file_format:
                        if hide_file_format == None:
                            f.write('+ '+str(filename2)+'\n\n')
                        else:
                            f.write('+ '+str(os.path.splitext(filename2)[0])+'\n\n')
                else:    # 文件夹
                    i0 += 1
                    if starting_from_h1 == None:
                        f.write('#')
                    if show_second_number != None:
                        f.write('## '+str(i0)+'. '+str(filename2)+'\n\n')
                    else:
                        f.write('## '+str(filename2)+'\n\n')
                    j0 = 0
                    filenames3 = os.listdir(filename2_with_path)
                    for filename3 in filenames3[::reverse_positive_or_negative]:
                        filename3_with_path = os.path.join(directory, filename1, filename2, filename3) 
                        if os.path.isfile(filename3_with_path):    # 文件
                            if os.path.splitext(filename3)[1] not in banned_file_format:
                                if hide_file_format == None:
                                    f.write('+ '+str(filename3)+'\n\n')
                                else:
                                    f.write('+ '+str(os.path.splitext(filename3)[0])+'\n\n')
                        else:   # 文件夹
                            j0 += 1
                            if starting_from_h1 == None:
                                f.write('#')
                            if show_third_number != None:
                                f.write('### ('+str(j0)+') '+str(filename3)+'\n\n')
                            else:
                                f.write('### '+str(filename3)+'\n\n')
                            filenames4 = os.listdir(filename3_with_path)
                            for filename4 in filenames4[::reverse_positive_or_negative]:
                                filename4_with_path = os.path.join(directory, filename1, filename2, filename3, filename4) 
                                if os.path.isfile(filename4_with_path):   # 文件
                                    if os.path.splitext(filename4)[1] not in banned_file_format:
                                        if hide_file_format == None:
                                            f.write('+ '+str(filename4)+'\n\n')
                                        else:
                                            f.write('+ '+str(os.path.splitext(filename4)[0])+'\n\n')
                                else:     # 文件夹
                                    if starting_from_h1 == None:
                                        f.write('#')
                                    f.write('#### '+str(filename4)+'\n\n')
                                    filenames5 = os.listdir(filename4_with_path)
                                    for filename5 in filenames5[::reverse_positive_or_negative]:
                                        filename5_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5) 
                                        if os.path.isfile(filename5_with_path):    # 文件
                                            if os.path.splitext(filename5)[1] not in banned_file_format:
                                                if hide_file_format == None:
                                                    f.write('+ '+str(filename5)+'\n\n')
                                                else:
                                                    f.write('+ '+str(os.path.splitext(filename5)[0])+'\n\n')
                                        else:   # 文件夹
                                            if starting_from_h1 == None:
                                                f.write('#')
                                            f.write('##### '+str(filename5)+'\n\n')
                                            filenames6 = os.listdir(filename5_with_path)
                                            for filename6 in filenames6[::reverse_positive_or_negative]:
                                                filename6_with_path = os.path.join(directory, filename1, filename2, filename3, filename4, filename5, filename6) 
                                                if os.path.isfile(filename6_with_path):   # 文件
                                                    if os.path.splitext(filename6)[1] not in banned_file_format:
                                                        if hide_file_format == None:
                                                            f.write('+ '+str(filename6)+'\n\n')
                                                        else:
                                                            f.write('+ '+str(os.path.splitext(filename6)[0])+'\n\n')
                                                else:  # 文件夹
                                                    if starting_from_h1 == None:
                                                        f.write('#')
                                                    f.write('###### '+str(filename6)+'\n\n')
    f.close()
 if __name__ == '__main__':
    main()
--- a/2022.09.12_creat_necessary_file_or_delete_file_with_specific_name/creat_necessary_file_or_delete_file_with_specific_name.py
+++ b/2022.09.12_creat_necessary_file_or_delete_file_with_specific_name/creat_necessary_file_or_delete_file_with_specific_name.py
@@ -1,55 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25943
 """
 def main():
    directory = 'E:/test'
    creat_necessary_file(directory)
    # delete_file_with_specific_name(directory)
    # import guan
    # guan.creat_necessary_file(directory)
    # guan.delete_file_with_specific_name(directory)
 def creat_necessary_file(directory, filename='readme', file_format='.md', content='', overwrite=None, ignored_directory_with_words=[]):
    import os
    directory_with_file = []
    ignored_directory = []
    for root, dirs, files in os.walk(directory):
        for i0 in range(len(files)):
            if root not in directory_with_file:
                directory_with_file.append(root)
            if files[i0] == filename+file_format:
                if root not in ignored_directory:
                    ignored_directory.append(root)
    if overwrite == None:
        for root in ignored_directory:
            directory_with_file.remove(root)
    ignored_directory_more =[]
    for root in directory_with_file: 
        for word in ignored_directory_with_words:
            if word in root:
                if root not in ignored_directory_more:
                    ignored_directory_more.append(root)
    for root in ignored_directory_more:
        directory_with_file.remove(root) 
    for root in directory_with_file:
        os.chdir(root)
        f = open(filename+file_format, 'w', encoding="utf-8")
        f.write(content)
        f.close()
 def delete_file_with_specific_name(directory, filename='readme', file_format='.md'):
      import os
      for root, dirs, files in os.walk(directory):
        for i0 in range(len(files)):
            if files[i0] == filename+file_format:
                os.remove(root+'/'+files[i0])
 if __name__ == '__main__':
    main()
--- a/2022.09.14_find_repeated_file_with_same_filename/find_repeated_file_with_same_filename.py
+++ b/2022.09.14_find_repeated_file_with_same_filename/find_repeated_file_with_same_filename.py
@@ -1,45 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26113
 """
 # 仅支持文件名判断是否重复，不支持对文件内容的判断。
 # 如需对文件名和内容都判断，需要计算文件的哈希值。这里暂时不考虑。
 def main():
    directory = 'E:/test'
    repeated_file = find_repeated_file_with_same_filename(directory)
    print(repeated_file)
    # import guan
    # repeated_file = guan.find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000)
    # print(repeated_file)
 def find_repeated_file_with_same_filename(directory='./', ignored_directory_with_words=[], ignored_file_with_words=[], num=1000):
    import os
    from collections import Counter
    file_list = []
    for root, dirs, files in os.walk(directory):
        for i0 in range(len(files)):
            file_list.append(files[i0])
            for word in ignored_directory_with_words:
                if word in root:
                    file_list.remove(files[i0])       
            for word in ignored_file_with_words:
                if word in files[i0]:
                    try:
                        file_list.remove(files[i0])   
                    except:
                        pass 
    count_file = Counter(file_list).most_common(num)
    repeated_file = []
    for item in count_file:
        if item[1]>1:
            repeated_file.append(item)
    return repeated_file
 if __name__ == '__main__':
    main()
--- a/2022.09.30_count_file_in_sub_directory/count_file_in_sub_directory.py
+++ b/2022.09.30_count_file_in_sub_directory/count_file_in_sub_directory.py
@@ -1,41 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26536
 """
 def main():
    # 如果子文件夹中所有文件的数量小于5，输出路径。
    count_file_in_sub_directory(directory='./', smaller_than_num=5) 
    # import guan
    # guan.count_file_in_sub_directory(directory='./', smaller_than_num=5)
 def count_file_in_sub_directory(directory='./', smaller_than_num=None):
    import os
    from collections import Counter
    dirs_list = []
    for root, dirs, files in os.walk(directory):
        if dirs != []:
            for i0 in range(len(dirs)):
                dirs_list.append(root+'/'+dirs[i0])
    for sub_dir in dirs_list:
        file_list = []
        for root, dirs, files in os.walk(sub_dir):
            for i0 in range(len(files)):
                file_list.append(files[i0])
        count_file = len(file_list)
        if smaller_than_num == None:
            print(sub_dir)
            print(count_file)
            print()
        else:
            if count_file<smaller_than_num:
                print(sub_dir)
                print(count_file)
                print()
 if __name__ == '__main__':
    main()
--- a/2023.06.28_combine_two_pdf_files/combine_two_pdf_files.py
+++ b/2023.06.28_combine_two_pdf_files/combine_two_pdf_files.py
@@ -1,32 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/34649
 """
 import PyPDF2
 # 创建一个空的PDF对象
 output_pdf = PyPDF2.PdfWriter()
 # 打开第一个PDF文件
 with open('a.pdf', 'rb') as file1:
    pdf1 = PyPDF2.PdfReader(file1)
    # 将第一个PDF文件的所有页面添加到输出PDF对象中
    for page in range(len(pdf1.pages)):
        output_pdf.add_page(pdf1.pages[page])
 # 打开第二个PDF文件
 with open('b.pdf', 'rb') as file2:
    pdf2 = PyPDF2.PdfReader(file2)
    # 将第二个PDF文件的所有页面添加到输出PDF对象中
    for page in range(len(pdf2.pages)):
        output_pdf.add_page(pdf2.pages[page])
 # 保存合并后的PDF文件
 with open('combined_file.pdf', 'wb') as combined_file:
    output_pdf.write(combined_file)
 # import guan
 # guan.combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_file='combined_file.pdf')