0.1.161

2025-03-06 05:51:11 +08:00 · 2025-03-06 05:51:11 +08:00 · 7f0a36220a
commit 7f0a36220a
parent 9ddb69dc61
5 changed files with 141 additions and 115 deletions
--- a/PyPI/setup.cfg
+++ b/PyPI/setup.cfg
@ -1,7 +1,7 @@
 [metadata]
 # replace with your username:
 name = guan
-version = 0.1.160
+version = 0.1.161
 author = guanjihuan
 author_email = guanjihuan@163.com
 description = An open source python package
--- a/PyPI/src/guan.egg-info/PKG-INFO
+++ b/PyPI/src/guan.egg-info/PKG-INFO
@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: guan
-Version: 0.1.160
+Version: 0.1.161
 Summary: An open source python package
 Home-page: https://py.guanjihuan.com
 Author: guanjihuan
--- a/PyPI/src/guan/data_processing.py
+++ b/PyPI/src/guan/data_processing.py
@ -86,6 +86,116 @@ def run(function_name, *args, **kwargs):
        pass
    return return_data

+# 获取运行的日期和时间并写入文件
+def statistics_with_day_and_time(content='', filename='time_logging', file_format='.txt'):
+    import datetime
+    datetime_today = str(datetime.date.today())
+    datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
+    with open(filename+file_format, 'a', encoding="utf-8") as f2:
+       if content == '':
+           f2.write(datetime_today+' '+datetime_time+'\n')
+       else:
+           f2.write(datetime_today+' '+datetime_time+' '+content+'\n')
+
+# 创建一个sh文件用于提交任务（PBS）
+def make_sh_file_for_qsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', cd_dir=0):
+    sh_content = \
+        '#!/bin/sh\n' \
+        +'#PBS -N '+task_name+'\n' \
+        +'#PBS -l nodes=1:ppn='+str(cpu_num)+'\n'
+    if cd_dir==1:
+        sh_content += 'cd $PBS_O_WORKDIR\n'
+    sh_content += command_line
+    with open(sh_filename+'.sh', 'w') as f:
+        f.write(sh_content)
+
+# 创建一个sh文件用于提交任务（LSF）
+def make_sh_file_for_bsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', queue_name='score', cd_dir=0):
+    sh_content = \
+        '#!/bin/sh\n' \
+        +'#BSUB -J '+task_name+'\n' \
+        +'#BSUB -q '+queue_name+'\n' \
+        +'#BSUB -n '+str(cpu_num)+'\n'
+    if cd_dir==1:
+        sh_content += 'cd $PBS_O_WORKDIR\n'
+    sh_content += command_line
+    with open(sh_filename+'.sh', 'w') as f:
+        f.write(sh_content)
+
+# 复制.py和.sh文件，然后提交任务，实现半手动并行（PBS）
+def copy_py_sh_file_and_qsub_task(parameter_array, py_filename='a', old_str_in_py='parameter = 0', new_str_in_py='parameter = ', sh_filename='a', qsub_task_name='task'):
+    import os
+    parameter_str_array = []
+    for i0 in parameter_array:
+        parameter_str_array.append(str(i0))
+    index = 0
+    for parameter_str in parameter_str_array:
+        index += 1
+        # copy python file
+        old_file = py_filename+'.py'
+        new_file = py_filename+'_'+str(index)+'.py'
+        os.system('cp '+old_file+' '+new_file)
+        with open(new_file, 'r') as f:
+            content  = f.read()
+        old_str = old_str_in_py
+        new_str = new_str_in_py+parameter_str
+        content = content.replace(old_str, new_str)
+        with open(py_filename+'_'+str(index)+'.py', 'w') as f:
+            f.write(content)
+        # copy sh file
+        old_file = sh_filename+'.sh'
+        new_file = sh_filename+'_'+str(index)+'.sh'
+        os.system('cp '+old_file+' '+new_file)
+        with open(new_file, 'r') as f:
+            content  = f.read()
+        old_str = 'python '+py_filename+'.py'
+        new_str = 'python '+py_filename+'_'+str(index)+'.py'
+        content = content.replace(old_str, new_str)
+        old_str = '#PBS -N '+qsub_task_name
+        new_str = '#PBS -N '+qsub_task_name+'_'+str(index)
+        content = content.replace(old_str, new_str)
+        with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: 
+            f.write(content)
+        # qsub task
+        os.system('qsub '+new_file)
+
+# 复制.py和.sh文件，然后提交任务，实现半手动并行（LSF）
+def copy_py_sh_file_and_bsub_task(parameter_array, py_filename='a', old_str_in_py='parameter = 0', new_str_in_py='parameter = ', sh_filename='a', bsub_task_name='task'):
+    import os
+    parameter_str_array = []
+    for i0 in parameter_array:
+        parameter_str_array.append(str(i0))
+    index = 0
+    for parameter_str in parameter_str_array:
+        index += 1
+        # copy python file
+        old_file = py_filename+'.py'
+        new_file = py_filename+'_'+str(index)+'.py'
+        os.system('cp '+old_file+' '+new_file)
+        with open(new_file, 'r') as f:
+            content  = f.read()
+        old_str = old_str_in_py
+        new_str = new_str_in_py+parameter_str
+        content = content.replace(old_str, new_str)
+        with open(py_filename+'_'+str(index)+'.py', 'w') as f:
+            f.write(content)
+        # copy sh file
+        old_file = sh_filename+'.sh'
+        new_file = sh_filename+'_'+str(index)+'.sh'
+        os.system('cp '+old_file+' '+new_file)
+        with open(new_file, 'r') as f:
+            content  = f.read()
+        old_str = 'python '+py_filename+'.py'
+        new_str = 'python '+py_filename+'_'+str(index)+'.py'
+        content = content.replace(old_str, new_str)
+        old_str = '#BSUB -J '+bsub_task_name
+        new_str = '#BSUB -J '+bsub_task_name+'_'+str(index)
+        content = content.replace(old_str, new_str)
+        with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: 
+            f.write(content)
+        # bsub task
+        os.system('bsub < '+new_file)
+
 # 获取矩阵的维度（考虑单一数值的矩阵维度为1）
 def dimension_of_array(array):
    import numpy as np
@ -180,105 +290,6 @@ def preprocess_for_parallel_calculations(parameter_array_all, task_num=1, task_i
            parameter_array = parameter_array_all[task_index*num_parameter:num_all]
    return parameter_array

-# 创建一个sh文件用于提交任务（PBS）
-def make_sh_file_for_qsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', cd_dir=0):
-    sh_content = \
-        '#!/bin/sh\n' \
-        +'#PBS -N '+task_name+'\n' \
-        +'#PBS -l nodes=1:ppn='+str(cpu_num)+'\n'
-    if cd_dir==1:
-        sh_content += 'cd $PBS_O_WORKDIR\n'
-    sh_content += command_line
-    with open(sh_filename+'.sh', 'w') as f:
-        f.write(sh_content)
-
-# 创建一个sh文件用于提交任务（LSF）
-def make_sh_file_for_bsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', queue_name='score', cd_dir=0):
-    sh_content = \
-        '#!/bin/sh\n' \
-        +'#BSUB -J '+task_name+'\n' \
-        +'#BSUB -q '+queue_name+'\n' \
-        +'#BSUB -n '+str(cpu_num)+'\n'
-    if cd_dir==1:
-        sh_content += 'cd $PBS_O_WORKDIR\n'
-    sh_content += command_line
-    with open(sh_filename+'.sh', 'w') as f:
-        f.write(sh_content)
-
-# 复制.py和.sh文件，然后提交任务，实现半手动并行（PBS）
-def copy_py_sh_file_and_qsub_task(parameter_array, py_filename='a', old_str_in_py='parameter=0', new_str_in_py='parameter=', sh_filename='a', qsub_task_name='task'):
-    import os
-    parameter_str_array = []
-    for i0 in parameter_array:
-        parameter_str_array.append(str(i0))
-    index = 0
-    for parameter_str in parameter_str_array:
-        index += 1
-        # copy python file
-        old_file = py_filename+'.py'
-        new_file = py_filename+'_'+str(index)+'.py'
-        os.system('cp '+old_file+' '+new_file)
-        with open(new_file, 'r') as f:
-            content  = f.read()
-        old_str = old_str_in_py
-        new_str = new_str_in_py+parameter_str
-        content = content.replace(old_str, new_str)
-        with open(py_filename+'_'+str(index)+'.py', 'w') as f:
-            f.write(content)
-        # copy sh file
-        old_file = sh_filename+'.sh'
-        new_file = sh_filename+'_'+str(index)+'.sh'
-        os.system('cp '+old_file+' '+new_file)
-        with open(new_file, 'r') as f:
-            content  = f.read()
-        old_str = 'python '+py_filename+'.py'
-        new_str = 'python '+py_filename+'_'+str(index)+'.py'
-        content = content.replace(old_str, new_str)
-        old_str = qsub_task_name
-        new_str = qsub_task_name+'_'+str(index)
-        content = content.replace(old_str, new_str)
-        with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: 
-            f.write(content)
-        # qsub task
-        os.system('qsub '+new_file)
-
-# 复制.py和.sh文件，然后提交任务，实现半手动并行（LSF）
-def copy_py_sh_file_and_bsub_task(parameter_array, py_filename='a', old_str_in_py='parameter=0', new_str_in_py='parameter=', sh_filename='a', bsub_task_name='task'):
-    import os
-    parameter_str_array = []
-    for i0 in parameter_array:
-        parameter_str_array.append(str(i0))
-    index = 0
-    for parameter_str in parameter_str_array:
-        index += 1
-        # copy python file
-        old_file = py_filename+'.py'
-        new_file = py_filename+'_'+str(index)+'.py'
-        os.system('cp '+old_file+' '+new_file)
-        with open(new_file, 'r') as f:
-            content  = f.read()
-        old_str = old_str_in_py
-        new_str = new_str_in_py+parameter_str
-        content = content.replace(old_str, new_str)
-        with open(py_filename+'_'+str(index)+'.py', 'w') as f:
-            f.write(content)
-        # copy sh file
-        old_file = sh_filename+'.sh'
-        new_file = sh_filename+'_'+str(index)+'.sh'
-        os.system('cp '+old_file+' '+new_file)
-        with open(new_file, 'r') as f:
-            content  = f.read()
-        old_str = 'python '+py_filename+'.py'
-        new_str = 'python '+py_filename+'_'+str(index)+'.py'
-        content = content.replace(old_str, new_str)
-        old_str = bsub_task_name
-        new_str = bsub_task_name+'_'+str(index)
-        content = content.replace(old_str, new_str)
-        with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: 
-            f.write(content)
-        # bsub task
-        os.system('bsub < '+new_file)
-
 # 自动先后运行程序
 def run_programs_sequentially(program_files=['./a.py', './b.py'], execute='python ', show_time=0):
    import os
@ -457,8 +468,8 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil
    with open(output_file, 'wb') as combined_file:
        output_pdf.write(combined_file)

-# 将PDF文件转成文本
-def pdf_to_text(pdf_path):
+# 使用pdfminer3k将PDF文件转成文本
+def pdf_to_text_with_pdfminer3k(pdf_path):
    from pdfminer.pdfparser import PDFParser, PDFDocument
    from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
    from pdfminer.converter import PDFPageAggregator
@ -488,6 +499,16 @@ def pdf_to_text(pdf_path):
                    content  = content + x.get_text().strip()
    return content

+# 使用PyPDF2将PDF文件转成文本
+def pdf_to_text_with_PyPDF2_for_all_pages(pdf_path):
+    import guan
+    num_pages = guan.get_pdf_page_number(pdf_path)
+    content = ''
+    for i0 in range(num_pages):
+        page_text = guan.pdf_to_txt_for_a_specific_page(pdf_path, page_num=i0+1)
+        content += page_text + '\n\n'
+    return content
+
 # 获取PDF文件页数
 def get_pdf_page_number(pdf_path):
    import PyPDF2
@ -576,17 +597,6 @@ def get_time(colon=True):
        datetime_time = datetime_time.replace(':', '')
    return datetime_time

-# 获取运行的日期和时间并写入文件
-def statistics_with_day_and_time(content='', filename='a', file_format='.txt'):
-    import datetime
-    datetime_today = str(datetime.date.today())
-    datetime_time = datetime.datetime.now().strftime('%H:%M:%S')
-    with open(filename+file_format, 'a', encoding="utf-8") as f2:
-       if content == '':
-           f2.write(datetime_today+' '+datetime_time+'\n')
-       else:
-           f2.write(datetime_today+' '+datetime_time+' '+content+'\n')
-
 # 获取本月的所有日期
 def get_date_array_of_the_current_month(str_or_datetime='str'):
    import datetime
--- a/PyPI/src/guan/deprecated.py
+++ b/PyPI/src/guan/deprecated.py
@ -31,4 +31,10 @@ def get_days_of_the_month_before_last(str_or_datetime='str'):
    import guan
    print('Warning: The current function name has been deprecated, which will be deleted in the future version. Please change it into guan.get_date_array_of_the_month_before_last().')
    date_array = guan.get_date_array_of_the_month_before_last(str_or_datetime=str_or_datetime)
-    return date_array
+    return date_array
+
+def pdf_to_text(pdf_path):
+    import guan
+    print('Warning: The current function name has been deprecated, which will be deleted in the future version. Please change it into guan.pdf_to_text_with_pdfminer3k().')
+    content = guan.pdf_to_text_with_pdfminer3k(pdf_path)
+    return content
--- a/PyPI/src/guan/functions_using_objects_of_custom_classes.py
+++ b/PyPI/src/guan/functions_using_objects_of_custom_classes.py
@ -25,6 +25,16 @@ def get_max_min_x_y_from_atom_object_list(atom_object_list):
    min_y = min(y_array)
    return max_x, min_x, max_y, min_y

+# 从原子对象列表中获取满足坐标条件的索引
+def get_index_via_coordinate_from_atom_object_list(atom_object_list, x=0, y=0, z=0, eta=1e-3):
+    for atom in atom_object_list:
+        x_i = atom.x
+        y_i = atom.y
+        z_i = atom.z
+        index = atom.index
+        if abs(x-x_i)<eta and abs(y-y_i)<eta and abs(z-z_i)<eta:
+            return index
+
 # 根据原子对象列表来初始化哈密顿量
 def initialize_hamiltonian_from_atom_object_list(atom_object_list):
    import numpy as np