diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index 510432c..0d32f42 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.160 +version = 0.1.161 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index 1ec2a49..fcfd5cb 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.2 Name: guan -Version: 0.1.160 +Version: 0.1.161 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/data_processing.py b/PyPI/src/guan/data_processing.py index cbfe026..94d9dbd 100644 --- a/PyPI/src/guan/data_processing.py +++ b/PyPI/src/guan/data_processing.py @@ -86,6 +86,116 @@ def run(function_name, *args, **kwargs): pass return return_data +# 获取运行的日期和时间并写入文件 +def statistics_with_day_and_time(content='', filename='time_logging', file_format='.txt'): + import datetime + datetime_today = str(datetime.date.today()) + datetime_time = datetime.datetime.now().strftime('%H:%M:%S') + with open(filename+file_format, 'a', encoding="utf-8") as f2: + if content == '': + f2.write(datetime_today+' '+datetime_time+'\n') + else: + f2.write(datetime_today+' '+datetime_time+' '+content+'\n') + +# 创建一个sh文件用于提交任务(PBS) +def make_sh_file_for_qsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', cd_dir=0): + sh_content = \ + '#!/bin/sh\n' \ + +'#PBS -N '+task_name+'\n' \ + +'#PBS -l nodes=1:ppn='+str(cpu_num)+'\n' + if cd_dir==1: + sh_content += 'cd $PBS_O_WORKDIR\n' + sh_content += command_line + with open(sh_filename+'.sh', 'w') as f: + f.write(sh_content) + +# 创建一个sh文件用于提交任务(LSF) +def make_sh_file_for_bsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', queue_name='score', cd_dir=0): + sh_content = \ + '#!/bin/sh\n' \ + +'#BSUB -J '+task_name+'\n' \ + +'#BSUB -q '+queue_name+'\n' \ + +'#BSUB -n '+str(cpu_num)+'\n' + if cd_dir==1: + sh_content += 'cd $PBS_O_WORKDIR\n' + sh_content += command_line + with open(sh_filename+'.sh', 'w') as f: + f.write(sh_content) + +# 复制.py和.sh文件,然后提交任务,实现半手动并行(PBS) +def copy_py_sh_file_and_qsub_task(parameter_array, py_filename='a', old_str_in_py='parameter = 0', new_str_in_py='parameter = ', sh_filename='a', qsub_task_name='task'): + import os + parameter_str_array = [] + for i0 in parameter_array: + parameter_str_array.append(str(i0)) + index = 0 + for parameter_str in parameter_str_array: + index += 1 + # copy python file + old_file = py_filename+'.py' + new_file = py_filename+'_'+str(index)+'.py' + os.system('cp '+old_file+' '+new_file) + with open(new_file, 'r') as f: + content = f.read() + old_str = old_str_in_py + new_str = new_str_in_py+parameter_str + content = content.replace(old_str, new_str) + with open(py_filename+'_'+str(index)+'.py', 'w') as f: + f.write(content) + # copy sh file + old_file = sh_filename+'.sh' + new_file = sh_filename+'_'+str(index)+'.sh' + os.system('cp '+old_file+' '+new_file) + with open(new_file, 'r') as f: + content = f.read() + old_str = 'python '+py_filename+'.py' + new_str = 'python '+py_filename+'_'+str(index)+'.py' + content = content.replace(old_str, new_str) + old_str = '#PBS -N '+qsub_task_name + new_str = '#PBS -N '+qsub_task_name+'_'+str(index) + content = content.replace(old_str, new_str) + with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: + f.write(content) + # qsub task + os.system('qsub '+new_file) + +# 复制.py和.sh文件,然后提交任务,实现半手动并行(LSF) +def copy_py_sh_file_and_bsub_task(parameter_array, py_filename='a', old_str_in_py='parameter = 0', new_str_in_py='parameter = ', sh_filename='a', bsub_task_name='task'): + import os + parameter_str_array = [] + for i0 in parameter_array: + parameter_str_array.append(str(i0)) + index = 0 + for parameter_str in parameter_str_array: + index += 1 + # copy python file + old_file = py_filename+'.py' + new_file = py_filename+'_'+str(index)+'.py' + os.system('cp '+old_file+' '+new_file) + with open(new_file, 'r') as f: + content = f.read() + old_str = old_str_in_py + new_str = new_str_in_py+parameter_str + content = content.replace(old_str, new_str) + with open(py_filename+'_'+str(index)+'.py', 'w') as f: + f.write(content) + # copy sh file + old_file = sh_filename+'.sh' + new_file = sh_filename+'_'+str(index)+'.sh' + os.system('cp '+old_file+' '+new_file) + with open(new_file, 'r') as f: + content = f.read() + old_str = 'python '+py_filename+'.py' + new_str = 'python '+py_filename+'_'+str(index)+'.py' + content = content.replace(old_str, new_str) + old_str = '#BSUB -J '+bsub_task_name + new_str = '#BSUB -J '+bsub_task_name+'_'+str(index) + content = content.replace(old_str, new_str) + with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: + f.write(content) + # bsub task + os.system('bsub < '+new_file) + # 获取矩阵的维度(考虑单一数值的矩阵维度为1) def dimension_of_array(array): import numpy as np @@ -180,105 +290,6 @@ def preprocess_for_parallel_calculations(parameter_array_all, task_num=1, task_i parameter_array = parameter_array_all[task_index*num_parameter:num_all] return parameter_array -# 创建一个sh文件用于提交任务(PBS) -def make_sh_file_for_qsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', cd_dir=0): - sh_content = \ - '#!/bin/sh\n' \ - +'#PBS -N '+task_name+'\n' \ - +'#PBS -l nodes=1:ppn='+str(cpu_num)+'\n' - if cd_dir==1: - sh_content += 'cd $PBS_O_WORKDIR\n' - sh_content += command_line - with open(sh_filename+'.sh', 'w') as f: - f.write(sh_content) - -# 创建一个sh文件用于提交任务(LSF) -def make_sh_file_for_bsub(sh_filename='a', command_line='python a.py', cpu_num=1, task_name='task', queue_name='score', cd_dir=0): - sh_content = \ - '#!/bin/sh\n' \ - +'#BSUB -J '+task_name+'\n' \ - +'#BSUB -q '+queue_name+'\n' \ - +'#BSUB -n '+str(cpu_num)+'\n' - if cd_dir==1: - sh_content += 'cd $PBS_O_WORKDIR\n' - sh_content += command_line - with open(sh_filename+'.sh', 'w') as f: - f.write(sh_content) - -# 复制.py和.sh文件,然后提交任务,实现半手动并行(PBS) -def copy_py_sh_file_and_qsub_task(parameter_array, py_filename='a', old_str_in_py='parameter=0', new_str_in_py='parameter=', sh_filename='a', qsub_task_name='task'): - import os - parameter_str_array = [] - for i0 in parameter_array: - parameter_str_array.append(str(i0)) - index = 0 - for parameter_str in parameter_str_array: - index += 1 - # copy python file - old_file = py_filename+'.py' - new_file = py_filename+'_'+str(index)+'.py' - os.system('cp '+old_file+' '+new_file) - with open(new_file, 'r') as f: - content = f.read() - old_str = old_str_in_py - new_str = new_str_in_py+parameter_str - content = content.replace(old_str, new_str) - with open(py_filename+'_'+str(index)+'.py', 'w') as f: - f.write(content) - # copy sh file - old_file = sh_filename+'.sh' - new_file = sh_filename+'_'+str(index)+'.sh' - os.system('cp '+old_file+' '+new_file) - with open(new_file, 'r') as f: - content = f.read() - old_str = 'python '+py_filename+'.py' - new_str = 'python '+py_filename+'_'+str(index)+'.py' - content = content.replace(old_str, new_str) - old_str = qsub_task_name - new_str = qsub_task_name+'_'+str(index) - content = content.replace(old_str, new_str) - with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: - f.write(content) - # qsub task - os.system('qsub '+new_file) - -# 复制.py和.sh文件,然后提交任务,实现半手动并行(LSF) -def copy_py_sh_file_and_bsub_task(parameter_array, py_filename='a', old_str_in_py='parameter=0', new_str_in_py='parameter=', sh_filename='a', bsub_task_name='task'): - import os - parameter_str_array = [] - for i0 in parameter_array: - parameter_str_array.append(str(i0)) - index = 0 - for parameter_str in parameter_str_array: - index += 1 - # copy python file - old_file = py_filename+'.py' - new_file = py_filename+'_'+str(index)+'.py' - os.system('cp '+old_file+' '+new_file) - with open(new_file, 'r') as f: - content = f.read() - old_str = old_str_in_py - new_str = new_str_in_py+parameter_str - content = content.replace(old_str, new_str) - with open(py_filename+'_'+str(index)+'.py', 'w') as f: - f.write(content) - # copy sh file - old_file = sh_filename+'.sh' - new_file = sh_filename+'_'+str(index)+'.sh' - os.system('cp '+old_file+' '+new_file) - with open(new_file, 'r') as f: - content = f.read() - old_str = 'python '+py_filename+'.py' - new_str = 'python '+py_filename+'_'+str(index)+'.py' - content = content.replace(old_str, new_str) - old_str = bsub_task_name - new_str = bsub_task_name+'_'+str(index) - content = content.replace(old_str, new_str) - with open(sh_filename+'_'+str(index)+'.sh', 'w') as f: - f.write(content) - # bsub task - os.system('bsub < '+new_file) - # 自动先后运行程序 def run_programs_sequentially(program_files=['./a.py', './b.py'], execute='python ', show_time=0): import os @@ -457,8 +468,8 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil with open(output_file, 'wb') as combined_file: output_pdf.write(combined_file) -# 将PDF文件转成文本 -def pdf_to_text(pdf_path): +# 使用pdfminer3k将PDF文件转成文本 +def pdf_to_text_with_pdfminer3k(pdf_path): from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import PDFPageAggregator @@ -488,6 +499,16 @@ def pdf_to_text(pdf_path): content = content + x.get_text().strip() return content +# 使用PyPDF2将PDF文件转成文本 +def pdf_to_text_with_PyPDF2_for_all_pages(pdf_path): + import guan + num_pages = guan.get_pdf_page_number(pdf_path) + content = '' + for i0 in range(num_pages): + page_text = guan.pdf_to_txt_for_a_specific_page(pdf_path, page_num=i0+1) + content += page_text + '\n\n' + return content + # 获取PDF文件页数 def get_pdf_page_number(pdf_path): import PyPDF2 @@ -576,17 +597,6 @@ def get_time(colon=True): datetime_time = datetime_time.replace(':', '') return datetime_time -# 获取运行的日期和时间并写入文件 -def statistics_with_day_and_time(content='', filename='a', file_format='.txt'): - import datetime - datetime_today = str(datetime.date.today()) - datetime_time = datetime.datetime.now().strftime('%H:%M:%S') - with open(filename+file_format, 'a', encoding="utf-8") as f2: - if content == '': - f2.write(datetime_today+' '+datetime_time+'\n') - else: - f2.write(datetime_today+' '+datetime_time+' '+content+'\n') - # 获取本月的所有日期 def get_date_array_of_the_current_month(str_or_datetime='str'): import datetime diff --git a/PyPI/src/guan/deprecated.py b/PyPI/src/guan/deprecated.py index 277d298..317704d 100644 --- a/PyPI/src/guan/deprecated.py +++ b/PyPI/src/guan/deprecated.py @@ -31,4 +31,10 @@ def get_days_of_the_month_before_last(str_or_datetime='str'): import guan print('Warning: The current function name has been deprecated, which will be deleted in the future version. Please change it into guan.get_date_array_of_the_month_before_last().') date_array = guan.get_date_array_of_the_month_before_last(str_or_datetime=str_or_datetime) - return date_array \ No newline at end of file + return date_array + +def pdf_to_text(pdf_path): + import guan + print('Warning: The current function name has been deprecated, which will be deleted in the future version. Please change it into guan.pdf_to_text_with_pdfminer3k().') + content = guan.pdf_to_text_with_pdfminer3k(pdf_path) + return content \ No newline at end of file diff --git a/PyPI/src/guan/functions_using_objects_of_custom_classes.py b/PyPI/src/guan/functions_using_objects_of_custom_classes.py index 412a1a5..ccbf80b 100644 --- a/PyPI/src/guan/functions_using_objects_of_custom_classes.py +++ b/PyPI/src/guan/functions_using_objects_of_custom_classes.py @@ -25,6 +25,16 @@ def get_max_min_x_y_from_atom_object_list(atom_object_list): min_y = min(y_array) return max_x, min_x, max_y, min_y +# 从原子对象列表中获取满足坐标条件的索引 +def get_index_via_coordinate_from_atom_object_list(atom_object_list, x=0, y=0, z=0, eta=1e-3): + for atom in atom_object_list: + x_i = atom.x + y_i = atom.y + z_i = atom.z + index = atom.index + if abs(x-x_i)