127 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			127 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Module: data_processing
 | ||
| 
 | ||
| # 并行计算前的预处理,把参数分成多份
 | ||
| def preprocess_for_parallel_calculations(parameter_array_all, cpus=1, task_index=0):
 | ||
|     import numpy as np
 | ||
|     num_all = np.array(parameter_array_all).shape[0]
 | ||
|     if num_all%cpus == 0:
 | ||
|         num_parameter = int(num_all/cpus) 
 | ||
|         parameter_array = parameter_array_all[task_index*num_parameter:(task_index+1)*num_parameter]
 | ||
|     else:
 | ||
|         num_parameter = int(num_all/(cpus-1))
 | ||
|         if task_index != cpus-1:
 | ||
|             parameter_array = parameter_array_all[task_index*num_parameter:(task_index+1)*num_parameter]
 | ||
|         else:
 | ||
|             parameter_array = parameter_array_all[task_index*num_parameter:num_all]
 | ||
|     return parameter_array
 | ||
| 
 | ||
| # 根据子数组的第index个元素对子数组进行排序(index从0开始)
 | ||
| def sort_array_by_index_element(original_array, index):
 | ||
|     sorted_array = sorted(original_array, key=lambda x: x[index])
 | ||
|     return sorted_array
 | ||
| 
 | ||
| # 随机获得一个整数,左闭右闭
 | ||
| def get_random_number(start=0, end=1):
 | ||
|     import random
 | ||
|     rand_number = random.randint(start, end)   # 左闭右闭 [start, end]
 | ||
|     return rand_number
 | ||
| 
 | ||
| # 选取一个种子生成固定的随机整数,左闭右开
 | ||
| def generate_random_int_number_for_a_specific_seed(seed=0, x_min=0, x_max=10):
 | ||
|     import numpy as np
 | ||
|     np.random.seed(seed)
 | ||
|     rand_num = np.random.randint(x_min, x_max) # 左闭右开[x_min, x_max)
 | ||
|     return rand_num
 | ||
| 
 | ||
| # 以显示编号的样式,打印数组
 | ||
| def print_array_with_index(array, show_index=1, index_type=0):
 | ||
|     if show_index==0:
 | ||
|         for i0 in array:
 | ||
|             print(i0)
 | ||
|     else:
 | ||
|         if index_type==0:
 | ||
|             index = 0
 | ||
|             for i0 in array:
 | ||
|                 print(index, i0)
 | ||
|                 index += 1
 | ||
|         else:
 | ||
|             index = 0
 | ||
|             for i0 in array:
 | ||
|                 index += 1
 | ||
|                 print(index, i0)
 | ||
| 
 | ||
| # 使用jieba软件包进行分词
 | ||
| def divide_text_into_words(text):
 | ||
|     import jieba
 | ||
|     words = jieba.lcut(text)
 | ||
|     return words
 | ||
| 
 | ||
| # 根据一定的字符长度来分割文本
 | ||
| def split_text(text, wrap_width=3000):  
 | ||
|     import textwrap  
 | ||
|     split_text_list = textwrap.wrap(text, wrap_width)
 | ||
|     return split_text_list
 | ||
| 
 | ||
| # 判断某个字符是中文还是英文或其他
 | ||
| def check_Chinese_or_English(a):  
 | ||
|     if '\u4e00' <= a <= '\u9fff' :  
 | ||
|         word_type = 'Chinese'  
 | ||
|     elif '\x00' <= a <= '\xff':  
 | ||
|         word_type = 'English'
 | ||
|     else:
 | ||
|         word_type = 'Others' 
 | ||
|     return word_type
 | ||
| 
 | ||
| # 统计中英文文本的字数,默认不包括空格
 | ||
| def count_words(text, include_space=0, show_words=0):
 | ||
|     import jieba
 | ||
|     import guan
 | ||
|     words = jieba.lcut(text)  
 | ||
|     new_words = []
 | ||
|     if include_space == 0:
 | ||
|         for word in words:
 | ||
|             if word != ' ':
 | ||
|                 new_words.append(word)
 | ||
|     else:
 | ||
|         new_words = words
 | ||
|     num_words = 0
 | ||
|     new_words_2 = []
 | ||
|     for word in new_words:
 | ||
|         word_type = guan.check_Chinese_or_English(word[0])
 | ||
|         if word_type == 'Chinese':
 | ||
|             num_words += len(word)
 | ||
|             for one_word in word:
 | ||
|                 new_words_2.append(one_word)
 | ||
|         elif word_type == 'English' or 'Others':
 | ||
|             num_words += 1
 | ||
|             new_words_2.append(word)
 | ||
|     if show_words == 1:
 | ||
|         print(new_words_2)
 | ||
|     return num_words
 | ||
| 
 | ||
| # 将RGB转成HEX
 | ||
| def rgb_to_hex(rgb, pound=1):
 | ||
|     if pound==0:
 | ||
|         return '%02x%02x%02x' % rgb
 | ||
|     else:
 | ||
|         return '#%02x%02x%02x' % rgb
 | ||
| 
 | ||
| # 将HEX转成RGB
 | ||
| def hex_to_rgb(hex):
 | ||
|     hex = hex.lstrip('#')
 | ||
|     length = len(hex)
 | ||
|     return tuple(int(hex[i:i+length//3], 16) for i in range(0, length, length//3))
 | ||
| 
 | ||
| # 使用MD5进行散列加密
 | ||
| def encryption_MD5(password, salt=''):
 | ||
|     import hashlib
 | ||
|     password = salt+password
 | ||
|     hashed_password = hashlib.md5(password.encode()).hexdigest()
 | ||
|     return hashed_password
 | ||
| 
 | ||
| # 使用SHA-256进行散列加密
 | ||
| def encryption_SHA_256(password, salt=''):
 | ||
|     import hashlib
 | ||
|     password = salt+password
 | ||
|     hashed_password = hashlib.sha256(password.encode()).hexdigest()
 | ||
|     return hashed_password |