# Module: data_processing # 并行计算前的预处理,把参数分成多份 def preprocess_for_parallel_calculations(parameter_array_all, cpus=1, task_index=0): import numpy as np num_all = np.array(parameter_array_all).shape[0] if num_all%cpus == 0: num_parameter = int(num_all/cpus) parameter_array = parameter_array_all[task_index*num_parameter:(task_index+1)*num_parameter] else: num_parameter = int(num_all/(cpus-1)) if task_index != cpus-1: parameter_array = parameter_array_all[task_index*num_parameter:(task_index+1)*num_parameter] else: parameter_array = parameter_array_all[task_index*num_parameter:num_all] return parameter_array # 根据子数组的第index个元素对子数组进行排序(index从0开始) def sort_array_by_index_element(original_array, index): sorted_array = sorted(original_array, key=lambda x: x[index]) return sorted_array # 随机获得一个整数,左闭右闭 def get_random_number(start=0, end=1): import random rand_number = random.randint(start, end) # 左闭右闭 [start, end] return rand_number # 选取一个种子生成固定的随机整数,左闭右开 def generate_random_int_number_for_a_specific_seed(seed=0, x_min=0, x_max=10): import numpy as np np.random.seed(seed) rand_num = np.random.randint(x_min, x_max) # 左闭右开[x_min, x_max) return rand_num # 以显示编号的样式,打印数组 def print_array_with_index(array, show_index=1, index_type=0): if show_index==0: for i0 in array: print(i0) else: if index_type==0: index = 0 for i0 in array: print(index, i0) index += 1 else: index = 0 for i0 in array: index += 1 print(index, i0) # 使用jieba软件包进行分词 def divide_text_into_words(text): import jieba words = jieba.lcut(text) return words # 根据一定的字符长度来分割文本 def split_text(text, wrap_width=3000): import textwrap split_text_list = textwrap.wrap(text, wrap_width) return split_text_list # 判断某个字符是中文还是英文或其他 def check_Chinese_or_English(a): if '\u4e00' <= a <= '\u9fff' : word_type = 'Chinese' elif '\x00' <= a <= '\xff': word_type = 'English' else: word_type = 'Others' return word_type # 统计中英文文本的字数,默认不包括空格 def count_words(text, include_space=0, show_words=0): import jieba import guan words = jieba.lcut(text) new_words = [] if include_space == 0: for word in words: if word != ' ': new_words.append(word) else: new_words = words num_words = 0 new_words_2 = [] for word in new_words: word_type = guan.check_Chinese_or_English(word[0]) if word_type == 'Chinese': num_words += len(word) for one_word in word: new_words_2.append(one_word) elif word_type == 'English' or 'Others': num_words += 1 new_words_2.append(word) if show_words == 1: print(new_words_2) return num_words # 将RGB转成HEX def rgb_to_hex(rgb, pound=1): if pound==0: return '%02x%02x%02x' % rgb else: return '#%02x%02x%02x' % rgb # 将HEX转成RGB def hex_to_rgb(hex): hex = hex.lstrip('#') length = len(hex) return tuple(int(hex[i:i+length//3], 16) for i in range(0, length, length//3)) # 使用MD5进行散列加密 def encryption_MD5(password, salt=''): import hashlib password = salt+password hashed_password = hashlib.md5(password.encode()).hexdigest() return hashed_password # 使用SHA-256进行散列加密 def encryption_SHA_256(password, salt=''): import hashlib password = salt+password hashed_password = hashlib.sha256(password.encode()).hexdigest() return hashed_password