From 3eb199cbf002eb4b11dde8fc86d9e0b729735d39 Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Thu, 12 Oct 2023 21:11:42 +0800 Subject: [PATCH] 0.0.192 --- API_Reference/API_Reference.py | 10 ++++- Source_Code/PyPI/setup.cfg | 2 +- Source_Code/PyPI/src/guan.egg-info/PKG-INFO | 2 +- Source_Code/PyPI/src/guan/__init__.py | 43 +++++++++++++++++++-- 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/API_Reference/API_Reference.py b/API_Reference/API_Reference.py index d6341d1..7e56ccd 100644 --- a/API_Reference/API_Reference.py +++ b/API_Reference/API_Reference.py @@ -1,4 +1,4 @@ -# API Reference shows all functions in Guan package. The current version is guan-0.0.191, updated on December 12, 2023. +# API Reference shows all functions in Guan package. The current version is guan-0.0.192, updated on December 12, 2023. import guan @@ -818,8 +818,14 @@ degenerate_k_array, degenerate_eigenvalue_array = guan.find_degenerate_points(k_ # 选取一个种子生成固定的随机整数 rand_num = guan.generate_random_int_number_for_a_specific_seed(seed=0, x_min=0, x_max=10) +# 使用jieba分词 +words = guan.divide_text_into_words(text) + +# 判断某个字符是中文还是英文或其他 +word_type = guan.check_Chinese_or_English(a) + # 统计中英文文本的字数 -num_words = guan.count_words(text) +num_words = guan.count_words(text, include_space=0, show_words=0) # 统计运行的日期和时间,写进文件 guan.statistics_with_day_and_time(content='', filename='a', file_format='.txt') diff --git a/Source_Code/PyPI/setup.cfg b/Source_Code/PyPI/setup.cfg index 2df3c4c..5dd3cd2 100644 --- a/Source_Code/PyPI/setup.cfg +++ b/Source_Code/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.0.191 +version = 0.0.192 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO index b3a7bff..a04404a 100644 --- a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO +++ b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: guan -Version: 0.0.191 +Version: 0.0.192 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/Source_Code/PyPI/src/guan/__init__.py b/Source_Code/PyPI/src/guan/__init__.py index 1dfa14b..dd8cf8f 100644 --- a/Source_Code/PyPI/src/guan/__init__.py +++ b/Source_Code/PyPI/src/guan/__init__.py @@ -1,6 +1,6 @@ # Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com. -# The current version is guan-0.0.191, updated on December 12, 2023. +# The current version is guan-0.0.192, updated on December 12, 2023. # Installation: pip install --upgrade guan @@ -3576,11 +3576,46 @@ def generate_random_int_number_for_a_specific_seed(seed=0, x_min=0, x_max=10): rand_num = np.random.randint(x_min, x_max) # 左闭右开[x_min, x_max) return rand_num -# 统计中英文文本的字数 -def count_words(text): +# 使用jieba分词 +def divide_text_into_words(text): import jieba words = jieba.lcut(text) - num_words = len(words) + return words + +# 判断某个字符是中文还是英文或其他 +def check_Chinese_or_English(a): + if '\u4e00' <= a <= '\u9fff' : + word_type = 'Chinese' + elif '\x00' <= a <= '\xff': + word_type = 'English' + else: + word_type = 'Others' + return word_type + +# 统计中英文文本的字数,默认不包括空格 +def count_words(text, include_space=0, show_words=0): + import jieba + words = jieba.lcut(text) + new_words = [] + if include_space == 0: + for word in words: + if word != ' ': + new_words.append(word) + else: + new_words = words + num_words = 0 + new_words_2 = [] + for word in new_words: + word_type = guan.check_Chinese_or_English(word[0]) + if word_type == 'Chinese': + num_words += len(word) + for one_word in word: + new_words_2.append(one_word) + elif word_type == 'English' or 'Others': + num_words += 1 + new_words_2.append(word) + if show_words == 1: + print(new_words_2) return num_words # 统计运行的日期和时间,写进文件