0.0.192

2023-10-12 21:11:42 +08:00
parent ca2fd59f2b
commit 3eb199cbf0
4 changed files with 49 additions and 8 deletions
--- a/API_Reference/API_Reference.py
+++ b/API_Reference/API_Reference.py
@@ -1,4 +1,4 @@
-# API Reference shows all functions in Guan package. The current version is guan-0.0.191, updated on December 12, 2023.
+# API Reference shows all functions in Guan package. The current version is guan-0.0.192, updated on December 12, 2023.

 import guan

@@ -818,8 +818,14 @@ degenerate_k_array, degenerate_eigenvalue_array = guan.find_degenerate_points(k_
 # 选取一个种子生成固定的随机整数
 rand_num = guan.generate_random_int_number_for_a_specific_seed(seed=0, x_min=0, x_max=10)

+# 使用jieba分词
+words = guan.divide_text_into_words(text)
+
+# 判断某个字符是中文还是英文或其他
+word_type = guan.check_Chinese_or_English(a)
+
 # 统计中英文文本的字数
-num_words = guan.count_words(text)
+num_words = guan.count_words(text, include_space=0, show_words=0)

 # 统计运行的日期和时间，写进文件
 guan.statistics_with_day_and_time(content='', filename='a', file_format='.txt')
--- a/Source_Code/PyPI/setup.cfg
+++ b/Source_Code/PyPI/setup.cfg
@@ -1,7 +1,7 @@
 [metadata]
 # replace with your username:
 name = guan
-version = 0.0.191
+version = 0.0.192
 author = guanjihuan
 author_email = guanjihuan@163.com
 description = An open source python package
--- a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO
+++ b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: guan
-Version: 0.0.191
+Version: 0.0.192
 Summary: An open source python package
 Home-page: https://py.guanjihuan.com
 Author: guanjihuan
--- a/Source_Code/PyPI/src/guan/init.py
+++ b/Source_Code/PyPI/src/guan/init.py
@@ -1,6 +1,6 @@
 # Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com.

-# The current version is guan-0.0.191, updated on December 12, 2023.
+# The current version is guan-0.0.192, updated on December 12, 2023.

 # Installation: pip install --upgrade guan

@@ -3576,11 +3576,46 @@ def generate_random_int_number_for_a_specific_seed(seed=0, x_min=0, x_max=10):
    rand_num = np.random.randint(x_min, x_max) # 左闭右开[x_min, x_max)
    return rand_num

-# 统计中英文文本的字数
-def count_words(text):
+# 使用jieba分词
+def divide_text_into_words(text):
    import jieba
    words = jieba.lcut(text)
-    num_words = len(words)
+    return words
+
+# 判断某个字符是中文还是英文或其他
+def check_Chinese_or_English(a):  
+    if '\u4e00' <= a <= '\u9fff' :  
+        word_type = 'Chinese'  
+    elif '\x00' <= a <= '\xff':  
+        word_type = 'English'
+    else:
+        word_type = 'Others' 
+    return word_type
+
+# 统计中英文文本的字数，默认不包括空格
+def count_words(text, include_space=0, show_words=0):
+    import jieba
+    words = jieba.lcut(text)  
+    new_words = []
+    if include_space == 0:
+        for word in words:
+            if word != ' ':
+                new_words.append(word)
+    else:
+        new_words = words
+    num_words = 0
+    new_words_2 = []
+    for word in new_words:
+        word_type = guan.check_Chinese_or_English(word[0])
+        if word_type == 'Chinese':
+            num_words += len(word)
+            for one_word in word:
+                new_words_2.append(one_word)
+        elif word_type == 'English' or 'Others':
+            num_words += 1
+            new_words_2.append(word)
+    if show_words == 1:
+        print(new_words_2)
    return num_words

 # 统计运行的日期和时间，写进文件