From 7bcf9f0c87a78306021e7d8f74039c81081f9aa0 Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Mon, 8 Dec 2025 12:46:30 +0800 Subject: [PATCH] 0.1.192 --- PyPI/setup.cfg | 2 +- PyPI/src/guan.egg-info/PKG-INFO | 2 +- PyPI/src/guan/data_processing.py | 52 +++++++++++++++++--------------- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index 996aefa..c222038 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.191 +version = 0.1.192 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index 09cc134..eda4a8e 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: guan -Version: 0.1.191 +Version: 0.1.192 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/data_processing.py b/PyPI/src/guan/data_processing.py index ed78292..3d378e9 100644 --- a/PyPI/src/guan/data_processing.py +++ b/PyPI/src/guan/data_processing.py @@ -83,24 +83,25 @@ def loop_calculation_with_three_parameters(function_name, parameter_array_1, par # 文本对比 def word_diff(a, b, print_show=1): import difflib - import re - import guan - a_words = guan.divide_text_into_words(a) - b_words = guan.divide_text_into_words(b) + import jieba + import logging + jieba.setLogLevel(logging.ERROR) + a_words = jieba.lcut(a) + b_words = jieba.lcut(b) sm = difflib.SequenceMatcher(None, a_words, b_words) result = [] for tag, i1, i2, j1, j2 in sm.get_opcodes(): if tag == "equal": result.extend(a_words[i1:i2]) elif tag == "delete": - result.append("\033[91m" + " ".join(a_words[i1:i2]) + "\033[0m") + result.append("\033[9;91m" + "".join(a_words[i1:i2]) + "\033[0m") elif tag == "insert": - result.append("\033[92m" + " ".join(b_words[j1:j2]) + "\033[0m") + result.append("\033[92m" + "".join(b_words[j1:j2]) + "\033[0m") elif tag == "replace": - result.append("\033[91m" + " ".join(a_words[i1:i2]) + "\033[0m") - result.append("\033[92m" + " ".join(b_words[j1:j2]) + "\033[0m") - diff_result = " ".join(result) - diff_result = re.sub(r' +', ' ', diff_result) + result.append("\033[9;91m" + "".join(a_words[i1:i2]) + "\033[0m") + result.append(" ") + result.append("\033[92m" + "".join(b_words[j1:j2]) + "\033[0m") + diff_result = "".join(result) if print_show: print(diff_result) return diff_result @@ -109,33 +110,34 @@ def word_diff(a, b, print_show=1): def word_diff_to_html(a, b, filename='diff_result', write_file=1): import difflib from html import escape - import re - import guan - a_words = guan.divide_text_into_words(a) - b_words = guan.divide_text_into_words(b) + import jieba + import logging + jieba.setLogLevel(logging.ERROR) + a_words = jieba.lcut(a) + b_words = jieba.lcut(b) sm = difflib.SequenceMatcher(None, a_words, b_words) html_parts = [] for tag, i1, i2, j1, j2 in sm.get_opcodes(): if tag == "equal": - html_parts.append(" ".join(map(escape, a_words[i1:i2]))) + html_parts.append("".join(map(escape, a_words[i1:i2]))) elif tag == "delete": - html_parts.append(f"" - + " ".join(map(escape, a_words[i1:i2])) + html_parts.append(f"" + + "".join(map(escape, a_words[i1:i2])) + "") elif tag == "insert": - html_parts.append(f"" - + " ".join(map(escape, b_words[j1:j2])) + html_parts.append(f"" + + "".join(map(escape, b_words[j1:j2])) + "") elif tag == "replace": - html_parts.append(f"" - + " ".join(map(escape, a_words[i1:i2])) + html_parts.append(f"" + + "".join(map(escape, a_words[i1:i2])) + "") - html_parts.append(f"" - + " ".join(map(escape, b_words[j1:j2])) + html_parts.append(" ") + html_parts.append(f"" + + "".join(map(escape, b_words[j1:j2])) + "") - diff_result = " ".join(html_parts) + diff_result = "".join(html_parts) diff_result = diff_result.replace("\n", "
") - diff_result = re.sub(r' +', ' ', diff_result) if write_file: with open(filename+'.html', 'w', encoding='UTF-8') as f: f.write(diff_result)