diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index aa53b6f..996aefa 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.190 +version = 0.1.191 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index 2c889b1..09cc134 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: guan -Version: 0.1.190 +Version: 0.1.191 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/data_processing.py b/PyPI/src/guan/data_processing.py index 771ad05..ed78292 100644 --- a/PyPI/src/guan/data_processing.py +++ b/PyPI/src/guan/data_processing.py @@ -80,6 +80,67 @@ def loop_calculation_with_three_parameters(function_name, parameter_array_1, par i1 += 1 return result_array +# 文本对比 +def word_diff(a, b, print_show=1): + import difflib + import re + import guan + a_words = guan.divide_text_into_words(a) + b_words = guan.divide_text_into_words(b) + sm = difflib.SequenceMatcher(None, a_words, b_words) + result = [] + for tag, i1, i2, j1, j2 in sm.get_opcodes(): + if tag == "equal": + result.extend(a_words[i1:i2]) + elif tag == "delete": + result.append("\033[91m" + " ".join(a_words[i1:i2]) + "\033[0m") + elif tag == "insert": + result.append("\033[92m" + " ".join(b_words[j1:j2]) + "\033[0m") + elif tag == "replace": + result.append("\033[91m" + " ".join(a_words[i1:i2]) + "\033[0m") + result.append("\033[92m" + " ".join(b_words[j1:j2]) + "\033[0m") + diff_result = " ".join(result) + diff_result = re.sub(r' +', ' ', diff_result) + if print_show: + print(diff_result) + return diff_result + +# 文本对比(写入HTML文件) +def word_diff_to_html(a, b, filename='diff_result', write_file=1): + import difflib + from html import escape + import re + import guan + a_words = guan.divide_text_into_words(a) + b_words = guan.divide_text_into_words(b) + sm = difflib.SequenceMatcher(None, a_words, b_words) + html_parts = [] + for tag, i1, i2, j1, j2 in sm.get_opcodes(): + if tag == "equal": + html_parts.append(" ".join(map(escape, a_words[i1:i2]))) + elif tag == "delete": + html_parts.append(f"" + + " ".join(map(escape, a_words[i1:i2])) + + "") + elif tag == "insert": + html_parts.append(f"" + + " ".join(map(escape, b_words[j1:j2])) + + "") + elif tag == "replace": + html_parts.append(f"" + + " ".join(map(escape, a_words[i1:i2])) + + "") + html_parts.append(f"" + + " ".join(map(escape, b_words[j1:j2])) + + "") + diff_result = " ".join(html_parts) + diff_result = diff_result.replace("\n", "
") + diff_result = re.sub(r' +', ' ', diff_result) + if write_file: + with open(filename+'.html', 'w', encoding='UTF-8') as f: + f.write(diff_result) + return diff_result + # 打印数组 def print_array(array, line_break=0): if line_break == 0: