From 24e3a2c8bfd458d163976ae254daca3384b75cf9 Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Wed, 14 Sep 2022 07:41:37 +0800 Subject: [PATCH] update --- ..._file_or_delete_file_with_specific_name.py | 6 ++++ .../find_repeated_file_with_same_filename.py | 36 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 language_learning/2022.09.14_find_repeated_file_with_same_filename/find_repeated_file_with_same_filename.py diff --git a/language_learning/2022.09.12_creat_necessary_file_or_delete_file_with_specific_name/creat_necessary_file_or_delete_file_with_specific_name.py b/language_learning/2022.09.12_creat_necessary_file_or_delete_file_with_specific_name/creat_necessary_file_or_delete_file_with_specific_name.py index 410e03c..b8e492b 100644 --- a/language_learning/2022.09.12_creat_necessary_file_or_delete_file_with_specific_name/creat_necessary_file_or_delete_file_with_specific_name.py +++ b/language_learning/2022.09.12_creat_necessary_file_or_delete_file_with_specific_name/creat_necessary_file_or_delete_file_with_specific_name.py @@ -1,3 +1,9 @@ +""" +This code is supported by the website: https://www.guanjihuan.com +The newest version of this code is on the web page: https://www.guanjihuan.com/archives/25943 +""" + + def main(): directory = 'E:/test' creat_necessary_file(directory) diff --git a/language_learning/2022.09.14_find_repeated_file_with_same_filename/find_repeated_file_with_same_filename.py b/language_learning/2022.09.14_find_repeated_file_with_same_filename/find_repeated_file_with_same_filename.py new file mode 100644 index 0000000..3ee26f7 --- /dev/null +++ b/language_learning/2022.09.14_find_repeated_file_with_same_filename/find_repeated_file_with_same_filename.py @@ -0,0 +1,36 @@ +""" +This code is supported by the website: https://www.guanjihuan.com +The newest version of this code is on the web page: https://www.guanjihuan.com/archives/26113 +""" + +# 仅支持文件名判断是否重复,不支持对文件内容的判断。 +# 如需对文件名和内容都判断,需要计算文件的哈希值。这里暂时不考虑。 + + +def main(): + directory = 'E:/test' + repeated_file = find_repeated_file_with_same_filename(directory) + print(repeated_file) + + # import guan + # repeated_file = guan.find_repeated_file_with_same_filename(directory) + # print(repeated_file) + + +def find_repeated_file_with_same_filename(directory, num=1000): + import os + from collections import Counter + file_list = [] + for root, dirs, files in os.walk(directory): + for i0 in range(len(files)): + file_list.append(files[i0]) + count_file = Counter(file_list).most_common(num) + repeated_file = [] + for item in count_file: + if item[1]>1: + repeated_file.append(item) + return repeated_file + + +if __name__ == '__main__': + main() \ No newline at end of file