# -*- coding: utf-8 -*-
import sys
import os
import re
import hashlib
class CompareDir(object):
def __init__(self, res_dir, dst_dir):
self.res_dir = res_dir
self.dst_dir = dst_dir
def compute_file_md5(self, file):
f = open(file, 'rb')
md5_obj = hashlib.md5()
while True:
d = f.read(8096)
if not d:
break
md5_obj.update(d)
hash_code = md5_obj.hexdigest()
f.close()
md5 = str(hash_code).lower()
return md5
def dir_file_add_modify(self, invert=False):
if invert:
self.res_dir, self.dst_dir = self.dst_dir, self.res_dir
add_modify_dict = {"add": {"dirs": [], "files": []}, "modify": {"files": []}}
for dst_root_dir, _, dst_child_files in os.walk(self.dst_dir):
res_root_dir = self.res_dir + dst_root_dir[len(self.dst_dir):]
if not os.path.isdir(res_root_dir):
add_modify_dict["add"]["dirs"].append(dst_root_dir)
if not _:
for child_filename in dst_child_files:
dst_child_file = os.path.join(dst_root_dir, child_filename)
add_modify_dict["add"]["files"].append(dst_child_file)
else:
for child_filename in dst_child_files:
dst_child_file = os.path.join(dst_root_dir, child_filename)
res_child_file = os.path.join(res_root_dir, child_filename)
if not os.path.isfile(res_child_file):
add_modify_dict["add"]["files"].append(dst_child_file)
else:
if not invert:
md5_dst_child_file = self.compute_file_md5(dst_child_file)
md5_res_child_file = self.compute_file_md5(res_child_file)
if md5_res_child_file != md5_dst_child_file:
add_modify_dict["modify"]["files"].append(dst_child_file)
if invert:
self.res_dir, self.dst_dir = self.dst_dir, self.res_dir
return add_modify_dict
def ordinary_diff_dict(self):
"""
获取两个目录的子目录及其子文件的差别
:return 返回数据格式:{'add': {'dirs': [], 'files': []}, 'modify': {'files': []}, 'delete': {'dirs': [],
'files': []}}
"""
ordinary_diff_dict = self.dir_file_add_modify()
ordinary_diff_dict_ = self.dir_file_add_modify(invert=True)
ordinary_diff_dict["delete"] = ordinary_diff_dict_.get("add")
del ordinary_diff_dict_
return ordinary_diff_dict
def rebuild_diff_dict(self, index, relative_file_list, child_list):
if index == len(relative_file_list) - 1:
child_list.append(relative_file_list[index])
return
if index < len(relative_file_list):
flag = False
for child in child_list:
if isinstance(child, dict):
if relative_file_list[index] in child:
flag = True
self.rebuild_diff_dict(index + 1, relative_file_list, child[relative_file_list[index]])
break
if not flag:
list_ = list()
child_list.append({relative_file_list[index]: list_})
self.rebuild_diff_dict(index + 1, relative_file_list, list_)
def sort_childs_list(self, child_list):
file_child_list = list()
dir_child_list = list()
for child in child_list:
if isinstance(child, dict):
dir_child_list.append(child)
else:
file_child_list.append(child)
def sort_dir_child(dir_child):
return list(dir_child.keys()).pop()
file_child_list.sort()
dir_child_list.sort(key=sort_dir_child)
dir_child_list.extend(file_child_list)
child_list = dir_child_list
return child_list
def format_diff_dict(self, diff_dict):
"""
生成两个目录差异的格式化数据
:param diff_dict: {"add": {"dirs": [], "files": []}, "modify": {"dirs": [], "files": []}, "delete": {"dirs": [], "files": []}}
:return 格式: {”add“: {root_dir:[]}, ”modify“: {root_dir:[]}, ”delete“: {root_dir:[]}}
"""
dst_root = os.path.basename(self.dst_dir)
res_root = os.path.basename(self.res_dir)
format_diff_dict = {"add": {dst_root: list()}, "modify": {dst_root: list()}, "delete": {res_root: list()}}
for key in diff_dict:
files_list = diff_dict[key]["files"]
if key in ["add", "modify"]:
for file in files_list:
relative_file = file[len(self.dst_dir):].strip(r"\\").strip("/")
relative_file_list = re.split(r'[\\|/]', relative_file)
self.rebuild_diff_dict(0, relative_file_list, format_diff_dict[key][dst_root])
else:
for file in files_list:
relative_file = file[len(self.res_dir):].strip(r"\\").strip("/")
relative_file_list = re.split(r'[\\|/]', relative_file)
self.rebuild_diff_dict(0, relative_file_list, format_diff_dict[key][res_root])
if not format_diff_dict["add"][dst_root]:
format_diff_dict["add"] = dict()
if not format_diff_dict["modify"][dst_root]:
format_diff_dict["modify"] = dict()
if not format_diff_dict["delete"][res_root]:
format_diff_dict["delete"] = dict()
for opt, dir in format_diff_dict.items():
if not dir:
continue
parents = list(dir.keys()).pop()
child_list = {parents: self.sort_childs_list(dir[parents])}
format_diff_dict[opt] = child_list
return format_diff_dict
def write_line_to_file(self, path, line):
with open(path, "a+") as f:
f.write(line + "\n")
def write_to_ordinary_file(self, ordinary_diff_dict):
"""
生成一个显示两个目录差异的普通文件
:param ordinary_diff_dict: {"add": {"dirs": [], "files": []}, "modify": {"dirs": [], "files": []}, "delete": {"dirs": [], "files": []}}
"""
cur_dir, _ = os.path.split(os.path.abspath(__file__))
ordinary_diff_dst2res_file = os.path.join(cur_dir, "ordinary_diff_dst2res.txt")
if os.path.isfile(ordinary_diff_dst2res_file):
os.remove(ordinary_diff_dst2res_file)
with open(ordinary_diff_dst2res_file, "w"):
pass
for key in ordinary_diff_dict:
files_list = ordinary_diff_dict[key]["files"]
if not files_list:
continue
line = key + ":"
self.write_line_to_file(ordinary_diff_dst2res_file, line)
for file in files_list:
self.write_line_to_file(ordinary_diff_dst2res_file, file)
line = ""
for i in range(2):
self.write_line_to_file(ordinary_diff_dst2res_file, line)
def write_to_format_file(self, format_diff_dict):
"""
生成一个显示两个目录差异的格式化文件
:param format_diff_dict: {”add“: {root_dir:[]}, ”modify“: {root_dir:[]}, ”delete“: {root_dir:[]}}
"""
cur_dir, _ = os.path.split(os.path.abspath(__file__))
format_diff_dst2res_file = os.path.join(cur_dir, "format_diff_dst2res.txt" )
if os.path.isfile(format_diff_dst2res_file):
os.remove(format_diff_dst2res_file)
with open(format_diff_dst2res_file, "w"):
pass
def build_line(stack, dir, format_diff_dst2res_file):
parents = list(dir.keys()).pop()
stack.append(parents)
line_ = "d" + "++|" * len(stack)
line_ = line_[:-1] + stack[-1]
self.write_line_to_file(format_diff_dst2res_file, line_)
child_list = dir[parents]
for child in child_list:
if isinstance(child, dict):
build_line(stack, child, format_diff_dst2res_file)
else:
line_ = "f" + "--|" * (len(stack) + 1)
line_ = line_[:-1] + child
self.write_line_to_file(format_diff_dst2res_file, line_)
if child is child_list[-1]:
stack.pop()
for opt, dir in format_diff_dict.items():
if not dir:
continue
line = opt + ":"
self.write_line_to_file(format_diff_dst2res_file, line)
stack = list()
build_line(stack, dir, format_diff_dst2res_file)
line = ""
for i in range(2):
self.write_line_to_file(format_diff_dst2res_file, line)
def main(res_dir, dst_dir):
for dir in [res_dir, dst_dir]:
if not os.path.isdir(dir):
print("目录:{dir} 错误".format(dir=dir))
compare_dir = CompareDir(res_dir, dst_dir)
ordinary_diff_dict = compare_dir.ordinary_diff_dict()
# print(ordinary_diff_dict)
compare_dir.write_to_ordinary_file(ordinary_diff_dict)
format_diff_dict = compare_dir.format_diff_dict(ordinary_diff_dict)
compare_dir.write_to_format_file(format_diff_dict)
if __name__ == "__main__":
# res_dir = sys.argv[1]
# dst_dir = sys.argv[2]
res_dir = r"test_dir1"
dst_dir = r"test_dir2"
main(res_dir, dst_dir)
print("Success!")
python--杂识--6--相似目录差异对比脚本
于 2022-02-25 18:13:51 首次发布