python--杂识--6--相似目录差异对比脚本

该代码实现了一个用于比较两个目录中文件差异的工具,包括新增、修改和删除的文件。它能生成普通格式和格式化的差异日志,并将结果写入文件。通过计算文件的MD5哈希值来判断文件是否被修改。此外,还提供了目录结构的重建和排序功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# -*- coding: utf-8 -*-
import sys
import os
import re
import hashlib


class CompareDir(object):
    def __init__(self, res_dir, dst_dir):
        self.res_dir = res_dir
        self.dst_dir = dst_dir

    def compute_file_md5(self, file):
        f = open(file, 'rb')
        md5_obj = hashlib.md5()
        while True:
            d = f.read(8096)
            if not d:
                break
            md5_obj.update(d)
        hash_code = md5_obj.hexdigest()
        f.close()
        md5 = str(hash_code).lower()
        return md5

    def dir_file_add_modify(self, invert=False):
        if invert:
            self.res_dir, self.dst_dir = self.dst_dir, self.res_dir
        add_modify_dict = {"add": {"dirs": [], "files": []}, "modify": {"files": []}}
        for dst_root_dir, _, dst_child_files in os.walk(self.dst_dir):
            res_root_dir = self.res_dir + dst_root_dir[len(self.dst_dir):]
            if not os.path.isdir(res_root_dir):
                add_modify_dict["add"]["dirs"].append(dst_root_dir)
                if not _:
                    for child_filename in dst_child_files:
                        dst_child_file = os.path.join(dst_root_dir, child_filename)
                        add_modify_dict["add"]["files"].append(dst_child_file)
            else:
                for child_filename in dst_child_files:
                    dst_child_file = os.path.join(dst_root_dir, child_filename)
                    res_child_file = os.path.join(res_root_dir, child_filename)
                    if not os.path.isfile(res_child_file):
                        add_modify_dict["add"]["files"].append(dst_child_file)
                    else:
                        if not invert:
                            md5_dst_child_file = self.compute_file_md5(dst_child_file)
                            md5_res_child_file = self.compute_file_md5(res_child_file)
                            if md5_res_child_file != md5_dst_child_file:
                                add_modify_dict["modify"]["files"].append(dst_child_file)
        if invert:
            self.res_dir, self.dst_dir = self.dst_dir, self.res_dir
        return add_modify_dict

    def ordinary_diff_dict(self):
        """
        获取两个目录的子目录及其子文件的差别
        :return 返回数据格式:{'add': {'dirs': [], 'files': []}, 'modify': {'files': []}, 'delete': {'dirs': [],
                'files': []}}
        """
        ordinary_diff_dict = self.dir_file_add_modify()
        ordinary_diff_dict_ = self.dir_file_add_modify(invert=True)
        ordinary_diff_dict["delete"] = ordinary_diff_dict_.get("add")
        del ordinary_diff_dict_

        return ordinary_diff_dict

    def rebuild_diff_dict(self, index, relative_file_list, child_list):
        if index == len(relative_file_list) - 1:
            child_list.append(relative_file_list[index])
            return
        if index < len(relative_file_list):
            flag = False
            for child in child_list:
                if isinstance(child, dict):
                    if relative_file_list[index] in child:
                        flag = True
                        self.rebuild_diff_dict(index + 1, relative_file_list, child[relative_file_list[index]])
                        break
            if not flag:
                list_ = list()
                child_list.append({relative_file_list[index]: list_})
                self.rebuild_diff_dict(index + 1, relative_file_list, list_)

    def sort_childs_list(self, child_list):
        file_child_list = list()
        dir_child_list = list()
        for child in child_list:
            if isinstance(child, dict):
                dir_child_list.append(child)
            else:
                file_child_list.append(child)

        def sort_dir_child(dir_child):
            return list(dir_child.keys()).pop()

        file_child_list.sort()
        dir_child_list.sort(key=sort_dir_child)
        dir_child_list.extend(file_child_list)
        child_list = dir_child_list
        return child_list

    def format_diff_dict(self, diff_dict):
        """
        生成两个目录差异的格式化数据
        :param diff_dict: {"add": {"dirs": [], "files": []}, "modify": {"dirs": [], "files": []}, "delete": {"dirs": [], "files": []}}
        :return 格式: {”add“: {root_dir:[]}, ”modify“: {root_dir:[]}, ”delete“: {root_dir:[]}}
        """
        dst_root = os.path.basename(self.dst_dir)
        res_root = os.path.basename(self.res_dir)
        format_diff_dict = {"add": {dst_root: list()}, "modify": {dst_root: list()}, "delete": {res_root: list()}}
        for key in diff_dict:
            files_list = diff_dict[key]["files"]
            if key in ["add", "modify"]:
                for file in files_list:
                    relative_file = file[len(self.dst_dir):].strip(r"\\").strip("/")
                    relative_file_list = re.split(r'[\\|/]', relative_file)
                    self.rebuild_diff_dict(0, relative_file_list, format_diff_dict[key][dst_root])
            else:
                for file in files_list:
                    relative_file = file[len(self.res_dir):].strip(r"\\").strip("/")
                    relative_file_list = re.split(r'[\\|/]', relative_file)
                    self.rebuild_diff_dict(0, relative_file_list, format_diff_dict[key][res_root])

        if not format_diff_dict["add"][dst_root]:
            format_diff_dict["add"] = dict()
        if not format_diff_dict["modify"][dst_root]:
            format_diff_dict["modify"] = dict()
        if not format_diff_dict["delete"][res_root]:
            format_diff_dict["delete"] = dict()

        for opt, dir in format_diff_dict.items():
            if not dir:
                continue
            parents = list(dir.keys()).pop()
            child_list = {parents: self.sort_childs_list(dir[parents])}
            format_diff_dict[opt] = child_list
        return format_diff_dict

    def write_line_to_file(self, path, line):
        with open(path, "a+") as f:
            f.write(line + "\n")

    def write_to_ordinary_file(self, ordinary_diff_dict):
        """
        生成一个显示两个目录差异的普通文件
        :param ordinary_diff_dict: {"add": {"dirs": [], "files": []}, "modify": {"dirs": [], "files": []}, "delete": {"dirs": [], "files": []}}
        """
        cur_dir, _ = os.path.split(os.path.abspath(__file__))
        ordinary_diff_dst2res_file = os.path.join(cur_dir, "ordinary_diff_dst2res.txt")
        if os.path.isfile(ordinary_diff_dst2res_file):
            os.remove(ordinary_diff_dst2res_file)
        with open(ordinary_diff_dst2res_file, "w"):
            pass

        for key in ordinary_diff_dict:
            files_list = ordinary_diff_dict[key]["files"]
            if not files_list:
                continue
            line = key + ":"
            self.write_line_to_file(ordinary_diff_dst2res_file, line)
            for file in files_list:
                self.write_line_to_file(ordinary_diff_dst2res_file, file)

            line = ""
            for i in range(2):
                self.write_line_to_file(ordinary_diff_dst2res_file, line)

    def write_to_format_file(self, format_diff_dict):
        """
        生成一个显示两个目录差异的格式化文件
        :param format_diff_dict: {”add“: {root_dir:[]}, ”modify“: {root_dir:[]}, ”delete“: {root_dir:[]}}
        """
        cur_dir, _ = os.path.split(os.path.abspath(__file__))
        format_diff_dst2res_file = os.path.join(cur_dir, "format_diff_dst2res.txt" )
        if os.path.isfile(format_diff_dst2res_file):
            os.remove(format_diff_dst2res_file)
        with open(format_diff_dst2res_file, "w"):
            pass

        def build_line(stack, dir, format_diff_dst2res_file):
            parents = list(dir.keys()).pop()
            stack.append(parents)
            line_ = "d" + "++|" * len(stack)
            line_ = line_[:-1] + stack[-1]
            self.write_line_to_file(format_diff_dst2res_file, line_)
            child_list = dir[parents]
            for child in child_list:
                if isinstance(child, dict):
                    build_line(stack, child, format_diff_dst2res_file)
                else:
                    line_ = "f" + "--|" * (len(stack) + 1)
                    line_ = line_[:-1] + child
                    self.write_line_to_file(format_diff_dst2res_file, line_)
                if child is child_list[-1]:
                    stack.pop()

        for opt, dir in format_diff_dict.items():
            if not dir:
                continue
            line = opt + ":"
            self.write_line_to_file(format_diff_dst2res_file, line)
            stack = list()
            build_line(stack, dir, format_diff_dst2res_file)

            line = ""
            for i in range(2):
                self.write_line_to_file(format_diff_dst2res_file, line)


def main(res_dir, dst_dir):
    for dir in [res_dir, dst_dir]:
        if not os.path.isdir(dir):
            print("目录:{dir} 错误".format(dir=dir))

    compare_dir = CompareDir(res_dir, dst_dir)
    ordinary_diff_dict = compare_dir.ordinary_diff_dict()
    # print(ordinary_diff_dict)
    compare_dir.write_to_ordinary_file(ordinary_diff_dict)
    format_diff_dict = compare_dir.format_diff_dict(ordinary_diff_dict)
    compare_dir.write_to_format_file(format_diff_dict)


if __name__ == "__main__":
    # res_dir = sys.argv[1]
    # dst_dir = sys.argv[2]
    res_dir = r"test_dir1"
    dst_dir = r"test_dir2"
    main(res_dir, dst_dir)
    print("Success!")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值