Python dataclass 高阶用法与技巧


各位老板好, dataclass 是 Python 3.7+ 引入的强大功能,用于简化数据容器的创建。下面我将介绍各种高阶用法和技巧,并提供可运行的代码示例。

1 基础用法

from dataclasses import dataclass

@dataclass
class Point:
    x: float
    y: float
    z: float = 0.0  # 默认值

p = Point(1.5, 2.5)
print(p)  # Point(x=1.5, y=2.5, z=0.0)

2 高级技巧与用法

1.1 不可变数据类 (frozen=True)

from dataclasses import dataclass

@dataclass(frozen=True)
class ImmutablePoint:
    x: float
    y: float

p = ImmutablePoint(1.0, 2.0)
# p.x = 3.0  # 会引发 FrozenInstanceError
print(p)  # ImmutablePoint(x=1.0, y=2.0)

1.2 后初始化处理 (__post_init__)

from dataclasses import dataclass

@dataclass
class Rectangle:
    width: float
    height: float
    area: float = None  # 计算属性
    
    def __post_init__(self):
        self.area = self.width * self.height

r = Rectangle(3.0, 4.0)
print(r)  # Rectangle(width=3.0, height=4.0, area=12.0)

1.3 字段定制化 (field)

from dataclasses import dataclass, field

@dataclass
class Product:
    name: str
    price: float
    tags: list[str] = field(default_factory=list)
    _id: int = field(init=False, repr=False)  # 不包含在__init__和__repr__中
    version: str = field(init=False, default="1.0")
    
    def __post_init__(self):
        self._id = id(self)

p = Product("Laptop", 999.99, ["tech", "electronics"])
print(p)  # Product(name='Laptop', price=999.99, tags=['tech', 'electronics'], version='1.0')

1.4 排序支持 (order=True)

from dataclasses import dataclass

@dataclass(order=True)
class Employee:
    name: str
    salary: int
    department: str = "Engineering"

e1 = Employee("Alice", 85000)
e2 = Employee("Bob", 92000)
e3 = Employee("Charlie", 85000, "Marketing")

print(e1 < e2)  # True - 按字段顺序比较 (name, salary, department)
print(sorted([e2, e1, e3]))
# [Employee(name='Alice', salary=85000, department='Engineering'), 
#  Employee(name='Charlie', salary=85000, department='Marketing'), 
#  Employee(name='Bob', salary=92000, department='Engineering')]

1.5 继承与字段顺序

from dataclasses import dataclass

@dataclass
class Person:
    name: str
    age: int

@dataclass
class Employee(Person):
    employee_id: str
    department: str

emp = Employee("Alice", 30, "E12345", "Engineering")
print(emp)  # Employee(name='Alice', age=30, employee_id='E12345', department='Engineering')

1.6 作为字典或元组使用

from dataclasses import dataclass, asdict, astuple

@dataclass
class Point3D:
    x: float
    y: float
    z: float

p = Point3D(1.0, 2.0, 3.0)
print(asdict(p))  # {'x': 1.0, 'y': 2.0, 'z': 3.0}
print(astuple(p))  # (1.0, 2.0, 3.0)

1.7 类型验证与转换

from dataclasses import dataclass, field
from typing import Any

@dataclass
class TypedData:
    value: Any
    _type: type = field(init=False, repr=False)
    
    def __post_init__(self):
        self._type = type(self.value)
        # 类型检查示例
        if not isinstance(self.value, (int, float, str)):
            raise TypeError("Value must be int, float or str")

t1 = TypedData(42)
t2 = TypedData(3.14)
# t3 = TypedData([1, 2, 3])  # 会引发 TypeError

1.8 模式匹配 (Python 3.10+)

from dataclasses import dataclass

@dataclass
class Circle:
    radius: float

@dataclass
class Rectangle:
    width: float
    height: float

def area(shape):
    match shape:
        case Circle(radius=r):
            return 3.14159 * r ** 2
        case Rectangle(width=w, height=h):
            return w * h
        case _:
            raise TypeError("Unknown shape")

print(area(Circle(2.0)))      # 约 12.56636
print(area(Rectangle(3, 4)))  # 12

1.9 数据类与 JSON 序列化

from dataclasses import dataclass
import json

@dataclass
class User:
    id: int
    name: str
    email: str
    is_active: bool = True
    
    def to_json(self):
        return json.dumps(self.__dict__)
    
    @classmethod
    def from_json(cls, json_str):
        data = json.loads(json_str)
        return cls(**data)

user = User(1, "Alice", "alice@example.com")
json_str = user.to_json()
print(json_str)  # {"id": 1, "name": "Alice", "email": "alice@example.com", "is_active": true}

new_user = User.from_json(json_str)
print(new_user)  # User(id=1, name='Alice', email='alice@example.com', is_active=True)

1.10 元数据存储

from dataclasses import dataclass, field

@dataclass
class ConfigItem:
    key: str
    value: str
    metadata: dict = field(default_factory=dict, metadata={
        "description": "Additional metadata for configuration item",
        "internal": True
    })

item = ConfigItem("timeout", "30s", {"unit": "seconds", "max": 120})
print(item)

# 访问字段元数据
field_meta = ConfigItem.__dataclass_fields__["metadata"].metadata
print(field_meta["description"])  # Additional metadata for configuration item

1.11 仅关键字参数 (Python 3.10+)

from dataclasses import dataclass, KW_ONLY

@dataclass
class Person:
    name: str
    age: int
    _: KW_ONLY  # 后面的字段只能通过关键字参数传递
    address: str = "Unknown"
    phone: str = ""

p = Person("Alice", 30, phone="555-1234")
# p = Person("Bob", 25, "123 Main St")  # 错误:位置参数不能用于address
print(p)  # Person(name='Alice', age=30, address='Unknown', phone='555-1234')

1.12 数据类组合

from dataclasses import dataclass

@dataclass
class Address:
    street: str
    city: str
    zip_code: str

@dataclass
class Person:
    name: str
    age: int
    address: Address

addr = Address("123 Main St", "Springfield", "12345")
person = Person("Alice", 30, addr)
print(person)
# Person(name='Alice', age=30, address=Address(street='123 Main St', city='Springfield', zip_code='12345'))

1.13 自定义字符串表示

from dataclasses import dataclass

@dataclass
class Vector:
    x: float
    y: float
    z: float
    
    def __str__(self):
        return f"<Vector: ({self.x}, {self.y}, {self.z})>"
    
    def __repr__(self):
        return f"Vector({self.x}, {self.y}, {self.z})"

v = Vector(1, 2, 3)
print(str(v))   # <Vector: (1, 2, 3)>
print(repr(v))  # Vector(1, 2, 3)

1.14 槽位优化 (Python 3.10+)

from dataclasses import dataclass

@dataclass(slots=True)
class SlotPoint:
    x: float
    y: float

p = SlotPoint(1.0, 2.0)
print(p.__slots__)  # ('x', 'y')
# 比普通类更节省内存,访问速度更快

1.15 数据类与属性结合

from dataclasses import dataclass

@dataclass
class Temperature:
    _celsius: float
    
    @property
    def celsius(self):
        return self._celsius
    
    @property
    def fahrenheit(self):
        return (self._celsius * 9/5) + 32
    
    @fahrenheit.setter
    def fahrenheit(self, value):
        self._celsius = (value - 32) * 5/9

temp = Temperature(0)
print(f"0°C = {temp.fahrenheit}°F")  # 0°C = 32.0°F

temp.fahrenheit = 100
print(f"100°F = {temp.celsius}°C")  # 100°F = 37.77777777777778°C

3 总结

dataclass 的高级用法包括:

  1. 不可变数据类 (frozen=True) 确保数据安全
  2. __post_init__ 用于后初始化处理
  3. field 函数定制字段行为
  4. 排序支持 (order=True)
  5. 继承时的字段顺序处理
  6. 与 JSON 等格式的转换
  7. 类型验证与转换
  8. Python 3.10+ 新特性(模式匹配、仅关键字参数、槽位优化)
  9. 自定义字符串表示
  10. 与属性 (property) 结合使用

这些技巧能帮助你创建更安全、更高效、更易维护的数据结构,特别适合在数据密集型应用、配置管理和API数据传输中使用。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

@MMiL

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值