我正在使用 python 3.11 和 pydantic 2.7。我想从我的模型中收集特定信息,而实际模型非常大。那么我可以在解析/验证模型时收集数据吗?
我尝试使用验证器和类变量,但如果您尝试多次执行此操作,它会不断收集数据。我需要这个作为例子。有什么建议吗?
我的带有速度检查器的代码:
import copy
from time import perf_counter as time
from typing import Any, ClassVar
from pydantic import BaseModel, field_validator
class Employees(BaseModel):
name: str
employees_names: ClassVar[list[str]] = []
@field_validator('name')
@classmethod
def collect_employees_names(cls, value):
cls.employees_names.append(value)
return value
class EmployeesNative(BaseModel):
name: str
class Manager(BaseModel):
name: str
employees: list[Employees]
managers_names: ClassVar[list[str]] = []
@field_validator('name')
@classmethod
def collect_managers_names(cls, value):
cls.managers_names.append(value)
return value
class ManagerNative(BaseModel):
name: str
employees: list[EmployeesNative]
class Results(BaseModel):
managers: list[Manager]
class ResultsNative(BaseModel):
managers: list[ManagerNative]
class Data(BaseModel):
results: list[Results]
class DataNative(BaseModel):
results: list[ResultsNative]
class Schema(BaseModel):
data: Data
class SchemaNative(BaseModel):
data: DataNative
man1 = {'name': 'max', 'employees': [{'name': 'Jorge'}, {'name': 'Sasha'}]}
man2 = {'name': 'kate', 'employees': [{'name': 'Eva'}, {'name': 'Mike'}, {'name': 'Riss'}]}
man3 = {'name': 'bub', 'employees': []}
man4 = {'name': 'slava', 'employees': [{'name': 'Martha'}]}
res1 = {'managers': [man1]}
res2 = {'managers': [man2, man3]}
res3 = {'managers': [man4]}
def parse_native(obj: dict[str, Any]) -> list[str]:
""" Get all persons names in schema. """
mod = SchemaNative.model_validate(obj)
m = [m.name for res in mod.data.results for m in res.managers]
e = [e.name for res in mod.data.results for m in res.managers for e in m.employees]
r = m + e
print('native: ', r)
return r
def parse_validators(obj: dict[str, Any]) -> list[str]:
""" Get all persons names in schema. """
Schema.model_validate(obj)
r = Manager.managers_names + Employees.employees_names
print('validators: ', r)
return r
def run_test(func, objs, comment='', reps=1, average=True, prec=.6):
""" Speed checker. """
total = 0
for _ in range(reps):
for e in objs:
beg = time()
func(e)
end = time()
total += (end - beg)
t = total / reps if average else total
print(f'{t:{prec}f} ({comment})')
# payload for tests
test_obj = {'data': {'results': [res1, res2, res3]}}
payload = tuple(copy.deepcopy(test_obj) for _ in range(1))
# number of repetitions
REPS = 1
run_test(func=parse_native, objs=payload, comment='No validators', reps=REPS)
run_test(func=parse_validators, objs=payload, comment='With validators', reps=REPS)
我尝试在嵌套模型中设置实例变量并覆盖 .model_validate() 方法,但没有成功
亚历山大,
问题在于使用 ClassVar 来存储
employees_names
。在这种情况下,属性将始终将所有先前的对象存储在那里。基本上它会收集所有更新,直到您明确调用清除它们为止。
例如,您可以在
parse_validators
块内执行此操作。
def parse_validators(obj: dict[str, Any]) -> list[str]:
""" Get all persons names in schema. """
Schema.model_validate(obj)
r = Manager.managers_names + Employees.employees_names
Manager.managers_names = []
Employees.employees_names = []
print('validators: ', r)
return r
很抱歉没有给出任何更好的建议,但我认为这是我可以提供的最好的建议,而无需进行大量代码重构。
还有一件事,考虑使用
def model_post_init(self, __context: Any)
而不是field_validators,我认为它是故意构建的,用于根据已初始化的所有其他变量更新某些变量。