我想向 pydantic BaseModels 生成的 JSON 模式添加语义丰富。当我希望模型的元素是枚举类型时,就会出现问题。我无法找出向新枚举或枚举定义中指定的值添加语义丰富的正确方法。下面是我破解的代码。它生成我正在寻找的输出,但它绝对不是正确的解决方案。请提供更好的解决方案,或者让我知道是否有更好的方法来使用语义信息记录我的数据对象。
我正在使用 Python 3.11.3 和以下软件包:
from enum import Enum
import json
from typing import Dict, Any
from pydantic import BaseModel, Field
def clean_dictionary(base: dict):
"""cleans the taxonomy out of the JSON"""
keys = list(base.keys())
# recursion exit
if len(keys) == 2 and "value" in keys and "taxonomy" in keys:
return base["value"]
for key in keys:
item = base[key]
if isinstance(item, dict):
base[key] = clean_dictionary(item)
elif isinstance(item, list):
for ii, sub in enumerate(item):
if isinstance(sub, dict):
item[ii] = clean_dictionary(sub)
return base
class OntologyModel(BaseModel):
"""A model sub-class that cleans the Enums when it generates JSON"""
def model_dump(
self,
*,
mode: str = "python",
include=None,
exclude=None,
by_alias: bool = False,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
round_trip: bool = False,
warnings: bool = True
) -> dict[str, Any]:
"""override of BaseModel method"""
text = self.model_dump_json(
include=include,
exclude=exclude,
by_alias=by_alias,
exclude_unset=exclude_unset,
exclude_defaults=exclude_defaults,
exclude_none=exclude_none,
round_trip=round_trip,
warnings=warnings,
)
return json.loads(text)
def model_dump_json(
self,
indent: int | None = None,
include=None,
exclude=None,
by_alias: bool = False,
exclude_unset: bool = False,
exclude_defaults: bool = False,
exclude_none: bool = False,
round_trip: bool = False,
warnings: bool = True,
):
"""override of BaseModel method"""
data = json.loads(
super().model_dump_json(
indent=indent,
include=include,
exclude=exclude,
by_alias=by_alias,
exclude_unset=exclude_unset,
exclude_defaults=exclude_defaults,
exclude_none=exclude_none,
round_trip=round_trip,
warnings=warnings,
)
)
data = clean_dictionary(data)
return json.dumps(data, indent=indent)
class FlowerEnum(Enum):
"""taxonomy: //example.com/flowers/F000021"""
DAN = {"value": "dandelion", "taxonomy": "//example.com/flowers#D00012"}
ORC = {"value": "ochid", "taxonomy": "//example.com/flowers#O00032"}
class ColorEnum(Enum):
"""taxonomy: https://example.com/colors/C000000"""
RED = {"value": "red", "taxonomy": "//example.com/colors#C000001"}
PUR = {"value": "purple", "taxonomy": "//example.com/colors#C000002"}
class Flower(OntologyModel):
"""An instance of a specific flower"""
class Config:
json_schema_extra = {"taxonomy": "//example.com/flowers#F000003"}
variety: FlowerEnum = Field(
...,
description="The type of flower",
json_schema_extra={"taxonomy": "//example.com/flowers#F000004"},
)
color: ColorEnum = Field(
...,
description="The flower's color",
json_schema_extra={"taxonomy": "//example.com/colors#C000005"},
)
if __name__ == "__main__":
from pprint import pprint
flower = Flower(variety=FlowerEnum.ORC, color=ColorEnum.PUR)
print("\n", "*" * 80, "\n")
pprint(flower.model_json_schema())
print("\n", "*" * 80, "\n")
pprint(flower.model_dump())
print("\n", "*" * 80, "\n")
代码生成的东西半适合我的目的,但我宁愿创建更符合最佳实践的模式,并且没有以这种特殊方式指定枚举值的愚蠢的隐藏要求。以下是所写代码的输出:
{'$defs': {'ColorEnum': {'description': 'taxonomy: /example.com/colors/C000000',
'enum': [{'taxonomy': '/example.com/colors#C000001',
'value': 'red'},
{'taxonomy': '/example.com/colors#C000002',
'value': 'purple'}],
'title': 'ColorEnum'},
'FlowerEnum': {'description': 'taxonomy: '
'/example.com/flowers/F000021',
'enum': [{'taxonomy': '/example.com/flowers#D00012',
'value': 'dandelion'},
{'taxonomy': '/example.com/flowers#O00032',
'value': 'ochid'}],
'title': 'FlowerEnum'}},
'description': 'An instance of a specific flower',
'properties': {'color': {'allOf': [{'$ref': '#/$defs/ColorEnum'}],
'description': "The flower's color",
'taxonomy': '/example.com/colors#C000005'},
'variety': {'allOf': [{'$ref': '#/$defs/FlowerEnum'}],
'description': 'The type of flower',
'taxonomy': '/example.com/flowers#F000004'}},
'required': ['variety', 'color'],
'taxonomy': '/example.com/flowers#F000003',
'title': 'Flower',
'type': 'object'}
********************************************************************************
{'color': 'purple', 'variety': 'ochid'}
********************************************************************************
从枚举导入枚举,
从 pydantic 导入 BaseModel、Field
class FlowerEnum(str, Enum):
DANDELLION = "dandelion"
ORCHID = "orchid"
class ColorEnum(str, Enum):
RED = "red"
PURPLE = "purple"
class Flower(BaseModel):
variety: FlowerEnum = Field(..., schema={
"title": "FlowerEnum",
"description": "taxonomy: /example.com/flowers/F000021",
"enum": [
{"value": "dandelion", "taxonomy": "//example.com/flowers#D00012"},
{"value": "orchid", "taxonomy": "//example.com/flowers#O00032"}
]
})
color: ColorEnum = Field(..., schema={
"title": "ColorEnum",
"description": "taxonomy: /example.com/colors/C000000",
"enum": [
{"value": "red", "taxonomy": "//example.com/colors#C000001"},
{"value": "purple", "taxonomy": "//example.com/colors#C000002"}
]
})
使用 str 作为枚举的基础以避免自定义 JSON 序列化。 3. 总体自定义 JSON Schema:
使用 Config.schema_extra 添加全局模式元数据:
class Flower(BaseModel):
# ...
class Config:
schema_extra = {"taxonomy": "//example.com/flowers#F000003"}