我正在使用 SQLAlchemy 构建 Postgres 数据库。我正在像这样创建我的架构:
import datetime
from typing import List, Optional
import sqlalchemy as sa
from sqlalchemy import orm, MetaData, create_engine, FetchedValue, ForeignKey, Column, Integer, Computed
from sqlalchemy.orm import relationship
from sqlalchemy.orm import Session
from sqlalchemy.orm import (
mapped_column,
DeclarativeBase,
Mapped,
MappedAsDataclass,
)
from sqlalchemy.dialects.postgresql import ARRAY, TEXT, JSONB
dbUrl = XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
metadata = MetaData()
class Base(MappedAsDataclass, DeclarativeBase):
pass
engine = create_engine(dbUrl, echo=True)
class MediaType(Base):
__tablename__ = 'media_types'
media_type: Mapped[str] = mapped_column(init=False, primary_key=True)
description: Mapped[str] = mapped_column(default=None, unique=True)
file_formats: Mapped[list] = mapped_column(ARRAY(TEXT, dimensions=1),
unique=False,
default=None,
nullable=True)
MediaType.__table__
class Collection(MediaType):
__tablename__ = 'collections'
collection_id: Mapped[int] = mapped_column(init=False, primary_key=True,autoincrement=True)
media_type: Mapped[str] = mapped_column(ForeignKey("media_types.media_type"))
collection_name: Mapped[str] = mapped_column(default=None, unique=True)
description: Mapped[str] = mapped_column(default=None,
unique=False,
nullable=True)
tags: Mapped[list] = mapped_column(ARRAY(TEXT, dimensions=1), default=None, nullable=True)
date_added: Mapped[datetime.datetime] = mapped_column(default=None,
nullable=True)
__mapper_args__ = {
'polymorphic_identity': 'collections',
'polymorphic_on': 'media_type',
'eager_defaults': True,
}
Collection.__table__
class Post(Collection):
__tablename__ = 'posts'
id: Mapped[int] = mapped_column(init=False, primary_key=True)
collection_id: Mapped[int] = mapped_column(
ForeignKey("collections.collection_id"), default=None)
user: Mapped[str] = mapped_column(default=None, nullable=True)
title: Mapped[str] = mapped_column(default=None, nullable=True)
description: Mapped[str] = mapped_column(default=None, nullable=True)
date_modified: Mapped[Optional[datetime.datetime]] = mapped_column(
default=None, nullable=True)
tags: Mapped[list] = mapped_column(ARRAY(TEXT, dimensions=1),
default=None,
nullable=True)
views: Mapped[int] = mapped_column(default=0, nullable=True)
social_media: Mapped[dict] = mapped_column(JSONB,
default=None,
nullable=True)
date_added: Mapped[datetime.datetime] = mapped_column(default=None,
nullable=True)
date_modified: Mapped[datetime.datetime] = mapped_column(default=None,
nullable=True)
__mapper_args__ = {
'polymorphic_identity': 'posts',
'polymorphic_on': 'media_type',
'eager_defaults': True,
}
Post.__table__
class Link(Post):
__tablename__ = 'links'
id: Mapped[int] = mapped_column(ForeignKey('posts.id'),
init=False,
primary_key=True)
url: Mapped[str] = mapped_column(default=None, unique=True)
other_info: Mapped[str] = mapped_column(default=None, nullable=True)
clicks: Mapped[int] = mapped_column(default=0, nullable=True)
__mapper_args__ = {
'polymorphic_identity': 'links',
'eager_defaults': True,
}
Link.__table__
class Image(Post):
__tablename__ = 'image'
id: Mapped[int] = mapped_column(ForeignKey('posts.id'),
init=False,
primary_key=True)
filepath: Mapped[str] = mapped_column(default=None,
unique=True,
nullable=True)
__mapper_args__ = {
'polymorphic_identity': 'image',
'eager_defaults': True,
}
Image.__table__
Base.metadata.create_all(engine)
这里的想法是,有不同类型的帖子(例如链接或图像),每种类型都附加有独特的数据。因此,每种数据类型都有自己的表。由于所有帖子共享一些数据(如标题、描述和日期),因此这些表使用与 posts 表相连接的表继承。我还希望能够存储有关媒体类型的信息,因此我选择为媒体类型而不是枚举数据类型创建一个表。我使用字符主键,因为永远不会超过几行,并且名称不会更改。
我遇到的问题是,当我尝试将数据插入到孙子表之一时,我还尝试将冗余类型名称插入到gandparent表中:
collection = newCollection("New collection", "description of collection", sesh)
link = Link(url="http://example.com",
title="Example link",
collection_id=collection,
description="My example link",
date_added=datetime.datetime.now(),
media_type='link')
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "media_types_pkey"
DETAIL: Key (media_type)=(link) already exists.
我想也许我可以省略“media_type”,因为它应该由集合暗示,该集合已经有一个“media_type”:
link = Link(url="http://example.com",
title="Example link",
collection_id=collection,
description="My example link",
date_added=datetime.datetime.now())
sesh.add(link)
sesh.commit()
但这也不起作用:
Traceback (most recent call last):
File "main.py", line 149, in <module>
link = Link(url="http://example.com",
TypeError: __init__() missing 1 required positional argument: 'media_type'
从架构中可以看到,“media_type”不在实际的 Link 对象中,但层次结构需要它(来自祖父母表)。我想要的是它不要在每次创建集合时尝试向 media_type 插入新行。
那么当数据库(Postgres 顺便说一句)插入重复项时,如何“告诉”数据库(Postgres)不要尝试这种冗余插入,或者跳过它(但不是整个事务!)?
编辑:我刚刚注意到在错误发生之前,我收到此警告,我怀疑这是相关的:
main.py:146: SAWarning: Flushing object <Collection at 0x7ffab9bbd5d0> with incompatible polymorphic identity 'link'; the object may not refresh and/or load correctly (this warning may be suppressed after 10 occurrences)
session.flush()
这有效。我没有意识到我必须为 media_type 使用不同的列名称。
class MediaType(Base):
__tablename__ = "media_types"
media_type: Mapped[str] = mapped_column(init=True, primary_key=True)
file_format: Mapped[str] = mapped_column(
default=None, unique=True, nullable=True
)
class Collection(Base):
__tablename__ = "collections"
collection_id: Mapped[int] = mapped_column(
init=False, primary_key=True, autoincrement=True
)
collection_name: Mapped[str] = mapped_column(default=None, unique=True)
collection_description: Mapped[str] = mapped_column(
default=None, unique=False, nullable=True
)
media_type_id: Mapped[str] = mapped_column(
"media_type",
ForeignKey("media_types.media_type"), default=None
)
media_type: Mapped[MediaType] = relationship(init=False)
__mapper_args__ = {
"polymorphic_identity": "collections",
"eager_defaults": True,
}
class Post(Base):
__tablename__ = "posts"
id: Mapped[int] = mapped_column(init=False, primary_key=True)
title: Mapped[str] = mapped_column(default=None, nullable=True)
description: Mapped[str] = mapped_column(default=None, nullable=True)
collection_id: Mapped[int] = mapped_column(
ForeignKey("collections.collection_id"), init=False
)
collection: Mapped[Collection] = relationship(kw_only=True)
media_type: Mapped[str] = mapped_column(
ForeignKey("media_types.media_type"), init=False
)
media_type_reference: Mapped[MediaType] = relationship(init=False)
__mapper_args__ = {
"polymorphic_identity": "posts",
"polymorphic_on": "media_type",
"eager_defaults": True,
}
class Link(Post):
__tablename__ = "links"
id: Mapped[int] = mapped_column(
ForeignKey("posts.id"), init=False, primary_key=True
)
url: Mapped[str] = mapped_column(default=None, unique=True)
other_info: Mapped[str] = mapped_column(default=None, nullable=True)
__mapper_args__ = {
"polymorphic_identity": "links",
"eager_defaults": True,
}
class Image(Post):
__tablename__ = "images"
id: Mapped[int] = mapped_column(
ForeignKey("posts.id"), init=False, primary_key=True
)
filepath: Mapped[str] = mapped_column(default=None, unique=True)
__mapper_args__ = {
"polymorphic_identity": "images",
"eager_defaults": True,
}
Image.__table__
Base.metadata.create_all(engine)
sesh = Session(engine)
with Session(engine) as sesh:
# pre populate all possible MediaTypes
sesh.add_all(
[
MediaType(media_type=mt)
for mt in ["collections", "posts", "links", "images"]
]
)
sesh.commit()
with Session(engine) as sesh:
collection = Collection(
collection_name="New collection",
collection_description="My new collection of links",
media_type_id="links"
)
new = Link(
url="http://example.com",
collection=collection,
title="New Link",
description="Description of link"
)
sesh.add(new)
sesh.commit()
new = Link(
url="http://google.com",
collection=collection,
title="Google Search",
description="Search the Internet"
)
sesh.add(new)
sesh.commit()