我想要实现的功能是创建一个新类并导入用户。由于我不希望多次导入同一用户并且用户可以加入不同的类,因此我使用了 get_or_create()。但是,我注意到,当一次导入大量用户时(使用 CSV 导入用户),程序会变得很慢。
def post(self, request):
class_data = request.data
class_data["start_time"] = dateutil.parser.parse(class_data["start_time"])
class_data["end_time"] = dateutil.parser.parse(class_data["end_time"])
class_data["created_by"] = request.user
if class_data["end_time"] <= class_data["start_time"]:
return self.error("Start time must occur earlier than end time")
user_data = class_data.pop("users") # Retrieve user data and remove it from the class data
try:
with transaction.atomic():
if Class.objects.filter(title=class_data["title"]).exists():
return self.error("Class with the same title already exists")
class_obj = Class.objects.create(**class_data) # Create the class object
# Add the creator to the class members
if not class_obj.users.filter(id=request.user.id).exists():
class_obj.users.add(request.user)
for data in user_data:
if len(data) != 4 or len(data[1]) > 32:
return self.error(f"Error occurred while processing data '{data}'")
username = data[1]
user, created = User.objects.get_or_create(username=username, defaults={
"password": make_password(data[1]),
"college": data[3],
"student_number": data[1]
})
if created:
profile = UserProfile(user=user, real_name=data[2])
profile.save()
# class_obj.users.add(user)
if not class_obj.users.filter(id=user.id).exists():
class_obj.users.add(user)
return self.success(ClassAdminSerializer(class_obj).data)
except IntegrityError as e:
return self.error(str(e).split("\n")[1])
我已经确定了减慢程序速度的三个方面。首先, get_or_create() 操作需要很长时间,因为它单独检查每个用户是否存在。其次,profile.save()操作会逐条插入数据,导致时间消耗很大。最后,向 Class 和 User 模型之间的关联表插入数据的 class_obj.users.add(user) 操作也需要相当长的时间。 这些是我的用户和班级的模型:
class User(AbstractBaseUser):
id = models.BigAutoField(primary_key=True)
username = models.TextField(unique=True)
email = models.TextField(blank=True, null=True)
# ADD
college = models.TextField(default=College.COMMUNICATION_COLLEGE)
student_number = models.TextField(null=True)
major = models.TextField(null=True)
create_time = models.DateTimeField(auto_now_add=True, null=True)
# One of UserType
admin_type = models.TextField(default=AdminType.REGULAR_USER)
problem_permission = models.TextField(default=ProblemPermission.NONE)
reset_password_token = models.TextField(null=True)
reset_password_token_expire_time = models.DateTimeField(null=True)
# SSO auth token
auth_token = models.TextField(null=True)
two_factor_auth = models.BooleanField(default=False)
tfa_token = models.TextField(null=True)
session_keys = JSONField(default=list)
# open api key
open_api = models.BooleanField(default=False)
open_api_appkey = models.TextField(null=True)
is_disabled = models.BooleanField(default=False)
USERNAME_FIELD = "username"
REQUIRED_FIELDS = []
objects = UserManager()
def is_admin(self):
return self.admin_type == AdminType.ADMIN
def is_super_admin(self):
return self.admin_type == AdminType.SUPER_ADMIN
def is_admin_role(self):
return self.admin_type in [AdminType.ADMIN, AdminType.SUPER_ADMIN]
def can_mgmt_all_problem(self):
return self.problem_permission == ProblemPermission.ALL
def is_contest_admin(self, contest):
return self.is_authenticated and (contest.created_by == self or self.admin_type == AdminType.SUPER_ADMIN)
def is_Class_admin(self, class_obj):
return self.is_authenticated and (class_obj.created_by == self or self.admin_type == AdminType.SUPER_ADMIN)
class Meta:
db_table = "user"
class Class(models.Model):
title = models.TextField()
start_time = models.DateTimeField()
end_time = models.DateTimeField()
create_time = models.DateTimeField(auto_now_add=True)
# show real time rank or cached rank
real_time_rank = models.BooleanField(default=True)
created_by = models.ForeignKey(User, on_delete=models.CASCADE, related_name='created_classes')
users = models.ManyToManyField(User, related_name='enrolled_classes')
@property
def status(self):
if self.start_time > now():
return ClassStatus.CLASS_NOT_START
elif self.end_time < now():
return ClassStatus.CLASS_ENDED
else:
return ClassStatus.CLASS_UNDERWAY
def problem_details_permission(self, user):
return self.status == ClassStatus.CLASS_ENDED or \
user.is_authenticated and user.is_Class_admin(self) or \
self.real_time_rank
class Meta:
db_table = "Class"
ordering = ("-start_time",)
这是我第一次使用 Django 模型,我不知道如何解决这个问题。我想询问有关如何修改我的代码的建议。您能否建议我进行必要的更改?
有多种方法可以改进此代码,但大多数时候这都会带来一些权衡:
User.objects.filter(username__in=[data[1] for data in user_data]).in_bulk(field_name="username")
。然后,您可以通过开始查找此方法返回的字典来替换您的 get_or_create ,并且只有在字典中没有的情况下才重新创建一个新的users = User(username=...) for data in user_data; User.bulk_create(users)
,但这很危险,因为不会发送信号,因为这是单个用户创建的情况class.users.add
接受多个对象,因此您可以将所有用户存储在列表中(无论是创建的还是新的)并使用class_inst.users.add(*users)