当我使用python 3.9和pymysql 1.1.0时,我发现从游标获取数据后内存会增加。我需要以循环方式运行模型。输入数据需要从 mysql 数据库下载。我使用 dbutils 来管理我的 pymysql 连接。然后我使用memory_profiler检查代码,发现当我使用“for row incursor:”时内存增加。我应该怎么做才能解决这个问题?谢谢!
def databaseSelect(dataConnect,unit, kkspoint, kksname, starttime, endtimePlus):
conn = dataConnect.connection()
cursor = conn.cursor()
backdict = {}
data1 = {}
tables = []
data = []
unit = str(unit)
kkspoint = str(kkspoint)
kksname = str(kksname)
starttime = str(starttime)
endtimePlus = int(endtimePlus)
starttime = datetime.datetime.strptime(starttime, "%Y-%m-%d %H:%M:%S")
endtime = (starttime + datetime.timedelta(minutes=endtimePlus)).strftime('%Y-%m-%d %H:%M:%S')
sql = 'show tables;'
cursor.execute(sql)
for row in cursor:
# print('#' * 50 + "checkpoint5" + '#' * 50)
# print(row)
tables.append(str(row[0].decode('utf-8')))
del row
cursor = conn.close()
conn.close()
del (cursor,conn)
findState = False
for table in tables:
a = table.split('y')[0].split('a')[2]
if a == kkspoint:
b = table.split('y')[1].split('m')[0]
c = table.split('y')[1].split('m')[1].split('d')
times = b + '-' + c[0] + '-' + c[1] + ' 00:00:00'
originTime = datetime.datetime.strptime(times, '%Y-%m-%d %H:%M:%S')
backdict.setdefault(a, []).append(originTime)
del (a,b,c,times,originTime)
del table
for key in backdict.keys():
backdict[key].sort()
data1[key] = backdict[key]
for i, timefind in enumerate(data1[key]):
if starttime >= timefind and starttime < backdict[key][i + 1]:
checkTimeFind = timefind
findState = True
break
if findState:
break
del key
if unit == 'zj':
kkspoint = 'Data' + kkspoint + 'y' + str(checkTimeFind.year) + 'm' + str(
checkTimeFind.month) + 'd' + str(
checkTimeFind.day)
elif unit == 'fw':
kkspoint = 'fwData' + kkspoint + 'y' + str(checkTimeFind.year) + 'm' + str(
checkTimeFind.month) + 'd' + str(
checkTimeFind.day)
else:
print('#' * 100 + '\nIPdownloader.databaseSelect\nwrong unit')
raise IndexError
sql = "select {} from {} where time between '{}' and '{}';".format(kksname, kkspoint, starttime, endtime)
conn = dataConnect.connection()
cursor = conn.cursor()
cursor.execute(sql)
for row in cursor:
data.append(row)
del row
cursor.close()
conn.close()
del (backdict, data1, sql, tables, cursor, conn, unit, kkspoint, kksname, starttime, endtime, findState)
return data
我发现问题是 pandas 在 pymysql 返回数据后带来的。在我的代码中我使用的 pandas 版本是 2.1.0。它会在代码运行一次时留下一个weakref。这样记忆力就会增加。我将 pandas 版本更改为 1.5.3 后问题得到解决。现在看来还不错。