我有一个带类和不带类的 Pandas DataFrame。如果有类,我有错误的数据时间索引,但如果没有类,那就可以了。请帮忙。
from datetime import datetime
import pandas as pd
class Records:
def __init__(self):
columns = ['moment', 'val']
# Create an empty DataFrame with columns
self.dfv = pd.DataFrame(columns=columns)
def add(self, moment, val):
# Record
rec = {'moment': moment, 'val': val}
# Convert 'moment' to datetime
rec['moment'] = pd.to_datetime(rec['moment'], format='%Y-%m-%d %H:%M:%S')
# Add the record to DataFrame
self.dfv = self.dfv.append(rec, ignore_index=True)
self.dfv['moment'] = pd.to_datetime(self.dfv['moment'])
self.dfv.set_index('moment', inplace=True)
return self.dfv
if __name__ == '__main__':
print('=============================Class records===================')
r = Records()
df = r.add('2023-11-01 10:00:00', 100.0)
print(df)
df = r.add('2023-11-01 11:00:00', 120.0)
# Print the updated DataFrame
print(df)
df.info()
print('=============================Single records==================')
# Test first column datatime as an index
columns = ['moment', 'val']
dfv = pd.DataFrame(columns=columns)
dfv['moment'] = pd.to_datetime(dfv['moment'])
dfv.set_index('moment', inplace=True)
rec = {'moment': '2023-11-01 10:00:00', 'val': 100.0}
# Convert 'moment' to datetime
rec['moment'] = pd.to_datetime(rec['moment'], format='%Y-%m-%d %H:%M:%S')
# Add the record to DataFrame
dfv = dfv.append(rec, ignore_index=True)
# Set 'moment' as the index
rec2 = {'moment': '2023-11-01 11:00:00', 'val': 120.0}
dfv = dfv.append(rec2, ignore_index=True)
# Set 'moment' as the index
dfv.set_index('moment', inplace=True)
print(dfv)
dfv.info()
exit(0)
输出
=============================Class records===================
val
moment
2023-11-01 10:00:00 100.0
val
moment
NaT 100.0
2023-11-01 11:00:00 120.0
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2 entries, NaT to 2023-11-01 11:00:00
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 val 2 non-null float64
dtypes: float64(1)
memory usage: 32.0 bytes
=============================Single records==================
val
moment
2023-11-01 10:00:00 100.0
2023-11-01 11:00:00 120.0
<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2023-11-01 10:00:00 to 2023-11-01 11:00:00
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 val 2 non-null float64
dtypes: float64(1)
memory usage: 32.0+ bytes
因此,在没有类的情况下,我完全有我想要得到的东西,但如果我使用类(并且我需要使用类来达到我的目的),我将 NaT 作为第一个记录索引值。请告知如何在课堂上获得正确的代码。谢谢。
您可以使用以下方法修复代码:
from datetime import datetime
import pandas as pd
class Records:
def __init__(self):
# Create a correctly typed dataframe
self.dfv = pd.DataFrame(columns=pd.Series(name='val', dtype='float'),
index=pd.DatetimeIndex([], name='moment'))
def add(self, moment, val):
# Same remark as previous
moment = pd.to_datetime(moment, format='%Y-%m-%d %H:%M:%S')
rec = pd.DataFrame({'val': val}, index=[moment])
# Safe concat, works with most recent versions of Pandas
self.dfv = pd.concat([self.dfv, rec], axis=0)
return self.dfv
print('=============================Class records===================')
r = Records()
df = r.add('2023-11-01 10:00:00', 100.0)
print(df)
df = r.add('2023-11-01 11:00:00', 120.0)
print(df)
df.info()
输出:
=============================Class records===================
val
2023-11-01 10:00:00 100.0
val
2023-11-01 10:00:00 100.0
2023-11-01 11:00:00 120.0
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2 entries, 2023-11-01 10:00:00 to 2023-11-01 11:00:00
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 val 2 non-null float64
dtypes: float64(1)
memory usage: 32.0 bytes
问题是,在您第一次调用
df
后,您的 r.add(...)
的形状有所不同。让我们在初始化后和第一次调用后打印 df
来清楚地看到这一点:
if __name__ == '__main__':
print('=============================Class records===================')
r = Records()
df = r.dfv
print("""df after initialization:\n""")
print(df)
df = r.add('2023-11-01 10:00:00', 100.0)
print("""df after first `add` call:\n""")
print(df)
=============================Class records===================
df after initialization:
Empty DataFrame
Columns: [moment, val]
Index: []
df after first `add` call:
val
moment
2023-11-01 10:00:00 100.0
C:\Users\bobco\AppData\Local\Temp\ipykernel_12940\2699656422.py:20: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
self.dfv = self.dfv.append(rec, ignore_index=True)
因此,您的第一个调用正在运行,因为您在有 2 列 (df.append
) 的
df
上使用 ['moment', 'val']
。然而,对于所有后续调用,您的 df
只剩下 1 列 (['val']
),moment
已成为 index
。
修复方法是使用
df
作为索引来初始化 'moment'
,然后在添加数据之前以相同的形状添加数据。请注意上面输出中的 FutureWarning
:由于自 1.4.0 版本以来 df.append
已被弃用,所以让我们使用 pd.concat
代替。
import pandas as pd
class Records:
def __init__(self):
columns = ['moment', 'val']
# Create an empty DataFrame with a `DatetimeIndex`
self.dfv = pd.DataFrame(columns=['val'],
index=pd.DatetimeIndex([]))
def add(self, moment, val):
# Record (changing to setup of your dict)
rec = {'val': {pd.to_datetime(moment): val}}
# Add the record to DataFrame (using `pd.concat`)
self.dfv = pd.concat([self.dfv, pd.DataFrame(rec)])
return self.dfv
if __name__ == '__main__':
print('=============================Class records===================')
r = Records()
df = r.dfv
print("""df after initialization:\n""")
print(df)
df = r.add('2023-11-01 10:00:00', 100.0)
print("""\ndf after first `add` call:\n""")
print(df)
df = r.add('2023-11-01 11:00:00', 120.0)
print("""\ndf after second `add` call:\n""")
print(df)
=============================Class records===================
df after initialization:
Empty DataFrame
Columns: [val]
Index: []
df after first `add` call:
val
2023-11-01 10:00:00 100.0
df after second `add` call:
val
2023-11-01 10:00:00 100.0
2023-11-01 11:00:00 120.0