当使用 pandas 读取 json 数据集时,我面临时间戳的关键错误

问题描述 投票:0回答:0
import pandas as pd
import json
import numpy as np
import datetime

# Define chunk size
chunk_size = 1000

# Create a generator to read the JSON file in chunks
data_generator = pd.read_json(
    "/home/pi/AMG8833_IR_cam/examples/combined_data.json",
    orient="records",
    lines=True,
    chunksize=chunk_size,
)

df = pd.DataFrame()

def dataframe_processed(data_generator):
    global timestamps, temperatures, hist_plot_dftemp, df

    # Initialize df_processed
    df_processed = pd.DataFrame(columns=["Timestamp", "Temperature"])

    # Iterate over the chunks and process each chunk
    for chunk in data_generator:
        # Extract data into NumPy array
        timestamps = np.array(
            [pd.to_datetime(d["Timestamp"]) for d in chunk.to_dict("records")]
        )
        temperatures = np.array(
            [d["IS_Temperature [F]"] for d in chunk.to_dict("records")]
        )

        # Create DataFrame from NumPy arrays
        chunk_df = pd.DataFrame({"Timestamp": timestamps, "Temperature": temperatures})

        # Drop NaN values and perform other data cleaning steps
        chunk_df = chunk_df.dropna()
        chunk_df = chunk_df.rename(columns={"IS_Temperature [F]": "Temperature"})

        # Append the processed data to the global variable
        df = df.append(chunk_df)

        # Append timestamps and temperatures to the global variables
        timestamps.extend(list(timestamps))
        temperatures.extend(list(temperatures))

        # Append the processed chunk to df_processed
        df_processed = df_processed.append(chunk_df)

        # Call the histogram plot function with the processed data
        hist_plot_dftemp = plot_hist_temp(df_processed)

    return df, hist_plot_dftemp, init_df, timestamps, temperatures



dftemp = pd.DataFrame(columns=["date", "hour", "mean"])

# Use the processed DataFrame for subsequent analysis
window_24 = (datetime.datetime.now() + datetime.timedelta(hours=-24))
window_24_str = window_24.strftime("%Y-%m-%d %H:%M:%S")

plotting_data = df[(df["Timestamp"] > datetime.datetime.strptime(window_24_str, "%Y-%m-%d %H:%M:%S"))]


prev_day = (datetime.datetime.now() + datetime.timedelta(days=-1))
prev_day_str = prev_day.strftime("%Y-%m-%d 00:00:00")
init_df = df[(df["Timestamp"] > datetime.datetime.strptime(prev_day_str, "%Y-%m-%d %H:%M:%S"))]
Logging Started
output : (b'2023-02-26 22:45:58\n', None)   <class 'tuple'>
uptime 1: 2023-02-26 22:45:58   <class 'str'>
uptime 2: 2023-02-26 22:45:58
uptime 3: 2023-02-26 22:45:58 PM
qt5ct: using qt5ct plugin
First Except Block: 'Timestamp'
Waiting for Process to restart
^CTraceback (most recent call last):
  File "/home/pi/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 4554, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 4562, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Timestamp'
python pandas dataframe valueerror keyerror
© www.soinside.com 2019 - 2024. All rights reserved.