这是我的代码:
def mask(mask_cols):
"""
This function masks specific columns of the pupil dataframe.
"""
masked_df = pd.read_csv(con.CLEANED_PUPIL_GAZE)
masked_df = mask_pupil_first_derivative(masked_df, threshold=3.0,
mask_cols=['diameter', 'diameter_3d'])
return masked_df
def smooth(columns_to_smooth):
"""
This function smooths two columns of the interpolated pupil dataframe.
:param columns_to_smooth: diameter and diamete 3d columns.
"""
original_df = mask(mask_cols=['diameter', 'diameter_3d'])
smooth_df = original_df
logging.basicConfig(filename='filtering_errors.log', level=logging.ERROR)
print(smooth_df)
print("Data will be now be smoothed with a low-pass BW filter.")
nyquist = 0.5 * con.sample_rate_ET
cutoff_high = 4.0 # Example: Upper bound for smoothing
cutoff_low = 0.01 # Example: Lower bound for smoothing
# Calculate normalized cutoff frequencies
normalized_cutoff_low = cutoff_low / nyquist
normalized_cutoff_high = cutoff_high / nyquist
for column in columns_to_smooth:
try:
# Check for NaNs before filtering
if smooth_df[column].isnull().any():
raise ValueError(f"NaN values detected in column {column} before filtering")
# Apply Butterworth filter to the selected column
b, a = butter(N=3, Wn=[normalized_cutoff_low, normalized_cutoff_high],
btype='bandpass', fs=con.sample_rate_ET)
smooth_values = filtfilt(b, a, smooth_df[column])
# Check for NaNs after filtering
if np.isnan(smooth_values).any():
raise ValueError(f"NaN values detected after filtering column {column}")
# Update the column in smooth_df with the filtered values
smooth_df[column] = smooth_values
print(smooth_values)
except ValueError as e:
print(f"Error: {e}")
# Handle the error, e.g., log the error, skip this column, or set NaNs as appropriate
# Save the smoothed data to a new CSV file
smooth_df.to_csv(con.SMOOTHED_PUPIL, index=False)
# Plot the frequency response of the filter for the current column
w, h = freqs(b, a)
plt.semilogx(w, 20 * np.log10(abs(h)))
plt.title(f'Butterworth filter frequency response for column {column}')
plt.xlabel('Frequency [radians / second]')
plt.ylabel('Amplitude [dB]')
plt.margins(0, 0.1)
plt.grid(which='both', axis='both')
plt.axvline(100, color='green') # cutoff frequency
plt.show()
# Assuming 'original_df' contains the original data before smoothing and 'smooth_df' contains the smoothed data
# Plot original and smoothed data for comparison
plt.plot(original_df['pupil_timestamp'], original_df['diameter'], label='Original')
plt.plot(smooth_df['pupil_timestamp'], smooth_df['diameter'], label='Smoothed')
plt.xlabel('Timestamp')
plt.ylabel('Diameter')
plt.title('Comparison of Original and Smoothed Data')
plt.legend()
plt.show()
# Calculate summary statistics
original_mean = original_df['diameter'].mean()
smoothed_mean = smooth_df['diameter'].mean()
original_std = original_df['diameter'].std()
smoothed_std = smooth_df['diameter'].std()
# Print summary statistics
print(f"Original Mean: {original_mean}, Smoothed Mean: {smoothed_mean}")
print(f"Original Standard Deviation: {original_std}, Smoothed Standard Deviation: {smoothed_std}")
我正在处理平均采样率为 120 Hz 的眼动追踪数据(但每个数据集可能有所不同)。原始数据集和平滑数据集具有相同的均值和标准差,并且图形重叠。
没有进行平滑处理。我该怎么办?
正如 @mkrieger1 指出的那样,您没有制作 pandas 数据框的深层副本。这就是您面临此问题的原因。如果您参考这篇post,您就知道您的逻辑应该是
smoothed_df = original_df.copy(deep=True)
这应该确保您的答案是不同的。否则,您将根据同一数据帧计算统计数据。