def mask(mask_cols):
    This function masks specific columns of the pupil dataframe.
    masked_df = pd.read_csv(con.CLEANED_PUPIL_GAZE)

    masked_df = mask_pupil_first_derivative(masked_df, threshold=3.0,
                                mask_cols=['diameter', 'diameter_3d'])

    return masked_df

def smooth(columns_to_smooth):
    This function smooths two columns of the interpolated pupil dataframe.
    :param columns_to_smooth: diameter and diamete 3d columns.
    original_df = mask(mask_cols=['diameter', 'diameter_3d'])

    smooth_df = original_df

    logging.basicConfig(filename='filtering_errors.log', level=logging.ERROR)


    print("Data will be now be smoothed with a low-pass BW filter.")

    nyquist = 0.5 * con.sample_rate_ET
    cutoff_high = 4.0 # Example: Upper bound for smoothing
    cutoff_low = 0.01  # Example: Lower bound for smoothing

    # Calculate normalized cutoff frequencies
    normalized_cutoff_low = cutoff_low / nyquist
    normalized_cutoff_high = cutoff_high / nyquist

    for column in columns_to_smooth:
            # Check for NaNs before filtering
            if smooth_df[column].isnull().any():
                raise ValueError(f"NaN values detected in column {column} before filtering")

            # Apply Butterworth filter to the selected column
            b, a = butter(N=3, Wn=[normalized_cutoff_low, normalized_cutoff_high],
                          btype='bandpass', fs=con.sample_rate_ET)
            smooth_values = filtfilt(b, a, smooth_df[column])

            # Check for NaNs after filtering
            if np.isnan(smooth_values).any():
                raise ValueError(f"NaN values detected after filtering column {column}")

            # Update the column in smooth_df with the filtered values
            smooth_df[column] = smooth_values


        except ValueError as e:
            print(f"Error: {e}")
            # Handle the error, e.g., log the error, skip this column, or set NaNs as appropriate

    # Save the smoothed data to a new CSV file
    smooth_df.to_csv(con.SMOOTHED_PUPIL, index=False)

    # Plot the frequency response of the filter for the current column
    w, h = freqs(b, a)
    plt.semilogx(w, 20 * np.log10(abs(h)))
    plt.title(f'Butterworth filter frequency response for column {column}')
    plt.xlabel('Frequency [radians / second]')
    plt.ylabel('Amplitude [dB]')
    plt.margins(0, 0.1)
    plt.grid(which='both', axis='both')
    plt.axvline(100, color='green')  # cutoff frequency

    # Assuming 'original_df' contains the original data before smoothing and 'smooth_df' contains the smoothed data
    # Plot original and smoothed data for comparison
    plt.plot(original_df['pupil_timestamp'], original_df['diameter'], label='Original')
    plt.plot(smooth_df['pupil_timestamp'], smooth_df['diameter'], label='Smoothed')
    plt.title('Comparison of Original and Smoothed Data')

    # Calculate summary statistics
    original_mean = original_df['diameter'].mean()
    smoothed_mean = smooth_df['diameter'].mean()

    original_std = original_df['diameter'].std()
    smoothed_std = smooth_df['diameter'].std()

    # Print summary statistics
    print(f"Original Mean: {original_mean}, Smoothed Mean: {smoothed_mean}")
    print(f"Original Standard Deviation: {original_std}, Smoothed Standard Deviation: {smoothed_std}")

我正在处理平均采样率为 120 Hz 的眼动追踪数据(但每个数据集可能有所不同)。原始数据集和平滑数据集具有相同的均值和标准差,并且图形重叠。


正如 @mkrieger1 指出的那样,您没有制作 pandas 数据框的深层副本。这就是您面临此问题的原因。如果您参考这篇post,您就知道您的逻辑应该是

smoothed_df = original_df.copy(deep=True)


