我需要处理实验数据。该实验是将一块磁铁穿过 2 个线圈,一个在下落开始时,一个在下落结束时,然后根据差异测量物体(当我使用不同线圈之间的高度差重复该过程时),为此我需要识别有趣的事情真正发生的时刻和事件结束的时刻,并将它们与事件之前/之后的噪音区分开来。我上传了数据here(那里有几个实验的几个文件)。所有文件中的所有数据通常都是相同的,按原样绘制它会生成一种“高斯/洛伦兹导数形式”。其中有三个相关列:t 代表时间,X 是第一个线圈的电压数据,Y 是第二个线圈的电压数据。 原始数据图如下所示(减去两个红点):
我正在寻找一种方法来自动从数据中提取这两个红点的位置。最初的计划是两次导出数据,二阶导数上升到零以上或下降到零的时刻就是我正在寻找的图表中的两个红点。然而,数据不干净并且有很多噪音,所以二阶导数是一团糟,并且在主要事件之前的边缘绝对不为零。
经过反复试验,这是我想出的最好的办法。它确实适用于某些 Y 向量,但不适用于其他向量,并且绝对不适用于 X。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import scipy
from scipy.signal import argrelmax
from scipy.ndimage.filters import gaussian_filter1d
from scipy.interpolate import UnivariateSpline
from whittaker_eilers import WhittakerSmoother
from scipy.signal import savgol_filter
def importData(filename):
# use skiprows to skip the scope setup information and header
# usecols column numbers starting from 0
t, v1, v2 = np.loadtxt(filename, delimiter=',', skiprows=47, usecols=[1,2,3]).T
return t, v1, v2
#%%
t,X,Y= importData(r"C:\Users\IMOE001\Desktop\inductance\15cm_10N.csv")
whittaker_smoother =WhittakerSmoother(lmbda=30, order=2, data_length=len(t))
nX=gaussian_filter1d(Y, 2, order=0)
nX=np.array(whittaker_smoother.smooth(nX))
dX= np.gradient(nX,t)
ndX=np.array(whittaker_smoother.smooth(dX))
ndX=gaussian_filter1d(ndX,2, order=0)
c1ndX=ndX/np.abs(np.mean(ndX))
c2ndX=(c1ndX-np.mean(c1ndX))/10000
ddX=np.gradient(c2ndX,t)
c1bex=ddX/np.abs(np.mean(ddX))
c2bex=(c1bex-np.mean(c1bex))/50000
bex=gaussian_filter1d(c2bex, 2, order=0)
bex=np.array(whittaker_smoother.smooth(bex))
plt.figure(dpi=500)
index_f=np.argmin(np.abs(bex[::-1])<5e-2)
index_i=np.argmin(np.abs(bex)<5e-2)
val_i=np.abs(np.mean(bex[:index_i]))*350
val_f=np.abs(np.mean(bex[len(bex)-index_f:]))*250
a= np.argmin(np.abs(bex)<val_i)
b= len(bex)-np.argmin(np.abs(bex[::-1])<val_f)
print(a,b,index_i,index_f)
上面的构建是这样的:
pip install
它。 - 第 19-21 行
np.gradient
导出平滑数据并进一步平滑结果,这次将结果除以结果的平均值,以使事件前后的线更接近于零。 - 第 22-25 行
index_i
和
index_f
的表示与初始尾部和最终尾部相关。 - 第 36-37 行
a-
的是开始索引,
b-
是结束索引。 - 第 38-41 行
a
、
b
、index_i
、index_f
来帮助我了解谁更准确,但实际上没有一个更准确。 42号线
和我只关心事件开始时的小幅上升,并假设您确实关心外部拐点。通过使用更标准的 filtfilt
而不是 Whittaker 来简化:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal
from zipfile import ZipFile
data_list = []
archive = ZipFile('Data package from April 5th..zip')
for file_info in archive.infolist():
set_df = pd.read_csv(
filepath_or_buffer=archive.open(name=file_info.filename),
skiprows=45, usecols=['Time(s)', 'Channel1', 'Channel2'],
index_col='Time(s)',
)
# Convert to a series having a multi-index over experiment, time and channel
set_df.columns.name = 'Channel'
set_series = set_df.stack()
set_series = pd.DataFrame({
'Experiment': file_info.filename.removesuffix('.csv'),
'Voltage': set_series,
}).set_index('Experiment', append=True)['Voltage']
set_series.index = (
set_series.index
.swaplevel('Experiment', 'Time(s)')
.swaplevel('Channel', 'Time(s)')
)
data_list.append(set_series)
voltage = pd.concat(data_list)
# Used prior to differentials
filter_strength = 10
filter_b = np.full(shape=filter_strength, fill_value=1/filter_strength)
for experiment, egroup in voltage.groupby('Experiment'):
fig = plt.figure()
fig.suptitle(experiment)
# For each experiment, a figure having 2x2 axes: (volts, differential) by (channels 1, 2)
ax_volts = fig.add_subplot(2, 2, 1), fig.add_subplot(2, 2, 2)
for col in ax_volts:
col.tick_params('x', labelbottom=False)
ax_diffs = (
fig.add_subplot(2, 2, 3, sharex=ax_volts[0]),
fig.add_subplot(2, 2, 4, sharex=ax_volts[1]),
)
for row, label in zip(
(ax_volts, ax_diffs),
('Experimental data, volts', 'Second-order differential'),
):
row[0].set_ylabel(label)
cgroup: pd.Series
for ax_volt, ax_diff, (channel, cgroup) in zip(
ax_volts, ax_diffs, egroup.groupby('Channel'),
):
filtered = pd.Series(scipy.signal.filtfilt(
b=filter_b, a=1, x=cgroup,
))
time = cgroup.index.get_level_values('Time(s)')
ax_volt.plot(time, cgroup, label='Voltage')
ax_volt.plot(time, filtered, label='Filtered')
ax_volt.set_title(channel)
# Finite-differences second-order differential: https://stackoverflow.com/a/59095333/313768
diff2 = filtered - 2*filtered.shift(1) + filtered.shift(2)
ax_diff.plot(time, diff2)
'''
Assumed structure:
1. a positive peak
2. 0 (inflection point A)
3. minimum
4. 0 (inflection point B)
5. maximum
6. 0 (inflection point C)
7. a negative peak
'''
i_max = diff2.argmax()
segc = diff2.iloc[i_max:]
i_ipc = segc[segc < 0].index[0]
segb = diff2.iloc[:i_max]
i_ipb = segb[segb < 0].index[-1]
sega = segb.iloc[:i_ipb]
i_ipa = sega[sega > 0].index[-1]
for ax, series in zip(
(ax_volt, ax_diff), (cgroup, diff2)
):
ax.scatter(
time[[i_ipa, i_ipc]],
series.iloc[[i_ipa, i_ipc]],
)
ax_volts[1].legend()
plt.show()
这可以正确找到您拥有的每个数据集的拐点。然而,您(含糊地)暗示这不是您真正关心的。根据您设置为 10,000 的调整参数,您似乎希望一些外部点具有较小的斜率。这根本不需要差速器:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from zipfile import ZipFile
data_list = []
archive = ZipFile('Data package from April 5th..zip')
for file_info in archive.infolist():
set_df = pd.read_csv(
filepath_or_buffer=archive.open(name=file_info.filename),
skiprows=45, usecols=['Time(s)', 'Channel1', 'Channel2'],
index_col='Time(s)',
)
# Convert to a series having a multi-index over experiment, time and channel
set_df.columns.name = 'Channel'
set_series = set_df.stack()
set_series = pd.DataFrame({
'Experiment': file_info.filename.removesuffix('.csv'),
'Voltage': set_series,
}).set_index('Experiment', append=True)['Voltage']
set_series.index = (
set_series.index
.swaplevel('Experiment', 'Time(s)')
.swaplevel('Channel', 'Time(s)')
)
data_list.append(set_series)
voltage = pd.concat(data_list)
threshold = 0.1
for experiment, egroup in voltage.groupby('Experiment'):
fig = plt.figure()
fig.suptitle(experiment)
# For each experiment, a figure having 1x2 axes: (channels 1, 2)
ax_volts = fig.add_subplot(1, 2, 1), fig.add_subplot(1, 2, 2)
ax_volts[0].set_ylabel('Volts')
cgroup: pd.Series
for ax_volt, (channel, cgroup) in zip(
ax_volts, egroup.groupby('Channel'),
):
time = cgroup.index.get_level_values('Time(s)')
ax_volt.plot(time, cgroup, label='Voltage')
ax_volt.set_title(channel)
iv_max = cgroup.argmax()
iv_min = cgroup.argmin()
v_thresh_hi = threshold*cgroup.iloc[iv_max]
v_thresh_lo = threshold*cgroup.iloc[iv_min]
i_hi = cgroup.iloc[:iv_max].searchsorted(v_thresh_hi)
i_lo = cgroup.iloc[iv_min:].searchsorted(v_thresh_lo) + iv_min
ax_volt.scatter(
time[[i_hi, i_lo]],
cgroup.iloc[[i_hi, i_lo]],
c='red', label='threshold',
)
ax_volts[1].legend()
plt.show()