不知道这个问题是否在其他地方发布过,如果是的话请指导我,谢谢
我想计算曲线下的面积,但是由于初始点和终点不为零,我想根据开始和结束数据进行线性回归,然后计算数据下的面积,但限制在像这样的线性回归
我想选择线性回归的数据,然后根据前基线回归中的最后一个点和后基线回归中限制在范围内的第一个点来计算数据下的面积线性回归。
我用 chatGPT 玩了一下,为我提供了一些代码,而不是做我想做的事情,但它似乎不明白我只想要线性回归和数据之间的区域
def baseline_correction(data, indices):
# Calculate a unified baseline using linear regression on selected indices
slope, intercept, _, _, _ = linregress(data[indices, 0], data[indices, 1])
return slope, intercept
def calculate_area(data, start_index, end_index, slope, intercept):
# Correct the current by subtracting the baseline
corrected_current = data[start_index:end_index, 1] - (data[start_index:end_index, 0] * slope + intercept)
# Calculate the area (charge) under the corrected current curve
charge = trapz(corrected_current, x=data[start_index:end_index, 0])
return charge, corrected_current
def process_cycles(data, cycle_starts):
charges = []
for i in range(len(cycle_starts) - 1):
start_index = cycle_starts[i]
end_index = cycle_starts[i + 1]
# Selecting only the increasing potential part of the cycle
increasing_potential_indices = np.where(np.diff(data[start_index:end_index, 0]) > 0)[0] + start_index
# Combine pre-peak and post-peak data points for baseline
pre_peak_indices = increasing_potential_indices[np.where((data[increasing_potential_indices, 0] >= 0.35) & (data[increasing_potential_indices, 0] <= 0.4))]
post_peak_indices = increasing_potential_indices[np.where((data[increasing_potential_indices, 0] >= 0.525) & (data[increasing_potential_indices, 0] <= 0.55))]
combined_indices = np.concatenate((pre_peak_indices, post_peak_indices))
# Calculate unified baseline
slope, intercept = baseline_correction(data, combined_indices)
# Define peak region (between 0.35V to 0.4V)
peak_start_idx = np.min(np.where(data[:, 0] >= 0.4)[0])
peak_end_idx = np.max(np.where(data[:, 0] <= 0.525)[0])
# Calculate the charge
charge, corrected_current = calculate_area(data, peak_start_idx, peak_end_idx, slope, intercept)
charges.append(charge)
# Visualization
plt.figure(figsize=(10, 6))
plt.plot(data[start_index:end_index, 0], data[start_index:end_index, 1], label='Original Data', color='gray')
plt.plot(data[combined_indices, 0], data[combined_indices, 1], 'o', label='Baseline Points', color='red')
baseline_curve = data[peak_start_idx:peak_end_idx, 0] * slope + intercept
plt.plot(data[peak_start_idx:peak_end_idx, 0], baseline_curve, label='Baseline', color='green')
plt.plot(data[peak_start_idx:peak_end_idx, 0], corrected_current, label='Corrected Current', color='purple')
plt.fill_between(data[peak_start_idx:peak_end_idx, 0], 0, corrected_current, color='purple', alpha=0.3, label='Area Under Curve')
plt.xlabel('Potential (V)')
plt.ylabel('Current (A)')
plt.title(f'Cycle {i+1} Charge Calculation')
plt.legend()
plt.grid(True)
plt.show()
return charges
编辑:
Wetransfer 提供的数据:
https://wetransfer.com/downloads/18be5ac96b80858ee5e0f24efab826b120240503140809/d7ba45
以下代码用于数据处理:
import numpy as np
import matplotlib.pyplot as plt
def load_numeric_data(filepath):
data_start_marker = "Potential/V, Current/A"
numeric_data = []
with open(filepath, 'r') as file:
# Read through the file until the marker is found
for line in file:
if data_start_marker in line:
break
# Read the remaining lines and process them
for line in file:
parts = line.strip().split(',')
if len(parts) == 2:
try:
potential = float(parts[0].strip())
current = float(parts[1].strip())
numeric_data.append([potential, current])
except ValueError:
# This handles lines that cannot be converted to floats
continue
# Convert list to a NumPy array
return np.array(numeric_data)
def find_cycle_starts(data, start_potential):
# Finding all indices where the potential approximately matches the start_potential
potential_close_indices = np.where(np.isclose(data[:,0], start_potential, atol=0.01))[0]
# Determine the actual start of each cycle by checking the difference between consecutive matches
cycle_starts = [potential_close_indices[0]]
for i in range(1, len(potential_close_indices)):
if potential_close_indices[i] - potential_close_indices[i-1] > 10: # Ensuring a significant index gap
cycle_starts.append(potential_close_indices[i])
return cycle_starts
def plot_cycle_data(data, cycle_starts, cycle_number):
if cycle_number > len(cycle_starts):
print("Cycle number exceeds the available cycles.")
return
start_index = cycle_starts[cycle_number - 1]
end_index = cycle_starts[cycle_number] if cycle_number < len(cycle_starts) else len(data)
plt.figure(figsize=(10, 5))
plt.plot(data[start_index:end_index, 0], data[start_index:end_index, 1], label=f'Cycle {cycle_number}')
plt.xlabel('Potential (V)')
plt.ylabel('Current (A)')
plt.title(f'Cycle {cycle_number} from Potential {data[start_index, 0]}V to {data[end_index-1, 0]}V')
plt.legend()
plt.grid(True)
plt.show()
# Usage
file_path = 'path_to_your_file.txt'
data = load_numeric_data(file_path)
cycle_starts = find_cycle_starts(data, -1.3)
plot_cycle_data(data, cycle_starts, 1) # Plot the first cycle
charges = process_cycles(data, cycle_starts)
print(charges)
下面的代码查找向前扫描的曲线下方的面积。
每个周期首先分为正向电压扫描和反向电压扫描。然后,您可以将正向循环数据提供给
calculate_fwd_cycle_peak_area()
。该函数通过查看信号梯度的特征来定位峰值。该函数还绘制一个图,显示用于描绘峰边界的各种“地标”的位置。使用 numpy 的梯形积分计算面积,并返回结果。
该函数配置了各种默认值,我发现这些默认值可以很好地处理您提供的数据(示例 1、2 和 3 中的所有正向循环)。
用途:
#Load data
data = load_numeric_data(f'/mnt/c/dev/CV_sample1.txt')
#Define the start potentials of the forward and reverse sweeps
fwd_sweep_start_potential = -1.3
rev_sweep_start_potential = 0.7
#Find the start indices
fwd_cycle_starts = find_cycle_starts(data, fwd_sweep_start_potential)
rev_cycle_starts = find_cycle_starts(data, rev_sweep_start_potential)
#Get the cycles into a list
fwd_cycles_list, rev_cycles_list = split_on_cycles(data, fwd_cycle_starts, rev_cycle_starts)
#Supply each cycle in turn to the area function
for i, cycle in enumerate(fwd_cycles_list):
area = calculate_fwd_cycle_peak_area(cycle)
print('Peak area of cycle', i, 'is:', area)
完整代码如下。
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
def load_numeric_data(filepath):
data_start_marker = "Potential/V, Current/A"
numeric_data = []
with open(filepath, 'r') as file:
# Read through the file until the marker is found
for line in file:
if data_start_marker in line:
break
# Read the remaining lines and process them
for line in file:
parts = line.strip().split(',')
if len(parts) == 2:
try:
potential = float(parts[0].strip())
current = float(parts[1].strip())
numeric_data.append([potential, current])
except ValueError:
# This handles lines that cannot be converted to floats
continue
# Convert list to a NumPy array
return np.array(numeric_data)
def find_cycle_starts(data, start_potential):
# Finding all indices where the potential approximately matches the start_potential
potential_close_indices = np.where(np.isclose(data[:,0], start_potential, atol=0.01))[0]
# Determine the actual start of each cycle by checking the difference between consecutive matches
cycle_starts = [potential_close_indices[0]]
for i in range(1, len(potential_close_indices)):
if potential_close_indices[i] - potential_close_indices[i-1] > 10: # Ensuring a significant index gap
cycle_starts.append(potential_close_indices[i])
return cycle_starts
def split_on_cycles(data, fwd_cycle_starts, rev_cycle_starts):
"""Returns fwd cycles and rev cycles in two lists
"""
fwd_cycles_list = []
rev_cycles_list = []
#fwd cycles
for i, start_pt in enumerate(fwd_cycle_starts[:-1]): #last one is half cycle
cycle_period = slice(start_pt, rev_cycle_starts[i])
fwd_cycles_list.append(data[cycle_period, :])
#rev cycles
for i, start_pt in enumerate(rev_cycle_starts):
cycle_period = slice(start_pt, fwd_cycle_starts[i + 1])
rev_cycles_list.append(data[cycle_period, :])
return fwd_cycles_list, rev_cycles_list
def calculate_fwd_cycle_peak_area(
cycle,
clip_below_v=0.2,
nonzero_grad_thresh=1e-6,
grad_rise_thresh=10e-6
):
"""Locates peak and returns the area. Also renders a plot.
Parameters
----------
cycle: Array (n_samples, 2) of [voltages, currents]
clip_below: Ignore voltages below this value
nonzero_grad_thresh: Threshold for clipping the flat part before the peak
grad_rise_thresh: Used to establish the start of the peak
Returns:
area under the curve
"""
voltage, current = [cycle[:, i] for i in range(2)]
#Gradients
current_dx = pd.Series(np.gradient(current)).rolling(window=10).mean().values
abs_current_dx = np.abs(current_dx)
# current_d2x = pd.Series(np.gradient(np.gradient(current))).rolling(window=10).mean().values
f, ax = plt.subplots(figsize=(11, 4.5))
ax.plot(voltage, current, color='black', linewidth=6, label='data')
ax.set(xlabel='voltage', ylabel='current')
grad_clr = 'navy'
ax2 = ax.twinx()
ax2.plot(voltage, current_dx, color=grad_clr, linewidth=1.5, label='gradient')
ax2.axhline(0, color=grad_clr, linewidth=1, linestyle=':')
ax2.set_ylabel('gradient')
ax2.tick_params(axis='y', colors=grad_clr)
ax2.yaxis.label.set_color(grad_clr)
ax2.spines.right.set_color(grad_clr)
cmap = plt.get_cmap('tab10')
line_fmt = dict(linestyle='--', linewidth=2, alpha=1)
#"rel_start" tracks the starting point for each subsequent operation
ax.axvline(voltage[rel_start := 0], color=cmap(0), **line_fmt, label='cycle starts')
#Hard clip. Ignore voltages below clip_below_v
clip_idx = np.argwhere(voltage > clip_below_v)[0].item()
ax.axvline(voltage[rel_start + clip_idx], color=cmap(1), **line_fmt, label='hard clip ends')
rel_start += clip_idx
#Clip the flat part
flatpart_endidx = np.argwhere(abs_current_dx[rel_start:] > nonzero_grad_thresh)[0].item()
ax.axvline(voltage[rel_start + flatpart_endidx], color=cmap(2), **line_fmt, label='flat part ends')
rel_start += flatpart_endidx
#Peak starts where the gradient begins to rise
peak_start_idx = np.argwhere(abs_current_dx[rel_start:] > grad_rise_thresh)[0].item()
ax.axvline(voltage[rel_start + peak_start_idx], color=cmap(3), **line_fmt, label='peak_start_idx')
peak_x_indices = [rel_start + peak_start_idx]
rel_start += peak_start_idx
#Find the midpoint of the downwards slope
down_slope_idx = np.argmin(current_dx[rel_start:]).item()
ax.axvline(voltage[rel_start + down_slope_idx], color=cmap(5), **line_fmt, label='down_slope_idx')
rel_start += down_slope_idx
#The gradient point closest to 0 after the downwards slope is where the peak ends
peak_end_idx = np.argmin(abs_current_dx[rel_start:]).item()
peak_x_indices.append(rel_start + peak_end_idx)
ax.axvline(voltage[rel_start + peak_end_idx], color=cmap(6), **line_fmt, label='peak_end_idx')
f.legend(
fontsize=8.5, loc='upper left', ncols=10, shadow=True, framealpha=1,
bbox_to_anchor=(0.05, 0)
)
ax.set_xlim(voltage[clip_idx + flatpart_endidx - 50], voltage.max() * 0.9)
ax.set_ylim(-0.005, 0.0075)
ax2.set_ylim(ax2.get_ylim()[0] * 0.83, ax2.get_ylim()[1] * 0.8)
#Points denoting line
ax.scatter(
voltage[peak_x_indices], current[peak_x_indices],
marker='o', color='lime', s=40, zorder=3,
)
peak_indices_all = np.arange(*peak_x_indices)
peak_voltages = voltage[peak_indices_all]
peak_currents = current[peak_indices_all]
line_voltages = peak_voltages[[0, -1]]
line_currents = peak_currents[[0, -1]]
line_poly = np.polynomial.Polynomial.fit(line_voltages, line_currents, deg=1)
ax.plot(line_voltages, line_poly(line_voltages), linewidth=3, color='lime')
ax.plot(peak_voltages, peak_currents, color='lime', linewidth=1.8)
ax.fill_between(
x=peak_voltages, y1=line_poly(peak_voltages), y2=peak_currents,
hatch=None, facecolor='lime', alpha=0.25
)
peak_area = np.trapz(peak_currents, peak_voltages)
ax.set_title(f'shaded area: {peak_area:>.2e}', weight='bold')
plt.show()
return peak_area
#Load data
data = load_numeric_data(f'/mnt/c/dev/CV_sample1.txt')
#Define the start potentials of the forward and reverse sweeps
fwd_sweep_start_potential = -1.3
rev_sweep_start_potential = 0.7
#Find the start indices
fwd_cycle_starts = find_cycle_starts(data, fwd_sweep_start_potential)
rev_cycle_starts = find_cycle_starts(data, rev_sweep_start_potential)
#Get the cycles into a list
fwd_cycles_list, rev_cycles_list = split_on_cycles(data, fwd_cycle_starts, rev_cycle_starts)
for i, cycle in enumerate(fwd_cycles_list):
area = calculate_fwd_cycle_peak_area(cycle)
print('Peak area of cycle', i, 'is:', area)
#For debugging: view the data splitting results
if False:
plt.plot(data[:, 0])
[plt.gca().axvline(x, linewidth=1, linestyle=':', color='tab:red') for x in fwd_cycle_starts]
[plt.gca().axvline(x, linewidth=1, linestyle=':', color='black') for x in rev_cycle_starts]
plt.gcf().set_size_inches(8, 2)
plt.gca().set(
xlabel='index', ylabel='V',
title=f'{len(fwd_cycles_list)} fwd cycles/{len(rev_cycles_list)} rev cycles'
)
plt.show()
for cycles_list in [fwd_cycles_list, rev_cycles_list]:
#View split cycles
for c in cycles_list:
plt.plot(c[:, 0], c[:, 1])
plt.gca().set(xlim=(0.25, 0.65), ylim=(-0.0075, 0.0075))
plt.gca().set(xlabel='V', ylabel='I')
plt.gcf().set_size_inches(8, 3)
plt.show()