数据集和 2 组基线的曲线下面积

Question

不知道这个问题是否在其他地方发布过，如果是的话请指导我，谢谢

我有一些数据看起来像这样

我想计算曲线下的面积，但是由于初始点和终点不为零，我想根据开始和结束数据进行线性回归，然后计算数据下的面积，但限制在像这样的线性回归

我想选择线性回归的数据，然后根据前基线回归中的最后一个点和后基线回归中限制在范围内的第一个点来计算数据下的面积线性回归。

我用 chatGPT 玩了一下，为我提供了一些代码，而不是做我想做的事情，但它似乎不明白我只想要线性回归和数据之间的区域

def baseline_correction(data, indices):
    # Calculate a unified baseline using linear regression on selected indices
    slope, intercept, _, _, _ = linregress(data[indices, 0], data[indices, 1])
    return slope, intercept

def calculate_area(data, start_index, end_index, slope, intercept):
    # Correct the current by subtracting the baseline
    corrected_current = data[start_index:end_index, 1] - (data[start_index:end_index, 0] * slope + intercept)
    # Calculate the area (charge) under the corrected current curve
    charge = trapz(corrected_current, x=data[start_index:end_index, 0])
    return charge, corrected_current

def process_cycles(data, cycle_starts):
    charges = []
    for i in range(len(cycle_starts) - 1):
        start_index = cycle_starts[i]
        end_index = cycle_starts[i + 1]

        # Selecting only the increasing potential part of the cycle
        increasing_potential_indices = np.where(np.diff(data[start_index:end_index, 0]) > 0)[0] + start_index
        
        # Combine pre-peak and post-peak data points for baseline
        pre_peak_indices = increasing_potential_indices[np.where((data[increasing_potential_indices, 0] >= 0.35) & (data[increasing_potential_indices, 0] <= 0.4))]
        post_peak_indices = increasing_potential_indices[np.where((data[increasing_potential_indices, 0] >= 0.525) & (data[increasing_potential_indices, 0] <= 0.55))]
        combined_indices = np.concatenate((pre_peak_indices, post_peak_indices))
        
        # Calculate unified baseline
        slope, intercept = baseline_correction(data, combined_indices)
        
        # Define peak region (between 0.35V to 0.4V)
        peak_start_idx = np.min(np.where(data[:, 0] >= 0.4)[0])
        peak_end_idx = np.max(np.where(data[:, 0] <= 0.525)[0])
        
        # Calculate the charge
        charge, corrected_current = calculate_area(data, peak_start_idx, peak_end_idx, slope, intercept)
        charges.append(charge)
        
        # Visualization
        plt.figure(figsize=(10, 6))
        plt.plot(data[start_index:end_index, 0], data[start_index:end_index, 1], label='Original Data', color='gray')
        plt.plot(data[combined_indices, 0], data[combined_indices, 1], 'o', label='Baseline Points', color='red')
        baseline_curve = data[peak_start_idx:peak_end_idx, 0] * slope + intercept
        plt.plot(data[peak_start_idx:peak_end_idx, 0], baseline_curve, label='Baseline', color='green')
        plt.plot(data[peak_start_idx:peak_end_idx, 0], corrected_current, label='Corrected Current', color='purple')
        plt.fill_between(data[peak_start_idx:peak_end_idx, 0], 0, corrected_current, color='purple', alpha=0.3, label='Area Under Curve')
        plt.xlabel('Potential (V)')
        plt.ylabel('Current (A)')
        plt.title(f'Cycle {i+1} Charge Calculation')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    return charges

编辑：

Wetransfer 提供的数据：

https://wetransfer.com/downloads/18be5ac96b80858ee5e0f24efab826b120240503140809/d7ba45

以下代码用于数据处理：

import numpy as np
import matplotlib.pyplot as plt

def load_numeric_data(filepath):
    data_start_marker = "Potential/V, Current/A"
    numeric_data = []
    
    with open(filepath, 'r') as file:
        # Read through the file until the marker is found
        for line in file:
            if data_start_marker in line:
                break
        
        # Read the remaining lines and process them
        for line in file:
            parts = line.strip().split(',')
            if len(parts) == 2:
                try:
                    potential = float(parts[0].strip())
                    current = float(parts[1].strip())
                    numeric_data.append([potential, current])
                except ValueError:
                    # This handles lines that cannot be converted to floats
                    continue

    # Convert list to a NumPy array
    return np.array(numeric_data)

def find_cycle_starts(data, start_potential):
    # Finding all indices where the potential approximately matches the start_potential
    potential_close_indices = np.where(np.isclose(data[:,0], start_potential, atol=0.01))[0]
    
    # Determine the actual start of each cycle by checking the difference between consecutive matches
    cycle_starts = [potential_close_indices[0]]
    for i in range(1, len(potential_close_indices)):
        if potential_close_indices[i] - potential_close_indices[i-1] > 10:  # Ensuring a significant index gap
            cycle_starts.append(potential_close_indices[i])
    return cycle_starts

def plot_cycle_data(data, cycle_starts, cycle_number):
    if cycle_number > len(cycle_starts):
        print("Cycle number exceeds the available cycles.")
        return
    
    start_index = cycle_starts[cycle_number - 1]
    end_index = cycle_starts[cycle_number] if cycle_number < len(cycle_starts) else len(data)
    
    plt.figure(figsize=(10, 5))
    plt.plot(data[start_index:end_index, 0], data[start_index:end_index, 1], label=f'Cycle {cycle_number}')
    plt.xlabel('Potential (V)')
    plt.ylabel('Current (A)')
    plt.title(f'Cycle {cycle_number} from Potential {data[start_index, 0]}V to {data[end_index-1, 0]}V')
    plt.legend()
    plt.grid(True)
    plt.show()

# Usage
file_path = 'path_to_your_file.txt'
data = load_numeric_data(file_path)
cycle_starts = find_cycle_starts(data, -1.3)
plot_cycle_data(data, cycle_starts, 1)  # Plot the first cycle
charges = process_cycles(data, cycle_starts)
print(charges)

Answer 1

下面的代码查找向前扫描的曲线下方的面积。

每个周期首先分为正向电压扫描和反向电压扫描。然后，您可以将正向循环数据提供给

calculate_fwd_cycle_peak_area()

。该函数通过查看信号梯度的特征来定位峰值。该函数还绘制一个图，显示用于描绘峰边界的各种“地标”的位置。使用 numpy 的梯形积分计算面积，并返回结果。

该函数配置了各种默认值，我发现这些默认值可以很好地处理您提供的数据（示例 1、2 和 3 中的所有正向循环）。

结果示例：

用途：

#Load data
data = load_numeric_data(f'/mnt/c/dev/CV_sample1.txt')

#Define the start potentials of the forward and reverse sweeps
fwd_sweep_start_potential = -1.3
rev_sweep_start_potential = 0.7

#Find the start indices
fwd_cycle_starts = find_cycle_starts(data, fwd_sweep_start_potential)
rev_cycle_starts = find_cycle_starts(data, rev_sweep_start_potential)

#Get the cycles into a list
fwd_cycles_list, rev_cycles_list = split_on_cycles(data, fwd_cycle_starts, rev_cycle_starts)

#Supply each cycle in turn to the area function
for i, cycle in enumerate(fwd_cycles_list):
    area = calculate_fwd_cycle_peak_area(cycle)
    print('Peak area of cycle', i, 'is:', area)

完整代码如下。

import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

def load_numeric_data(filepath):
    data_start_marker = "Potential/V, Current/A"
    numeric_data = []
    
    with open(filepath, 'r') as file:
        # Read through the file until the marker is found
        for line in file:
            if data_start_marker in line:
                break
        
        # Read the remaining lines and process them
        for line in file:
            parts = line.strip().split(',')
            if len(parts) == 2:
                try:
                    potential = float(parts[0].strip())
                    current = float(parts[1].strip())
                    numeric_data.append([potential, current])
                except ValueError:
                    # This handles lines that cannot be converted to floats
                    continue

    # Convert list to a NumPy array
    return np.array(numeric_data)

def find_cycle_starts(data, start_potential):
    # Finding all indices where the potential approximately matches the start_potential
    potential_close_indices = np.where(np.isclose(data[:,0], start_potential, atol=0.01))[0]
    
    # Determine the actual start of each cycle by checking the difference between consecutive matches
    cycle_starts = [potential_close_indices[0]]
    for i in range(1, len(potential_close_indices)):
        if potential_close_indices[i] - potential_close_indices[i-1] > 10:  # Ensuring a significant index gap
            cycle_starts.append(potential_close_indices[i])
    return cycle_starts

def split_on_cycles(data, fwd_cycle_starts, rev_cycle_starts):
    """Returns fwd cycles and rev cycles in two lists
    """
    fwd_cycles_list = []
    rev_cycles_list = []
    
    #fwd cycles
    for i, start_pt in enumerate(fwd_cycle_starts[:-1]): #last one is half cycle
        cycle_period = slice(start_pt, rev_cycle_starts[i])
        fwd_cycles_list.append(data[cycle_period, :])
    
    #rev cycles
    for i, start_pt in enumerate(rev_cycle_starts):
        cycle_period = slice(start_pt, fwd_cycle_starts[i + 1])
        rev_cycles_list.append(data[cycle_period, :])
    return fwd_cycles_list, rev_cycles_list


def calculate_fwd_cycle_peak_area(
    cycle,
    clip_below_v=0.2,
    nonzero_grad_thresh=1e-6,
    grad_rise_thresh=10e-6
):
    """Locates peak and returns the area. Also renders a plot.
    
    Parameters
    ----------
    cycle: Array (n_samples, 2) of [voltages, currents]
    clip_below: Ignore voltages below this value
    nonzero_grad_thresh: Threshold for clipping the flat part before the peak
    grad_rise_thresh: Used to establish the start of the peak
    
    Returns:
    area under the curve
    """
    voltage, current = [cycle[:, i] for i in range(2)]

    #Gradients
    current_dx = pd.Series(np.gradient(current)).rolling(window=10).mean().values
    abs_current_dx = np.abs(current_dx)
    # current_d2x = pd.Series(np.gradient(np.gradient(current))).rolling(window=10).mean().values

    f, ax = plt.subplots(figsize=(11, 4.5))
    ax.plot(voltage, current, color='black', linewidth=6, label='data')
    ax.set(xlabel='voltage', ylabel='current')

    grad_clr = 'navy'
    ax2 = ax.twinx()
    ax2.plot(voltage, current_dx, color=grad_clr, linewidth=1.5, label='gradient')
    ax2.axhline(0, color=grad_clr, linewidth=1, linestyle=':')
    ax2.set_ylabel('gradient')
    ax2.tick_params(axis='y', colors=grad_clr)
    ax2.yaxis.label.set_color(grad_clr)
    ax2.spines.right.set_color(grad_clr)

    cmap = plt.get_cmap('tab10')
    line_fmt = dict(linestyle='--', linewidth=2, alpha=1)

    #"rel_start" tracks the starting point for each subsequent operation
    ax.axvline(voltage[rel_start := 0], color=cmap(0), **line_fmt, label='cycle starts')

    #Hard clip. Ignore voltages below clip_below_v
    clip_idx = np.argwhere(voltage > clip_below_v)[0].item()
    ax.axvline(voltage[rel_start + clip_idx], color=cmap(1), **line_fmt, label='hard clip ends')
    rel_start += clip_idx

    #Clip the flat part
    flatpart_endidx = np.argwhere(abs_current_dx[rel_start:] > nonzero_grad_thresh)[0].item()
    ax.axvline(voltage[rel_start + flatpart_endidx], color=cmap(2), **line_fmt, label='flat part ends')
    rel_start += flatpart_endidx

    #Peak starts where the gradient begins to rise
    peak_start_idx = np.argwhere(abs_current_dx[rel_start:] > grad_rise_thresh)[0].item()
    ax.axvline(voltage[rel_start + peak_start_idx], color=cmap(3), **line_fmt, label='peak_start_idx')
    peak_x_indices = [rel_start + peak_start_idx]
    rel_start += peak_start_idx

    #Find the midpoint of the downwards slope
    down_slope_idx = np.argmin(current_dx[rel_start:]).item()
    ax.axvline(voltage[rel_start + down_slope_idx], color=cmap(5), **line_fmt, label='down_slope_idx')
    rel_start += down_slope_idx

    #The gradient point closest to 0 after the downwards slope is where the peak ends 
    peak_end_idx = np.argmin(abs_current_dx[rel_start:]).item()
    peak_x_indices.append(rel_start + peak_end_idx)
    ax.axvline(voltage[rel_start + peak_end_idx], color=cmap(6), **line_fmt, label='peak_end_idx')

    f.legend(
        fontsize=8.5, loc='upper left', ncols=10, shadow=True, framealpha=1,
        bbox_to_anchor=(0.05, 0)
    )
    ax.set_xlim(voltage[clip_idx + flatpart_endidx - 50], voltage.max() * 0.9)
    ax.set_ylim(-0.005, 0.0075)
    ax2.set_ylim(ax2.get_ylim()[0] * 0.83, ax2.get_ylim()[1] * 0.8)

    #Points denoting line
    ax.scatter(
        voltage[peak_x_indices], current[peak_x_indices],
        marker='o', color='lime', s=40, zorder=3,
    )

    peak_indices_all = np.arange(*peak_x_indices)
    peak_voltages = voltage[peak_indices_all]
    peak_currents = current[peak_indices_all]

    line_voltages = peak_voltages[[0, -1]]
    line_currents = peak_currents[[0, -1]]
    line_poly = np.polynomial.Polynomial.fit(line_voltages, line_currents, deg=1)
    ax.plot(line_voltages, line_poly(line_voltages), linewidth=3, color='lime')
    ax.plot(peak_voltages, peak_currents, color='lime', linewidth=1.8)
    ax.fill_between(
        x=peak_voltages, y1=line_poly(peak_voltages), y2=peak_currents,
        hatch=None, facecolor='lime', alpha=0.25
    )

    peak_area = np.trapz(peak_currents, peak_voltages)
    ax.set_title(f'shaded area: {peak_area:>.2e}', weight='bold')
    plt.show()
    
    return peak_area

#Load data
data = load_numeric_data(f'/mnt/c/dev/CV_sample1.txt')

#Define the start potentials of the forward and reverse sweeps
fwd_sweep_start_potential = -1.3
rev_sweep_start_potential = 0.7

#Find the start indices
fwd_cycle_starts = find_cycle_starts(data, fwd_sweep_start_potential)
rev_cycle_starts = find_cycle_starts(data, rev_sweep_start_potential)

#Get the cycles into a list
fwd_cycles_list, rev_cycles_list = split_on_cycles(data, fwd_cycle_starts, rev_cycle_starts)

for i, cycle in enumerate(fwd_cycles_list):
    area = calculate_fwd_cycle_peak_area(cycle)
    print('Peak area of cycle', i, 'is:', area)

#For debugging: view the data splitting results
if False:
    plt.plot(data[:, 0])
    [plt.gca().axvline(x, linewidth=1, linestyle=':', color='tab:red') for x in fwd_cycle_starts]
    [plt.gca().axvline(x, linewidth=1, linestyle=':', color='black') for x in rev_cycle_starts]
    plt.gcf().set_size_inches(8, 2)
    plt.gca().set(
        xlabel='index', ylabel='V',
        title=f'{len(fwd_cycles_list)} fwd cycles/{len(rev_cycles_list)} rev cycles'
    )
    plt.show()

    for cycles_list in [fwd_cycles_list, rev_cycles_list]:
        #View split cycles
        for c in cycles_list:
            plt.plot(c[:, 0], c[:, 1])
        plt.gca().set(xlim=(0.25, 0.65), ylim=(-0.0075, 0.0075))
        plt.gca().set(xlabel='V', ylabel='I')
        plt.gcf().set_size_inches(8, 3)
        plt.show()

数据集和 2 组基线的曲线下面积

问题描述投票：0回答：1

1个回答

最新问题

数据集和 2 组基线的曲线下面积

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1