我有这段代码,旨在适应这里的数据DriveFilelink
函数read_file用于以结构化方式提取信息。然而,我在高斯拟合方面遇到了挑战,从高斯拟合图像中观察到的差异可以明显看出。这个问题似乎源于对某些参数施加的约束,例如固定所有高斯的平均值和四个振幅中的三个。尽管存在这些限制,但它们是必要的,因为它们基于可用信息。
有谁知道我该如何解决这个问题?为了更好的贴合。
代码如下:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy.optimize import curve_fit
from google.colab import drive
import os
# Mount Google Drive
drive.mount('/content/drive')
# Function to read numeric data from a file
def read_numeric_data(file_path):
with open(file_path, 'r', encoding='latin1') as f:
data = []
live_time = None
real_time = None
for line in f:
if 'LIVE_TIME' in line:
live_time = line.strip()
elif 'REAL_TIME' in line:
real_time = line.strip()
elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
row = [float(x) for x in line.split()]
data.extend(row)
return np.array(data), live_time, real_time
file_path = '/content/drive/MyDrive/ProjetoXRF_Manuel/April2024/20240402_In.mca'
data, _, _ = read_numeric_data(file_path)
a = -0.0188026396003431
b = 0.039549044037714
Data = data
# Función para convolucionar múltiples gaussianas
def multi_gaussian(x, *params):
eps = 1e-10
y = np.zeros_like(x)
amplitude_relations = [1,0.5538, 0.1673 , 0.1673*0.5185 ]
meanvalues= [24.210, 24.002 , 27.276 , 27.238]
amplitude = params[0]
for i in range(4):
sigma = params[i * 3 + 2] # Get the fitted standard deviation for this Gaussian
y += amplitude*amplitude_relations[i]* np.exp(-(x - meanvalues[i])**2 / (2 * sigma**2 + eps))
return y
sigma=[]
area=[]
# Función para graficar el espectro de energía convolucionado
def plot_convolved_spectrum(Data, a, b,i, ax=None):
maxim = np.max(Data)
workingarray = np.squeeze(Data)
# Definir puntos máximos
peaks, _ = find_peaks(workingarray / maxim, height=0.1)
peak_values = workingarray[peaks] / maxim
peak_indices = peaks
# Calcular valores de energía correspondientes a los picos
energy_spectrum = a + b * peak_indices
# Preparar datos para convolución
data = workingarray[:885] / maxim
data_y = data / data.max()
data_x = a + b * np.linspace(0, 885, num=len(data_y))
# Ajustar múltiples gaussianas al espectro de energía
peak_indices2, _ = find_peaks(data_y, height=0.1)
peak_amplitudes = [1,0.5538, 0.1673 , 0.1673*0.5185 ]
peak_means = [24.210, 24.002 , 27.276 , 27.238]
peak_sigmas = [0.1] * 4
params_init = list(zip(peak_amplitudes, peak_means, peak_sigmas))
params_init = np.concatenate(params_init)
# Ajustar curva
params, params_cov = curve_fit(multi_gaussian, data_x, data_y, p0=params_init)
# Obtener una interpolación de alta resolución del ajuste
x_fine = np.linspace(data_x.min(), data_x.max(), num=20000)
y_fit = multi_gaussian(x_fine, *params)
# Data Graphic
ax.scatter(data_x, data_y, color='black', marker='o', s=20, label = 'Data' )
y_data_error =np.sqrt(workingarray[:885])
ax.plot(data_x, data_y + y_data_error/maxim, color='black',linestyle='--')
ax.plot(data_x, data_y - y_data_error/maxim, color='black',linestyle='--')
# Fit Graphic
ax.plot(x_fine, y_fit, label="Fit", linewidth=1.5)
# Extraer desviaciones estándar y amplitudes
sigmas_array = params[2::3]
# Calcular sigma promedio
sigma.append(np.mean(sigmas_array))
# Configuración del gráfico
ax.set_xlabel('Energy (KeV)')
ax.set_ylabel('Normalized Data')
ax.legend()
ax.set_title('Convolved Energy Spectrum')
# Imprimir información
print("Standard deviations:", sigmas_array)
fig, ax = plt.subplots()
plot_convolved_spectrum(Data, a, b,1,ax=ax)
ax.set_xlim(22, 28)
plt.show()
注意 我尝试过设置界限并改进初始猜测以改进高斯拟合。但是,需要注意的是,使用边界并不等同于固定参数。我的目标是保持只有两个参数的自由度:第一个高斯的幅度和所有高斯的标准差。在探索这些调整的同时,我努力在约束和灵活性之间取得平衡,以达到所需的拟合精度。
这是一个 MCVE,展示了如何从信号的重要部分回归可变数量的峰值。
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal, stats, optimize
我们加载您的数据并对其进行后处理以获得物理序列:
data = pd.read_csv("20240402_In.mca", encoding="latin-1", names=["y"]).loc[12:2058]
data["y"] = pd.to_numeric(data["y"])
data["y"] /= data["y"].max()
data["x"] = -0.0188026396003431 + 0.039549044037714 * data.index
data = data.loc[(20. <= data["x"]) & (data["x"] <= 30.), :]
我们创建接受可变数量峰值的模型(由
find_peaks
解决方案驱动):
def peak(x, A, s, x0):
law = stats.norm(loc=x0, scale=s)
return A * law.pdf(x) / law.pdf(x0)
def model(x, *parameters):
n = len(parameters) // 3
y = np.zeros_like(x)
for i in range(n):
y += peak(x, *parameters[(i * 3):((i + 1) * 3)])
return y
我们识别峰值:
indices, bases = signal.find_peaks(data.y, prominence=0.0125)
#(array([ 67, 118, 195, 210]),
# {'prominences': array([0.01987281, 0.99920509, 0.18918919, 0.03338633]),
# 'left_bases': array([ 1, 1, 179, 205]),
# 'right_bases': array([ 76, 160, 219, 219])})
这是关键点,从识别中,我们做出有根据的猜测:
x0s = data.x.values[indices]
As = bases["prominences"]
ss = (data.x.values[bases["right_bases"]] - data.x.values[bases["left_bases"]]) / 8.
p0 = list(itertools.chain(*zip(As, ss, x0s)))
#[0.01987281399046105,
# 0.37077228785356864,
# 22.682348638047493,
# 0.9992050874403816,
# 0.7860372502495658,
# 24.699349883970907,
# 0.1891891891891892,
# 0.19774522018856988,
# 27.744626274874886,
# 0.033386327503974564,
# 0.06921082706599968,
# 28.337861935440593]
现在我们对您的数据集进行模型回归:
popt, pcov = optimize.curve_fit(model, data.x, data.y, p0=p0)
#array([1.03735804e-02, 9.61270732e-01, 2.29030214e+01, 9.92651381e-01,
# 1.59694755e-01, 2.46561911e+01, 1.85332645e-01, 1.21882422e-01,
# 2.77807838e+01, 3.67805911e-02, 1.21890416e-01, 2.83583849e+01])
我们现在可以估计模型:
yhat = model(data.x, *popt)
从图形上看,它导致:
还是比较满意的。