尝试将尽可能多的洛伦兹峰拟合到使用
glob
模块编译的光谱集合中。我无法让 lmfit
工作,所以我正在使用 scipy。我从另一篇文章中获取了洛伦兹函数和相关函数,但是当它们被绘制出来时,它们看起来不正确。我希望能够编译这些洛伦兹模型的集合,以获得全角半最大值数据以及对应于最高峰的波数。
这是到目前为止的代码:
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 16 11:38:14 2024
@author: tppoirier96
"""
# importing packages
import pandas as pd
import glob
import numpy as np
from scipy.optimize import leastsq
import matplotlib.pyplot as plt
folder_path = 'insert/path/here'
file_list = glob.glob(folder_path + "/*.txt")
main_dataframe = pd.DataFrame(pd.read_table(file_list[0]))
for i in range(0,len(file_list)-1):
data = pd.read_table(file_list[i], sep="\t")
df = pd.DataFrame(data)
main_dataframe = pd.concat([main_dataframe, df], axis = 1)
DataDF = pd.concat([main_dataframe.iloc[67:,0],main_dataframe.iloc[67:,1::2]], axis=1)
SumDataDF = DataDF.describe()
DataNorm = pd.DataFrame()
DataLorentz = pd.DataFrame()
MinArray = np.zeros(len(DataDF.columns)-1)
MaxArray = np.zeros(len(DataDF.columns)-1)
for i in range(1,len(MinArray)+1):
DataNorm = pd.concat([DataNorm,(DataDF.iloc[:,i]-DataDF.iloc[:,i].min()/(DataDF.iloc[:,i].max()-DataDF.iloc[:,i].min()))], axis=1)
DataNorm2 = DataNorm.apply(lambda iterator: ((iterator.max() - iterator)/(iterator.max() - iterator.min())))
DataNorm2 = pd.concat([DataDF.iloc[:,0], DataNorm2], axis=1)
def norm_lorentzian(x, x0, HWHM, gamma):
return 1/((1+(x-x0)/HWHM)**2)
NumPeaks = int(input('How many peaks to fit?\n'))
def multi_lorentz(x,params):
off = params[0]
paramsRest = params[1:]
assert not (len(paramsRest )%3)
return off + sum([norm_lorentzian( x, *paramsRest[ i : i+3 ] ) for i in range( 0, len( paramsRest ), 3 ) ] )
def res_multi_lorentz( params, xData, yData ):
diff = [ multi_lorentz( x, params ) - y for x, y in zip( xData, yData ) ]
return diff
xData = DataNorm2.iloc[:,0].array
for i in range(1,len(DataNorm.columns)-1):
yData = DataNorm2.iloc[:,i].array
counter = 0
generalWidth = 10
yDataLoc = yData
startValues = [max(DataNorm2.iloc[:,i])]
while max( yDataLoc ) - min( yDataLoc ) > .1:
counter += 1
print(str(counter)+ " while")
if counter > 20:
print("Reached max iterations!")
break
minP = np.argmin( yDataLoc )
minY = yData[ minP ]
x0 = xData[ minP ]
startValues += [ x0, minY - max( yDataLoc ), generalWidth ]
popt, ier = leastsq( res_multi_lorentz, startValues, args=( xData, yData))
yDataLoc = [y - multi_lorentz( x, popt ) for x,y in zip( xData, yData)]
print(str(i)+" for")
print(popt)
testData = [multi_lorentz(x,popt) for x in xData]
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xData, yData, label="Data" )
ax.plot( xData, testData, label="Lorentz")
plt.xlabel("Wavenumber")
plt.ylabel("Intensity relative to E2G")
plt.legend(loc="lower left")
plt.show()
# creating a new csv file with the dataframe we created
# cwd = os.getcwd()
# print(cwd)
#main_dataframe.to_excel('Mapped Data.xlsx')
我的身体很奇怪:
我想保持 glob 文件的顺序,因为它们对应于样本的特定位置。
总而言之,我想要拟合可变数量的峰值,以便我可以对光谱文件夹的相应波数和半最大值全宽进行矩阵可视化。我还想避免最初的猜测,以使该代码对于所有材料和光谱类型都灵活。
我有 25 个光谱文件,位于 5x5 网格中。我打算尽快收集更多。
需要考虑的挑战很少:
然后,对每个文件进行自动化。
以下类实现该方法:
import inspect
import itertools
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pybaselines import Baseline
from scipy import optimize, stats, signal
class SpectroscopySolver:
@staticmethod
def peak_model(x, loc, scale, height):
"""Lorentzian peak"""
law = stats.cauchy(loc=loc, scale=scale)
return height * law.pdf(x) / law.pdf(loc)
@classmethod
def m(cls):
"""Number of model parameters"""
return len(inspect.signature(cls.peak_model).parameters) - 1
@classmethod
def model(cls, x, *p):
"""Variable peak model"""
m = cls.m()
n = len(p) // m
y = np.zeros_like(x)
for i in range(n):
y += cls.peak_model(x, *p[i * m:(i + 1) * m])
return y
@classmethod
def loss_factory(cls, xdata, ydata):
"""Least Square Factory"""
def wrapped(p):
"""Least Square Loss function"""
return 0.5 * np.sum(np.power(ydata - cls.model(xdata, *p), 2))
return wrapped
@classmethod
def fit(cls, xdata, ydata, prominence=400.):
"""Fitting methodology"""
# Baseline management:
baseline_fitter = Baseline(x_data=xdata)
baseline, params = baseline_fitter.asls(ydata)
yb = ydata - baseline
# Peak identifications:
peaks, bases = signal.find_peaks(yb, prominence=prominence)
lefts, rights = bases["left_bases"], bases["right_bases"]
# Initial guess tuning:
x0 = list(itertools.chain(*[[xdata[i], 0.1, p] for i, p in zip(peaks, bases["prominences"])]))
bounds = list(itertools.chain(*[
[(xdata[lefts[i]], xdata[rights[i]])] + [(0., np.inf)] * (cls.m() - 1) for i in range(len(peaks))
]))
# Least Square Loss Minimization:
loss = cls.loss_factory(xdata, yb)
solution = optimize.minimize(loss, x0=x0, bounds=bounds)
# Estimate:
yhat = cls.model(xdata, *solution.x)
ybhat = yhat + baseline
# Plot:
fig, axe = plt.subplots()
axe.scatter(xdata, ydata, marker=".", color="gray", label="Data")
axe.scatter(xdata[peaks], ydata[peaks], label="Peaks")
axe.plot(xdata, baseline, "--", label="Baseline")
axe.plot(xdata, ybhat, label="Fit")
axe.set_title("Spectrogram")
axe.set_xlabel(r"Wavenumber, $\nu$")
axe.set_ylabel(r"Raw Signal, $g(\nu)$")
axe.legend()
axe.grid()
return axe
然后就足以对每个文件进行自动化:
files = list(pathlib.Path("raman/").glob("*.txt"))
solver = SpectroscopySolver()
for file in files:
data = pd.read_csv(file, sep="\t", header=None, skiprows=100, names=["x", "y"])
solver.fit(data.x, data.y)
对于您的数据集来说,其表现相对较好。
您可以通过以下方式控制方法: