如何在Python中拟合双高斯分布？

Question

我正在尝试使用Python获得数据的双高斯分布（link）。原始数据的形式为：

对于给定的数据，我想获得图中所示峰值的两个高斯分布。我用以下代码尝试了它（source）：

from sklearn import mixture
import matplotlib.pyplot
import matplotlib.mlab
import numpy as np
from pylab import *
data = np.genfromtxt('gaussian_fit.dat', skiprows = 1)
x = data[:, 0]
y = data[:, 1]
clf = mixture.GMM(n_components=2, covariance_type='full')
clf.fit((y, x))
m1, m2 = clf.means_
w1, w2 = clf.weights_
c1, c2 = clf.covars_
fig = plt.figure(figsize = (5, 5))
plt.subplot(111)
plotgauss1 = lambda x: plot(x,w1*matplotlib.mlab.normpdf(x,m1,np.sqrt(c1))[0], linewidth=3)
plotgauss2 = lambda x: plot(x,w2*matplotlib.mlab.normpdf(x,m2,np.sqrt(c2))[0], linewidth=3)
fig.savefig('gaussian_fit.pdf')

但是我无法获得所需的输出。那么，如何在Python中得到双高斯分布呢？

更新

我能够使用以下代码拟合单个高斯分布：

import pylab as plb
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy import asarray as ar,exp
import numpy as np

data = np.genfromtxt('gaussian_fit.dat', skiprows = 1)
x = data[:, 0]
y = data[:, 1]
n = len(x)
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n


def gaus(x,a,x0,sigma):
    return a*exp(-(x-x0)**2/(2*sigma**2))


popt,pcov = curve_fit(gaus, x, y ,p0 = [1, mean, sigma])


fig = plt.figure(figsize = (5, 5))
plt.subplot(111)
plt.plot(x, y, label='Raw')
plt.plot(x, gaus(x, *popt), 'o', markersize = 4, label='Gaussian fit')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
fig.savefig('gaussian_fit.pdf')

Answer 1

您不能为此使用 scikit-learn，因为您不是在处理一组要估计其分布的样本。您当然可以将曲线转换为 PDF，对其进行采样，然后尝试使用高斯混合模型来拟合它，但这对我来说似乎有点过大了。

这是使用简单的最小二乘曲线拟合的解决方案。为了让它工作，我必须删除背景，即忽略带有

y < 5

的所有数据点，并为

leastsq

提供一个良好的起始向量，可以通过数据图进行估计。

寻找起始向量

最小二乘法求得的参数向量就是向量

params = [c1, mu1, sigma1, c2, mu2, sigma2]

这里，

c1

和

c2

是两个高斯的缩放因子，即它们的高度，

mu1

和

mu2

是平均值，即峰值的水平位置，

sigma1

和

sigma2

是标准决定高斯宽度的偏差。为了找到起始向量，我只需查看数据图并估计两个峰的高度（分别为 =

c1

、

c2

）及其水平位置（分别为 =

mu1

、

mu1

）。

sigma1

和

sigma2

只需设置为

1.0

。

代码

import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import leastsq


def double_gaussian(x, params):
    (c1, mu1, sigma1, c2, mu2, sigma2) = params
    res =   c1 * np.exp( - (x - mu1)**2.0 / (2.0 * sigma1**2.0) ) \
          + c2 * np.exp( - (x - mu2)**2.0 / (2.0 * sigma2**2.0) )
    return res

def double_gaussian_fit(params, x, y):
    fit = double_gaussian(x, params)
    return (fit - y)


data = np.genfromtxt('gaussian_fit.dat', skip_header = 1)
x = data[:, 0]
y = data[:, 1]

# Remove background.
y_proc = np.copy(y)
y_proc[y_proc < 5] = 0.0

# Least squares fit. Starting values found by inspection.
fit = leastsq(double_gaussian_fit, [13.0,-13.0,1.0,60.0,3.0,1.0], args=(x, y_proc)
plt.plot(x, y, c='b' )
plt.plot(x, double_gaussian(x, fit[0]), c='r' )

Answer 2

尝试使用以下代码进行多重高斯拟合：

# -*- coding: utf-8 -*-
"""
Created on Sat Jan 28 18:57:13 2023[enter image description here][1]

@author: Sagar Dam
"""
"""
This is the fitting function for Multi Gaussian data. The inputs are two 
same length array of datatype float.
There are 3 outputs:
1. The array after fitting the data. That can be plotted.
2. The used parameters set as the name parameters.
3. The string to describe the parameters set and the fit function.
"""

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit as fit
from decimal import Decimal
import pandas as pd 

import matplotlib
matplotlib.rcParams['figure.dpi']=300 # highres display

def Gauss1(x,b,x0):
    y=np.exp(-(x-x0)**2/(2*b**2))
    return y

def Gaussfit(w,I):
    xdata=w         #Taking the x axis data
    ydata=I         #Taking the y axis data
    
    ''' 
        here the code fits only the normalized Gaussian
        So, we first normalize the array and later multiply with the 
        amplitude factor to get the main array
    '''
    y_maxval=max(ydata)      #taking the maximum value of the y array
    ymax_index=(list(ydata)).index(y_maxval)   
    
    xmax_val=xdata[ymax_index]  #Shifting the array as a non-shifted Gausian 
    xdata=xdata-xmax_val        #Shifting the array as a non-shifted Gausian
    
    ydata=ydata/y_maxval
    
    parameters, covariance = fit(Gauss1, xdata, ydata,maxfev=100000)
    fit_y = Gauss1(xdata, *parameters)
    
    
    xdata=xdata+xmax_val
    #parameters[1]+=xmax_val
    
    fit_y=np.asarray(fit_y)
    fit_y=fit_y*y_maxval       # again multiplying the data to get the actual value
    
    string1=r"Fit: $f(x)=Ae^{-\frac{(x-x_0)^2}{2b^2}}$;"
    string2=rf"with A={Decimal(str(y_maxval)).quantize(Decimal('1.00'))}, b={Decimal(str(parameters[0])).quantize(Decimal('1.00'))}, $x_0$={Decimal(str(parameters[1])).quantize(Decimal('1.00'))}"
    string=string1+string2
    return fit_y,parameters,string


def Multi_Gaussfit(x,y):
    fit_y1,parameters1,string1=Gaussfit(x,y)

    y2=y-fit_y1
    fit_y2,parameters2,string2=Gaussfit(x,y2)
    
    y3=y-fit_y1-fit_y2
    fit_y3,parameters3,string3=Gaussfit(x,y3)
    
    y4=y-fit_y1-fit_y2-fit_y3
    fit_y4,parameters4,string4=Gaussfit(x,y4)
    
    y5=y-fit_y1-fit_y2-fit_y3-fit_y4
    fit_y5,parameters5,string5=Gaussfit(x,y5)
    
    fit_y=fit_y1+fit_y2+fit_y3+fit_y4+fit_y5
    
    parameters_data=[parameters1[0],parameters1[1],parameters2[0],parameters2[1],parameters3[0],parameters3[1],parameters4[0],parameters4[1],parameters5[0],parameters5[1]]
    parameters_name=[r"$\sigma_1$",r"$x_{01}$",r"$\sigma_2$",r"$x_{02}$",r"$\sigma_3$",r"$x_{03}$",r"$\sigma_4$",r"$x_{04}$",r"$\sigma_5$",r"$x_{05}$"]
    
    parameters=pd.Series(parameters_data,index=parameters_name)
    
    return(fit_y,parameters)    

x=np.linspace(-30,30,601)      #data along x axis
y=50*np.exp(-(x+20)**2/5)+30*np.exp(-(x+10)**2/7)+10*np.exp(-(x-20)**2/50)+40*np.exp(-(x-10)**2/0.5)+30*np.exp(-(x-0)**2/5)            #data along y axis
random_noise=np.random.uniform(low=-2,high=2,size=(len(y)))
y=y+random_noise

fit_y,parameters=Multi_Gaussfit(x, y)
print(parameters)



plt.figure()
plt.plot(x,y,color='k')
#plt.title(string)
plt.plot(x,fit_y,'b-')
plt.figtext(0.9,0.9,str(parameters))
#plt.plot(x,y2,'r-')
plt.show()

#print(*parameters)
#print('FWHM= ', 2.355*parameters[0])

如何在Python中拟合双高斯分布？

问题描述投票：0回答：2

2个回答

寻找起始向量

代码

最新问题

如何在Python中拟合双高斯分布？

问题描述 投票：0回答：2

2个回答

寻找起始向量

代码

最新问题

问题描述投票：0回答：2