Cython优化代码

Question

我正在努力用Cython提高我的python粒子跟踪代码的性能。

这是我的纯Python代码：

from scipy.integrate import odeint
import numpy as np
from numpy import sqrt, pi, sin, cos
from time import time as Time
import multiprocessing as mp
from functools import partial

cLight = 299792458.
Dim = 6

class Integrator:
    def __init__(self, ring):
        self.ring = ring

    def equations(self, X, s):
        dXds = np.zeros(Dim)

        E, B = self.ring.getEMField( [X[0], X[2], s], X[4] )

        h = 1 + X[0]/self.ring.ringRadius
        p_s = np.sqrt(X[5]**2 - self.ring.particle.mass**2 - X[1]**2 - X[3]**2)
        dtds = h*X[5]/p_s
        gamma = X[5]/self.ring.particle.mass
        beta = np.array( [X[1], X[3], p_s] ) / X[5]

        dXds[0] = dtds*beta[0]
        dXds[2] = dtds*beta[1]
        dXds[1] = p_s/self.ring.ringRadius + self.ring.particle.charge*(dtds*E[0] + dXds[2]*B[2] - h*B[1])
        dXds[3] = self.ring.particle.charge*(dtds*E[1] + h*B[0] - dXds[0]*B[2])
        dXds[4] = dtds
        dXds[5] = self.ring.particle.charge*(dXds[0]*E[0] + dXds[2]*E[1] + h*E[2])
        return dXds

    def odeSolve(self, X0, sRange):
        sol = odeint(self.equations, X0, sRange)
        return sol

class Ring:
    def __init__(self, particle):
        self.particle = particle
        self.ringRadius = 7.112
        self.magicB0 = self.particle.magicMomentum/self.ringRadius

    def getEMField(self, pos, time):
        x, y, s = pos
        theta = (s/self.ringRadius*180/pi) % 360
        r = sqrt(x**2 + y**2)
        arg = 0 if r == 0 else np.angle( complex(x/r, y/r) )
        rn = r/0.045

        k2 = 37*24e3
        k10 = -4*24e3

        E = np.zeros(3)
        B = np.array( [ 0, self.magicB0, 0 ] )

        for i in range(4):
            if ((21.9+90*i < theta < 34.9+90*i or 38.9+90*i < theta < 64.9+90*i) and (-0.05 < x < 0.05 and -0.05 < y < 0.05)):
                E = np.array( [ k2*x/0.045 + k10*rn**9*cos(9*arg), -k2*y/0.045 -k10*rn**9*sin(9*arg), 0] )
                break
        return E, B

class Particle:
    def __init__(self):
        self.mass = 105.65837e6
        self.charge = 1.
        self.gm2 = 0.001165921 

        self.magicMomentum = self.mass/sqrt(self.gm2)
        self.magicEnergy = sqrt(self.magicMomentum**2 + self.mass**2)
        self.magicGamma = self.magicEnergy/self.mass
        self.magicBeta = self.magicMomentum/(self.magicGamma*self.mass)


def runSimulation(nParticles, tEnd):
    particle = Particle()
    ring = Ring(particle)
    integrator = Integrator(ring)

    Xs = np.array( [ np.array( [45e-3*(np.random.rand()-0.5)*2, 0, 0, 0, 0, particle.magicEnergy] ) for i in range(nParticles) ] )
    sRange = np.arange(0, tEnd, 1e-9)*particle.magicBeta*cLight 

    ode = partial(integrator.odeSolve, sRange=sRange)

    t1 = Time()

    pool = mp.Pool()
    sol = np.array(pool.map(ode, Xs))

    t2 = Time()
    print ("%.3f sec" %(t2-t1))

    return t2-t1

显然，最耗时的过程是集成ODE，在类Integrator中定义为odeSolve（）和equation（）。此外，类Ring中的getEMField（）方法与求解过程中的方程（）方法一样被调用。我尝试使用Cython获得大量的加速（至少10x~20x），但是我只通过以下Cython脚本获得了大约1.5倍的加速：

import cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt, pi, sin, cos

from scipy.integrate import odeint
from time import time as Time
import multiprocessing as mp
from functools import partial

cdef double cLight = 299792458.
cdef int Dim = 6

@cython.boundscheck(False)
cdef class Integrator:
    cdef Ring ring

    def __init__(self, ring):
        self.ring = ring

    cpdef np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] equations(self,
                  np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] X,
                  double s):
        cdef np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] dXds = np.zeros(Dim)
        cdef double h, p_s, dtds, gamma
        cdef np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] beta, E, B

        E, B = self.ring.getEMField( [X[0], X[2], s], X[4] )

        h = 1 + X[0]/self.ring.ringRadius
        p_s = np.sqrt(X[5]*X[5] - self.ring.particle.mass*self.ring.particle.mass - X[1]*X[1] - X[3]*X[3])
        dtds = h*X[5]/p_s
        gamma = X[5]/self.ring.particle.mass
        beta = np.array( [X[1], X[3], p_s] ) / X[5]

        dXds[0] = dtds*beta[0]
        dXds[2] = dtds*beta[1]
        dXds[1] = p_s/self.ring.ringRadius + self.ring.particle.charge*(dtds*E[0] + dXds[2]*B[2] - h*B[1])
        dXds[3] = self.ring.particle.charge*(dtds*E[1] + h*B[0] - dXds[0]*B[2])
        dXds[4] = dtds
        dXds[5] = self.ring.particle.charge*(dXds[0]*E[0] + dXds[2]*E[1] + h*E[2])
        return dXds

    cpdef np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] odeSolve(self,
                 np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] X0,
                 np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] sRange):
        sol = odeint(self.equations, X0, sRange)
        return sol

@cython.boundscheck(False)
cdef class Ring:
    cdef Particle particle
    cdef double ringRadius
    cdef double magicB0

    def __init__(self, particle):
        self.particle = particle
        self.ringRadius = 7.112
        self.magicB0 = self.particle.magicMomentum/self.ringRadius

    cpdef tuple getEMField(self,
                   list pos,
                   double time):
        cdef double x, y, s
        cdef double theta, r, rn, arg, k2, k10
        cdef np.ndarray[np.double_t, ndim=1, negative_indices=False, mode="c"] E, B

        x, y, s = pos
        theta = (s/self.ringRadius*180/pi) % 360
        r = sqrt(x*x + y*y)
        arg = 0 if r == 0 else np.angle( complex(x/r, y/r) )
        rn = r/0.045

        k2 = 37*24e3
        k10 = -4*24e3

        E = np.zeros(3)
        B = np.array( [ 0, self.magicB0, 0 ] )

        for i in range(4):
            if ((21.9+90*i < theta < 34.9+90*i or 38.9+90*i < theta < 64.9+90*i) and (-0.05 < x < 0.05 and -0.05 < y < 0.05)):
                E = np.array( [ k2*x/0.045 + k10*rn**9*cos(9*arg), -k2*y/0.045 -k10*rn**9*sin(9*arg), 0] )
                #E = np.array( [ k2*x/0.045, -k2*y/0.045, 0] )
                break
        return E, B

cdef class Particle:
    cdef double mass
    cdef double charge
    cdef double gm2

    cdef double magicMomentum
    cdef double magicEnergy
    cdef double magicGamma
    cdef double magicBeta

    def __init__(self):
        self.mass = 105.65837e6
        self.charge = 1.
        self.gm2 = 0.001165921 

        self.magicMomentum = self.mass/sqrt(self.gm2)
        self.magicEnergy = sqrt(self.magicMomentum**2 + self.mass**2)
        self.magicGamma = self.magicEnergy/self.mass
        self.magicBeta = self.magicMomentum/(self.magicGamma*self.mass)

def runSimulation(nParticles, tEnd):
    particle = Particle()
    ring = Ring(particle)
    integrator = Integrator(ring)

    #nParticles = 5
    Xs = np.array( [ np.array( [45e-3*(np.random.rand()-0.5)*2, 0, 0, 0, 0, particle.magicEnergy] ) for i in range(nParticles) ] )
    sRange = np.arange(0, tEnd, 1e-9)*particle.magicBeta*cLight 

    ode = partial(integrator.odeSolve, sRange=sRange)

    t1 = Time()

    pool = mp.Pool()
    sol = np.array(pool.map(ode, Xs))

    t2 = Time()
    print ("%.3f sec" %(t2-t1))

    return t2-t1

我该怎么做才能从Cython中获得最大的效果？（我试过Numba而不是Cython，实际上Numba的性能提升是巨大的（大约20倍加速）。但我很难用Numba和python类实例，我决定使用Cython而不是Numba）。

作为参考，以下是关于其编译的cython注释：

Answer 1

这是一个非常不完整的答案，因为我没有描述或定时任何事情，甚至检查它给出了相同的答案。但是，这里有一些减少Cython生成的Python代码量的建议：

添加@cython.cdivision(True)编译指令。这意味着不会在浮动分区上引发ZeroDivisionError，而是获得NaN值。（只有在您不希望引发错误时才执行此操作）。
将p_s = np.sqrt(...)改为p_s = sqrt(...)。这将删除仅对单个值进行操作的numpy调用。你好像在其他地方这样做过，所以我不知道你为什么错过了这一行。
在可能的情况下使用固定大小的C数组而不是numpy数组： cdef double beta[3] # ... beta[0] = X[1]/X[5] beta[1] = X[3]/X[5] beta[2] = p_s/X[5] 当在编译时知道大小（并且相当小）并且当您不想返回它时，可以执行此操作。这避免了对np.zeros的调用以及一些后续的类型检查，以便为其分配类型化的numpy数组。我认为beta是唯一可以做到这一点的地方。
np.angle( complex(x/r, y/r) )可以被atan2(y/r, x/r)取代（使用来自atan2的libc.math。你也可以通过r失去分裂
cdef int i帮助你在for中使你的getEMField循环更快（Cython通常擅长自动获取循环变量的类型，但似乎在这里失败了）
我怀疑分配E逐个元素而不是整个数组更快： E[0] = k2*x/0.045 + k10*rn**9*cos(9*arg) E[1] = -k2*y/0.045 -k10*rn**9*sin(9*arg)
指定类型如list和tuple没有多大价值，它实际上可能会使代码稍慢（因为它会浪费时间检查类型）。
更大的变化是将E和B作为指针传递给GetEMField而不是使用np.zeros。这将允许您在equations（cdef double E[3]）中将它们分配为静态C数组。缺点是GetEMField必须是cdef所以不再可以从Python调用（但如果你愿意，你也可以制作一个Python可调用的包装函数）。

Cython优化代码

问题描述投票：2回答：1

1个回答

最新问题

Cython优化代码

问题描述 投票：2回答：1

1个回答

最新问题

问题描述投票：2回答：1